mung language changes

--HG--
branch : mung
This commit is contained in:
cecilkorik 2010-11-27 22:46:34 +00:00
parent b11356b0c3
commit fe0ea2a7f9
4 changed files with 1125 additions and 264 deletions

View file

@ -9,16 +9,35 @@ endwhile
bytecode: bytecode:
startblock 1 startblock 1
stack_literal_int 1 stack_literal_int 1 1
exit_true 1 exit_true 1 0
stack_literal_str "message" stack_literal_str "message" 1
set_var arg set_var arg 1
discard_stack 1 discard_stack 1 0
get_var arg get_var arg 1
stack_literal_obj #7407 stack_literal_obj #7407 2
call_builtin send call_builtin send 1
discard_stack 1 discard_stack 1 0
stack_literal_float 1.0 stack_literal_float 1.0 1
call_builtin suspend call_builtin suspend 1
discard_stack 1 discard_stack 1 0
endblock 1 endblock 1
other stuff:
var = 1 == 0 + 1
list = [1, 2, @[3, 4], 5]
literal 1
literal 2
literal 3
literal 4
literal 2
makelist
flatten
literal 5
literal 4 <-- the length
makelist 4

File diff suppressed because it is too large Load diff

View file

@ -1,54 +0,0 @@
class VMType(object):
pass
class VMInteger(VMType):
def __init__(self, value):
self.value = int(value)
class VMFloat(VMType):
def __init__(self, value):
self.value = float(value)
class VMTable(VMType):
def __init__(self, value):
self.value = dict(value)
class VMString(VMType):
def __init__(self, value):
if isinstance(value, unicode):
self.value = value
else:
self.value = unicode(str(value), 'ascii', 'ignore')
class VMObjRef(VMType):
def __init__(self, value):
if isinstance(value, ObjRef):
self.value = value
elif isinstance(value, (float, int)):
self.value = ObjRef(int(value))
else:
raise TypeError, "Attempted to create VMObjRef with invalid object reference: %r" % (value,)
def coerce(value):
if isinstance(value, int):
return VMInteger(value)
elif isinstance(value, (tuple, list)):
return VMList(list(value))
elif isinstance(value, unicode):
return VMString(value)
elif isinstance(value, dict):
return VMTable(value)
elif isinstance(value, ObjRef):
return VMObjRef(value)
elif isinstance(value, float):
return VMFloat(value)
elif value == None:
return VMInteger(0)
else:
raise TypeError("Unknown type %s cannot be coerced to VMType" % (type(value),))
def uncoerce(value):
assert isinstance(value, VMType)
return value.value

281
parse.py
View file

@ -1,103 +1,192 @@
from pyparsing import * from pyparsing import *
from language import *
def enum_parse():
fd = open('test.enum', 'r')
# syntax we don't want to see in the final parse tree
_lcurl = Suppress('{')
_rcurl = Suppress('}')
_equal = Suppress('=')
_comma = Suppress(',')
_semi = Suppress(';')
_enum = Suppress('enum')
identifier = Word(alphas,alphanums+'_')
integer = Word(nums)
enumValue = Group(identifier('name') + Optional(_equal + integer('value')))
enumList = Group(enumValue + ZeroOrMore(_comma + enumValue))
enum = _enum + identifier('enum') + _lcurl + enumList('list') + _rcurl + Optional(_semi)
enumlist = ZeroOrMore(enum)
#print enumlist.parseString(fd.read(), parseAll=True)
# find instances of enums ignoring other syntax
for item in enumlist.parseString(fd.read(), parseAll=True):
id = 0
for entry in item:
if entry.value != '':
id = int(entry.value)
print '%s_%s = %d' % (item.enum.upper(),entry.name.upper(),id)
id += 1
fd.close()
def parse_fourfn():
global bnf
if not bnf:
point = Literal( "." )
e = CaselessLiteral( "E" )
fnumber = Combine( Word( "+-"+nums, nums ) +
Optional( point + Optional( Word( nums ) ) ) +
Optional( e + Word( "+-"+nums, nums ) ) )
ident = Word(alphas, alphas+nums+"_$")
plus = Literal( "+" )
minus = Literal( "-" )
mult = Literal( "*" )
div = Literal( "/" )
lpar = Literal( "(" ).suppress()
rpar = Literal( ")" ).suppress()
addop = plus | minus
multop = mult | div
expop = Literal( "^" )
pi = CaselessLiteral( "PI" )
expr = Forward()
atom = (Optional("-") + ( pi | e | fnumber | ident + lpar + expr + rpar ).setParseAction( pushFirst ) | ( lpar + expr.suppress() + rpar )).setParseAction(pushUMinus)
# by defining exponentiation as "atom [ ^ factor ]..." instead of "atom [ ^ atom ]...", we get right-to-left exponents, instead of left-to-righ
# that is, 2^3^2 = 2^(3^2), not (2^3)^2.
factor = Forward()
factor << atom + ZeroOrMore( ( expop + factor ).setParseAction( pushFirst ) )
term = factor + ZeroOrMore( ( multop + factor ).setParseAction( pushFirst ) )
expr << term + ZeroOrMore( ( addop + term ).setParseAction( pushFirst ) )
bnf = expr
return bnf
def moo_parse(): class Parser(object):
fd = open('test.moo', 'r') def __init__(self):
data = fd.read() self.parser = None
fd.close() self.init_parser()
point = Literal( "." ) def nest(self, tokens):
return [list(tokens)]
integer = Word( "+-"+nums, nums ) def init_parser(self):
fnumber = Combine( integer + """ phase 1:
Optional( point + Optional( Word( nums ) ) ) + most important part is to build the meta-parser for "expr". expr represents any atomic action that returns a value, and the bulk of
Optional( CaselessLiteral('e') + Word( "+-"+nums, nums ) ) ) the code in any program will consist primarily of exprs and flow control. expr is heavily recursive, because most types of expr can
ident = Word(alphas, alphas+nums+"_") take another expr as an input value.
"""
plus = Literal( "+" ) point = Literal( "." )
minus = Literal( "-" ) plus = Literal( "+" )
mult = Literal( "*" ) minus = Literal( "-" )
div = Literal( "/" ) mult = Literal( "*" )
lpar = Literal( "(" ).suppress() div = Literal( "/" )
rpar = Literal( ")" ).suppress() lpar = Literal( "(" ).suppress()
addop = plus | minus rpar = Literal( ")" ).suppress()
multop = mult | div llbr = Literal( "[" ).suppress()
expop = Literal( "^" ) rlbr = Literal( "]" ).suppress()
addop = plus | minus
expr = Forward() multop = mult | div
atom = (Optional("-") + ( fnumber | ident + lpar + expr + rpar ).setParseAction( pushFirst ) | ( lpar + expr.suppress() + rpar )).setParseAction(pushUMinus) expop = Literal( "^" )
quote = Literal( '"' )
# by defining exponentiation as "atom [ ^ factor ]..." instead of "atom [ ^ atom ]...", we get right-to-left exponents, instead of left-to-righ excl = Literal( "!" )
# that is, 2^3^2 = 2^(3^2), not (2^3)^2. call = Literal( ":" )
factor = Forward() endl = Literal( ";" )
factor << atom + ZeroOrMore( ( expop + factor ).setParseAction( pushFirst ) ) lisep = Literal( "," ).suppress()
objn = Literal( "#" )
term = factor + ZeroOrMore( ( multop + factor ).setParseAction( pushFirst ) ) ref = Literal( "$" )
expr << term + ZeroOrMore( ( addop + term ).setParseAction( pushFirst ) ) assign = Literal( "=" )
flatten = Literal( "@" )
neg = excl.copy()
expr = Forward()
ident = Word(alphas+"_", alphas+nums+"_")
ident.setParseAction(VMIdent.parse)
variable = Word(alphas+"_", alphas+nums+"_")
variable.setParseAction(VMVariable.parse)
integer = Word( "+-"+nums, nums )
fnumber = Combine( integer +
Optional( point + Optional( Word( nums ) ) ) +
Optional( CaselessLiteral('e') + Word( "+-"+nums, nums ) ) )
objref = objn + Word( "+-"+nums, nums )
objref.setParseAction(VMObjRef.parse)
coreref = (ref + ident)
coreref.setParseAction(VMCoreRef.parse)
bexpr = (lpar + expr + rpar).setParseAction(self.nest)
objrefexpr = bexpr | coreref | variable | objref
identexpr = bexpr | ident
propref = (objrefexpr + point + ident).setParseAction(VMPropRef.parse) | coreref
fileref = (objrefexpr + excl + ident).setParseAction(VMFileRef.parse)
argspec = delimitedList(expr)
argspec.setParseAction(StackToList.parse)
funccall = objrefexpr + call + identexpr + lpar + argspec + rpar
fnumber.setParseAction(VMFloat.parse)
integer.setParseAction(VMInteger.parse)
funccall.setParseAction(CallFunc.parse)
stringlit = QuotedString(quoteChar='"', escChar='\\').setParseAction(VMString.parse)
atom = Forward()
bifunction = (ident + lpar + argspec + rpar).setParseAction(CallBuiltin.parse)
flatexpr = Optional(flatten) + expr
flatexpr.setParseAction(Flatten.parse)
listlit = llbr + Optional(flatexpr) + ZeroOrMore(lisep + flatexpr) + rlbr
literal = integer | fnumber | stringlit | listlit | objref
atom << (Optional(minus) + ZeroOrMore(neg) + (propref | literal | bifunction | bexpr | variable | funccall | fileref)).setParseAction(UnaryOp.parse)
# by defining exponentiation as "atom [ ^ factor ]..." instead of "atom [ ^ atom ]...", we get right-to-left exponents, instead of left-to-righ
# that is, 2^3^2 = 2^(3^2), not (2^3)^2.
factor = Forward()
factor << atom + ZeroOrMore( (expop + factor).setParseAction(ArithExp.parse) )
term = factor + ZeroOrMore( (multop + factor).setParseAction(ArithMul.parse) )
#term.setParseAction(self.nest)
mathexpr = term + ZeroOrMore( (addop + term).setParseAction(ArithAdd.parse) )
#mathexpr.setParseAction(self.nest)
opeq = Literal('==')
opneq = Literal('!=')
opgteq = Literal('<=')
oplteq = Literal('>=')
oplt = Literal('<')
opgt = Literal('>')
opin = Keyword('in')
opcmp = opeq | opneq | opgteq | oplteq | oplt | opgt | opin
eqexpr = mathexpr + Optional( (opcmp + mathexpr).setParseAction(BoolCompare.parse) )
opand = Literal('&&') | Keyword('and')
opor = Literal('||') | Keyword('or')
opxor = Literal('~~') | Keyword('xor')
opbool = opand | opor | opxor
boolexpr = eqexpr + ZeroOrMore( (opbool + eqexpr).setParseAction(BoolLogic.parse) )
assignable = variable | propref | fileref
assignexpr = Optional(assignable + assign) + boolexpr
expr << assignexpr.setParseAction(Assignment.parse)
""" phase 2:
now that expr is built, we can move on to handling flow control statements, and after that the structure of the program
is mostly defined
"""
ifstart = (Keyword("if") + bexpr)
ifelseif = (Keyword("elseif") + bexpr)
ifelse = Keyword("else")
ifend = Keyword("endif")
trystart = Keyword("try")
tryexcept = (Keyword("except") + variable)
tryelse = Keyword("else")
tryfinally = Keyword("finally")
tryend = Keyword("endtry")
whilestart = (Keyword("while") + bexpr)
whileend = Keyword("endwhile")
forstart = (Keyword("for") + variable + Keyword("in") + bexpr)
forend = Keyword("endfor")
kwdbreak = Keyword("break").setParseAction(LoopBreak)
kwdcontinue = Keyword("continue").setParseAction(LoopContinue)
kwdreturn = Keyword("return")
rtnexpr = (kwdreturn + expr).setParseAction(KeywordReturn.parse)
line = expr | rtnexpr
lline = expr | rtnexpr | kwdcontinue | kwdbreak
exprblock = ZeroOrMore(line + endl)
lexprblock = ZeroOrMore(lline + endl)
block = Forward()
lblock = Forward()
ifblock = ifstart + block + ZeroOrMore(ifelseif + block) + Optional(ifelse + block) + ifend
tryblock = trystart + block + Optional(tryexcept + block + Optional(tryelse + block)) + Optional(tryfinally + block) + tryend
iflblock = ifstart + lblock + ZeroOrMore(ifelseif + lblock) + Optional(ifelse + lblock) + ifend
trylblock = trystart + lblock + Optional(tryexcept + lblock + Optional(tryelse + lblock)) + Optional(tryfinally + block) + tryend
whileblock = whilestart + lblock + whileend
forblock = forstart + lblock + forend
ifblock.setParseAction(IfBlock.parse)
tryblock.setParseAction(TryBlock.parse)
iflblock.setParseAction(IfBlock.parse)
trylblock.setParseAction(TryBlock.parse)
whileblock.setParseAction(WhileBlock.parse)
forblock.setParseAction(ForeachBlock.parse)
# blocks are used for code blocks that are outside a loop. Inside a loop, all code blocks are lblocks
# which allow loop-control keywords like break and continue (except try-finally, it wouldn't make sense)
block << (exprblock + Optional(ifblock | tryblock | whileblock | forblock) + exprblock)
lblock << (lexprblock + Optional(iflblock | trylblock | whileblock | forblock) + lexprblock)
block.setParseAction(self.nest)
lblock.setParseAction(self.nest)
endl.setParseAction(DiscardStack.parse)
self.parser = block
#print argspec.parseString("hello(hi.xyz)", parseAll=True)
#print block.parseString(u"hi.xyz + #555.test;", parseAll=True)
def parse(self, data):
rv = self.parser.parseString(data, parseAll=True)
return optimizer.optimize(rv)
def test(self):
#print self.parse(u"if (1) #740.xyz + -hello.world; endif")
data = unicode(open("test.moo", "r").read(), 'utf-8')
print self.parse(data)
if __name__ == "__main__":
p = Parser()
p.test()