mung language changes

--HG--
branch : mung
This commit is contained in:
cecilkorik 2010-11-27 22:46:34 +00:00
parent b11356b0c3
commit fe0ea2a7f9
4 changed files with 1125 additions and 264 deletions

View file

@ -9,16 +9,35 @@ endwhile
bytecode:
startblock 1
stack_literal_int 1
exit_true 1
stack_literal_str "message"
set_var arg
discard_stack 1
get_var arg
stack_literal_obj #7407
call_builtin send
discard_stack 1
stack_literal_float 1.0
call_builtin suspend
discard_stack 1
endblock 1
stack_literal_int 1 1
exit_true 1 0
stack_literal_str "message" 1
set_var arg 1
discard_stack 1 0
get_var arg 1
stack_literal_obj #7407 2
call_builtin send 1
discard_stack 1 0
stack_literal_float 1.0 1
call_builtin suspend 1
discard_stack 1 0
endblock 1
other stuff:
var = 1 == 0 + 1
list = [1, 2, @[3, 4], 5]
literal 1
literal 2
literal 3
literal 4
literal 2
makelist
flatten
literal 5
literal 4 <-- the length
makelist 4

File diff suppressed because it is too large Load diff

View file

@ -1,54 +0,0 @@
class VMType(object):
pass
class VMInteger(VMType):
def __init__(self, value):
self.value = int(value)
class VMFloat(VMType):
def __init__(self, value):
self.value = float(value)
class VMTable(VMType):
def __init__(self, value):
self.value = dict(value)
class VMString(VMType):
def __init__(self, value):
if isinstance(value, unicode):
self.value = value
else:
self.value = unicode(str(value), 'ascii', 'ignore')
class VMObjRef(VMType):
def __init__(self, value):
if isinstance(value, ObjRef):
self.value = value
elif isinstance(value, (float, int)):
self.value = ObjRef(int(value))
else:
raise TypeError, "Attempted to create VMObjRef with invalid object reference: %r" % (value,)
def coerce(value):
if isinstance(value, int):
return VMInteger(value)
elif isinstance(value, (tuple, list)):
return VMList(list(value))
elif isinstance(value, unicode):
return VMString(value)
elif isinstance(value, dict):
return VMTable(value)
elif isinstance(value, ObjRef):
return VMObjRef(value)
elif isinstance(value, float):
return VMFloat(value)
elif value == None:
return VMInteger(0)
else:
raise TypeError("Unknown type %s cannot be coerced to VMType" % (type(value),))
def uncoerce(value):
assert isinstance(value, VMType)
return value.value

283
parse.py
View file

@ -1,103 +1,192 @@
from pyparsing import *
from language import *
def enum_parse():
fd = open('test.enum', 'r')
# syntax we don't want to see in the final parse tree
_lcurl = Suppress('{')
_rcurl = Suppress('}')
_equal = Suppress('=')
_comma = Suppress(',')
_semi = Suppress(';')
_enum = Suppress('enum')
identifier = Word(alphas,alphanums+'_')
integer = Word(nums)
enumValue = Group(identifier('name') + Optional(_equal + integer('value')))
enumList = Group(enumValue + ZeroOrMore(_comma + enumValue))
enum = _enum + identifier('enum') + _lcurl + enumList('list') + _rcurl + Optional(_semi)
enumlist = ZeroOrMore(enum)
#print enumlist.parseString(fd.read(), parseAll=True)
# find instances of enums ignoring other syntax
for item in enumlist.parseString(fd.read(), parseAll=True):
id = 0
for entry in item:
if entry.value != '':
id = int(entry.value)
print '%s_%s = %d' % (item.enum.upper(),entry.name.upper(),id)
id += 1
fd.close()
def parse_fourfn():
global bnf
if not bnf:
point = Literal( "." )
e = CaselessLiteral( "E" )
fnumber = Combine( Word( "+-"+nums, nums ) +
Optional( point + Optional( Word( nums ) ) ) +
Optional( e + Word( "+-"+nums, nums ) ) )
ident = Word(alphas, alphas+nums+"_$")
plus = Literal( "+" )
minus = Literal( "-" )
mult = Literal( "*" )
div = Literal( "/" )
lpar = Literal( "(" ).suppress()
rpar = Literal( ")" ).suppress()
addop = plus | minus
multop = mult | div
expop = Literal( "^" )
pi = CaselessLiteral( "PI" )
expr = Forward()
atom = (Optional("-") + ( pi | e | fnumber | ident + lpar + expr + rpar ).setParseAction( pushFirst ) | ( lpar + expr.suppress() + rpar )).setParseAction(pushUMinus)
# by defining exponentiation as "atom [ ^ factor ]..." instead of "atom [ ^ atom ]...", we get right-to-left exponents, instead of left-to-righ
# that is, 2^3^2 = 2^(3^2), not (2^3)^2.
factor = Forward()
factor << atom + ZeroOrMore( ( expop + factor ).setParseAction( pushFirst ) )
term = factor + ZeroOrMore( ( multop + factor ).setParseAction( pushFirst ) )
expr << term + ZeroOrMore( ( addop + term ).setParseAction( pushFirst ) )
bnf = expr
return bnf
class Parser(object):
def __init__(self):
self.parser = None
self.init_parser()
def nest(self, tokens):
return [list(tokens)]
def init_parser(self):
""" phase 1:
most important part is to build the meta-parser for "expr". expr represents any atomic action that returns a value, and the bulk of
the code in any program will consist primarily of exprs and flow control. expr is heavily recursive, because most types of expr can
take another expr as an input value.
"""
point = Literal( "." )
plus = Literal( "+" )
minus = Literal( "-" )
mult = Literal( "*" )
div = Literal( "/" )
lpar = Literal( "(" ).suppress()
rpar = Literal( ")" ).suppress()
llbr = Literal( "[" ).suppress()
rlbr = Literal( "]" ).suppress()
addop = plus | minus
multop = mult | div
expop = Literal( "^" )
quote = Literal( '"' )
excl = Literal( "!" )
call = Literal( ":" )
endl = Literal( ";" )
lisep = Literal( "," ).suppress()
objn = Literal( "#" )
ref = Literal( "$" )
assign = Literal( "=" )
flatten = Literal( "@" )
neg = excl.copy()
def moo_parse():
fd = open('test.moo', 'r')
data = fd.read()
fd.close()
expr = Forward()
ident = Word(alphas+"_", alphas+nums+"_")
ident.setParseAction(VMIdent.parse)
variable = Word(alphas+"_", alphas+nums+"_")
variable.setParseAction(VMVariable.parse)
integer = Word( "+-"+nums, nums )
fnumber = Combine( integer +
Optional( point + Optional( Word( nums ) ) ) +
Optional( CaselessLiteral('e') + Word( "+-"+nums, nums ) ) )
objref = objn + Word( "+-"+nums, nums )
objref.setParseAction(VMObjRef.parse)
coreref = (ref + ident)
coreref.setParseAction(VMCoreRef.parse)
bexpr = (lpar + expr + rpar).setParseAction(self.nest)
objrefexpr = bexpr | coreref | variable | objref
identexpr = bexpr | ident
propref = (objrefexpr + point + ident).setParseAction(VMPropRef.parse) | coreref
fileref = (objrefexpr + excl + ident).setParseAction(VMFileRef.parse)
argspec = delimitedList(expr)
argspec.setParseAction(StackToList.parse)
funccall = objrefexpr + call + identexpr + lpar + argspec + rpar
fnumber.setParseAction(VMFloat.parse)
integer.setParseAction(VMInteger.parse)
funccall.setParseAction(CallFunc.parse)
stringlit = QuotedString(quoteChar='"', escChar='\\').setParseAction(VMString.parse)
atom = Forward()
bifunction = (ident + lpar + argspec + rpar).setParseAction(CallBuiltin.parse)
flatexpr = Optional(flatten) + expr
flatexpr.setParseAction(Flatten.parse)
listlit = llbr + Optional(flatexpr) + ZeroOrMore(lisep + flatexpr) + rlbr
literal = integer | fnumber | stringlit | listlit | objref
atom << (Optional(minus) + ZeroOrMore(neg) + (propref | literal | bifunction | bexpr | variable | funccall | fileref)).setParseAction(UnaryOp.parse)
# by defining exponentiation as "atom [ ^ factor ]..." instead of "atom [ ^ atom ]...", we get right-to-left exponents, instead of left-to-righ
# that is, 2^3^2 = 2^(3^2), not (2^3)^2.
factor = Forward()
factor << atom + ZeroOrMore( (expop + factor).setParseAction(ArithExp.parse) )
term = factor + ZeroOrMore( (multop + factor).setParseAction(ArithMul.parse) )
#term.setParseAction(self.nest)
mathexpr = term + ZeroOrMore( (addop + term).setParseAction(ArithAdd.parse) )
#mathexpr.setParseAction(self.nest)
opeq = Literal('==')
opneq = Literal('!=')
opgteq = Literal('<=')
oplteq = Literal('>=')
oplt = Literal('<')
opgt = Literal('>')
opin = Keyword('in')
opcmp = opeq | opneq | opgteq | oplteq | oplt | opgt | opin
eqexpr = mathexpr + Optional( (opcmp + mathexpr).setParseAction(BoolCompare.parse) )
opand = Literal('&&') | Keyword('and')
opor = Literal('||') | Keyword('or')
opxor = Literal('~~') | Keyword('xor')
opbool = opand | opor | opxor
boolexpr = eqexpr + ZeroOrMore( (opbool + eqexpr).setParseAction(BoolLogic.parse) )
assignable = variable | propref | fileref
assignexpr = Optional(assignable + assign) + boolexpr
expr << assignexpr.setParseAction(Assignment.parse)
""" phase 2:
now that expr is built, we can move on to handling flow control statements, and after that the structure of the program
is mostly defined
"""
ifstart = (Keyword("if") + bexpr)
ifelseif = (Keyword("elseif") + bexpr)
ifelse = Keyword("else")
ifend = Keyword("endif")
trystart = Keyword("try")
tryexcept = (Keyword("except") + variable)
tryelse = Keyword("else")
tryfinally = Keyword("finally")
tryend = Keyword("endtry")
whilestart = (Keyword("while") + bexpr)
whileend = Keyword("endwhile")
forstart = (Keyword("for") + variable + Keyword("in") + bexpr)
forend = Keyword("endfor")
kwdbreak = Keyword("break").setParseAction(LoopBreak)
kwdcontinue = Keyword("continue").setParseAction(LoopContinue)
kwdreturn = Keyword("return")
rtnexpr = (kwdreturn + expr).setParseAction(KeywordReturn.parse)
line = expr | rtnexpr
lline = expr | rtnexpr | kwdcontinue | kwdbreak
exprblock = ZeroOrMore(line + endl)
lexprblock = ZeroOrMore(lline + endl)
block = Forward()
lblock = Forward()
ifblock = ifstart + block + ZeroOrMore(ifelseif + block) + Optional(ifelse + block) + ifend
tryblock = trystart + block + Optional(tryexcept + block + Optional(tryelse + block)) + Optional(tryfinally + block) + tryend
iflblock = ifstart + lblock + ZeroOrMore(ifelseif + lblock) + Optional(ifelse + lblock) + ifend
trylblock = trystart + lblock + Optional(tryexcept + lblock + Optional(tryelse + lblock)) + Optional(tryfinally + block) + tryend
whileblock = whilestart + lblock + whileend
forblock = forstart + lblock + forend
ifblock.setParseAction(IfBlock.parse)
tryblock.setParseAction(TryBlock.parse)
iflblock.setParseAction(IfBlock.parse)
trylblock.setParseAction(TryBlock.parse)
whileblock.setParseAction(WhileBlock.parse)
forblock.setParseAction(ForeachBlock.parse)
# blocks are used for code blocks that are outside a loop. Inside a loop, all code blocks are lblocks
# which allow loop-control keywords like break and continue (except try-finally, it wouldn't make sense)
block << (exprblock + Optional(ifblock | tryblock | whileblock | forblock) + exprblock)
lblock << (lexprblock + Optional(iflblock | trylblock | whileblock | forblock) + lexprblock)
block.setParseAction(self.nest)
lblock.setParseAction(self.nest)
endl.setParseAction(DiscardStack.parse)
self.parser = block
#print argspec.parseString("hello(hi.xyz)", parseAll=True)
#print block.parseString(u"hi.xyz + #555.test;", parseAll=True)
def parse(self, data):
rv = self.parser.parseString(data, parseAll=True)
return optimizer.optimize(rv)
def test(self):
#print self.parse(u"if (1) #740.xyz + -hello.world; endif")
data = unicode(open("test.moo", "r").read(), 'utf-8')
print self.parse(data)
point = Literal( "." )
integer = Word( "+-"+nums, nums )
fnumber = Combine( integer +
Optional( point + Optional( Word( nums ) ) ) +
Optional( CaselessLiteral('e') + Word( "+-"+nums, nums ) ) )
ident = Word(alphas, alphas+nums+"_")
plus = Literal( "+" )
minus = Literal( "-" )
mult = Literal( "*" )
div = Literal( "/" )
lpar = Literal( "(" ).suppress()
rpar = Literal( ")" ).suppress()
addop = plus | minus
multop = mult | div
expop = Literal( "^" )
expr = Forward()
atom = (Optional("-") + ( fnumber | ident + lpar + expr + rpar ).setParseAction( pushFirst ) | ( lpar + expr.suppress() + rpar )).setParseAction(pushUMinus)
# by defining exponentiation as "atom [ ^ factor ]..." instead of "atom [ ^ atom ]...", we get right-to-left exponents, instead of left-to-righ
# that is, 2^3^2 = 2^(3^2), not (2^3)^2.
factor = Forward()
factor << atom + ZeroOrMore( ( expop + factor ).setParseAction( pushFirst ) )
term = factor + ZeroOrMore( ( multop + factor ).setParseAction( pushFirst ) )
expr << term + ZeroOrMore( ( addop + term ).setParseAction( pushFirst ) )
if __name__ == "__main__":
p = Parser()
p.test()