mung language changes

--HG--
branch : mung
This commit is contained in:
cecilkorik 2010-11-27 22:46:34 +00:00
parent b11356b0c3
commit fe0ea2a7f9
4 changed files with 1125 additions and 264 deletions

View file

@ -9,16 +9,35 @@ endwhile
bytecode: bytecode:
startblock 1 startblock 1
stack_literal_int 1 stack_literal_int 1 1
exit_true 1 exit_true 1 0
stack_literal_str "message" stack_literal_str "message" 1
set_var arg set_var arg 1
discard_stack 1 discard_stack 1 0
get_var arg get_var arg 1
stack_literal_obj #7407 stack_literal_obj #7407 2
call_builtin send call_builtin send 1
discard_stack 1 discard_stack 1 0
stack_literal_float 1.0 stack_literal_float 1.0 1
call_builtin suspend call_builtin suspend 1
discard_stack 1 discard_stack 1 0
endblock 1 endblock 1
other stuff:
var = 1 == 0 + 1
list = [1, 2, @[3, 4], 5]
literal 1
literal 2
literal 3
literal 4
literal 2
makelist
flatten
literal 5
literal 4 <-- the length
makelist 4

File diff suppressed because it is too large Load diff

View file

@ -1,54 +0,0 @@
class VMType(object):
pass
class VMInteger(VMType):
def __init__(self, value):
self.value = int(value)
class VMFloat(VMType):
def __init__(self, value):
self.value = float(value)
class VMTable(VMType):
def __init__(self, value):
self.value = dict(value)
class VMString(VMType):
def __init__(self, value):
if isinstance(value, unicode):
self.value = value
else:
self.value = unicode(str(value), 'ascii', 'ignore')
class VMObjRef(VMType):
def __init__(self, value):
if isinstance(value, ObjRef):
self.value = value
elif isinstance(value, (float, int)):
self.value = ObjRef(int(value))
else:
raise TypeError, "Attempted to create VMObjRef with invalid object reference: %r" % (value,)
def coerce(value):
if isinstance(value, int):
return VMInteger(value)
elif isinstance(value, (tuple, list)):
return VMList(list(value))
elif isinstance(value, unicode):
return VMString(value)
elif isinstance(value, dict):
return VMTable(value)
elif isinstance(value, ObjRef):
return VMObjRef(value)
elif isinstance(value, float):
return VMFloat(value)
elif value == None:
return VMInteger(0)
else:
raise TypeError("Unknown type %s cannot be coerced to VMType" % (type(value),))
def uncoerce(value):
assert isinstance(value, VMType)
return value.value

283
parse.py
View file

@ -1,103 +1,192 @@
from pyparsing import * from pyparsing import *
from language import *
def enum_parse():
fd = open('test.enum', 'r')
# syntax we don't want to see in the final parse tree
_lcurl = Suppress('{')
_rcurl = Suppress('}')
_equal = Suppress('=')
_comma = Suppress(',')
_semi = Suppress(';')
_enum = Suppress('enum')
identifier = Word(alphas,alphanums+'_')
integer = Word(nums)
enumValue = Group(identifier('name') + Optional(_equal + integer('value')))
enumList = Group(enumValue + ZeroOrMore(_comma + enumValue))
enum = _enum + identifier('enum') + _lcurl + enumList('list') + _rcurl + Optional(_semi)
enumlist = ZeroOrMore(enum)
#print enumlist.parseString(fd.read(), parseAll=True)
# find instances of enums ignoring other syntax
for item in enumlist.parseString(fd.read(), parseAll=True):
id = 0
for entry in item:
if entry.value != '':
id = int(entry.value)
print '%s_%s = %d' % (item.enum.upper(),entry.name.upper(),id)
id += 1
fd.close()
def parse_fourfn(): class Parser(object):
global bnf def __init__(self):
if not bnf: self.parser = None
point = Literal( "." ) self.init_parser()
e = CaselessLiteral( "E" )
fnumber = Combine( Word( "+-"+nums, nums ) +
Optional( point + Optional( Word( nums ) ) ) +
Optional( e + Word( "+-"+nums, nums ) ) )
ident = Word(alphas, alphas+nums+"_$")
plus = Literal( "+" )
minus = Literal( "-" )
mult = Literal( "*" )
div = Literal( "/" )
lpar = Literal( "(" ).suppress()
rpar = Literal( ")" ).suppress()
addop = plus | minus
multop = mult | div
expop = Literal( "^" )
pi = CaselessLiteral( "PI" )
expr = Forward()
atom = (Optional("-") + ( pi | e | fnumber | ident + lpar + expr + rpar ).setParseAction( pushFirst ) | ( lpar + expr.suppress() + rpar )).setParseAction(pushUMinus)
# by defining exponentiation as "atom [ ^ factor ]..." instead of "atom [ ^ atom ]...", we get right-to-left exponents, instead of left-to-righ
# that is, 2^3^2 = 2^(3^2), not (2^3)^2.
factor = Forward()
factor << atom + ZeroOrMore( ( expop + factor ).setParseAction( pushFirst ) )
term = factor + ZeroOrMore( ( multop + factor ).setParseAction( pushFirst ) )
expr << term + ZeroOrMore( ( addop + term ).setParseAction( pushFirst ) )
bnf = expr
return bnf
def nest(self, tokens):
return [list(tokens)]
def init_parser(self):
""" phase 1:
most important part is to build the meta-parser for "expr". expr represents any atomic action that returns a value, and the bulk of
the code in any program will consist primarily of exprs and flow control. expr is heavily recursive, because most types of expr can
take another expr as an input value.
"""
point = Literal( "." )
plus = Literal( "+" )
minus = Literal( "-" )
mult = Literal( "*" )
div = Literal( "/" )
lpar = Literal( "(" ).suppress()
rpar = Literal( ")" ).suppress()
llbr = Literal( "[" ).suppress()
rlbr = Literal( "]" ).suppress()
addop = plus | minus
multop = mult | div
expop = Literal( "^" )
quote = Literal( '"' )
excl = Literal( "!" )
call = Literal( ":" )
endl = Literal( ";" )
lisep = Literal( "," ).suppress()
objn = Literal( "#" )
ref = Literal( "$" )
assign = Literal( "=" )
flatten = Literal( "@" )
neg = excl.copy()
def moo_parse(): expr = Forward()
fd = open('test.moo', 'r') ident = Word(alphas+"_", alphas+nums+"_")
data = fd.read() ident.setParseAction(VMIdent.parse)
fd.close() variable = Word(alphas+"_", alphas+nums+"_")
variable.setParseAction(VMVariable.parse)
integer = Word( "+-"+nums, nums )
fnumber = Combine( integer +
Optional( point + Optional( Word( nums ) ) ) +
Optional( CaselessLiteral('e') + Word( "+-"+nums, nums ) ) )
objref = objn + Word( "+-"+nums, nums )
objref.setParseAction(VMObjRef.parse)
coreref = (ref + ident)
coreref.setParseAction(VMCoreRef.parse)
bexpr = (lpar + expr + rpar).setParseAction(self.nest)
objrefexpr = bexpr | coreref | variable | objref
identexpr = bexpr | ident
propref = (objrefexpr + point + ident).setParseAction(VMPropRef.parse) | coreref
fileref = (objrefexpr + excl + ident).setParseAction(VMFileRef.parse)
argspec = delimitedList(expr)
argspec.setParseAction(StackToList.parse)
funccall = objrefexpr + call + identexpr + lpar + argspec + rpar
fnumber.setParseAction(VMFloat.parse)
integer.setParseAction(VMInteger.parse)
funccall.setParseAction(CallFunc.parse)
stringlit = QuotedString(quoteChar='"', escChar='\\').setParseAction(VMString.parse)
atom = Forward()
bifunction = (ident + lpar + argspec + rpar).setParseAction(CallBuiltin.parse)
flatexpr = Optional(flatten) + expr
flatexpr.setParseAction(Flatten.parse)
listlit = llbr + Optional(flatexpr) + ZeroOrMore(lisep + flatexpr) + rlbr
literal = integer | fnumber | stringlit | listlit | objref
atom << (Optional(minus) + ZeroOrMore(neg) + (propref | literal | bifunction | bexpr | variable | funccall | fileref)).setParseAction(UnaryOp.parse)
# by defining exponentiation as "atom [ ^ factor ]..." instead of "atom [ ^ atom ]...", we get right-to-left exponents, instead of left-to-righ
# that is, 2^3^2 = 2^(3^2), not (2^3)^2.
factor = Forward()
factor << atom + ZeroOrMore( (expop + factor).setParseAction(ArithExp.parse) )
term = factor + ZeroOrMore( (multop + factor).setParseAction(ArithMul.parse) )
#term.setParseAction(self.nest)
mathexpr = term + ZeroOrMore( (addop + term).setParseAction(ArithAdd.parse) )
#mathexpr.setParseAction(self.nest)
opeq = Literal('==')
opneq = Literal('!=')
opgteq = Literal('<=')
oplteq = Literal('>=')
oplt = Literal('<')
opgt = Literal('>')
opin = Keyword('in')
opcmp = opeq | opneq | opgteq | oplteq | oplt | opgt | opin
eqexpr = mathexpr + Optional( (opcmp + mathexpr).setParseAction(BoolCompare.parse) )
opand = Literal('&&') | Keyword('and')
opor = Literal('||') | Keyword('or')
opxor = Literal('~~') | Keyword('xor')
opbool = opand | opor | opxor
boolexpr = eqexpr + ZeroOrMore( (opbool + eqexpr).setParseAction(BoolLogic.parse) )
assignable = variable | propref | fileref
assignexpr = Optional(assignable + assign) + boolexpr
expr << assignexpr.setParseAction(Assignment.parse)
""" phase 2:
now that expr is built, we can move on to handling flow control statements, and after that the structure of the program
is mostly defined
"""
ifstart = (Keyword("if") + bexpr)
ifelseif = (Keyword("elseif") + bexpr)
ifelse = Keyword("else")
ifend = Keyword("endif")
trystart = Keyword("try")
tryexcept = (Keyword("except") + variable)
tryelse = Keyword("else")
tryfinally = Keyword("finally")
tryend = Keyword("endtry")
whilestart = (Keyword("while") + bexpr)
whileend = Keyword("endwhile")
forstart = (Keyword("for") + variable + Keyword("in") + bexpr)
forend = Keyword("endfor")
kwdbreak = Keyword("break").setParseAction(LoopBreak)
kwdcontinue = Keyword("continue").setParseAction(LoopContinue)
kwdreturn = Keyword("return")
rtnexpr = (kwdreturn + expr).setParseAction(KeywordReturn.parse)
line = expr | rtnexpr
lline = expr | rtnexpr | kwdcontinue | kwdbreak
exprblock = ZeroOrMore(line + endl)
lexprblock = ZeroOrMore(lline + endl)
block = Forward()
lblock = Forward()
ifblock = ifstart + block + ZeroOrMore(ifelseif + block) + Optional(ifelse + block) + ifend
tryblock = trystart + block + Optional(tryexcept + block + Optional(tryelse + block)) + Optional(tryfinally + block) + tryend
iflblock = ifstart + lblock + ZeroOrMore(ifelseif + lblock) + Optional(ifelse + lblock) + ifend
trylblock = trystart + lblock + Optional(tryexcept + lblock + Optional(tryelse + lblock)) + Optional(tryfinally + block) + tryend
whileblock = whilestart + lblock + whileend
forblock = forstart + lblock + forend
ifblock.setParseAction(IfBlock.parse)
tryblock.setParseAction(TryBlock.parse)
iflblock.setParseAction(IfBlock.parse)
trylblock.setParseAction(TryBlock.parse)
whileblock.setParseAction(WhileBlock.parse)
forblock.setParseAction(ForeachBlock.parse)
# blocks are used for code blocks that are outside a loop. Inside a loop, all code blocks are lblocks
# which allow loop-control keywords like break and continue (except try-finally, it wouldn't make sense)
block << (exprblock + Optional(ifblock | tryblock | whileblock | forblock) + exprblock)
lblock << (lexprblock + Optional(iflblock | trylblock | whileblock | forblock) + lexprblock)
block.setParseAction(self.nest)
lblock.setParseAction(self.nest)
endl.setParseAction(DiscardStack.parse)
self.parser = block
#print argspec.parseString("hello(hi.xyz)", parseAll=True)
#print block.parseString(u"hi.xyz + #555.test;", parseAll=True)
def parse(self, data):
rv = self.parser.parseString(data, parseAll=True)
return optimizer.optimize(rv)
def test(self):
#print self.parse(u"if (1) #740.xyz + -hello.world; endif")
data = unicode(open("test.moo", "r").read(), 'utf-8')
print self.parse(data)
point = Literal( "." ) if __name__ == "__main__":
p = Parser()
integer = Word( "+-"+nums, nums ) p.test()
fnumber = Combine( integer +
Optional( point + Optional( Word( nums ) ) ) +
Optional( CaselessLiteral('e') + Word( "+-"+nums, nums ) ) )
ident = Word(alphas, alphas+nums+"_")
plus = Literal( "+" )
minus = Literal( "-" )
mult = Literal( "*" )
div = Literal( "/" )
lpar = Literal( "(" ).suppress()
rpar = Literal( ")" ).suppress()
addop = plus | minus
multop = mult | div
expop = Literal( "^" )
expr = Forward()
atom = (Optional("-") + ( fnumber | ident + lpar + expr + rpar ).setParseAction( pushFirst ) | ( lpar + expr.suppress() + rpar )).setParseAction(pushUMinus)
# by defining exponentiation as "atom [ ^ factor ]..." instead of "atom [ ^ atom ]...", we get right-to-left exponents, instead of left-to-righ
# that is, 2^3^2 = 2^(3^2), not (2^3)^2.
factor = Forward()
factor << atom + ZeroOrMore( ( expop + factor ).setParseAction( pushFirst ) )
term = factor + ZeroOrMore( ( multop + factor ).setParseAction( pushFirst ) )
expr << term + ZeroOrMore( ( addop + term ).setParseAction( pushFirst ) )