mung language changes
--HG-- branch : mung
This commit is contained in:
parent
b11356b0c3
commit
fe0ea2a7f9
4 changed files with 1125 additions and 264 deletions
45
langdoc.txt
45
langdoc.txt
|
@ -9,16 +9,35 @@ endwhile
|
||||||
bytecode:
|
bytecode:
|
||||||
|
|
||||||
startblock 1
|
startblock 1
|
||||||
stack_literal_int 1
|
stack_literal_int 1 1
|
||||||
exit_true 1
|
exit_true 1 0
|
||||||
stack_literal_str "message"
|
stack_literal_str "message" 1
|
||||||
set_var arg
|
set_var arg 1
|
||||||
discard_stack 1
|
discard_stack 1 0
|
||||||
get_var arg
|
get_var arg 1
|
||||||
stack_literal_obj #7407
|
stack_literal_obj #7407 2
|
||||||
call_builtin send
|
call_builtin send 1
|
||||||
discard_stack 1
|
discard_stack 1 0
|
||||||
stack_literal_float 1.0
|
stack_literal_float 1.0 1
|
||||||
call_builtin suspend
|
call_builtin suspend 1
|
||||||
discard_stack 1
|
discard_stack 1 0
|
||||||
endblock 1
|
endblock 1
|
||||||
|
|
||||||
|
other stuff:
|
||||||
|
|
||||||
|
var = 1 == 0 + 1
|
||||||
|
|
||||||
|
|
||||||
|
list = [1, 2, @[3, 4], 5]
|
||||||
|
|
||||||
|
literal 1
|
||||||
|
literal 2
|
||||||
|
literal 3
|
||||||
|
literal 4
|
||||||
|
literal 2
|
||||||
|
makelist
|
||||||
|
flatten
|
||||||
|
literal 5
|
||||||
|
literal 4 <-- the length
|
||||||
|
makelist 4
|
||||||
|
|
||||||
|
|
1007
language.py
1007
language.py
File diff suppressed because it is too large
Load diff
|
@ -1,54 +0,0 @@
|
||||||
|
|
||||||
|
|
||||||
class VMType(object):
|
|
||||||
pass
|
|
||||||
|
|
||||||
class VMInteger(VMType):
|
|
||||||
def __init__(self, value):
|
|
||||||
self.value = int(value)
|
|
||||||
class VMFloat(VMType):
|
|
||||||
def __init__(self, value):
|
|
||||||
self.value = float(value)
|
|
||||||
class VMTable(VMType):
|
|
||||||
def __init__(self, value):
|
|
||||||
self.value = dict(value)
|
|
||||||
|
|
||||||
class VMString(VMType):
|
|
||||||
def __init__(self, value):
|
|
||||||
if isinstance(value, unicode):
|
|
||||||
self.value = value
|
|
||||||
else:
|
|
||||||
self.value = unicode(str(value), 'ascii', 'ignore')
|
|
||||||
|
|
||||||
class VMObjRef(VMType):
|
|
||||||
def __init__(self, value):
|
|
||||||
if isinstance(value, ObjRef):
|
|
||||||
self.value = value
|
|
||||||
elif isinstance(value, (float, int)):
|
|
||||||
self.value = ObjRef(int(value))
|
|
||||||
else:
|
|
||||||
raise TypeError, "Attempted to create VMObjRef with invalid object reference: %r" % (value,)
|
|
||||||
|
|
||||||
|
|
||||||
def coerce(value):
|
|
||||||
if isinstance(value, int):
|
|
||||||
return VMInteger(value)
|
|
||||||
elif isinstance(value, (tuple, list)):
|
|
||||||
return VMList(list(value))
|
|
||||||
elif isinstance(value, unicode):
|
|
||||||
return VMString(value)
|
|
||||||
elif isinstance(value, dict):
|
|
||||||
return VMTable(value)
|
|
||||||
elif isinstance(value, ObjRef):
|
|
||||||
return VMObjRef(value)
|
|
||||||
elif isinstance(value, float):
|
|
||||||
return VMFloat(value)
|
|
||||||
elif value == None:
|
|
||||||
return VMInteger(0)
|
|
||||||
else:
|
|
||||||
raise TypeError("Unknown type %s cannot be coerced to VMType" % (type(value),))
|
|
||||||
|
|
||||||
|
|
||||||
def uncoerce(value):
|
|
||||||
assert isinstance(value, VMType)
|
|
||||||
return value.value
|
|
283
parse.py
283
parse.py
|
@ -1,103 +1,192 @@
|
||||||
from pyparsing import *
|
from pyparsing import *
|
||||||
|
from language import *
|
||||||
|
|
||||||
def enum_parse():
|
|
||||||
fd = open('test.enum', 'r')
|
|
||||||
|
|
||||||
# syntax we don't want to see in the final parse tree
|
|
||||||
_lcurl = Suppress('{')
|
|
||||||
_rcurl = Suppress('}')
|
|
||||||
_equal = Suppress('=')
|
|
||||||
_comma = Suppress(',')
|
|
||||||
_semi = Suppress(';')
|
|
||||||
_enum = Suppress('enum')
|
|
||||||
|
|
||||||
identifier = Word(alphas,alphanums+'_')
|
|
||||||
integer = Word(nums)
|
|
||||||
enumValue = Group(identifier('name') + Optional(_equal + integer('value')))
|
|
||||||
enumList = Group(enumValue + ZeroOrMore(_comma + enumValue))
|
|
||||||
enum = _enum + identifier('enum') + _lcurl + enumList('list') + _rcurl + Optional(_semi)
|
|
||||||
|
|
||||||
enumlist = ZeroOrMore(enum)
|
|
||||||
|
|
||||||
#print enumlist.parseString(fd.read(), parseAll=True)
|
|
||||||
# find instances of enums ignoring other syntax
|
|
||||||
for item in enumlist.parseString(fd.read(), parseAll=True):
|
|
||||||
id = 0
|
|
||||||
for entry in item:
|
|
||||||
if entry.value != '':
|
|
||||||
id = int(entry.value)
|
|
||||||
print '%s_%s = %d' % (item.enum.upper(),entry.name.upper(),id)
|
|
||||||
id += 1
|
|
||||||
|
|
||||||
fd.close()
|
|
||||||
|
|
||||||
def parse_fourfn():
|
class Parser(object):
|
||||||
global bnf
|
def __init__(self):
|
||||||
if not bnf:
|
self.parser = None
|
||||||
point = Literal( "." )
|
self.init_parser()
|
||||||
e = CaselessLiteral( "E" )
|
|
||||||
fnumber = Combine( Word( "+-"+nums, nums ) +
|
|
||||||
Optional( point + Optional( Word( nums ) ) ) +
|
|
||||||
Optional( e + Word( "+-"+nums, nums ) ) )
|
|
||||||
ident = Word(alphas, alphas+nums+"_$")
|
|
||||||
|
|
||||||
plus = Literal( "+" )
|
|
||||||
minus = Literal( "-" )
|
|
||||||
mult = Literal( "*" )
|
|
||||||
div = Literal( "/" )
|
|
||||||
lpar = Literal( "(" ).suppress()
|
|
||||||
rpar = Literal( ")" ).suppress()
|
|
||||||
addop = plus | minus
|
|
||||||
multop = mult | div
|
|
||||||
expop = Literal( "^" )
|
|
||||||
pi = CaselessLiteral( "PI" )
|
|
||||||
|
|
||||||
expr = Forward()
|
|
||||||
atom = (Optional("-") + ( pi | e | fnumber | ident + lpar + expr + rpar ).setParseAction( pushFirst ) | ( lpar + expr.suppress() + rpar )).setParseAction(pushUMinus)
|
|
||||||
|
|
||||||
# by defining exponentiation as "atom [ ^ factor ]..." instead of "atom [ ^ atom ]...", we get right-to-left exponents, instead of left-to-righ
|
|
||||||
# that is, 2^3^2 = 2^(3^2), not (2^3)^2.
|
|
||||||
factor = Forward()
|
|
||||||
factor << atom + ZeroOrMore( ( expop + factor ).setParseAction( pushFirst ) )
|
|
||||||
|
|
||||||
term = factor + ZeroOrMore( ( multop + factor ).setParseAction( pushFirst ) )
|
|
||||||
expr << term + ZeroOrMore( ( addop + term ).setParseAction( pushFirst ) )
|
|
||||||
bnf = expr
|
|
||||||
return bnf
|
|
||||||
|
|
||||||
|
def nest(self, tokens):
|
||||||
|
return [list(tokens)]
|
||||||
|
|
||||||
|
def init_parser(self):
|
||||||
|
""" phase 1:
|
||||||
|
most important part is to build the meta-parser for "expr". expr represents any atomic action that returns a value, and the bulk of
|
||||||
|
the code in any program will consist primarily of exprs and flow control. expr is heavily recursive, because most types of expr can
|
||||||
|
take another expr as an input value.
|
||||||
|
"""
|
||||||
|
point = Literal( "." )
|
||||||
|
plus = Literal( "+" )
|
||||||
|
minus = Literal( "-" )
|
||||||
|
mult = Literal( "*" )
|
||||||
|
div = Literal( "/" )
|
||||||
|
lpar = Literal( "(" ).suppress()
|
||||||
|
rpar = Literal( ")" ).suppress()
|
||||||
|
llbr = Literal( "[" ).suppress()
|
||||||
|
rlbr = Literal( "]" ).suppress()
|
||||||
|
addop = plus | minus
|
||||||
|
multop = mult | div
|
||||||
|
expop = Literal( "^" )
|
||||||
|
quote = Literal( '"' )
|
||||||
|
excl = Literal( "!" )
|
||||||
|
call = Literal( ":" )
|
||||||
|
endl = Literal( ";" )
|
||||||
|
lisep = Literal( "," ).suppress()
|
||||||
|
objn = Literal( "#" )
|
||||||
|
ref = Literal( "$" )
|
||||||
|
assign = Literal( "=" )
|
||||||
|
flatten = Literal( "@" )
|
||||||
|
neg = excl.copy()
|
||||||
|
|
||||||
|
|
||||||
def moo_parse():
|
expr = Forward()
|
||||||
fd = open('test.moo', 'r')
|
ident = Word(alphas+"_", alphas+nums+"_")
|
||||||
data = fd.read()
|
ident.setParseAction(VMIdent.parse)
|
||||||
fd.close()
|
variable = Word(alphas+"_", alphas+nums+"_")
|
||||||
|
variable.setParseAction(VMVariable.parse)
|
||||||
|
|
||||||
|
|
||||||
|
integer = Word( "+-"+nums, nums )
|
||||||
|
fnumber = Combine( integer +
|
||||||
|
Optional( point + Optional( Word( nums ) ) ) +
|
||||||
|
Optional( CaselessLiteral('e') + Word( "+-"+nums, nums ) ) )
|
||||||
|
objref = objn + Word( "+-"+nums, nums )
|
||||||
|
objref.setParseAction(VMObjRef.parse)
|
||||||
|
coreref = (ref + ident)
|
||||||
|
coreref.setParseAction(VMCoreRef.parse)
|
||||||
|
bexpr = (lpar + expr + rpar).setParseAction(self.nest)
|
||||||
|
objrefexpr = bexpr | coreref | variable | objref
|
||||||
|
identexpr = bexpr | ident
|
||||||
|
propref = (objrefexpr + point + ident).setParseAction(VMPropRef.parse) | coreref
|
||||||
|
fileref = (objrefexpr + excl + ident).setParseAction(VMFileRef.parse)
|
||||||
|
|
||||||
|
argspec = delimitedList(expr)
|
||||||
|
argspec.setParseAction(StackToList.parse)
|
||||||
|
funccall = objrefexpr + call + identexpr + lpar + argspec + rpar
|
||||||
|
|
||||||
|
fnumber.setParseAction(VMFloat.parse)
|
||||||
|
integer.setParseAction(VMInteger.parse)
|
||||||
|
funccall.setParseAction(CallFunc.parse)
|
||||||
|
|
||||||
|
stringlit = QuotedString(quoteChar='"', escChar='\\').setParseAction(VMString.parse)
|
||||||
|
|
||||||
|
atom = Forward()
|
||||||
|
bifunction = (ident + lpar + argspec + rpar).setParseAction(CallBuiltin.parse)
|
||||||
|
|
||||||
|
flatexpr = Optional(flatten) + expr
|
||||||
|
flatexpr.setParseAction(Flatten.parse)
|
||||||
|
listlit = llbr + Optional(flatexpr) + ZeroOrMore(lisep + flatexpr) + rlbr
|
||||||
|
literal = integer | fnumber | stringlit | listlit | objref
|
||||||
|
|
||||||
|
atom << (Optional(minus) + ZeroOrMore(neg) + (propref | literal | bifunction | bexpr | variable | funccall | fileref)).setParseAction(UnaryOp.parse)
|
||||||
|
|
||||||
|
|
||||||
|
# by defining exponentiation as "atom [ ^ factor ]..." instead of "atom [ ^ atom ]...", we get right-to-left exponents, instead of left-to-righ
|
||||||
|
# that is, 2^3^2 = 2^(3^2), not (2^3)^2.
|
||||||
|
factor = Forward()
|
||||||
|
factor << atom + ZeroOrMore( (expop + factor).setParseAction(ArithExp.parse) )
|
||||||
|
|
||||||
|
term = factor + ZeroOrMore( (multop + factor).setParseAction(ArithMul.parse) )
|
||||||
|
#term.setParseAction(self.nest)
|
||||||
|
mathexpr = term + ZeroOrMore( (addop + term).setParseAction(ArithAdd.parse) )
|
||||||
|
#mathexpr.setParseAction(self.nest)
|
||||||
|
|
||||||
|
opeq = Literal('==')
|
||||||
|
opneq = Literal('!=')
|
||||||
|
opgteq = Literal('<=')
|
||||||
|
oplteq = Literal('>=')
|
||||||
|
oplt = Literal('<')
|
||||||
|
opgt = Literal('>')
|
||||||
|
opin = Keyword('in')
|
||||||
|
|
||||||
|
opcmp = opeq | opneq | opgteq | oplteq | oplt | opgt | opin
|
||||||
|
eqexpr = mathexpr + Optional( (opcmp + mathexpr).setParseAction(BoolCompare.parse) )
|
||||||
|
|
||||||
|
opand = Literal('&&') | Keyword('and')
|
||||||
|
opor = Literal('||') | Keyword('or')
|
||||||
|
opxor = Literal('~~') | Keyword('xor')
|
||||||
|
|
||||||
|
opbool = opand | opor | opxor
|
||||||
|
boolexpr = eqexpr + ZeroOrMore( (opbool + eqexpr).setParseAction(BoolLogic.parse) )
|
||||||
|
|
||||||
|
|
||||||
|
assignable = variable | propref | fileref
|
||||||
|
assignexpr = Optional(assignable + assign) + boolexpr
|
||||||
|
expr << assignexpr.setParseAction(Assignment.parse)
|
||||||
|
|
||||||
|
|
||||||
|
""" phase 2:
|
||||||
|
now that expr is built, we can move on to handling flow control statements, and after that the structure of the program
|
||||||
|
is mostly defined
|
||||||
|
"""
|
||||||
|
|
||||||
|
ifstart = (Keyword("if") + bexpr)
|
||||||
|
ifelseif = (Keyword("elseif") + bexpr)
|
||||||
|
ifelse = Keyword("else")
|
||||||
|
ifend = Keyword("endif")
|
||||||
|
trystart = Keyword("try")
|
||||||
|
tryexcept = (Keyword("except") + variable)
|
||||||
|
tryelse = Keyword("else")
|
||||||
|
tryfinally = Keyword("finally")
|
||||||
|
tryend = Keyword("endtry")
|
||||||
|
whilestart = (Keyword("while") + bexpr)
|
||||||
|
whileend = Keyword("endwhile")
|
||||||
|
forstart = (Keyword("for") + variable + Keyword("in") + bexpr)
|
||||||
|
forend = Keyword("endfor")
|
||||||
|
|
||||||
|
kwdbreak = Keyword("break").setParseAction(LoopBreak)
|
||||||
|
kwdcontinue = Keyword("continue").setParseAction(LoopContinue)
|
||||||
|
kwdreturn = Keyword("return")
|
||||||
|
|
||||||
|
rtnexpr = (kwdreturn + expr).setParseAction(KeywordReturn.parse)
|
||||||
|
line = expr | rtnexpr
|
||||||
|
lline = expr | rtnexpr | kwdcontinue | kwdbreak
|
||||||
|
exprblock = ZeroOrMore(line + endl)
|
||||||
|
lexprblock = ZeroOrMore(lline + endl)
|
||||||
|
|
||||||
|
block = Forward()
|
||||||
|
lblock = Forward()
|
||||||
|
ifblock = ifstart + block + ZeroOrMore(ifelseif + block) + Optional(ifelse + block) + ifend
|
||||||
|
tryblock = trystart + block + Optional(tryexcept + block + Optional(tryelse + block)) + Optional(tryfinally + block) + tryend
|
||||||
|
iflblock = ifstart + lblock + ZeroOrMore(ifelseif + lblock) + Optional(ifelse + lblock) + ifend
|
||||||
|
trylblock = trystart + lblock + Optional(tryexcept + lblock + Optional(tryelse + lblock)) + Optional(tryfinally + block) + tryend
|
||||||
|
whileblock = whilestart + lblock + whileend
|
||||||
|
forblock = forstart + lblock + forend
|
||||||
|
|
||||||
|
ifblock.setParseAction(IfBlock.parse)
|
||||||
|
tryblock.setParseAction(TryBlock.parse)
|
||||||
|
iflblock.setParseAction(IfBlock.parse)
|
||||||
|
trylblock.setParseAction(TryBlock.parse)
|
||||||
|
whileblock.setParseAction(WhileBlock.parse)
|
||||||
|
forblock.setParseAction(ForeachBlock.parse)
|
||||||
|
|
||||||
|
# blocks are used for code blocks that are outside a loop. Inside a loop, all code blocks are lblocks
|
||||||
|
# which allow loop-control keywords like break and continue (except try-finally, it wouldn't make sense)
|
||||||
|
|
||||||
|
block << (exprblock + Optional(ifblock | tryblock | whileblock | forblock) + exprblock)
|
||||||
|
lblock << (lexprblock + Optional(iflblock | trylblock | whileblock | forblock) + lexprblock)
|
||||||
|
|
||||||
|
block.setParseAction(self.nest)
|
||||||
|
lblock.setParseAction(self.nest)
|
||||||
|
|
||||||
|
endl.setParseAction(DiscardStack.parse)
|
||||||
|
self.parser = block
|
||||||
|
#print argspec.parseString("hello(hi.xyz)", parseAll=True)
|
||||||
|
#print block.parseString(u"hi.xyz + #555.test;", parseAll=True)
|
||||||
|
|
||||||
|
def parse(self, data):
|
||||||
|
rv = self.parser.parseString(data, parseAll=True)
|
||||||
|
|
||||||
|
return optimizer.optimize(rv)
|
||||||
|
|
||||||
|
|
||||||
|
def test(self):
|
||||||
|
#print self.parse(u"if (1) #740.xyz + -hello.world; endif")
|
||||||
|
|
||||||
|
data = unicode(open("test.moo", "r").read(), 'utf-8')
|
||||||
|
print self.parse(data)
|
||||||
|
|
||||||
point = Literal( "." )
|
if __name__ == "__main__":
|
||||||
|
p = Parser()
|
||||||
integer = Word( "+-"+nums, nums )
|
p.test()
|
||||||
fnumber = Combine( integer +
|
|
||||||
Optional( point + Optional( Word( nums ) ) ) +
|
|
||||||
Optional( CaselessLiteral('e') + Word( "+-"+nums, nums ) ) )
|
|
||||||
ident = Word(alphas, alphas+nums+"_")
|
|
||||||
|
|
||||||
plus = Literal( "+" )
|
|
||||||
minus = Literal( "-" )
|
|
||||||
mult = Literal( "*" )
|
|
||||||
div = Literal( "/" )
|
|
||||||
lpar = Literal( "(" ).suppress()
|
|
||||||
rpar = Literal( ")" ).suppress()
|
|
||||||
addop = plus | minus
|
|
||||||
multop = mult | div
|
|
||||||
expop = Literal( "^" )
|
|
||||||
|
|
||||||
expr = Forward()
|
|
||||||
atom = (Optional("-") + ( fnumber | ident + lpar + expr + rpar ).setParseAction( pushFirst ) | ( lpar + expr.suppress() + rpar )).setParseAction(pushUMinus)
|
|
||||||
|
|
||||||
# by defining exponentiation as "atom [ ^ factor ]..." instead of "atom [ ^ atom ]...", we get right-to-left exponents, instead of left-to-righ
|
|
||||||
# that is, 2^3^2 = 2^(3^2), not (2^3)^2.
|
|
||||||
factor = Forward()
|
|
||||||
factor << atom + ZeroOrMore( ( expop + factor ).setParseAction( pushFirst ) )
|
|
||||||
|
|
||||||
term = factor + ZeroOrMore( ( multop + factor ).setParseAction( pushFirst ) )
|
|
||||||
expr << term + ZeroOrMore( ( addop + term ).setParseAction( pushFirst ) )
|
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Add table
Reference in a new issue