mung language changes
--HG-- branch : mung
This commit is contained in:
parent
b11356b0c3
commit
fe0ea2a7f9
4 changed files with 1125 additions and 264 deletions
45
langdoc.txt
45
langdoc.txt
|
@ -9,16 +9,35 @@ endwhile
|
|||
bytecode:
|
||||
|
||||
startblock 1
|
||||
stack_literal_int 1
|
||||
exit_true 1
|
||||
stack_literal_str "message"
|
||||
set_var arg
|
||||
discard_stack 1
|
||||
get_var arg
|
||||
stack_literal_obj #7407
|
||||
call_builtin send
|
||||
discard_stack 1
|
||||
stack_literal_float 1.0
|
||||
call_builtin suspend
|
||||
discard_stack 1
|
||||
endblock 1
|
||||
stack_literal_int 1 1
|
||||
exit_true 1 0
|
||||
stack_literal_str "message" 1
|
||||
set_var arg 1
|
||||
discard_stack 1 0
|
||||
get_var arg 1
|
||||
stack_literal_obj #7407 2
|
||||
call_builtin send 1
|
||||
discard_stack 1 0
|
||||
stack_literal_float 1.0 1
|
||||
call_builtin suspend 1
|
||||
discard_stack 1 0
|
||||
endblock 1
|
||||
|
||||
other stuff:
|
||||
|
||||
var = 1 == 0 + 1
|
||||
|
||||
|
||||
list = [1, 2, @[3, 4], 5]
|
||||
|
||||
literal 1
|
||||
literal 2
|
||||
literal 3
|
||||
literal 4
|
||||
literal 2
|
||||
makelist
|
||||
flatten
|
||||
literal 5
|
||||
literal 4 <-- the length
|
||||
makelist 4
|
||||
|
||||
|
|
1007
language.py
1007
language.py
File diff suppressed because it is too large
Load diff
|
@ -1,54 +0,0 @@
|
|||
|
||||
|
||||
class VMType(object):
|
||||
pass
|
||||
|
||||
class VMInteger(VMType):
|
||||
def __init__(self, value):
|
||||
self.value = int(value)
|
||||
class VMFloat(VMType):
|
||||
def __init__(self, value):
|
||||
self.value = float(value)
|
||||
class VMTable(VMType):
|
||||
def __init__(self, value):
|
||||
self.value = dict(value)
|
||||
|
||||
class VMString(VMType):
|
||||
def __init__(self, value):
|
||||
if isinstance(value, unicode):
|
||||
self.value = value
|
||||
else:
|
||||
self.value = unicode(str(value), 'ascii', 'ignore')
|
||||
|
||||
class VMObjRef(VMType):
|
||||
def __init__(self, value):
|
||||
if isinstance(value, ObjRef):
|
||||
self.value = value
|
||||
elif isinstance(value, (float, int)):
|
||||
self.value = ObjRef(int(value))
|
||||
else:
|
||||
raise TypeError, "Attempted to create VMObjRef with invalid object reference: %r" % (value,)
|
||||
|
||||
|
||||
def coerce(value):
|
||||
if isinstance(value, int):
|
||||
return VMInteger(value)
|
||||
elif isinstance(value, (tuple, list)):
|
||||
return VMList(list(value))
|
||||
elif isinstance(value, unicode):
|
||||
return VMString(value)
|
||||
elif isinstance(value, dict):
|
||||
return VMTable(value)
|
||||
elif isinstance(value, ObjRef):
|
||||
return VMObjRef(value)
|
||||
elif isinstance(value, float):
|
||||
return VMFloat(value)
|
||||
elif value == None:
|
||||
return VMInteger(0)
|
||||
else:
|
||||
raise TypeError("Unknown type %s cannot be coerced to VMType" % (type(value),))
|
||||
|
||||
|
||||
def uncoerce(value):
|
||||
assert isinstance(value, VMType)
|
||||
return value.value
|
283
parse.py
283
parse.py
|
@ -1,103 +1,192 @@
|
|||
from pyparsing import *
|
||||
from language import *
|
||||
|
||||
def enum_parse():
|
||||
fd = open('test.enum', 'r')
|
||||
|
||||
# syntax we don't want to see in the final parse tree
|
||||
_lcurl = Suppress('{')
|
||||
_rcurl = Suppress('}')
|
||||
_equal = Suppress('=')
|
||||
_comma = Suppress(',')
|
||||
_semi = Suppress(';')
|
||||
_enum = Suppress('enum')
|
||||
|
||||
identifier = Word(alphas,alphanums+'_')
|
||||
integer = Word(nums)
|
||||
enumValue = Group(identifier('name') + Optional(_equal + integer('value')))
|
||||
enumList = Group(enumValue + ZeroOrMore(_comma + enumValue))
|
||||
enum = _enum + identifier('enum') + _lcurl + enumList('list') + _rcurl + Optional(_semi)
|
||||
|
||||
enumlist = ZeroOrMore(enum)
|
||||
|
||||
#print enumlist.parseString(fd.read(), parseAll=True)
|
||||
# find instances of enums ignoring other syntax
|
||||
for item in enumlist.parseString(fd.read(), parseAll=True):
|
||||
id = 0
|
||||
for entry in item:
|
||||
if entry.value != '':
|
||||
id = int(entry.value)
|
||||
print '%s_%s = %d' % (item.enum.upper(),entry.name.upper(),id)
|
||||
id += 1
|
||||
|
||||
fd.close()
|
||||
|
||||
def parse_fourfn():
|
||||
global bnf
|
||||
if not bnf:
|
||||
point = Literal( "." )
|
||||
e = CaselessLiteral( "E" )
|
||||
fnumber = Combine( Word( "+-"+nums, nums ) +
|
||||
Optional( point + Optional( Word( nums ) ) ) +
|
||||
Optional( e + Word( "+-"+nums, nums ) ) )
|
||||
ident = Word(alphas, alphas+nums+"_$")
|
||||
|
||||
plus = Literal( "+" )
|
||||
minus = Literal( "-" )
|
||||
mult = Literal( "*" )
|
||||
div = Literal( "/" )
|
||||
lpar = Literal( "(" ).suppress()
|
||||
rpar = Literal( ")" ).suppress()
|
||||
addop = plus | minus
|
||||
multop = mult | div
|
||||
expop = Literal( "^" )
|
||||
pi = CaselessLiteral( "PI" )
|
||||
|
||||
expr = Forward()
|
||||
atom = (Optional("-") + ( pi | e | fnumber | ident + lpar + expr + rpar ).setParseAction( pushFirst ) | ( lpar + expr.suppress() + rpar )).setParseAction(pushUMinus)
|
||||
|
||||
# by defining exponentiation as "atom [ ^ factor ]..." instead of "atom [ ^ atom ]...", we get right-to-left exponents, instead of left-to-righ
|
||||
# that is, 2^3^2 = 2^(3^2), not (2^3)^2.
|
||||
factor = Forward()
|
||||
factor << atom + ZeroOrMore( ( expop + factor ).setParseAction( pushFirst ) )
|
||||
|
||||
term = factor + ZeroOrMore( ( multop + factor ).setParseAction( pushFirst ) )
|
||||
expr << term + ZeroOrMore( ( addop + term ).setParseAction( pushFirst ) )
|
||||
bnf = expr
|
||||
return bnf
|
||||
class Parser(object):
|
||||
def __init__(self):
|
||||
self.parser = None
|
||||
self.init_parser()
|
||||
|
||||
def nest(self, tokens):
|
||||
return [list(tokens)]
|
||||
|
||||
def init_parser(self):
|
||||
""" phase 1:
|
||||
most important part is to build the meta-parser for "expr". expr represents any atomic action that returns a value, and the bulk of
|
||||
the code in any program will consist primarily of exprs and flow control. expr is heavily recursive, because most types of expr can
|
||||
take another expr as an input value.
|
||||
"""
|
||||
point = Literal( "." )
|
||||
plus = Literal( "+" )
|
||||
minus = Literal( "-" )
|
||||
mult = Literal( "*" )
|
||||
div = Literal( "/" )
|
||||
lpar = Literal( "(" ).suppress()
|
||||
rpar = Literal( ")" ).suppress()
|
||||
llbr = Literal( "[" ).suppress()
|
||||
rlbr = Literal( "]" ).suppress()
|
||||
addop = plus | minus
|
||||
multop = mult | div
|
||||
expop = Literal( "^" )
|
||||
quote = Literal( '"' )
|
||||
excl = Literal( "!" )
|
||||
call = Literal( ":" )
|
||||
endl = Literal( ";" )
|
||||
lisep = Literal( "," ).suppress()
|
||||
objn = Literal( "#" )
|
||||
ref = Literal( "$" )
|
||||
assign = Literal( "=" )
|
||||
flatten = Literal( "@" )
|
||||
neg = excl.copy()
|
||||
|
||||
|
||||
def moo_parse():
|
||||
fd = open('test.moo', 'r')
|
||||
data = fd.read()
|
||||
fd.close()
|
||||
expr = Forward()
|
||||
ident = Word(alphas+"_", alphas+nums+"_")
|
||||
ident.setParseAction(VMIdent.parse)
|
||||
variable = Word(alphas+"_", alphas+nums+"_")
|
||||
variable.setParseAction(VMVariable.parse)
|
||||
|
||||
|
||||
integer = Word( "+-"+nums, nums )
|
||||
fnumber = Combine( integer +
|
||||
Optional( point + Optional( Word( nums ) ) ) +
|
||||
Optional( CaselessLiteral('e') + Word( "+-"+nums, nums ) ) )
|
||||
objref = objn + Word( "+-"+nums, nums )
|
||||
objref.setParseAction(VMObjRef.parse)
|
||||
coreref = (ref + ident)
|
||||
coreref.setParseAction(VMCoreRef.parse)
|
||||
bexpr = (lpar + expr + rpar).setParseAction(self.nest)
|
||||
objrefexpr = bexpr | coreref | variable | objref
|
||||
identexpr = bexpr | ident
|
||||
propref = (objrefexpr + point + ident).setParseAction(VMPropRef.parse) | coreref
|
||||
fileref = (objrefexpr + excl + ident).setParseAction(VMFileRef.parse)
|
||||
|
||||
argspec = delimitedList(expr)
|
||||
argspec.setParseAction(StackToList.parse)
|
||||
funccall = objrefexpr + call + identexpr + lpar + argspec + rpar
|
||||
|
||||
fnumber.setParseAction(VMFloat.parse)
|
||||
integer.setParseAction(VMInteger.parse)
|
||||
funccall.setParseAction(CallFunc.parse)
|
||||
|
||||
stringlit = QuotedString(quoteChar='"', escChar='\\').setParseAction(VMString.parse)
|
||||
|
||||
atom = Forward()
|
||||
bifunction = (ident + lpar + argspec + rpar).setParseAction(CallBuiltin.parse)
|
||||
|
||||
flatexpr = Optional(flatten) + expr
|
||||
flatexpr.setParseAction(Flatten.parse)
|
||||
listlit = llbr + Optional(flatexpr) + ZeroOrMore(lisep + flatexpr) + rlbr
|
||||
literal = integer | fnumber | stringlit | listlit | objref
|
||||
|
||||
atom << (Optional(minus) + ZeroOrMore(neg) + (propref | literal | bifunction | bexpr | variable | funccall | fileref)).setParseAction(UnaryOp.parse)
|
||||
|
||||
|
||||
# by defining exponentiation as "atom [ ^ factor ]..." instead of "atom [ ^ atom ]...", we get right-to-left exponents, instead of left-to-righ
|
||||
# that is, 2^3^2 = 2^(3^2), not (2^3)^2.
|
||||
factor = Forward()
|
||||
factor << atom + ZeroOrMore( (expop + factor).setParseAction(ArithExp.parse) )
|
||||
|
||||
term = factor + ZeroOrMore( (multop + factor).setParseAction(ArithMul.parse) )
|
||||
#term.setParseAction(self.nest)
|
||||
mathexpr = term + ZeroOrMore( (addop + term).setParseAction(ArithAdd.parse) )
|
||||
#mathexpr.setParseAction(self.nest)
|
||||
|
||||
opeq = Literal('==')
|
||||
opneq = Literal('!=')
|
||||
opgteq = Literal('<=')
|
||||
oplteq = Literal('>=')
|
||||
oplt = Literal('<')
|
||||
opgt = Literal('>')
|
||||
opin = Keyword('in')
|
||||
|
||||
opcmp = opeq | opneq | opgteq | oplteq | oplt | opgt | opin
|
||||
eqexpr = mathexpr + Optional( (opcmp + mathexpr).setParseAction(BoolCompare.parse) )
|
||||
|
||||
opand = Literal('&&') | Keyword('and')
|
||||
opor = Literal('||') | Keyword('or')
|
||||
opxor = Literal('~~') | Keyword('xor')
|
||||
|
||||
opbool = opand | opor | opxor
|
||||
boolexpr = eqexpr + ZeroOrMore( (opbool + eqexpr).setParseAction(BoolLogic.parse) )
|
||||
|
||||
|
||||
assignable = variable | propref | fileref
|
||||
assignexpr = Optional(assignable + assign) + boolexpr
|
||||
expr << assignexpr.setParseAction(Assignment.parse)
|
||||
|
||||
|
||||
""" phase 2:
|
||||
now that expr is built, we can move on to handling flow control statements, and after that the structure of the program
|
||||
is mostly defined
|
||||
"""
|
||||
|
||||
ifstart = (Keyword("if") + bexpr)
|
||||
ifelseif = (Keyword("elseif") + bexpr)
|
||||
ifelse = Keyword("else")
|
||||
ifend = Keyword("endif")
|
||||
trystart = Keyword("try")
|
||||
tryexcept = (Keyword("except") + variable)
|
||||
tryelse = Keyword("else")
|
||||
tryfinally = Keyword("finally")
|
||||
tryend = Keyword("endtry")
|
||||
whilestart = (Keyword("while") + bexpr)
|
||||
whileend = Keyword("endwhile")
|
||||
forstart = (Keyword("for") + variable + Keyword("in") + bexpr)
|
||||
forend = Keyword("endfor")
|
||||
|
||||
kwdbreak = Keyword("break").setParseAction(LoopBreak)
|
||||
kwdcontinue = Keyword("continue").setParseAction(LoopContinue)
|
||||
kwdreturn = Keyword("return")
|
||||
|
||||
rtnexpr = (kwdreturn + expr).setParseAction(KeywordReturn.parse)
|
||||
line = expr | rtnexpr
|
||||
lline = expr | rtnexpr | kwdcontinue | kwdbreak
|
||||
exprblock = ZeroOrMore(line + endl)
|
||||
lexprblock = ZeroOrMore(lline + endl)
|
||||
|
||||
block = Forward()
|
||||
lblock = Forward()
|
||||
ifblock = ifstart + block + ZeroOrMore(ifelseif + block) + Optional(ifelse + block) + ifend
|
||||
tryblock = trystart + block + Optional(tryexcept + block + Optional(tryelse + block)) + Optional(tryfinally + block) + tryend
|
||||
iflblock = ifstart + lblock + ZeroOrMore(ifelseif + lblock) + Optional(ifelse + lblock) + ifend
|
||||
trylblock = trystart + lblock + Optional(tryexcept + lblock + Optional(tryelse + lblock)) + Optional(tryfinally + block) + tryend
|
||||
whileblock = whilestart + lblock + whileend
|
||||
forblock = forstart + lblock + forend
|
||||
|
||||
ifblock.setParseAction(IfBlock.parse)
|
||||
tryblock.setParseAction(TryBlock.parse)
|
||||
iflblock.setParseAction(IfBlock.parse)
|
||||
trylblock.setParseAction(TryBlock.parse)
|
||||
whileblock.setParseAction(WhileBlock.parse)
|
||||
forblock.setParseAction(ForeachBlock.parse)
|
||||
|
||||
# blocks are used for code blocks that are outside a loop. Inside a loop, all code blocks are lblocks
|
||||
# which allow loop-control keywords like break and continue (except try-finally, it wouldn't make sense)
|
||||
|
||||
block << (exprblock + Optional(ifblock | tryblock | whileblock | forblock) + exprblock)
|
||||
lblock << (lexprblock + Optional(iflblock | trylblock | whileblock | forblock) + lexprblock)
|
||||
|
||||
block.setParseAction(self.nest)
|
||||
lblock.setParseAction(self.nest)
|
||||
|
||||
endl.setParseAction(DiscardStack.parse)
|
||||
self.parser = block
|
||||
#print argspec.parseString("hello(hi.xyz)", parseAll=True)
|
||||
#print block.parseString(u"hi.xyz + #555.test;", parseAll=True)
|
||||
|
||||
def parse(self, data):
|
||||
rv = self.parser.parseString(data, parseAll=True)
|
||||
|
||||
return optimizer.optimize(rv)
|
||||
|
||||
|
||||
def test(self):
|
||||
#print self.parse(u"if (1) #740.xyz + -hello.world; endif")
|
||||
|
||||
data = unicode(open("test.moo", "r").read(), 'utf-8')
|
||||
print self.parse(data)
|
||||
|
||||
point = Literal( "." )
|
||||
|
||||
integer = Word( "+-"+nums, nums )
|
||||
fnumber = Combine( integer +
|
||||
Optional( point + Optional( Word( nums ) ) ) +
|
||||
Optional( CaselessLiteral('e') + Word( "+-"+nums, nums ) ) )
|
||||
ident = Word(alphas, alphas+nums+"_")
|
||||
|
||||
plus = Literal( "+" )
|
||||
minus = Literal( "-" )
|
||||
mult = Literal( "*" )
|
||||
div = Literal( "/" )
|
||||
lpar = Literal( "(" ).suppress()
|
||||
rpar = Literal( ")" ).suppress()
|
||||
addop = plus | minus
|
||||
multop = mult | div
|
||||
expop = Literal( "^" )
|
||||
|
||||
expr = Forward()
|
||||
atom = (Optional("-") + ( fnumber | ident + lpar + expr + rpar ).setParseAction( pushFirst ) | ( lpar + expr.suppress() + rpar )).setParseAction(pushUMinus)
|
||||
|
||||
# by defining exponentiation as "atom [ ^ factor ]..." instead of "atom [ ^ atom ]...", we get right-to-left exponents, instead of left-to-righ
|
||||
# that is, 2^3^2 = 2^(3^2), not (2^3)^2.
|
||||
factor = Forward()
|
||||
factor << atom + ZeroOrMore( ( expop + factor ).setParseAction( pushFirst ) )
|
||||
|
||||
term = factor + ZeroOrMore( ( multop + factor ).setParseAction( pushFirst ) )
|
||||
expr << term + ZeroOrMore( ( addop + term ).setParseAction( pushFirst ) )
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
p = Parser()
|
||||
p.test()
|
||||
|
|
Loading…
Add table
Reference in a new issue