mung language changes

--HG-- branch : mung
2010-11-27 22:46:34 +00:00 · 2010-11-27 22:46:34 +00:00 · fe0ea2a7f9
commit fe0ea2a7f9
parent b11356b0c3
4 changed files with 1125 additions and 264 deletions
--- a/langdoc.txt
+++ b/langdoc.txt
@ -9,16 +9,35 @@ endwhile
 bytecode:
 startblock 1
-stack_literal_int 1
+stack_literal_int 1 1
-exit_true 1
+exit_true 1 0
-stack_literal_str "message"
+stack_literal_str "message" 1
-set_var arg
+set_var arg 1
-discard_stack 1
+discard_stack 1 0
-get_var arg
+get_var arg 1
-stack_literal_obj #7407
+stack_literal_obj #7407 2
-call_builtin send
+call_builtin send 1
-discard_stack 1
+discard_stack 1 0
-stack_literal_float 1.0
+stack_literal_float 1.0 1
-call_builtin suspend
+call_builtin suspend 1
-discard_stack 1
+discard_stack 1 0
-endblock 1
+endblock 1
 other stuff:
 var = 1 == 0 + 1
 list = [1, 2, @[3, 4], 5]
 literal 1
 literal 2
 	literal 3
 	literal 4
 	literal 2
 	makelist
 	flatten
 literal 5
 literal 4 <-- the length
 makelist 4
--- a/language.py
+++ b/language.py
--- a/language_types.py
+++ b/language_types.py
@ -1,54 +0,0 @@
 class VMType(object):
 	pass
 class VMInteger(VMType):
 	def __init__(self, value):
 		self.value = int(value)
 class VMFloat(VMType):
 	def __init__(self, value):
 		self.value = float(value)
 class VMTable(VMType):
 	def __init__(self, value):
 		self.value = dict(value)
 class VMString(VMType):
 	def __init__(self, value):
 		if isinstance(value, unicode):
 			self.value = value
 		else:
 			self.value = unicode(str(value), 'ascii', 'ignore')
 class VMObjRef(VMType):
 	def __init__(self, value):
 		if isinstance(value, ObjRef):
 			self.value = value
 		elif isinstance(value, (float, int)):
 			self.value = ObjRef(int(value))
 		else:
 			raise TypeError, "Attempted to create VMObjRef with invalid object reference: %r" % (value,)
 def coerce(value):
 	if isinstance(value, int):
 		return VMInteger(value)
 	elif isinstance(value, (tuple, list)):
 		return VMList(list(value))
 	elif isinstance(value, unicode):
 		return VMString(value)
 	elif isinstance(value, dict):
 		return VMTable(value)
 	elif isinstance(value, ObjRef):
 		return VMObjRef(value)
 	elif isinstance(value, float):
 		return VMFloat(value)
 	elif value == None:
 		return VMInteger(0)
 	else:
 		raise TypeError("Unknown type %s cannot be coerced to VMType" % (type(value),))
 def uncoerce(value):
 	assert isinstance(value, VMType)
 	return value.value
--- a/parse.py
+++ b/parse.py
@ -1,103 +1,192 @@
 from pyparsing import *
 from language import *
 def enum_parse():
 	fd = open('test.enum', 'r')
 	# syntax we don't want to see in the final parse tree
 	_lcurl = Suppress('{')
 	_rcurl = Suppress('}')
 	_equal = Suppress('=')
 	_comma = Suppress(',')
 	_semi = Suppress(';')
 	_enum = Suppress('enum')
 	identifier = Word(alphas,alphanums+'_')
 	integer = Word(nums)
 	enumValue = Group(identifier('name') + Optional(_equal + integer('value')))
 	enumList = Group(enumValue + ZeroOrMore(_comma + enumValue))
 	enum = _enum + identifier('enum') + _lcurl + enumList('list') + _rcurl + Optional(_semi)
 	enumlist = ZeroOrMore(enum)
 	#print enumlist.parseString(fd.read(), parseAll=True)
 	# find instances of enums ignoring other syntax
 	for item in enumlist.parseString(fd.read(), parseAll=True):
 	    id = 0
 	    for entry in item:
 	        if entry.value != '':
 	            id = int(entry.value)
 	        print '%s_%s = %d' % (item.enum.upper(),entry.name.upper(),id)
 	        id += 1
 	fd.close()
-def parse_fourfn():	
+class Parser(object):
-    global bnf
+	def __init__(self):
-    if not bnf:
+		self.parser = None
-        point = Literal( "." )
+		self.init_parser()
        e     = CaselessLiteral( "E" )
        fnumber = Combine( Word( "+-"+nums, nums ) + 
                           Optional( point + Optional( Word( nums ) ) ) +
                           Optional( e + Word( "+-"+nums, nums ) ) )
        ident = Word(alphas, alphas+nums+"_$")
        plus  = Literal( "+" )
        minus = Literal( "-" )
        mult  = Literal( "*" )
        div   = Literal( "/" )
        lpar  = Literal( "(" ).suppress()
        rpar  = Literal( ")" ).suppress()
        addop  = plus | minus
        multop = mult | div
        expop = Literal( "^" )
        pi    = CaselessLiteral( "PI" )
        expr = Forward()
        atom = (Optional("-") + ( pi | e | fnumber | ident + lpar + expr + rpar ).setParseAction( pushFirst ) | ( lpar + expr.suppress() + rpar )).setParseAction(pushUMinus) 
        # by defining exponentiation as "atom [ ^ factor ]..." instead of "atom [ ^ atom ]...", we get right-to-left exponents, instead of left-to-righ
        # that is, 2^3^2 = 2^(3^2), not (2^3)^2.
        factor = Forward()
        factor << atom + ZeroOrMore( ( expop + factor ).setParseAction( pushFirst ) )
        term = factor + ZeroOrMore( ( multop + factor ).setParseAction( pushFirst ) )
        expr << term + ZeroOrMore( ( addop + term ).setParseAction( pushFirst ) )
        bnf = expr
    return bnf
 	def nest(self, tokens):
 		return [list(tokens)]
 	def init_parser(self):
 		""" phase 1:
 		most important part is to build the meta-parser for "expr". expr represents any atomic action that returns a value, and the bulk of
 		the code in any program will consist primarily of exprs and flow control. expr is heavily recursive, because most types of expr can
 		take another expr as an input value.
 		"""
 		point = Literal( "." )
 		plus  = Literal( "+" )
 		minus = Literal( "-" )
 		mult  = Literal( "*" )
 		div   = Literal( "/" )
 		lpar  = Literal( "(" ).suppress()
 		rpar  = Literal( ")" ).suppress()
 		llbr  = Literal( "[" ).suppress()
 		rlbr  = Literal( "]" ).suppress()
 		addop  = plus | minus
 		multop = mult | div
 		expop = Literal( "^" )
 		quote = Literal( '"' )
 		excl   = Literal( "!" )
 		call  = Literal( ":" )
 		endl  = Literal( ";" )
 		lisep = Literal( "," ).suppress()
 		objn  = Literal( "#" )
 		ref   = Literal( "$" )
 		assign = Literal( "=" )
 		flatten = Literal( "@" )
 		neg = excl.copy()
-def moo_parse():
+		expr = Forward()
-	fd = open('test.moo', 'r')
+		ident = Word(alphas+"_", alphas+nums+"_")
-	data = fd.read()	
+		ident.setParseAction(VMIdent.parse)
-	fd.close()
+		variable = Word(alphas+"_", alphas+nums+"_")
 		variable.setParseAction(VMVariable.parse)
 		integer = Word( "+-"+nums, nums )
 		fnumber = Combine( integer + 
 						   Optional( point + Optional( Word( nums ) ) ) +
 						   Optional( CaselessLiteral('e') + Word( "+-"+nums, nums ) ) )
 		objref = objn + Word( "+-"+nums, nums )
 		objref.setParseAction(VMObjRef.parse)
 		coreref = (ref + ident)
 		coreref.setParseAction(VMCoreRef.parse)
 		bexpr = (lpar + expr + rpar).setParseAction(self.nest)
 		objrefexpr = bexpr | coreref | variable | objref
 		identexpr = bexpr | ident
 		propref = (objrefexpr + point + ident).setParseAction(VMPropRef.parse) | coreref
 		fileref = (objrefexpr + excl + ident).setParseAction(VMFileRef.parse)
 		argspec = delimitedList(expr)
 		argspec.setParseAction(StackToList.parse)
 		funccall = objrefexpr + call + identexpr + lpar + argspec + rpar
 		fnumber.setParseAction(VMFloat.parse)
 		integer.setParseAction(VMInteger.parse)
 		funccall.setParseAction(CallFunc.parse)
 		stringlit = QuotedString(quoteChar='"', escChar='\\').setParseAction(VMString.parse)
 		atom = Forward()
 		bifunction = (ident + lpar + argspec + rpar).setParseAction(CallBuiltin.parse)
 		flatexpr = Optional(flatten) + expr
 		flatexpr.setParseAction(Flatten.parse)
 		listlit = llbr + Optional(flatexpr) + ZeroOrMore(lisep + flatexpr) + rlbr
 		literal = integer | fnumber | stringlit | listlit | objref
 		atom << (Optional(minus) + ZeroOrMore(neg) + (propref | literal | bifunction | bexpr | variable | funccall | fileref)).setParseAction(UnaryOp.parse)
 		# by defining exponentiation as "atom [ ^ factor ]..." instead of "atom [ ^ atom ]...", we get right-to-left exponents, instead of left-to-righ
 		# that is, 2^3^2 = 2^(3^2), not (2^3)^2.
 		factor = Forward()
 		factor << atom + ZeroOrMore( (expop + factor).setParseAction(ArithExp.parse) )
 		term = factor + ZeroOrMore( (multop + factor).setParseAction(ArithMul.parse) )
 		#term.setParseAction(self.nest)
 		mathexpr = term + ZeroOrMore( (addop + term).setParseAction(ArithAdd.parse) )
 		#mathexpr.setParseAction(self.nest)
 		opeq = Literal('==')
 		opneq = Literal('!=')
 		opgteq = Literal('<=')
 		oplteq = Literal('>=')
 		oplt = Literal('<')
 		opgt = Literal('>')
 		opin = Keyword('in')
 		opcmp = opeq | opneq | opgteq | oplteq | oplt | opgt | opin
 		eqexpr = mathexpr + Optional( (opcmp + mathexpr).setParseAction(BoolCompare.parse) )
 		opand = Literal('&&') | Keyword('and')
 		opor = Literal('||') | Keyword('or')
 		opxor = Literal('~~') | Keyword('xor')
 		opbool = opand | opor | opxor
 		boolexpr = eqexpr + ZeroOrMore( (opbool + eqexpr).setParseAction(BoolLogic.parse) )
 		assignable = variable | propref | fileref
 		assignexpr = Optional(assignable + assign) + boolexpr
 		expr << assignexpr.setParseAction(Assignment.parse)
 		""" phase 2:
 		now that expr is built, we can move on to handling flow control statements, and after that the structure of the program
 		is mostly defined
 		"""
 		ifstart = (Keyword("if") + bexpr)
 		ifelseif = (Keyword("elseif") + bexpr)
 		ifelse = Keyword("else")
 		ifend = Keyword("endif")
 		trystart = Keyword("try")
 		tryexcept = (Keyword("except") + variable)
 		tryelse = Keyword("else")
 		tryfinally = Keyword("finally")
 		tryend = Keyword("endtry")
 		whilestart = (Keyword("while") + bexpr)
 		whileend = Keyword("endwhile")
 		forstart = (Keyword("for") + variable + Keyword("in") + bexpr)
 		forend = Keyword("endfor")
 		kwdbreak = Keyword("break").setParseAction(LoopBreak)
 		kwdcontinue = Keyword("continue").setParseAction(LoopContinue)
 		kwdreturn = Keyword("return")
 		rtnexpr = (kwdreturn + expr).setParseAction(KeywordReturn.parse)
 		line = expr | rtnexpr
 		lline = expr | rtnexpr | kwdcontinue | kwdbreak
 		exprblock = ZeroOrMore(line + endl)
 		lexprblock = ZeroOrMore(lline + endl)
 		block = Forward()
 		lblock = Forward()
 		ifblock = ifstart + block + ZeroOrMore(ifelseif + block) + Optional(ifelse + block) + ifend
 		tryblock = trystart + block + Optional(tryexcept + block + Optional(tryelse + block)) + Optional(tryfinally + block) + tryend
 		iflblock = ifstart + lblock + ZeroOrMore(ifelseif + lblock) + Optional(ifelse + lblock) + ifend
 		trylblock = trystart + lblock + Optional(tryexcept + lblock + Optional(tryelse + lblock)) + Optional(tryfinally + block) + tryend
 		whileblock = whilestart + lblock + whileend
 		forblock = forstart + lblock + forend
 		ifblock.setParseAction(IfBlock.parse)
 		tryblock.setParseAction(TryBlock.parse)
 		iflblock.setParseAction(IfBlock.parse)
 		trylblock.setParseAction(TryBlock.parse)
 		whileblock.setParseAction(WhileBlock.parse)
 		forblock.setParseAction(ForeachBlock.parse)
 		# blocks are used for code blocks that are outside a loop. Inside a loop, all code blocks are lblocks
 		# which allow loop-control keywords like break and continue (except try-finally, it wouldn't make sense)
 		block << (exprblock + Optional(ifblock | tryblock | whileblock | forblock) + exprblock)
 		lblock << (lexprblock + Optional(iflblock | trylblock | whileblock | forblock) + lexprblock)
 		block.setParseAction(self.nest)
 		lblock.setParseAction(self.nest)
 		endl.setParseAction(DiscardStack.parse)
 		self.parser = block
 		#print argspec.parseString("hello(hi.xyz)", parseAll=True)
 		#print block.parseString(u"hi.xyz + #555.test;", parseAll=True)
 	def parse(self, data):
 		rv = self.parser.parseString(data, parseAll=True)
 		return optimizer.optimize(rv)
 	def test(self):
 		#print self.parse(u"if (1) #740.xyz + -hello.world; endif")
 		data = unicode(open("test.moo", "r").read(), 'utf-8')
 		print self.parse(data)
-    point = Literal( "." )
+if __name__ == "__main__":
-
+	p = Parser()
-	integer = Word( "+-"+nums, nums )
+	p.test()
    fnumber = Combine( integer + 
                       Optional( point + Optional( Word( nums ) ) ) +
                       Optional( CaselessLiteral('e') + Word( "+-"+nums, nums ) ) )
    ident = Word(alphas, alphas+nums+"_")
    plus  = Literal( "+" )
    minus = Literal( "-" )
    mult  = Literal( "*" )
    div   = Literal( "/" )
    lpar  = Literal( "(" ).suppress()
    rpar  = Literal( ")" ).suppress()
    addop  = plus | minus
    multop = mult | div
    expop = Literal( "^" )
    expr = Forward()
    atom = (Optional("-") + ( fnumber | ident + lpar + expr + rpar ).setParseAction( pushFirst ) | ( lpar + expr.suppress() + rpar )).setParseAction(pushUMinus) 
    # by defining exponentiation as "atom [ ^ factor ]..." instead of "atom [ ^ atom ]...", we get right-to-left exponents, instead of left-to-righ
    # that is, 2^3^2 = 2^(3^2), not (2^3)^2.
    factor = Forward()
    factor << atom + ZeroOrMore( ( expop + factor ).setParseAction( pushFirst ) )
    term = factor + ZeroOrMore( ( multop + factor ).setParseAction( pushFirst ) )
    expr << term + ZeroOrMore( ( addop + term ).setParseAction( pushFirst ) )