From 25db8c81e0a9eb2c9f727bf2a450136aa6ae6376 Mon Sep 17 00:00:00 2001 From: cecilkorik Date: Tue, 23 Apr 2019 16:29:46 -0700 Subject: [PATCH] made pbkdf2 funuctional in python 3 started refactoring other code to work in python 3 --HG-- branch : mung --- .hgignore | 3 + builtins.py => bi.py | 4 +- builtins_code.py => bi_code.py | 0 bytecode.py | 14 +- language_types.py | 10 +- listener.py | 6 +- parse.py | 393 +++++++++++++++++---------------- pbkdf2.py | 22 +- 8 files changed, 233 insertions(+), 219 deletions(-) rename builtins.py => bi.py (98%) rename builtins_code.py => bi_code.py (100%) diff --git a/.hgignore b/.hgignore index 9e32b48..2ab5a5b 100644 --- a/.hgignore +++ b/.hgignore @@ -1,2 +1,5 @@ syntax: glob *.pyc +bin +lib +include diff --git a/builtins.py b/bi.py similarity index 98% rename from builtins.py rename to bi.py index 8285bba..cd5f5b5 100755 --- a/builtins.py +++ b/bi.py @@ -1,4 +1,4 @@ -from builtins_code import bi +from bi_code import bi __all__ = ['builtin_map'] @@ -113,4 +113,4 @@ listbi_map.update({ 'len': bi.list_len, 'sort': bi.list_sort, }) -""" \ No newline at end of file +""" diff --git a/builtins_code.py b/bi_code.py similarity index 100% rename from builtins_code.py rename to bi_code.py diff --git a/bytecode.py b/bytecode.py index 4954c90..9a68b72 100644 --- a/bytecode.py +++ b/bytecode.py @@ -1,5 +1,5 @@ from language_tools import * -from builtins import builtin_map +from bi import builtin_map import optimizer @@ -11,7 +11,7 @@ def coerce(value): return VMInteger(value) elif isinstance(value, (tuple, list)): return VMList(list(value)) - elif isinstance(value, unicode): + elif isinstance(value, str): return VMString(value) elif isinstance(value, dict): return VMTable(value) @@ -298,7 +298,7 @@ class UnaryOp(CodeOp): rv = [] ops = [] for t in tokens: - if isinstance(t, (str, unicode)) and t in UnaryOp.map: + if isinstance(t, (str, bytes)) and t in UnaryOp.map: ops.append(UnaryOp(UnaryOp.map[t])) else: rv.append(t) @@ -524,7 +524,7 @@ class WhileBlock(CodeOp): def parse(tokens): rv = WhileBlock() - for i in xrange(1, len(tokens)): + for i in range(1, len(tokens)): tok = tokens[i] if rv.cond == None: rv.cond = tok @@ -583,7 +583,7 @@ class TryBlock(CodeOp): rv = TryBlock() active_tok = None - for i in xrange(1, len(tokens)): + for i in range(1, len(tokens)): tok = tokens[i] if tok in ("try", "except", "else", "finally"): active_tok = tok @@ -677,7 +677,7 @@ class ForeachBlock(CodeOp): def parse(tokens): rv = ForeachBlock() - for i in xrange(1, len(tokens)): + for i in range(1, len(tokens)): tok = tokens[i] if rv.var == None: rv.var = tok @@ -717,7 +717,7 @@ class IfBlock(CodeOp): conds = [] tok_count = 0 active_tok = None - for i in xrange(len(tokens)): + for i in range(len(tokens)): tok = tokens[i] if tok == "endif": diff --git a/language_types.py b/language_types.py index 7eeb111..4f5234c 100755 --- a/language_types.py +++ b/language_types.py @@ -12,7 +12,7 @@ def disallow_keywords(tokens,keywords=None): if isinstance(t, VMIdent): if t.name in keywords: raise ParseException("Restricted keyword: %s" % (t.name,)) - elif isinstance(t, unicode): + elif isinstance(t, str): tstr = t.encode('ascii', 'ignore') if tstr in keywords: raise ParseException("Restricted keyword: %s" % (tstr,)) @@ -98,10 +98,10 @@ class VMTablePair(VMType): class VMString(VMType): def __init__(self, value): VMType.__init__(self) - if isinstance(value, unicode): + if isinstance(value, str): self.value = value else: - self.value = unicode(str(value), 'ascii', 'ignore') + self.value = str(value, 'ascii', 'ignore') def __repr__(self): return "\"%s\"" % (repr(self.value)[1:].strip("'").replace("\\'", "'").replace('"', '\\"'),) @@ -138,7 +138,7 @@ class VMIdent(VMRef): self.name = name def bytecode(self): - return [StackLiteral(unicode(self.name))] + return [StackLiteral(str(self.name))] @staticmethod @tokenparser @@ -156,7 +156,7 @@ class VMVariable(VMRef): self.name = name def ref(self): - return [StackLiteral(unicode(self.name))] + return [StackLiteral(str(self.name))] def bytecode(self): return codejoin(self.ref(), GetVariable()) diff --git a/listener.py b/listener.py index 6fa6082..9acbd3d 100644 --- a/listener.py +++ b/listener.py @@ -92,12 +92,12 @@ class Connection(object): data = self.conn.recv(bytes) if self.input_encoding == 'raw': - return unicode(self.escape(data), 'ascii') + return str(self.escape(data), 'ascii') else: - return unicode(data, self.input_encoding, 'ignore') + return str(data, self.input_encoding, 'ignore') def send(self, data): - assert isinstance(data, unicode) + assert isinstance(data, str) try: encoded = data.encode(self.output_encoding, 'replace') except UnicodeEncodeError: diff --git a/parse.py b/parse.py index a4a01ec..3c9eb1a 100755 --- a/parse.py +++ b/parse.py @@ -3,208 +3,215 @@ from language import * class Parser(object): - def __init__(self): - self.parser = None - self.init_parser() - - def nest(self, tokens): - return [list(tokens)] - - def init_parser(self): - """ phase 1: - most important part is to build the meta-parser for "expr". expr represents any atomic action that returns a value, and the bulk of - the code in any program will consist primarily of exprs and flow control. expr is heavily recursive, because most types of expr can - take another expr as an input value. - """ - point = Literal( "." ) - plus = Literal( "+" ) - minus = Literal( "-" ) - mult = Literal( "*" ) - div = Literal( "/" ) - lpar = Literal( "(" ).suppress() - rpar = Literal( ")" ).suppress() - llbr = Literal( "[" ).suppress() - rlbr = Literal( "]" ).suppress() - addop = plus | minus - multop = mult | div - expop = Literal( "^" ) - quote = Literal( '"' ) - excl = Literal( "!" ) - call = Literal( ":" ) - endl = Literal( ";" ) - lisep = Literal( "," ).suppress() - objn = Literal( "#" ) - ref = Literal( "$" ) - assign = Literal( "=" ) - flatten = Literal( "@" ) - neg = excl.copy() - + def __init__(self): + self.parser = None + self.init_parser() + + def nest(self, tokens): + return [list(tokens)] + + def init_parser(self): + """ phase 1: + most important part is to build the meta-parser for "expr". expr represents any atomic action that returns a value, and the bulk of + the code in any program will consist primarily of exprs and flow control. expr is heavily recursive, because most types of expr can + take another expr as an input value. + """ + point = Literal( "." ) + plus = Literal( "+" ) + minus = Literal( "-" ) + mult = Literal( "*" ) + div = Literal( "/" ) + lpar = Literal( "(" ).suppress() + rpar = Literal( ")" ).suppress() + llbr = Literal( "[" ).suppress() + rlbr = Literal( "]" ).suppress() + addop = plus | minus + multop = mult | div + expop = Literal( "^" ) + quote = Literal( '"' ) + excl = Literal( "!" ) + call = Literal( ":" ) + endl = Literal( ";" ) + lisep = Literal( "," ).suppress() + objn = Literal( "#" ) + ref = Literal( "$" ) + assign = Literal( "=" ) + flatten = Literal( "@" ) + neg = excl.copy() + - expr = Forward() - ident = Word(alphas+"_", alphas+nums+"_") - ident.setParseAction(VMIdent.parse) - variable = Word(alphas+"_", alphas+nums+"_") - variable.setParseAction(VMVariable.parse) - - - integer = Word( "+-"+nums, nums ) - fnumber = Combine( integer + - Optional( point + Optional( Word( nums ) ) ) + - Optional( CaselessLiteral('e') + Word( "+-"+nums, nums ) ) ) - objref = objn + Word( "+-"+nums, nums ) - objref.setParseAction(VMObjRef.parse) - coreref = (ref + ident) - coreref.setParseAction(VMCoreRef.parse) - bexpr = (lpar + expr + rpar).setParseAction(self.nest) - objrefexpr = bexpr | coreref | variable | objref - identexpr = bexpr | ident - propref = (objrefexpr + point + ident).setParseAction(VMPropRef.parse) | coreref - fileref = (objrefexpr + excl + ident).setParseAction(VMFileRef.parse) + expr = Forward() + ident = Word(alphas+"_", alphas+nums+"_") + ident.setParseAction(VMIdent.parse) + variable = Word(alphas+"_", alphas+nums+"_") + variable.setParseAction(VMVariable.parse) + + + integer = Word( "+-"+nums, nums ) + fnumber = Combine( integer + + Optional( point + Optional( Word( nums ) ) ) + + Optional( CaselessLiteral('e') + Word( "+-"+nums, nums ) ) ) + objref = objn + Word( "+-"+nums, nums ) + objref.setParseAction(VMObjRef.parse) + coreref = (ref + ident) + coreref.setParseAction(VMCoreRef.parse) + bexpr = (lpar + expr + rpar).setParseAction(self.nest) + objrefexpr = bexpr | coreref | variable | objref + identexpr = bexpr | ident + propref = (objrefexpr + point + ident).setParseAction(VMPropRef.parse) | coreref + fileref = (objrefexpr + excl + ident).setParseAction(VMFileRef.parse) - argspec = Optional(delimitedList(expr)) - argspec.setParseAction(StackToList.parse) - funccall = objrefexpr + call + identexpr + lpar + argspec + rpar - - fnumber.setParseAction(VMFloat.parse) - integer.setParseAction(VMInteger.parse) - funccall.setParseAction(CallFunc.parse) - - stringlit = QuotedString(quoteChar='"', escChar='\\').setParseAction(VMString.parse) - - atom = Forward() - bifunction = (ident + lpar + argspec + rpar).setParseAction(CallBuiltin.parse) - - flatexpr = Optional(flatten) + expr - flatexpr.setParseAction(Flatten.parse) - listlit = llbr + Optional(flatexpr) + ZeroOrMore(lisep + flatexpr) + rlbr - literal = integer | fnumber | stringlit | listlit | objref - - atom << (Optional(minus) + ZeroOrMore(neg) + (propref | literal | bifunction | bexpr | variable | funccall | fileref)).setParseAction(UnaryOp.parse) - - - # by defining exponentiation as "atom [ ^ factor ]..." instead of "atom [ ^ atom ]...", we get right-to-left exponents, instead of left-to-righ - # that is, 2^3^2 = 2^(3^2), not (2^3)^2. - factor = Forward() - factor << atom + ZeroOrMore( (expop + factor).setParseAction(ArithExp.parse) ) - - term = factor + ZeroOrMore( (multop + factor).setParseAction(ArithMul.parse) ) - #term.setParseAction(self.nest) - mathexpr = term + ZeroOrMore( (addop + term).setParseAction(ArithAdd.parse) ) - #mathexpr.setParseAction(self.nest) - - opeq = Literal('==') - opneq = Literal('!=') - opgteq = Literal('<=') - oplteq = Literal('>=') - oplt = Literal('<') - opgt = Literal('>') - opin = Keyword('in') - - opcmp = opeq | opneq | opgteq | oplteq | oplt | opgt | opin - eqexpr = mathexpr + Optional( (opcmp + mathexpr).setParseAction(BoolCompare.parse) ) - - opand = Literal('&&') | Keyword('and') - opor = Literal('||') | Keyword('or') - opxor = Literal('~~') | Keyword('xor') - - opbool = opand | opor | opxor - boolexpr = eqexpr + ZeroOrMore( (opbool + eqexpr).setParseAction(BoolLogic.parse) ) - - - assignable = variable | propref | fileref - assignexpr = Optional(assignable + assign) + boolexpr - expr << assignexpr.setParseAction(Assignment.parse) - - - """ phase 2: - now that expr is built, we can move on to handling flow control statements, and after that the structure of the program - is mostly defined - """ - - ifstart = (Keyword("if") + bexpr) - ifelseif = (Keyword("elseif") + bexpr) - ifelse = Keyword("else") - ifend = Keyword("endif") - trystart = Keyword("try") - tryexcept = (Keyword("except") + variable) - tryelse = Keyword("else") - tryfinally = Keyword("finally") - tryend = Keyword("endtry") - whilestart = (Keyword("while") + bexpr) - whileend = Keyword("endwhile") - forstart = (Keyword("for") + variable + Keyword("in") + bexpr) - forend = Keyword("endfor") + argspec = Optional(delimitedList(expr)) + argspec.setParseAction(StackToList.parse) + funccall = objrefexpr + call + identexpr + lpar + argspec + rpar + + fnumber.setParseAction(VMFloat.parse) + integer.setParseAction(VMInteger.parse) + funccall.setParseAction(CallFunc.parse) + + stringlit = QuotedString(quoteChar='"', escChar='\\').setParseAction(VMString.parse) + + atom = Forward() + bifunction = (ident + lpar + argspec + rpar).setParseAction(CallBuiltin.parse) + + flatexpr = Optional(flatten) + expr + flatexpr.setParseAction(Flatten.parse) + listlit = llbr + Optional(flatexpr) + ZeroOrMore(lisep + flatexpr) + rlbr + literal = integer | fnumber | stringlit | listlit | objref + + atom << (Optional(minus) + ZeroOrMore(neg) + (propref | literal | bifunction | bexpr | variable | funccall | fileref)).setParseAction(UnaryOp.parse) + atom = atom.streamline() + + + # by defining exponentiation as "atom [ ^ factor ]..." instead of "atom [ ^ atom ]...", we get right-to-left exponents, instead of left-to-righ + # that is, 2^3^2 = 2^(3^2), not (2^3)^2. + factor = Forward() + factor << atom + ZeroOrMore( (expop + factor).setParseAction(ArithExp.parse) ) + factor = factor.streamline() + + term = factor + ZeroOrMore( (multop + factor).setParseAction(ArithMul.parse) ) + #term.setParseAction(self.nest) + mathexpr = term + ZeroOrMore( (addop + term).setParseAction(ArithAdd.parse) ) + #mathexpr.setParseAction(self.nest) + + opeq = Literal('==') + opneq = Literal('!=') + opgteq = Literal('<=') + oplteq = Literal('>=') + oplt = Literal('<') + opgt = Literal('>') + opin = Keyword('in') + + opcmp = opeq | opneq | opgteq | oplteq | oplt | opgt | opin + eqexpr = mathexpr + Optional( (opcmp + mathexpr).setParseAction(BoolCompare.parse) ) + + opand = Literal('&&') | Keyword('and') + opor = Literal('||') | Keyword('or') + opxor = Literal('~~') | Keyword('xor') + + opbool = opand | opor | opxor + boolexpr = eqexpr + ZeroOrMore( (opbool + eqexpr).setParseAction(BoolLogic.parse) ) + + + assignable = variable | propref | fileref + assignexpr = Optional(assignable + assign) + boolexpr + expr << assignexpr.setParseAction(Assignment.parse) + expr = expr.streamline() - kwdbreak = Keyword("break").setParseAction(LoopBreak) - kwdcontinue = Keyword("continue").setParseAction(LoopContinue) - kwdreturn = Keyword("return") + + + """ phase 2: + now that expr is built, we can move on to handling flow control statements, and after that the structure of the program + is mostly defined + """ + + ifstart = (Keyword("if") + bexpr) + ifelseif = (Keyword("elseif") + bexpr) + ifelse = Keyword("else") + ifend = Keyword("endif") + trystart = Keyword("try") + tryexcept = (Keyword("except") + variable) + tryelse = Keyword("else") + tryfinally = Keyword("finally") + tryend = Keyword("endtry") + whilestart = (Keyword("while") + bexpr) + whileend = Keyword("endwhile") + forstart = (Keyword("for") + variable + Keyword("in") + bexpr) + forend = Keyword("endfor") - rtnexpr = (kwdreturn + expr).setParseAction(KeywordReturn.parse) - line = expr | rtnexpr - lline = expr | rtnexpr | kwdcontinue | kwdbreak - exprblock = ZeroOrMore(line + endl) - lexprblock = ZeroOrMore(lline + endl) + kwdbreak = Keyword("break").setParseAction(LoopBreak) + kwdcontinue = Keyword("continue").setParseAction(LoopContinue) + kwdreturn = Keyword("return") - block = Forward() - lblock = Forward() - ifblock = ifstart + block + ZeroOrMore(ifelseif + block) + Optional(ifelse + block) + ifend - tryblock = trystart + block + Optional(tryexcept + block + Optional(tryelse + block)) + Optional(tryfinally + block) + tryend - iflblock = ifstart + lblock + ZeroOrMore(ifelseif + lblock) + Optional(ifelse + lblock) + ifend - trylblock = trystart + lblock + Optional(tryexcept + lblock + Optional(tryelse + lblock)) + Optional(tryfinally + block) + tryend - whileblock = whilestart + lblock + whileend - forblock = forstart + lblock + forend - - ifblock.setParseAction(IfBlock.parse) - tryblock.setParseAction(TryBlock.parse) - iflblock.setParseAction(IfBlock.parse) - trylblock.setParseAction(TryBlock.parse) - whileblock.setParseAction(WhileBlock.parse) - forblock.setParseAction(ForeachBlock.parse) - - # blocks are used for code blocks that are outside a loop. Inside a loop, all code blocks are lblocks - # which allow loop-control keywords like break and continue (except try-finally, it wouldn't make sense) - - block << (exprblock + Optional(ifblock | tryblock | whileblock | forblock) + exprblock) - lblock << (lexprblock + Optional(iflblock | trylblock | whileblock | forblock) + lexprblock) - - block.setParseAction(self.nest) - lblock.setParseAction(self.nest) + rtnexpr = (kwdreturn + expr).setParseAction(KeywordReturn.parse) + line = expr | rtnexpr + lline = expr | rtnexpr | kwdcontinue | kwdbreak + exprblock = ZeroOrMore(line + endl) + lexprblock = ZeroOrMore(lline + endl) - endl.setParseAction(DiscardStack.parse) - self.parser = block - #print(argspec.parseString("hello(hi.xyz)", parseAll=True)) - #print(block.parseString(u"hi.xyz + #555.test;", parseAll=True)) - #print(block.parseString("""serverlog();""")) + block = Forward() + lblock = Forward() + ifblock = ifstart + block + ZeroOrMore(ifelseif + block) + Optional(ifelse + block) + ifend + tryblock = trystart + block + Optional(tryexcept + block + Optional(tryelse + block)) + Optional(tryfinally + block) + tryend + iflblock = ifstart + lblock + ZeroOrMore(ifelseif + lblock) + Optional(ifelse + lblock) + ifend + trylblock = trystart + lblock + Optional(tryexcept + lblock + Optional(tryelse + lblock)) + Optional(tryfinally + block) + tryend + whileblock = whilestart + lblock + whileend + forblock = forstart + lblock + forend + + ifblock.setParseAction(IfBlock.parse) + tryblock.setParseAction(TryBlock.parse) + iflblock.setParseAction(IfBlock.parse) + trylblock.setParseAction(TryBlock.parse) + whileblock.setParseAction(WhileBlock.parse) + forblock.setParseAction(ForeachBlock.parse) + + # blocks are used for code blocks that are outside a loop. Inside a loop, all code blocks are lblocks + # which allow loop-control keywords like break and continue (except try-finally, it wouldn't make sense) + + block << (exprblock + Optional(ifblock | tryblock | whileblock | forblock) + exprblock) + lblock << (lexprblock + Optional(iflblock | trylblock | whileblock | forblock) + lexprblock) - def parse(self, data): - rv = self.parser.parseString(data, parseAll=True) - - return optimizer.optimize(rv) + block = block.streamline() + lblock = lblock.streamline() + + block.setParseAction(self.nest) + lblock.setParseAction(self.nest) - def parse_command(self, line): - ls = line.split(' ') - cmd = ls[0] - argstr = ' '.join(ls[1:]) - vars = { - 'cmdstr': line, - 'cmd': cmd, - 'argstr': argstr, - 'args': [x.strip() for x in ls[1:] if x.strip() != ''] - } - - return [cmd, vars] + endl.setParseAction(DiscardStack.parse) + self.parser = block + #print(argspec.parseString("hello(hi.xyz)", parseAll=True)) + #print(block.parseString(u"hi.xyz + #555.test;", parseAll=True)) + #print(block.parseString("""serverlog();""")) - def test(self): - #print(self.parse(u"if (1) #740.xyz + -hello.world; endif")) - - data = unicode(open("test.moo", "r").read(), 'utf-8') - rv = self.parse(data) - print(rv) - return rv - - + def parse(self, data): + rv = self.parser.parseString(data, parseAll=True) + + return optimizer.optimize(rv) + + def parse_command(self, line): + ls = line.split(' ') + cmd = ls[0] + argstr = ' '.join(ls[1:]) + vars = { + 'cmdstr': line, + 'cmd': cmd, + 'argstr': argstr, + 'args': [x.strip() for x in ls[1:] if x.strip() != ''] + } + + return [cmd, vars] + + def test(self): + #print(self.parse(u"if (1) #740.xyz + -hello.world; endif")) + + data = open("test.moo", "r", encoding="utf-8").read() + rv = self.parse(data) + print(rv) + return rv + + static_parser = Parser() if __name__ == "__main__": - p = Parser() - p.test() + p = Parser() + p.test() diff --git a/pbkdf2.py b/pbkdf2.py index 624c7e4..4c75d8b 100644 --- a/pbkdf2.py +++ b/pbkdf2.py @@ -44,7 +44,8 @@ import hmac import hashlib from struct import Struct from operator import xor -from itertools import izip, starmap +from itertools import starmap +import binascii _pack_int = Struct('>I').pack @@ -52,7 +53,7 @@ _pack_int = Struct('>I').pack def pbkdf2_hex(data, salt, iterations=1000, keylen=24, hashfunc=None): """Like :func:`pbkdf2_bin` but returns a hex encoded string.""" - return pbkdf2_bin(data, salt, iterations, keylen, hashfunc).encode('hex') + return str(binascii.hexlify(pbkdf2_bin(data, salt, iterations, keylen, hashfunc)), 'ascii') def pbkdf2_bin(data, salt, iterations=1000, keylen=24, hashfunc=None): @@ -61,26 +62,29 @@ def pbkdf2_bin(data, salt, iterations=1000, keylen=24, hashfunc=None): key of `keylen` bytes. By default SHA-256 is used as hash function, a different hashlib `hashfunc` can be provided. """ + + bchr = lambda v: bytes((v,)) + hashfunc = hashfunc or hashlib.sha256 mac = hmac.new(data, None, hashfunc) def _pseudorandom(x, mac=mac): h = mac.copy() h.update(x) - return map(ord, h.digest()) + return h.digest() buf = [] - for block in xrange(1, -(-keylen // mac.digest_size) + 1): + for block in range(1, -(-keylen // mac.digest_size) + 1): rv = u = _pseudorandom(salt + _pack_int(block)) - for i in xrange(iterations - 1): - u = _pseudorandom(''.join(map(chr, u))) - rv = starmap(xor, izip(rv, u)) + for i in range(iterations - 1): + u = _pseudorandom(b''.join(map(bchr, u))) + rv = starmap(xor, zip(rv, u)) buf.extend(rv) - return ''.join(map(chr, buf))[:keylen] + return b''.join(map(bchr, buf))[:keylen] def test(): failed = [] def check(data, salt, iterations, keylen, expected): - rv = pbkdf2_hex(data, salt, iterations, keylen) + rv = pbkdf2_hex(bytes(data, "utf-8"), bytes(salt, "utf-8"), iterations, keylen, hashlib.sha1) if rv != expected: print('Test failed:') print(' Expected: %s' % expected)