Fixed assembly source parsing

This commit is contained in:
Maurizio Porrato 2019-11-01 07:57:49 +00:00
parent 57404c6fe8
commit aa8c19c50a
2 changed files with 123 additions and 59 deletions

173
dasm.py
View File

@ -1,10 +1,9 @@
#!/usr/bin/env python3
from rply import LexerGenerator, ParserGenerator
from rply import LexerGenerator, ParserGenerator, Token
_lexer = LexerGenerator()
_lexer.add("DIRECTIVE", r"\.org|\.entry|\.dw|\.ds|\.dsz|\.dsp")
_lexer.add("OPCODE2", r"SET|ADD|SUB|MUL|MLI|DIV|MOD|MDI|AND|BOR|XOR|SHR|ASR|SHL|IFB|IFC|IFE|IFN|IFG|IFA|IFL|IFU|ADX|SBX|STI|STD")
_lexer.add("OPCODE1", r"JSR|INT|IAG|IAS|RFI|IAQ|HWN|HWQ|HWI")
_lexer.add("OPCODE0", r"HLT")
@ -15,81 +14,133 @@ _lexer.add("COMMA", r",")
_lexer.add("COLON", r":")
_lexer.add("OPENSB", r"\[")
_lexer.add("CLOSESB", r"\]")
_lexer.add("BINOP", r"[+*/-]|>>|<<")
_lexer.add("OPENRB", r"\(")
_lexer.add("CLOSERB", r"\)")
_lexer.add("NUMBER", r"0x[0-9a-fA-F]+|[0-9]+")
_lexer.add("SYMBOL", r"[a-zA-Z._@][a-zA-Z._@0-9]*")
#_lexer.add("EOL", r"([;#][^\n]*)?(\r?\n)+")
#_lexer.ignore(r"[ \t]+")
_lexer.ignore(r"\s+|([;#][^\n]*)?\n")
_lexer.add("PLUS", r"\+")
_lexer.add("MINUS", r"-")
_lexer.add("STAR", r"\*")
_lexer.add("SLASH", r"/")
_lexer.add("EOL", r"(([;#][^\n]*)?(\r?\n))+")
_lexer.ignore(r"[ \t]+")
#_lexer.ignore(r"\s+|([;#][^\n]*)?\n")
lexer = _lexer.build()
_parser = ParserGenerator([x.name for x in lexer.rules])
_parser = ParserGenerator([x.name for x in lexer.rules],
precedence=[
("left", ["PLUS", "MINUS"]),
("left", ["STAR", "SLASH"])
]
)
@_parser.production("expression : NUMBER")
def expression_number(p):
n = p[0].getstr()
if n.startswith('0x'):
return int(n, 16)
else:
return int(n, 10)
@_parser.production("asmcode : asmline")
def asmcode1(p):
return [p[0]]
@_parser.production("expression : SYMBOL")
def expression_symbol(p):
return ('SYMBOL', p[0].getstr())
@_parser.production("asmcode : asmcode asmline")
def asmcode2(p):
return p[0]+[p[1]]
@_parser.production("asmline : SYMBOL asminst EOL")
def asmline_label(p):
return (p[0].getstr(), p[1])
@_parser.production("asmline : SYMBOL COLON asminst EOL")
def asmline_label_colon(p):
return (p[0].getstr(), p[2])
@_parser.production("asmline : asminst EOL")
def asmline_asminst(p):
return (None, p[0])
@_parser.production("asmline : SYMBOL COLON EOL")
@_parser.production("asmline : SYMBOL EOL")
def asmline_label_colon(p):
return (p[0].getstr(), None)
@_parser.production("asmline : EOL")
def asmline_empty(p):
return (None, None)
@_parser.production("asminst : OPCODE0")
def opcode0(p):
return (p[0], None, None)
@_parser.production("asminst : OPCODE1 am")
def opcode1(p):
return (p[0], p[1], None)
@_parser.production("asminst : OPCODE2 am COMMA am")
def opcode2(p):
return (p[0], p[1], p[3])
@_parser.production("am : OPENSB GREG PLUS expression CLOSESB")
@_parser.production("am : OPENSB GREG MINUS expression CLOSESB")
def am_ind_reg_disp(p):
return (p[1], p[2], p[3])
@_parser.production("am : OPENSB GREG CLOSESB")
@_parser.production("am : OPENSB expression CLOSESB")
def am_ind(p):
return p[1]
@_parser.production("am : expression")
def am_lit(p):
return p[0]
@_parser.production("am : GREG")
@_parser.production("am : SREG")
@_parser.production("am : STACK")
def am_reg_stack(p):
return p[0]
@_parser.production("expression : OPENRB expression CLOSERB")
def expression_parens(p):
return p[1]
@_parser.production("expression : expression BINOP expression")
def do_binop(op, a, b):
an = int(a)
bn = int(b)
if op == "PLUS":
return an+bn
elif op == "MINUS":
return an-bn
if op == "STAR":
return an*bn
elif op == "SLASH":
return an/bn
@_parser.production("expression : expression binop expression")
def expression_binop(p):
a, op, b = tuple(p)
print(op,a,b)
if a.gettokentype() == b.gettokentype() == "NUMBER":
result = do_binop(op.gettokentype(), a.getstr(), b.getstr())
return Token("NUMBER", str(result), a.getsourcepos())
return (op, a, b)
@_parser.production("value : GREG")
@_parser.production("value : SREG")
def value_reg(p):
return p[1]
@_parser.production("register : GREG")
@_parser.production("register : SREG")
@_parser.production("register : STACK")
def expression_register(p):
return p[0]
@_parser.production("value : OPENSB GREG CLOSESB")
def value_greg_indirect(p):
pass
@_parser.production("expression : register")
def expression_number(p):
return p[0]
@_parser.production("value : OPENSB GREG BINOP expression CLOSESB")
def value_greg_indirect_offset(p):
pass
@_parser.production("expression : NUMBER")
def expression_number(p):
return p[0]
@_parser.production("code : OPCODE0")
def opcode0(p):
pass
@_parser.production("code : OPCODE1 value")
def opcode1(p):
pass
@_parser.production("code : OPCODE2 value COMMA value")
def opcode2(p):
pass
@_parser.production("directive : DIRECTIVE expression")
def directive1(p):
pass
@_parser.production("source : LABEL")
def source_label(p):
pass
@_parser.production("source : code")
def source_code(p):
pass
@_parser.production("source : directive")
def source_directive(p):
pass
@_parser.production("binop : PLUS")
@_parser.production("binop : MINUS")
@_parser.production("binop : STAR")
@_parser.production("binop : SLASH")
def expression_parens(p):
return p[0]
parser = _parser.build()
@ -97,9 +148,13 @@ parser = _parser.build()
def assemble(filename):
with open(filename, "r") as f:
text = f.read()
print(len(text))
for token in lexer.lex(text):
print(token.source_pos, token)
tokens = list(lexer.lex(text))
#print(tokens)
#print(parser.parse(iter(tokens)))
for instr in parser.parse(iter(tokens)):
print(instr)
#for token in lexer.lex(text):
# print(token.source_pos, token)
if __name__ == '__main__':

9
samples/sample.asm Normal file
View File

@ -0,0 +1,9 @@
;
SET A, 0
l1: SET B, 1
l2 ADD A, B
XOR [A+1+2+3], [100+23]
XOR [B], 465
INT 1+2
;