Assembler: handle negative numbers, keep track of source code position, add support for case insensitive symbols, add alias for O register
This commit is contained in:
parent
69966cdcda
commit
04d09c68dc
106
asm.py
106
asm.py
|
@ -5,6 +5,8 @@ from rply import ParserGenerator, LexerGenerator
|
||||||
import re
|
import re
|
||||||
import struct
|
import struct
|
||||||
|
|
||||||
|
CASE_INSENSITIVE = True
|
||||||
|
|
||||||
OPS = {
|
OPS = {
|
||||||
'ADD': lambda a, b: a + b,
|
'ADD': lambda a, b: a + b,
|
||||||
'SUBTRACT': lambda a, b: a - b,
|
'SUBTRACT': lambda a, b: a - b,
|
||||||
|
@ -30,10 +32,36 @@ OPCODES = {
|
||||||
|
|
||||||
REGISTERS = {
|
REGISTERS = {
|
||||||
'A': 0x00, 'B': 0x01, 'C': 0x02, 'X': 0x03, 'Y': 0x04, 'Z': 0x05,
|
'A': 0x00, 'B': 0x01, 'C': 0x02, 'X': 0x03, 'Y': 0x04, 'Z': 0x05,
|
||||||
'I': 0x06, 'J': 0x07, 'SP': 0x1b, 'PC': 0x1c, 'EX': 0x1d
|
'I': 0x06, 'J': 0x07, 'SP': 0x1b, 'PC': 0x1c, 'EX': 0x1d,
|
||||||
|
'O': 0x1d # For compatibility with specs v1.1
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class SymbolTable:
|
||||||
|
|
||||||
|
def __init__(self, ignorecase=False):
|
||||||
|
self.ignorecase = ignorecase
|
||||||
|
self.symbols = {}
|
||||||
|
self.orig = {}
|
||||||
|
|
||||||
|
def __getitem__(self, key):
|
||||||
|
if self.ignorecase:
|
||||||
|
key = self.orig[key.upper()]
|
||||||
|
return self.symbols[key]
|
||||||
|
|
||||||
|
def __setitem__(self, key, value):
|
||||||
|
ukey = key.upper()
|
||||||
|
if ukey in self.orig:
|
||||||
|
key = self.orig[ukey]
|
||||||
|
else:
|
||||||
|
self.orig[ukey] = key
|
||||||
|
self.symbols[key] = value
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return '\n'.join([
|
||||||
|
f"{k}: {v}" for k, v in sorted(self.symbols.items())])
|
||||||
|
|
||||||
|
|
||||||
class ASM(SimpleNamespace):
|
class ASM(SimpleNamespace):
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
|
@ -102,7 +130,7 @@ class Expr(SimpleNamespace):
|
||||||
return self.value
|
return self.value
|
||||||
else:
|
else:
|
||||||
return OPS[self.op](self.l.eval(ctx), self.r.eval(ctx))
|
return OPS[self.op](self.l.eval(ctx), self.r.eval(ctx))
|
||||||
def simplify(self, ctx={}):
|
def simplify(self, ctx=SymbolTable(CASE_INSENSITIVE)):
|
||||||
if self.op == 'NUMBER':
|
if self.op == 'NUMBER':
|
||||||
return self
|
return self
|
||||||
elif self.op == 'SYMBOL':
|
elif self.op == 'SYMBOL':
|
||||||
|
@ -129,16 +157,18 @@ class Indirect(SimpleNamespace):
|
||||||
|
|
||||||
lg = LexerGenerator()
|
lg = LexerGenerator()
|
||||||
|
|
||||||
|
STARTOP = r'(?<![a-zA-Z0-9_\.\$])('
|
||||||
|
ENDOP = r')(?![a-zA-Z0-9_\.\$])'
|
||||||
tokens = [
|
tokens = [
|
||||||
('OP2', '|'.join([x for x, (_, e) in OPCODES.items() if e is None])),
|
('OP2', STARTOP+'|'.join([x for x, (_, e) in OPCODES.items() if e is None])+ENDOP),
|
||||||
('OP1', '|'.join([x for x, (_, e) in OPCODES.items() if e is not None])),
|
('OP1', STARTOP+'|'.join([x for x, (_, e) in OPCODES.items() if e is not None])+ENDOP),
|
||||||
('DIR1', r'\.org'),
|
('DIR1', STARTOP+r'\.org'+ENDOP),
|
||||||
('DIRN', r'\.(data|word)|DAT'),
|
('DIRN', STARTOP+r'\.(data|word)|DAT'+ENDOP),
|
||||||
('REG', '|'.join(REGISTERS.keys())),
|
('REG', STARTOP+'|'.join(REGISTERS.keys())+ENDOP),
|
||||||
('PUSH', r'PUSH'),
|
('PUSH', STARTOP+r'PUSH'+ENDOP),
|
||||||
('POP', r'POP'),
|
('POP', STARTOP+r'POP'+ENDOP),
|
||||||
('PEEK', r'PEEK'),
|
('PEEK', STARTOP+r'PEEK'+ENDOP),
|
||||||
('PICK', r'PICK'),
|
('PICK', STARTOP+r'PICK'+ENDOP),
|
||||||
('ADD', r'\+'),
|
('ADD', r'\+'),
|
||||||
('COMMA', r','),
|
('COMMA', r','),
|
||||||
('SUBTRACT', r'-'),
|
('SUBTRACT', r'-'),
|
||||||
|
@ -149,8 +179,8 @@ tokens = [
|
||||||
('EOL', r'[\n\r]+'),
|
('EOL', r'[\n\r]+'),
|
||||||
('COLON', r':'),
|
('COLON', r':'),
|
||||||
('STRLIT', r'"([^"\\]|\\.)*"'),
|
('STRLIT', r'"([^"\\]|\\.)*"'),
|
||||||
('NUMBER', r'0x[0-9a-fA-F]+|\$[0-9a-fA-F]+|0b[01]+|0[0-7]+|\d+'),
|
('NUMBER', STARTOP+r'-?(0x[0-9a-fA-F]+|\$[0-9a-fA-F]+|0b[01]+|0[0-7]+|\d+)'+ENDOP),
|
||||||
('SYMBOL', r'[a-zA-Z_][0-9a-zA-Z_]*')
|
('SYMBOL', STARTOP+r'[a-zA-Z_][0-9a-zA-Z_]*'+ENDOP)
|
||||||
]
|
]
|
||||||
|
|
||||||
for name, regex in tokens:
|
for name, regex in tokens:
|
||||||
|
@ -210,21 +240,21 @@ def line(p):
|
||||||
|
|
||||||
@pg.production("op : OP2 arg_b COMMA arg_a")
|
@pg.production("op : OP2 arg_b COMMA arg_a")
|
||||||
def op_op2(p):
|
def op_op2(p):
|
||||||
return ASM(label=None, op=p[0].getstr(), b=p[1], a=p[3])
|
return ASM(label=None, op=p[0].getstr().upper(), b=p[1], a=p[3], pos=p[0].getsourcepos())
|
||||||
|
|
||||||
@pg.production("op : OP1 arg_a")
|
@pg.production("op : OP1 arg_a")
|
||||||
# Some source code has a comma before the argument
|
# Some source code has a comma before the argument
|
||||||
@pg.production("op : OP1 COMMA arg_a")
|
@pg.production("op : OP1 COMMA arg_a")
|
||||||
def op_op1(p):
|
def op_op1(p):
|
||||||
return ASM(label=None, op=p[0].getstr(), a=p[-1], b=None)
|
return ASM(label=None, op=p[0].getstr().upper(), a=p[-1], b=None, pos=p[0].getsourcepos())
|
||||||
|
|
||||||
@pg.production("op : DIRN exprlist")
|
@pg.production("op : DIRN exprlist")
|
||||||
def op_dirn(p):
|
def op_dirn(p):
|
||||||
return Directive(label=None, directive=p[0].getstr(), args=p[1])
|
return Directive(label=None, directive=p[0].getstr().upper(), args=p[1], pos=p[0].getsourcepos())
|
||||||
|
|
||||||
@pg.production("op : DIR1 expr")
|
@pg.production("op : DIR1 expr")
|
||||||
def op_dir1(p):
|
def op_dir1(p):
|
||||||
return Directive(label=None, directive=p[0].getstr(), args=p[1])
|
return Directive(label=None, directive=p[0].getstr().upper(), args=p[1], pos=p[0].getsourcepos())
|
||||||
|
|
||||||
@pg.production("exprlist : exprlist COMMA expr")
|
@pg.production("exprlist : exprlist COMMA expr")
|
||||||
@pg.production("exprlist : exprlist COMMA string")
|
@pg.production("exprlist : exprlist COMMA string")
|
||||||
|
@ -246,7 +276,7 @@ def arg_a_pop(p):
|
||||||
|
|
||||||
@pg.production("arg_a : SBO REG ADD ADD SBC")
|
@pg.production("arg_a : SBO REG ADD ADD SBC")
|
||||||
def arg_a_pop_explicit(p):
|
def arg_a_pop_explicit(p):
|
||||||
if p[1].getstr() != 'SP':
|
if p[1].getstr().upper() != 'SP':
|
||||||
raise SyntaxError()
|
raise SyntaxError()
|
||||||
return Indirect(reg='SP', disp=0, sp='inc')
|
return Indirect(reg='SP', disp=0, sp='inc')
|
||||||
|
|
||||||
|
@ -260,17 +290,17 @@ def arg_b_push(p):
|
||||||
|
|
||||||
@pg.production("arg_b : SBO SUBTRACT SUBTRACT REG SBC")
|
@pg.production("arg_b : SBO SUBTRACT SUBTRACT REG SBC")
|
||||||
def arg_b_push_explicit(p):
|
def arg_b_push_explicit(p):
|
||||||
if p[3].getstr() != 'SP':
|
if p[3].getstr().upper() != 'SP':
|
||||||
raise SyntaxError()
|
raise SyntaxError()
|
||||||
return Indirect(reg='SP', disp=0, sp='dec')
|
return Indirect(reg='SP', disp=0, sp='dec')
|
||||||
|
|
||||||
@pg.production("arg : REG")
|
@pg.production("arg : REG")
|
||||||
def arg(p):
|
def arg(p):
|
||||||
return Register(name=p[0].getstr())
|
return Register(name=p[0].getstr().upper())
|
||||||
|
|
||||||
@pg.production("arg : SBO REG SBC")
|
@pg.production("arg : SBO REG SBC")
|
||||||
def arg_ind_reg(p):
|
def arg_ind_reg(p):
|
||||||
return Indirect(reg=p[1], disp=0)
|
return Indirect(reg=p[1].getstr().upper(), disp=0)
|
||||||
|
|
||||||
@pg.production("arg : SBO expr SBC")
|
@pg.production("arg : SBO expr SBC")
|
||||||
def arg_ind(p):
|
def arg_ind(p):
|
||||||
|
@ -285,7 +315,7 @@ def arg_ind_reg_disp(p):
|
||||||
if type(t) == Expr:
|
if type(t) == Expr:
|
||||||
disp = t
|
disp = t
|
||||||
elif t.gettokentype() == 'REG':
|
elif t.gettokentype() == 'REG':
|
||||||
reg = t.getstr()
|
reg = t.getstr().upper()
|
||||||
return Indirect(reg=reg, disp=disp)
|
return Indirect(reg=reg, disp=disp)
|
||||||
|
|
||||||
@pg.production("arg : PEEK")
|
@pg.production("arg : PEEK")
|
||||||
|
@ -307,6 +337,10 @@ def arg_expr(p):
|
||||||
def expr_op(p):
|
def expr_op(p):
|
||||||
return Expr(op=p[1].gettokentype(), l=p[0], r=p[2]).simplify()
|
return Expr(op=p[1].gettokentype(), l=p[0], r=p[2]).simplify()
|
||||||
|
|
||||||
|
@pg.production("expr : SUBTRACT expr")
|
||||||
|
def nexpr_op(p):
|
||||||
|
return Expr(op='SUBTRACT', l=Expr(op='NUMBER', value=0), r=p[1]).simplify()
|
||||||
|
|
||||||
@pg.production("string : STRLIT")
|
@pg.production("string : STRLIT")
|
||||||
def strlit(p):
|
def strlit(p):
|
||||||
# TODO: handle escapes
|
# TODO: handle escapes
|
||||||
|
@ -316,6 +350,10 @@ def strlit(p):
|
||||||
def expr_num(p):
|
def expr_num(p):
|
||||||
base = 10
|
base = 10
|
||||||
text = p[0].getstr()
|
text = p[0].getstr()
|
||||||
|
negate = False
|
||||||
|
if text.startswith('-'):
|
||||||
|
negate = True
|
||||||
|
text = text[1:]
|
||||||
if text.startswith('0b'):
|
if text.startswith('0b'):
|
||||||
base = 2
|
base = 2
|
||||||
text = text[2:]
|
text = text[2:]
|
||||||
|
@ -328,7 +366,10 @@ def expr_num(p):
|
||||||
elif text.startswith('0') and len(text) > 1:
|
elif text.startswith('0') and len(text) > 1:
|
||||||
base = 8
|
base = 8
|
||||||
text = text[1:]
|
text = text[1:]
|
||||||
return Expr(op='NUMBER', value=int(text, base))
|
value = int(text, base)
|
||||||
|
if negate:
|
||||||
|
value = -value
|
||||||
|
return Expr(op='NUMBER', value=value)
|
||||||
|
|
||||||
@pg.production("expr : SYMBOL")
|
@pg.production("expr : SYMBOL")
|
||||||
def expr_sym(p):
|
def expr_sym(p):
|
||||||
|
@ -367,25 +408,29 @@ def assemble(ctx, inst):
|
||||||
return inst.code()
|
return inst.code()
|
||||||
|
|
||||||
|
|
||||||
|
class AssemblerError(BaseException):
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
import sys
|
import sys
|
||||||
sym = {}
|
sym = SymbolTable(CASE_INSENSITIVE)
|
||||||
sym['.addr'] = 0
|
sym['.addr'] = 0
|
||||||
insns = []
|
insns = []
|
||||||
for filename in sys.argv[1:]:
|
for filename in sys.argv[1:]:
|
||||||
with open(filename, 'r') as sourcefile:
|
with open(filename, 'r') as sourcefile:
|
||||||
code = parser.parse(lexer.lex(sourcefile.read()))
|
code = parser.parse(lexer.lex(sourcefile.read()))
|
||||||
for inst in code: # pylint: disable=E1133
|
for inst in code: # pylint: disable=E1133
|
||||||
# print(sym['.addr'], inst)
|
print(sym['.addr'], inst)
|
||||||
a = sym['.addr']
|
a = sym['.addr']
|
||||||
c = assemble(sym, inst)
|
c = assemble(sym, inst)
|
||||||
if c is not None:
|
if c is not None:
|
||||||
insns.append((a, c))
|
insns.append((inst.pos, a, c))
|
||||||
print(sym)
|
print(sym)
|
||||||
print(insns)
|
print(insns)
|
||||||
sym['.addr'] = 0
|
sym['.addr'] = 0
|
||||||
binimage = b''
|
binimage = b''
|
||||||
for a, c in insns: # pylint: disable=E1133
|
for pos, a, c in insns: # pylint: disable=E1133
|
||||||
words = []
|
words = []
|
||||||
for w in c:
|
for w in c:
|
||||||
if type(w) == int:
|
if type(w) == int:
|
||||||
|
@ -393,10 +438,13 @@ if __name__ == '__main__':
|
||||||
else:
|
else:
|
||||||
words.append(w.eval(sym))
|
words.append(w.eval(sym))
|
||||||
if words:
|
if words:
|
||||||
print(["%04x" % x for x in words])
|
print(pos,["%04x" % x for x in words])
|
||||||
for w in words:
|
for w in words:
|
||||||
|
if w < 0:
|
||||||
|
w = (1<<16)+w
|
||||||
|
if w < 0 or w > 0xffff:
|
||||||
|
raise AssemblerError("Value out of bounds: "+str(w))
|
||||||
binimage += struct.pack("<H", w)
|
binimage += struct.pack("<H", w)
|
||||||
#binimage += struct.pack(">H", w)
|
|
||||||
outfilename = filename[:filename.rfind('.')]+'.bin'
|
outfilename = filename[:filename.rfind('.')]+'.bin'
|
||||||
with open(outfilename, 'wb') as binfile:
|
with open(outfilename, 'wb') as binfile:
|
||||||
binfile.write(binimage)
|
binfile.write(binimage)
|
||||||
|
|
Loading…
Reference in New Issue