From 4036e04c836eb14a88cd2dc0c4e4ab4bf670b6c8 Mon Sep 17 00:00:00 2001 From: Maurizio Porrato Date: Thu, 2 Jan 2020 13:21:38 +0100 Subject: [PATCH] Initial assembler implementation --- asm.py | 399 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 399 insertions(+) create mode 100755 asm.py diff --git a/asm.py b/asm.py new file mode 100755 index 0000000..34a2e7d --- /dev/null +++ b/asm.py @@ -0,0 +1,399 @@ +#!/usr/bin/env python3 + +from types import SimpleNamespace +from rply import ParserGenerator, LexerGenerator + +OPS = { + 'ADD': lambda a, b: a + b, + 'SUBTRACT': lambda a, b: a - b, + 'MULTIPLY': lambda a, b: a * b, + 'DIVIDE': lambda a, b: a / b +} + +OPCODES = { + 'SET': (0x01, None), 'ADD': (0x02, None), 'SUB': (0x03, None), 'MUL': (0x04, None), + 'MLI': (0x05, None), 'DIV': (0x06, None), 'DVI': (0x07, None), 'MOD': (0x08, None), + 'MDI': (0x09, None), 'AND': (0x0a, None), 'BOR': (0x0b, None), 'XOR': (0x0c, None), + 'SHR': (0x0d, None), 'ASR': (0x0e, None), 'SHL': (0x0f, None), 'IFB': (0x10, None), + 'IFC': (0x11, None), 'IFE': (0x12, None), 'IFN': (0x13, None), 'ING': (0x14, None), + 'INA': (0x15, None), 'INL': (0x16, None), 'INU': (0x17, None), 'ADX': (0x1a, None), + 'SBX': (0x1b, None), 'STI': (0x1e, None), 'STD': (0x1f, None), + + 'JSR': (0x00, 0x01), 'INT': (0x00, 0x08), 'IAG': (0x00, 0x09), 'IAS': (0x00, 0x0a), + 'RFI': (0x00, 0x0b), 'IAQ': (0x00, 0x0c), 'HWN': (0x00, 0x10), 'HWQ': (0x00, 0x11), + 'HWI': (0x00, 0x12), + + 'JMP': (0x01, 0x1c) # Alias for SET PC, a +} + +REGISTERS = { + 'A': 0x00, 'B': 0x01, 'C': 0x02, 'X': 0x03, 'Y': 0x04, 'Z': 0x05, + 'I': 0x06, 'J': 0x07, 'SP': 0x1b, 'PC': 0x1c, 'EX': 0x1d +} + +class ASM(SimpleNamespace): + @staticmethod + def addr(arg, is_a): + if type(arg) == Expr: + arg = arg.value + if type(arg) == Register: + return (REGISTERS[arg.name], None) + elif type(arg) == int: + if is_a and (-1 <= arg <= 30): + return (arg + 0x21, None) + else: + return (0x1f, arg) + elif type(arg) == Indirect: + if type(arg.disp) == Expr: + disp = arg.disp.value + else: + disp = arg.disp + if arg.reg is None: + return (0x1e, disp) + else: + if arg.reg == 'SP': + sp = getattr(arg, 'sp', None) + if sp is None: + if disp == 0: + return (0x19, None) + else: + return (0x1a, disp) + else: + if (sp, is_a) not in (('inc', True), ('dec', False)): + raise SyntaxError() + else: + return (0x18, None) + elif arg.reg in ('PC', 'EX'): + raise SyntaxError() + else: + if arg.disp == 0: + return (0x08+REGISTERS[arg.reg], None) + else: + return (0x10+REGISTERS[arg.reg], disp) + def code(self): + o, b = OPCODES[self.op] + a_bits, a_extra = self.addr(self.a, True) + if b is not None: + b_bits, b_extra = b, None + else: + b_bits, b_extra = self.addr(self.b, False) + r = [o | (a_bits << 10) | (b_bits << 5)] + for e in a_extra, b_extra: + if e is not None: + r.append(e) + return r + def words(self): + r = 1 + if type(self.a) in (Expr, int): # literal (FIXME: optimize short literals) + if type(self.a) == int: + if not (-1 <= self.a <= 30): + r += 1 + else: + r += 1 + elif type(self.a) == Indirect: + if self.a.disp != 0: + r += 1 + if type(self.b) in (Expr, int): # literal + r += 1 + elif type(self.b) == Indirect: + if self.b.disp != 0: + r += 1 + return r + +class Expr(SimpleNamespace): + def eval(self, ctx): + if self.op == 'SYMBOL': + return ctx[self.name] + elif self.op == 'NUMBER': + return self.value + else: + return OPS[self.op](self.l.eval(ctx), self.r.eval(ctx)) + def simplify(self, ctx={}): + if self.op == 'NUMBER': + return self + elif self.op == 'SYMBOL': + try: + return Expr(op='NUMBER', value=ctx[self.name]) + except: + return self + else: + e = Expr(op=self.op, l=self.l.simplify(ctx), r=self.r.simplify(ctx)) + if e.l.op == 'NUMBER' and e.r.op == 'NUMBER': + return Expr(op='NUMBER', value=e.eval(ctx)) + else: + return e + +class Directive(SimpleNamespace): + pass + +class Register(SimpleNamespace): + pass + +class Indirect(SimpleNamespace): + pass + + +lg = LexerGenerator() + +tokens = [ + ('OP2', '|'.join([x for x, (_, e) in OPCODES.items() if e is None])), + ('OP1', '|'.join([x for x, (_, e) in OPCODES.items() if e is not None])), + ('DIR1', r'\.org'), + ('DIRN', r'\.(data|word)|DAT'), + ('REG', '|'.join(REGISTERS.keys())), + ('PUSH', r'PUSH'), + ('POP', r'POP'), + ('PEEK', r'PEEK'), + ('PICK', r'PICK'), + ('ADD', r'\+'), + ('COMMA', r','), + ('SUBTRACT', r'-'), + ('MULTIPLY', r'\*'), + ('DIVIDE', r'/'), + ('SBO', r'\['), + ('SBC', r']'), + ('EOL', r'[\n\r]+'), + ('COLON', r':'), + ('STRLIT', r'"([^"\\]|\\.)*"'), + ('NUMBER', r'0x[0-9a-fA-F]+|\$[0-9a-fA-F]+|0b[01]+|0[0-7]+|\d+'), + ('SYMBOL', r'[a-zA-Z_][0-9a-zA-Z_]*') +] + +for name, regex in tokens: + lg.add(name, regex) + +#lg.ignore(r'\s+') +lg.ignore(r'[ \t\v\f]+') +lg.ignore(r'[;#].*') + + +pg = ParserGenerator( + [x for x, _ in tokens], + precedence=[ + ('left', ['ADD', 'SUBTRACT']), + ('left', ['MULTIPLY', 'DIVIDE']), + ] +) + +@pg.production("main : lines") +def main(p): + return p[0] + +@pg.production("lines : lines line") +def lines_lines(p): + return p[0] + [p[1]] + +@pg.production("lines : line") +def lines_line(p): + return [p[0]] + +@pg.production("lines : none") +def lines_empty(p): + return [] + +@pg.production("none :") +def none(p): + return None + +@pg.production("line : COLON SYMBOL op EOL") +@pg.production("line : COLON SYMBOL EOL") +@pg.production("line : SYMBOL op EOL") +@pg.production("line : SYMBOL COLON op EOL") +@pg.production("line : op EOL") +@pg.production("line : SYMBOL EOL") +@pg.production("line : SYMBOL COLON EOL") +@pg.production("line : EOL") +def line(p): + label = None + for t in p: + if type(t) in (ASM, Directive): + t.label = label + return t + elif t.gettokentype() == 'SYMBOL': + label = t.getstr() + return None + +@pg.production("op : OP2 arg_b COMMA arg_a") +def op_op2(p): + return ASM(label=None, op=p[0].getstr(), b=p[1], a=p[3]) + +@pg.production("op : OP1 arg_a") +def op_op1(p): + return ASM(label=None, op=p[0].getstr(), a=p[1], b=None) + +@pg.production("op : DIRN exprlist") +def op_dirn(p): + return Directive(label=None, directive=p[0].getstr(), args=p[1]) + +@pg.production("op : DIR1 expr") +def op_dir1(p): + return Directive(label=None, directive=p[0].getstr(), args=p[1]) + +@pg.production("exprlist : exprlist COMMA expr") +@pg.production("exprlist : exprlist COMMA string") +def exprlist_exprlist(p): + return p[0]+[p[2]] + +@pg.production("exprlist : expr") +@pg.production("exprlist : string") +def exprlist_expr(p): + return [p[0]] + +@pg.production("arg_a : arg") +def arg_a_arg(p): + return p[0] + +@pg.production("arg_a : POP") +def arg_a_pop(p): + return Indirect(reg='SP', disp=0, sp='inc') + +@pg.production("arg_a : SBO REG ADD ADD SBC") +def arg_a_pop_explicit(p): + if p[1].getstr() != 'SP': + raise SyntaxError() + return Indirect(reg='SP', disp=0, sp='inc') + +@pg.production("arg_b : arg") +def arg_b_arg(p): + return p[0] + +@pg.production("arg_b : PUSH") +def arg_b_push(p): + return Indirect(reg='SP', disp=0, sp='dec') + +@pg.production("arg_b : SBO SUBTRACT SUBTRACT REG SBC") +def arg_b_push_explicit(p): + if p[3].getstr() != 'SP': + raise SyntaxError() + return Indirect(reg='SP', disp=0, sp='dec') + +@pg.production("arg : REG") +def arg(p): + return Register(name=p[0].getstr()) + +@pg.production("arg : SBO REG SBC") +def arg_ind_reg(p): + return Indirect(reg=p[1], disp=0) + +@pg.production("arg : SBO expr SBC") +def arg_ind(p): + return Indirect(reg=None, disp=p[1]) + +@pg.production("arg : SBO REG ADD expr SBC") +@pg.production("arg : SBO expr ADD REG SBC") +def arg_ind_reg_disp(p): + reg = None + disp = 0 + for t in p: + if type(t) == Expr: + disp = t + elif t.gettokentype() == 'REG': + reg = t.getstr() + return Indirect(reg=reg, disp=disp) + +@pg.production("arg : PEEK") +def arg_peek(p): + return Indirect(reg=Register(name='SP'), disp=0) + +@pg.production("arg : PICK expr") +def arg_pick(p): + return Indirect(reg=Register(name='SP'), disp=p[1]) + +@pg.production("arg : expr") +def arg_expr(p): + return p[0] # FIXME + +@pg.production("expr : expr ADD expr") +@pg.production("expr : expr SUBTRACT expr") +@pg.production("expr : expr MULTIPLY expr") +@pg.production("expr : expr DIVIDE expr") +def expr_op(p): + return Expr(op=p[1].gettokentype(), l=p[0], r=p[2]).simplify() + +@pg.production("string : STRLIT") +def strlit(p): + # TODO: handle escapes + return p[0].getstr()[1:-1] + +@pg.production("expr : NUMBER") +def expr_num(p): + base = 10 + text = p[0].getstr() + if text.startswith('0b'): + base = 2 + text = text[2:] + elif text.startswith('0x'): + base = 16 + text = text[2:] + elif text.startswith('$'): + base = 16 + text = text[1:] + elif text.startswith('0') and len(text) > 1: + base = 8 + text = text[1:] + return Expr(op='NUMBER', value=int(text, base)) + +@pg.production("expr : SYMBOL") +def expr_sym(p): + return Expr(op='SYMBOL', name=p[0].getstr()) + +lexer = lg.build() +parser = pg.build() + +def assemble(ctx, inst, step=1): + if inst.label is not None: + if step == 1 and inst.label in ctx: + print(f"Redefining symbol {inst.label}") + ctx[inst.label] = ctx['.addr'] + if type(inst) == Directive: + if inst.directive == ".org": + ctx['.addr'] = inst.args.eval(ctx) + elif inst.directive in (".data", ".word", "DAT"): + al = [] + for a in inst.args: + if type(a) == str: + al.extend([ord(x) for x in a]) + else: + if step == 1: + al.append(a) + else: + al.append(a.eval(ctx)) + if step > 1: + print(ctx['.addr'], al) + ctx['.addr'] += len(al) + elif type(inst) == ASM: + try: + if type(inst.a) == Expr: + inst.a = inst.a.eval(ctx) + if type(inst.b) == Expr: + inst.b = inst.b.eval(ctx) + except KeyError as e: + if step != 1: + raise e + if inst.b is not None: + print(f"{ctx['.addr']} {inst.op} {inst.b}, {inst.a} [len={inst.words()}]") + else: + print(f"{ctx['.addr']} {inst.op} {inst.a} [len={inst.words()}]") + + ctx['.addr'] += inst.words() + + if step != 1: + return inst.code() + + +if __name__ == '__main__': + import sys + sym = {} + sym['.addr'] = 0 + for filename in sys.argv[1:]: + with open(filename, 'r') as sourcefile: + code = parser.parse(lexer.lex(sourcefile.read())) + for inst in code: + # print(sym['.addr'], inst) + assemble(sym, inst) + print(sym) + sym['.addr'] = 0 + for inst in code: + a = assemble(sym, inst, 2) + if a is not None: + print(["%04x" % x for x in a])