#!/usr/bin/env python3 from types import SimpleNamespace from rply import ParserGenerator, LexerGenerator import struct OPS = { 'ADD': lambda a, b: a + b, 'SUBTRACT': lambda a, b: a - b, 'MULTIPLY': lambda a, b: a * b, 'DIVIDE': lambda a, b: a / b } OPCODES = { 'SET': (0x01, None), 'ADD': (0x02, None), 'SUB': (0x03, None), 'MUL': (0x04, None), 'MLI': (0x05, None), 'DIV': (0x06, None), 'DVI': (0x07, None), 'MOD': (0x08, None), 'MDI': (0x09, None), 'AND': (0x0a, None), 'BOR': (0x0b, None), 'XOR': (0x0c, None), 'SHR': (0x0d, None), 'ASR': (0x0e, None), 'SHL': (0x0f, None), 'IFB': (0x10, None), 'IFC': (0x11, None), 'IFE': (0x12, None), 'IFN': (0x13, None), 'IFG': (0x14, None), 'IFA': (0x15, None), 'IFL': (0x16, None), 'IFU': (0x17, None), 'ADX': (0x1a, None), 'SBX': (0x1b, None), 'STI': (0x1e, None), 'STD': (0x1f, None), 'JSR': (0x00, 0x01), 'INT': (0x00, 0x08), 'IAG': (0x00, 0x09), 'IAS': (0x00, 0x0a), 'RFI': (0x00, 0x0b), 'IAQ': (0x00, 0x0c), 'HWN': (0x00, 0x10), 'HWQ': (0x00, 0x11), 'HWI': (0x00, 0x12), 'JMP': (0x01, 0x1c) # Alias for SET PC, a } REGISTERS = { 'A': 0x00, 'B': 0x01, 'C': 0x02, 'X': 0x03, 'Y': 0x04, 'Z': 0x05, 'I': 0x06, 'J': 0x07, 'SP': 0x1b, 'PC': 0x1c, 'EX': 0x1d } class ASM(SimpleNamespace): @staticmethod def addr(arg, is_a): if type(arg) == Expr: return (0x1f, arg) if type(arg) == Register: return (REGISTERS[arg.name], None) elif type(arg) == int: if is_a and (-1 <= arg <= 30): return (arg + 0x21, None) else: return (0x1f, arg) elif type(arg) == Indirect: if arg.reg is None: return (0x1e, arg.disp) else: if arg.reg == 'SP': sp = getattr(arg, 'sp', None) if sp is None: if arg.disp == 0: return (0x19, None) else: return (0x1a, arg.disp) else: if (sp, is_a) not in (('inc', True), ('dec', False)): raise SyntaxError() else: return (0x18, None) elif arg.reg in ('PC', 'EX'): raise SyntaxError() else: if hasattr(type(arg.reg), "getstr"): r = arg.reg.getstr() else: r = arg.reg print("REG: "+r) if arg.disp == 0: return (0x08+REGISTERS[r], None) else: return (0x10+REGISTERS[r], arg.disp) def code(self): o, b = OPCODES[self.op] a_bits, a_extra = self.addr(self.a, True) if b is not None: b_bits, b_extra = b, None else: b_bits, b_extra = self.addr(self.b, False) r = [o | (a_bits << 10) | (b_bits << 5)] for e in a_extra, b_extra: if e is not None: r.append(e) print("Assembing: %s %r, %r -> %r" % (self.op, self.b, self.a, r)) return r def words(self): return len(self.code()) class Expr(SimpleNamespace): def eval(self, ctx): if self.op == 'SYMBOL': return ctx[self.name] elif self.op == 'NUMBER': return self.value else: return OPS[self.op](self.l.eval(ctx), self.r.eval(ctx)) def simplify(self, ctx={}): if self.op == 'NUMBER': return self elif self.op == 'SYMBOL': try: return Expr(op='NUMBER', value=ctx[self.name]) except: return self else: e = Expr(op=self.op, l=self.l.simplify(ctx), r=self.r.simplify(ctx)) if e.l.op == 'NUMBER' and e.r.op == 'NUMBER': return Expr(op='NUMBER', value=e.eval(ctx)) else: return e class Directive(SimpleNamespace): pass class Register(SimpleNamespace): pass class Indirect(SimpleNamespace): pass lg = LexerGenerator() tokens = [ ('OP2', '|'.join([x for x, (_, e) in OPCODES.items() if e is None])), ('OP1', '|'.join([x for x, (_, e) in OPCODES.items() if e is not None])), ('DIR1', r'\.org'), ('DIRN', r'\.(data|word)|DAT'), ('REG', '|'.join(REGISTERS.keys())), ('PUSH', r'PUSH'), ('POP', r'POP'), ('PEEK', r'PEEK'), ('PICK', r'PICK'), ('ADD', r'\+'), ('COMMA', r','), ('SUBTRACT', r'-'), ('MULTIPLY', r'\*'), ('DIVIDE', r'/'), ('SBO', r'\['), ('SBC', r']'), ('EOL', r'[\n\r]+'), ('COLON', r':'), ('STRLIT', r'"([^"\\]|\\.)*"'), ('NUMBER', r'0x[0-9a-fA-F]+|\$[0-9a-fA-F]+|0b[01]+|0[0-7]+|\d+'), ('SYMBOL', r'[a-zA-Z_][0-9a-zA-Z_]*') ] for name, regex in tokens: lg.add(name, regex) #lg.ignore(r'\s+') lg.ignore(r'[ \t\v\f]+') lg.ignore(r'[;#].*') pg = ParserGenerator( [x for x, _ in tokens], precedence=[ ('left', ['ADD', 'SUBTRACT']), ('left', ['MULTIPLY', 'DIVIDE']), ] ) @pg.production("main : lines") def main(p): return p[0] @pg.production("lines : lines line") def lines_lines(p): return p[0] + [p[1]] @pg.production("lines : line") def lines_line(p): return [p[0]] @pg.production("lines : none") def lines_empty(p): return [] @pg.production("none :") def none(p): return None @pg.production("line : COLON SYMBOL op EOL") @pg.production("line : COLON SYMBOL EOL") @pg.production("line : SYMBOL op EOL") @pg.production("line : SYMBOL COLON op EOL") @pg.production("line : op EOL") @pg.production("line : SYMBOL EOL") @pg.production("line : SYMBOL COLON EOL") @pg.production("line : EOL") def line(p): label = None instr = Directive(label=None, directive=None, args=None) for t in p: if type(t) in (ASM, Directive): instr = t elif t.gettokentype() == 'SYMBOL': label = t.getstr() instr.label = label return instr @pg.production("op : OP2 arg_b COMMA arg_a") def op_op2(p): return ASM(label=None, op=p[0].getstr(), b=p[1], a=p[3]) @pg.production("op : OP1 arg_a") def op_op1(p): return ASM(label=None, op=p[0].getstr(), a=p[1], b=None) @pg.production("op : DIRN exprlist") def op_dirn(p): return Directive(label=None, directive=p[0].getstr(), args=p[1]) @pg.production("op : DIR1 expr") def op_dir1(p): return Directive(label=None, directive=p[0].getstr(), args=p[1]) @pg.production("exprlist : exprlist COMMA expr") @pg.production("exprlist : exprlist COMMA string") def exprlist_exprlist(p): return p[0]+[p[2]] @pg.production("exprlist : expr") @pg.production("exprlist : string") def exprlist_expr(p): return [p[0]] @pg.production("arg_a : arg") def arg_a_arg(p): return p[0] @pg.production("arg_a : POP") def arg_a_pop(p): return Indirect(reg='SP', disp=0, sp='inc') @pg.production("arg_a : SBO REG ADD ADD SBC") def arg_a_pop_explicit(p): if p[1].getstr() != 'SP': raise SyntaxError() return Indirect(reg='SP', disp=0, sp='inc') @pg.production("arg_b : arg") def arg_b_arg(p): return p[0] @pg.production("arg_b : PUSH") def arg_b_push(p): return Indirect(reg='SP', disp=0, sp='dec') @pg.production("arg_b : SBO SUBTRACT SUBTRACT REG SBC") def arg_b_push_explicit(p): if p[3].getstr() != 'SP': raise SyntaxError() return Indirect(reg='SP', disp=0, sp='dec') @pg.production("arg : REG") def arg(p): return Register(name=p[0].getstr()) @pg.production("arg : SBO REG SBC") def arg_ind_reg(p): return Indirect(reg=p[1], disp=0) @pg.production("arg : SBO expr SBC") def arg_ind(p): return Indirect(reg=None, disp=p[1]) @pg.production("arg : SBO REG ADD expr SBC") @pg.production("arg : SBO expr ADD REG SBC") def arg_ind_reg_disp(p): reg = None disp = 0 for t in p: if type(t) == Expr: disp = t elif t.gettokentype() == 'REG': reg = t.getstr() return Indirect(reg=reg, disp=disp) @pg.production("arg : PEEK") def arg_peek(p): return Indirect(reg=Register(name='SP'), disp=0) @pg.production("arg : PICK expr") def arg_pick(p): return Indirect(reg=Register(name='SP'), disp=p[1]) @pg.production("arg : expr") def arg_expr(p): return p[0] # FIXME @pg.production("expr : expr ADD expr") @pg.production("expr : expr SUBTRACT expr") @pg.production("expr : expr MULTIPLY expr") @pg.production("expr : expr DIVIDE expr") def expr_op(p): return Expr(op=p[1].gettokentype(), l=p[0], r=p[2]).simplify() @pg.production("string : STRLIT") def strlit(p): # TODO: handle escapes return p[0].getstr()[1:-1] @pg.production("expr : NUMBER") def expr_num(p): base = 10 text = p[0].getstr() if text.startswith('0b'): base = 2 text = text[2:] elif text.startswith('0x'): base = 16 text = text[2:] elif text.startswith('$'): base = 16 text = text[1:] elif text.startswith('0') and len(text) > 1: base = 8 text = text[1:] return Expr(op='NUMBER', value=int(text, base)) @pg.production("expr : SYMBOL") def expr_sym(p): return Expr(op='SYMBOL', name=p[0].getstr()) lexer = lg.build() parser = pg.build() def assemble(ctx, inst): if inst.label is not None: ctx[inst.label] = ctx['.addr'] if type(inst) == Directive: if inst.directive == ".org": ctx['.addr'] = inst.args.eval(ctx) return None elif inst.directive in (".data", ".word", "DAT"): al = [] for a in inst.args: if type(a) == str: al.extend([ord(x) for x in a]) else: al.append(a.simplify(ctx)) ctx['.addr'] += len(al) return al elif type(inst) == ASM: if type(inst.a) == Expr: inst.a = inst.a.simplify(ctx) if type(inst.b) == Expr: inst.b = inst.b.simplify(ctx) if inst.b is not None: print(f"{ctx['.addr']} {inst.op} {inst.b}, {inst.a} [len={inst.words()}]") else: print(f"{ctx['.addr']} {inst.op} {inst.a} [len={inst.words()}]") ctx['.addr'] += inst.words() return inst.code() if __name__ == '__main__': import sys sym = {} sym['.addr'] = 0 insns = [] for filename in sys.argv[1:]: with open(filename, 'r') as sourcefile: code = parser.parse(lexer.lex(sourcefile.read())) for inst in code: # pylint: disable=E1133 # print(sym['.addr'], inst) a = sym['.addr'] c = assemble(sym, inst) if c is not None: insns.append((a, c)) print(sym) print(insns) sym['.addr'] = 0 binimage = b'' for a, c in insns: # pylint: disable=E1133 words = [] for w in c: if type(w) == int: words.append(w) else: words.append(w.eval(sym)) if words: print(["%04x" % x for x in words]) for w in words: binimage += struct.pack("