Assembler: handle negative numbers, keep track of source code position, add support for case insensitive symbols, add alias for O register

2021-02-13 14:26:01 +00:00 · 2021-02-13 14:26:01 +00:00 · 04d09c68dc
parent 69966cdcda
commit 04d09c68dc
1 changed files with 77 additions and 29 deletions
--- a/asm.py
+++ b/asm.py
@ -5,6 +5,8 @@ from rply import ParserGenerator, LexerGenerator
 import re
 import struct
 CASE_INSENSITIVE = True
 OPS = {
    'ADD': lambda a, b: a + b,
    'SUBTRACT': lambda a, b: a - b,
@ -30,10 +32,36 @@ OPCODES = {
 REGISTERS = {
    'A': 0x00, 'B': 0x01, 'C': 0x02, 'X': 0x03, 'Y': 0x04, 'Z': 0x05,
-    'I': 0x06, 'J': 0x07, 'SP': 0x1b, 'PC': 0x1c, 'EX': 0x1d
+    'I': 0x06, 'J': 0x07, 'SP': 0x1b, 'PC': 0x1c, 'EX': 0x1d,
    'O': 0x1d # For compatibility with specs v1.1
 }
 class SymbolTable:
    def __init__(self, ignorecase=False):
        self.ignorecase = ignorecase
        self.symbols = {}
        self.orig = {}
    def __getitem__(self, key):
        if self.ignorecase:
            key = self.orig[key.upper()]
        return self.symbols[key]
    def __setitem__(self, key, value):
        ukey = key.upper()
        if ukey in self.orig:
            key = self.orig[ukey]
        else:
            self.orig[ukey] = key
        self.symbols[key] = value
    def __str__(self):
        return '\n'.join([
            f"{k}: {v}" for k, v in sorted(self.symbols.items())])
 class ASM(SimpleNamespace):
    @staticmethod
@ -102,7 +130,7 @@ class Expr(SimpleNamespace):
            return self.value
        else:
            return OPS[self.op](self.l.eval(ctx), self.r.eval(ctx))
-    def simplify(self, ctx={}):
+    def simplify(self, ctx=SymbolTable(CASE_INSENSITIVE)):
        if self.op == 'NUMBER':
            return self
        elif self.op == 'SYMBOL':
@ -129,16 +157,18 @@ class Indirect(SimpleNamespace):
 lg = LexerGenerator()
 STARTOP = r'(?<![a-zA-Z0-9_\.\$])('
 ENDOP = r')(?![a-zA-Z0-9_\.\$])'
 tokens = [
-    ('OP2', '|'.join([x for x, (_, e) in OPCODES.items() if e is None])),
+    ('OP2', STARTOP+'|'.join([x for x, (_, e) in OPCODES.items() if e is None])+ENDOP),
-    ('OP1', '|'.join([x for x, (_, e) in OPCODES.items() if e is not None])),
+    ('OP1', STARTOP+'|'.join([x for x, (_, e) in OPCODES.items() if e is not None])+ENDOP),
-    ('DIR1', r'\.org'),
+    ('DIR1', STARTOP+r'\.org'+ENDOP),
-    ('DIRN', r'\.(data|word)|DAT'),
+    ('DIRN', STARTOP+r'\.(data|word)|DAT'+ENDOP),
-    ('REG', '|'.join(REGISTERS.keys())),
+    ('REG', STARTOP+'|'.join(REGISTERS.keys())+ENDOP),
-    ('PUSH', r'PUSH'),
+    ('PUSH', STARTOP+r'PUSH'+ENDOP),
-    ('POP', r'POP'),
+    ('POP', STARTOP+r'POP'+ENDOP),
-    ('PEEK', r'PEEK'),
+    ('PEEK', STARTOP+r'PEEK'+ENDOP),
-    ('PICK', r'PICK'),
+    ('PICK', STARTOP+r'PICK'+ENDOP),
    ('ADD', r'\+'),
    ('COMMA', r','),
    ('SUBTRACT', r'-'),
@ -149,8 +179,8 @@ tokens = [
    ('EOL', r'[\n\r]+'),
    ('COLON', r':'),
    ('STRLIT', r'"([^"\\]|\\.)*"'),
-    ('NUMBER', r'0x[0-9a-fA-F]+|\$[0-9a-fA-F]+|0b[01]+|0[0-7]+|\d+'),
+    ('NUMBER', STARTOP+r'-?(0x[0-9a-fA-F]+|\$[0-9a-fA-F]+|0b[01]+|0[0-7]+|\d+)'+ENDOP),
-    ('SYMBOL', r'[a-zA-Z_][0-9a-zA-Z_]*')
+    ('SYMBOL', STARTOP+r'[a-zA-Z_][0-9a-zA-Z_]*'+ENDOP)
 ]
 for name, regex in tokens:
@ -210,21 +240,21 @@ def line(p):
@pg.production("op : OP2 arg_b COMMA arg_a")
 def op_op2(p):
-    return ASM(label=None, op=p[0].getstr(), b=p[1], a=p[3])
+    return ASM(label=None, op=p[0].getstr().upper(), b=p[1], a=p[3], pos=p[0].getsourcepos())
@pg.production("op : OP1 arg_a")
 # Some source code has a comma before the argument
@pg.production("op : OP1 COMMA arg_a")
 def op_op1(p):
-    return ASM(label=None, op=p[0].getstr(), a=p[-1], b=None)
+    return ASM(label=None, op=p[0].getstr().upper(), a=p[-1], b=None, pos=p[0].getsourcepos())
@pg.production("op : DIRN exprlist")
 def op_dirn(p):
-    return Directive(label=None, directive=p[0].getstr(), args=p[1])
+    return Directive(label=None, directive=p[0].getstr().upper(), args=p[1], pos=p[0].getsourcepos())
@pg.production("op : DIR1 expr")
 def op_dir1(p):
-    return Directive(label=None, directive=p[0].getstr(), args=p[1])
+    return Directive(label=None, directive=p[0].getstr().upper(), args=p[1], pos=p[0].getsourcepos())
@pg.production("exprlist : exprlist COMMA expr")
@pg.production("exprlist : exprlist COMMA string")
@ -246,7 +276,7 @@ def arg_a_pop(p):
@pg.production("arg_a : SBO REG ADD ADD SBC")
 def arg_a_pop_explicit(p):
-    if p[1].getstr() != 'SP':
+    if p[1].getstr().upper() != 'SP':
        raise SyntaxError()
    return Indirect(reg='SP', disp=0, sp='inc')
@ -260,17 +290,17 @@ def arg_b_push(p):
@pg.production("arg_b : SBO SUBTRACT SUBTRACT REG SBC")
 def arg_b_push_explicit(p):
-    if p[3].getstr() != 'SP':
+    if p[3].getstr().upper() != 'SP':
        raise SyntaxError()
    return Indirect(reg='SP', disp=0, sp='dec')
@pg.production("arg : REG")
 def arg(p):
-    return Register(name=p[0].getstr())
+    return Register(name=p[0].getstr().upper())
@pg.production("arg : SBO REG SBC")
 def arg_ind_reg(p):
-    return Indirect(reg=p[1], disp=0)
+    return Indirect(reg=p[1].getstr().upper(), disp=0)
@pg.production("arg : SBO expr SBC")
 def arg_ind(p):
@ -285,7 +315,7 @@ def arg_ind_reg_disp(p):
        if type(t) == Expr:
            disp = t
        elif t.gettokentype() == 'REG':
-            reg = t.getstr()
+            reg = t.getstr().upper()
    return Indirect(reg=reg, disp=disp)
@pg.production("arg : PEEK")
@ -307,6 +337,10 @@ def arg_expr(p):
 def expr_op(p):
    return Expr(op=p[1].gettokentype(), l=p[0], r=p[2]).simplify()
@pg.production("expr : SUBTRACT expr")
 def nexpr_op(p):
    return Expr(op='SUBTRACT', l=Expr(op='NUMBER', value=0), r=p[1]).simplify()
@pg.production("string : STRLIT")
 def strlit(p):
    # TODO: handle escapes
@ -316,6 +350,10 @@ def strlit(p):
 def expr_num(p):
    base = 10
    text = p[0].getstr()
    negate = False
    if text.startswith('-'):
        negate = True
        text = text[1:]
    if text.startswith('0b'):
        base = 2
        text = text[2:]
@ -328,7 +366,10 @@ def expr_num(p):
    elif text.startswith('0') and len(text) > 1:
        base = 8
        text = text[1:]
-    return Expr(op='NUMBER', value=int(text, base))
+    value = int(text, base)
    if negate:
        value = -value
    return Expr(op='NUMBER', value=value)
@pg.production("expr : SYMBOL")
 def expr_sym(p):
@ -367,25 +408,29 @@ def assemble(ctx, inst):
        return inst.code()
 class AssemblerError(BaseException):
    pass
 if __name__ == '__main__':
    import sys
-    sym = {}
+    sym = SymbolTable(CASE_INSENSITIVE)
    sym['.addr'] = 0
    insns = []
    for filename in sys.argv[1:]:
        with open(filename, 'r') as sourcefile:
            code = parser.parse(lexer.lex(sourcefile.read()))
        for inst in code:   # pylint: disable=E1133
-            # print(sym['.addr'], inst)
+            print(sym['.addr'], inst)
            a = sym['.addr']
            c = assemble(sym, inst)
            if c is not None:
-                insns.append((a, c))
+                insns.append((inst.pos, a, c))
        print(sym)
        print(insns)
        sym['.addr'] = 0
        binimage = b''
-        for a, c in insns:   # pylint: disable=E1133
+        for pos, a, c in insns:   # pylint: disable=E1133
            words = []
            for w in c:
                if type(w) == int:
@ -393,10 +438,13 @@ if __name__ == '__main__':
                else:
                    words.append(w.eval(sym))
            if words:
-                print(["%04x" % x for x in words])
+                print(pos,["%04x" % x for x in words])
                for w in words:
                    if w < 0:
                        w = (1<<16)+w
                    if w < 0 or w > 0xffff:
                        raise AssemblerError("Value out of bounds: "+str(w))
                    binimage += struct.pack("<H", w)
                    #binimage += struct.pack(">H", w)
        outfilename = filename[:filename.rfind('.')]+'.bin'
        with open(outfilename, 'wb') as binfile:
            binfile.write(binimage)