dcpu16/asm.py

#!/usr/bin/env python3

from types import SimpleNamespace
from rply import ParserGenerator, LexerGenerator
import re
import struct

CASE_INSENSITIVE = True

OPS = {
    'ADD': lambda a, b: a + b,
    'SUBTRACT': lambda a, b: a - b,
    'MULTIPLY': lambda a, b: a * b,
    'DIVIDE': lambda a, b: a / b
}

OPCODES = {
    'SET': (0x01, None), 'ADD': (0x02, None), 'SUB': (0x03, None), 'MUL': (0x04, None),
    'MLI': (0x05, None), 'DIV': (0x06, None), 'DVI': (0x07, None), 'MOD': (0x08, None),
    'MDI': (0x09, None), 'AND': (0x0a, None), 'BOR': (0x0b, None), 'XOR': (0x0c, None),
    'SHR': (0x0d, None), 'ASR': (0x0e, None), 'SHL': (0x0f, None), 'IFB': (0x10, None),
    'IFC': (0x11, None), 'IFE': (0x12, None), 'IFN': (0x13, None), 'IFG': (0x14, None),
    'IFA': (0x15, None), 'IFL': (0x16, None), 'IFU': (0x17, None), 'ADX': (0x1a, None),
    'SBX': (0x1b, None), 'STI': (0x1e, None), 'STD': (0x1f, None),

    'JSR': (0x00, 0x01), 'INT': (0x00, 0x08), 'IAG': (0x00, 0x09), 'IAS': (0x00, 0x0a),
    'RFI': (0x00, 0x0b), 'IAQ': (0x00, 0x0c), 'HWN': (0x00, 0x10), 'HWQ': (0x00, 0x11),
    'HWI': (0x00, 0x12),

    'JMP': (0x01, 0x1c) # Alias for SET PC, a
}

REGISTERS = {
    'A': 0x00, 'B': 0x01, 'C': 0x02, 'X': 0x03, 'Y': 0x04, 'Z': 0x05,
    'I': 0x06, 'J': 0x07, 'SP': 0x1b, 'PC': 0x1c, 'EX': 0x1d,
    'O': 0x1d # For compatibility with specs v1.1
}


class SymbolTable:

    def __init__(self, ignorecase=False):
        self.ignorecase = ignorecase
        self.symbols = {}
        self.orig = {}

    def __getitem__(self, key):
        if self.ignorecase:
            key = self.orig[key.upper()]
        return self.symbols[key]

    def __setitem__(self, key, value):
        ukey = key.upper()
        if ukey in self.orig:
            key = self.orig[ukey]
        else:
            self.orig[ukey] = key
        self.symbols[key] = value

    def __str__(self):
        return '\n'.join([
            f"{k}: {v}" for k, v in sorted(self.symbols.items())])


class ASM(SimpleNamespace):

    @staticmethod
    def addr(arg, is_a):
        if type(arg) == Expr:
            return (0x1f, arg)
        if type(arg) == Register:
            return (REGISTERS[arg.name], None)
        elif type(arg) == int:
            if is_a and (-1 <= arg <= 30):
                return (arg + 0x21, None)
            else:
                return (0x1f, arg)
        elif type(arg) == Indirect:
            if arg.reg is None:
                return (0x1e, arg.disp)
            else:
                if arg.reg == 'SP':
                    sp = getattr(arg, 'sp', None)
                    if sp is None:
                        if arg.disp == 0:
                            return (0x19, None)
                        else:
                            return (0x1a, arg.disp)
                    else:
                        return (0x18, None)
                elif arg.reg in ('PC', 'EX'):
                    raise SyntaxError()
                else:
                    if hasattr(type(arg.reg), "getstr"):
                        r = arg.reg.getstr()
                    else:
                        r = arg.reg
                    print("REG: "+r)
                    if arg.disp == 0:
                        return (0x08+REGISTERS[r], None)
                    else:
                        return (0x10+REGISTERS[r], arg.disp)

    def code(self):
        o, b = OPCODES[self.op]
        a_bits, a_extra = self.addr(self.a, True)
        if b is not None:
            b_bits, b_extra = b, None
        else:
            b_bits, b_extra = self.addr(self.b, False)
        r = [o | (a_bits << 10) | (b_bits << 5)]
        for e in a_extra, b_extra:
            if e is not None:
                r.append(e)
        print("Assembing: %s %r, %r -> %r" % (self.op, self.b, self.a, r))
        return r

    def words(self):
        return len(self.code())


class Expr(SimpleNamespace):
    def eval(self, ctx):
        if self.op == 'SYMBOL':
            return ctx[self.name]
        elif self.op == 'NUMBER':
            return self.value
        else:
            return OPS[self.op](self.l.eval(ctx), self.r.eval(ctx))
    def simplify(self, ctx=SymbolTable(CASE_INSENSITIVE)):
        if self.op == 'NUMBER':
            return self
        elif self.op == 'SYMBOL':
            try:
                return Expr(op='NUMBER', value=ctx[self.name])
            except:
                return self
        else:
            e = Expr(op=self.op, l=self.l.simplify(ctx), r=self.r.simplify(ctx))
            if e.l.op == 'NUMBER' and e.r.op == 'NUMBER':
                return Expr(op='NUMBER', value=e.eval(ctx))
            else:
                return e

class Directive(SimpleNamespace):
    pass

class Register(SimpleNamespace):
    pass

class Indirect(SimpleNamespace):
    pass


lg = LexerGenerator()

STARTOP = r'(?<![a-zA-Z0-9_\.\$])('
ENDOP = r')(?![a-zA-Z0-9_\.\$])'
tokens = [
    ('OP2', STARTOP+'|'.join([x for x, (_, e) in OPCODES.items() if e is None])+ENDOP),
    ('OP1', STARTOP+'|'.join([x for x, (_, e) in OPCODES.items() if e is not None])+ENDOP),
    ('DIR1', STARTOP+r'\.org'+ENDOP),
    ('DIRN', STARTOP+r'\.(data|word)|DAT'+ENDOP),
    ('REG', STARTOP+'|'.join(REGISTERS.keys())+ENDOP),
    ('PUSH', STARTOP+r'PUSH'+ENDOP),
    ('POP', STARTOP+r'POP'+ENDOP),
    ('PEEK', STARTOP+r'PEEK'+ENDOP),
    ('PICK', STARTOP+r'PICK'+ENDOP),
    ('ADD', r'\+'),
    ('COMMA', r','),
    ('SUBTRACT', r'-'),
    ('MULTIPLY', r'\*'),
    ('DIVIDE', r'/'),
    ('SBO', r'\['),
    ('SBC', r']'),
    ('EOL', r'[\n\r]+'),
    ('COLON', r':'),
    ('STRLIT', r'"([^"\\]|\\.)*"'),
    ('NUMBER', STARTOP+r'-?(0x[0-9a-fA-F]+|\$[0-9a-fA-F]+|0b[01]+|0[0-7]+|\d+)'+ENDOP),
    ('SYMBOL', STARTOP+r'[a-zA-Z_][0-9a-zA-Z_]*'+ENDOP)
]

for name, regex in tokens:
    lg.add(name, regex, re.IGNORECASE)

#lg.ignore(r'\s+')
lg.ignore(r'[ \t\v\f]+')
lg.ignore(r'[;#].*')


pg = ParserGenerator(
    [x for x, _ in tokens],
    precedence=[
        ('left', ['ADD', 'SUBTRACT']),
        ('left', ['MULTIPLY', 'DIVIDE']),
    ]
)

@pg.production("main : lines")
def main(p):
    return p[0]

@pg.production("lines : lines line")
def lines_lines(p):
    return p[0] + [p[1]]

@pg.production("lines : line")
def lines_line(p):
    return [p[0]]

@pg.production("lines : none")
def lines_empty(p):
    return []

@pg.production("none :")
def none(p):
    return None

@pg.production("line : COLON SYMBOL op EOL")
@pg.production("line : COLON SYMBOL EOL")
@pg.production("line : SYMBOL op EOL")
@pg.production("line : SYMBOL COLON op EOL")
@pg.production("line : op EOL")
@pg.production("line : SYMBOL EOL")
@pg.production("line : SYMBOL COLON EOL")
@pg.production("line : EOL")
def line(p):
    label = None
    instr = Directive(label=None, directive=None, args=None)
    for t in p:
        if type(t) in (ASM, Directive):
            instr = t
        elif t.gettokentype() == 'SYMBOL':
            label = t.getstr()
    instr.label = label
    return instr

@pg.production("op : OP2 arg_b COMMA arg_a")
def op_op2(p):
    return ASM(label=None, op=p[0].getstr().upper(), b=p[1], a=p[3], pos=p[0].getsourcepos())

@pg.production("op : OP1 arg_a")
# Some source code has a comma before the argument
@pg.production("op : OP1 COMMA arg_a")
def op_op1(p):
    return ASM(label=None, op=p[0].getstr().upper(), a=p[-1], b=None, pos=p[0].getsourcepos())

@pg.production("op : DIRN exprlist")
def op_dirn(p):
    return Directive(label=None, directive=p[0].getstr().upper(), args=p[1], pos=p[0].getsourcepos())

@pg.production("op : DIR1 expr")
def op_dir1(p):
    return Directive(label=None, directive=p[0].getstr().upper(), args=p[1], pos=p[0].getsourcepos())

@pg.production("exprlist : exprlist COMMA expr")
@pg.production("exprlist : exprlist COMMA string")
def exprlist_exprlist(p):
    return p[0]+[p[2]]

@pg.production("exprlist : expr")
@pg.production("exprlist : string")
def exprlist_expr(p):
    return [p[0]]

@pg.production("arg_a : arg")
def arg_a_arg(p):
    return p[0]

@pg.production("arg : POP")
def arg_pop(p):
    return Indirect(reg='SP', disp=0, sp='inc')

@pg.production("arg_a : SBO REG ADD ADD SBC")
def arg_a_pop_explicit(p):
    if p[1].getstr().upper() != 'SP':
        raise SyntaxError()
    return Indirect(reg='SP', disp=0, sp='inc')

@pg.production("arg_b : arg")
def arg_b_arg(p):
    return p[0]

@pg.production("arg : PUSH")
def arg_push(p):
    return Indirect(reg='SP', disp=0, sp='dec')

@pg.production("arg : SBO SUBTRACT SUBTRACT REG SBC")
def arg_push_explicit(p):
    if p[3].getstr().upper() != 'SP':
        raise SyntaxError()
    return Indirect(reg='SP', disp=0, sp='dec')

@pg.production("arg : REG")
def arg(p):
    return Register(name=p[0].getstr().upper())

@pg.production("arg : SBO REG SBC")
def arg_ind_reg(p):
    return Indirect(reg=p[1].getstr().upper(), disp=0)

@pg.production("arg : SBO expr SBC")
def arg_ind(p):
    return Indirect(reg=None, disp=p[1])

@pg.production("arg : SBO REG ADD expr SBC")
@pg.production("arg : SBO expr ADD REG SBC")
def arg_ind_reg_disp(p):
    reg = None
    disp = 0
    for t in p:
        if type(t) == Expr:
            disp = t
        elif t.gettokentype() == 'REG':
            reg = t.getstr().upper()
    return Indirect(reg=reg, disp=disp)

@pg.production("arg : PEEK")
def arg_peek(p):
    return Indirect(reg=Register(name='SP'), disp=0)

@pg.production("arg : PICK expr")
def arg_pick(p):
    return Indirect(reg=Register(name='SP'), disp=p[1])

@pg.production("arg : expr")
def arg_expr(p):
    return p[0] # FIXME

@pg.production("expr : expr ADD expr")
@pg.production("expr : expr SUBTRACT expr")
@pg.production("expr : expr MULTIPLY expr")
@pg.production("expr : expr DIVIDE expr")
def expr_op(p):
    return Expr(op=p[1].gettokentype(), l=p[0], r=p[2]).simplify()

@pg.production("expr : SUBTRACT expr")
def nexpr_op(p):
    return Expr(op='SUBTRACT', l=Expr(op='NUMBER', value=0), r=p[1]).simplify()

@pg.production("string : STRLIT")
def strlit(p):
    # TODO: handle escapes
    return p[0].getstr()[1:-1]

@pg.production("expr : NUMBER")
def expr_num(p):
    base = 10
    text = p[0].getstr()
    negate = False
    if text.startswith('-'):
        negate = True
        text = text[1:]
    if text.startswith('0b'):
        base = 2
        text = text[2:]
    elif text.startswith('0x'):
        base = 16
        text = text[2:]
    elif text.startswith('$'):
        base = 16
        text = text[1:]
    elif text.startswith('0') and len(text) > 1:
        base = 8
        text = text[1:]
    value = int(text, base)
    if negate:
        value = -value
    return Expr(op='NUMBER', value=value)

@pg.production("expr : SYMBOL")
def expr_sym(p):
    return Expr(op='SYMBOL', name=p[0].getstr())

lexer = lg.build()
parser = pg.build()

def assemble(ctx, inst):
    if inst.label is not None:
        ctx[inst.label] = ctx['.addr']
    if type(inst) == Directive:
        if inst.directive == ".org":
            ctx['.addr'] = inst.args.eval(ctx)
            return None
        elif inst.directive in (".data", ".word", "DAT"):
            al = []
            for a in inst.args:
                if type(a) == str:
                    al.extend([ord(x) for x in a])
                else:
                    al.append(a.simplify(ctx))
            ctx['.addr'] += len(al)
            return al
    elif type(inst) == ASM:
        if type(inst.a) == Expr:
            inst.a = inst.a.simplify(ctx)
        if type(inst.b) == Expr:
            inst.b = inst.b.simplify(ctx)
        if inst.b is not None:
            print(f"{ctx['.addr']} {inst.op} {inst.b}, {inst.a} [len={inst.words()}]")
        else:
            print(f"{ctx['.addr']} {inst.op} {inst.a} [len={inst.words()}]")

        ctx['.addr'] += inst.words()
        return inst.code()


class AssemblerError(BaseException):
    pass


if __name__ == '__main__':
    import sys
    sym = SymbolTable(CASE_INSENSITIVE)
    sym['.addr'] = 0
    insns = []
    for filename in sys.argv[1:]:
        with open(filename, 'r') as sourcefile:
            code = parser.parse(lexer.lex(sourcefile.read()))
        for inst in code:   # pylint: disable=E1133
            print(sym['.addr'], inst)
            a = sym['.addr']
            c = assemble(sym, inst)
            if c is not None:
                insns.append((inst.pos, a, c))
        print(sym)
        print(insns)
        sym['.addr'] = 0
        binimage = b''
        for pos, a, c in insns:   # pylint: disable=E1133
            words = []
            for w in c:
                if type(w) == int:
                    words.append(w)
                else:
                    words.append(w.eval(sym))
            if words:
                print(pos,["%04x" % x for x in words])
                for w in words:
                    if w < 0:
                        w = (1<<16)+w
                    if w < 0 or w > 0xffff:
                        raise AssemblerError("Value out of bounds: "+str(w))
                    binimage += struct.pack("<H", w)
        outfilename = filename[:filename.rfind('.')]+'.bin'
        with open(outfilename, 'wb') as binfile:
            binfile.write(binimage)