chipty5/ch8asm.py

#!/usr/bin/env python3

from typing import NamedTuple
import re, struct


class Token(NamedTuple):
    type: str
    value: str
    filename: str
    line: int
    column: int


TOKENS = [
    (
        "MNEMONIC",
        "|".join(
            [
                "CLS",
                "RET",
                "SYS",
                "JP",
                "CALL",
                "SE",
                "LD",
                "ADD",
                "AND",
                "OR",
                "XOR",
                "SUBN",
                "SUB",
                "SHR",
                "SHL",
                "SNE",
                "RND",
                "DRW",
                "SKP",
                "SKNP",
                "DATA",
                "EXIT",
            ]
        ),
    ),
    ("VREG", r"V[0-9a-fA-F]"),
    ("PREG", r"ST|DT|F|B|\[I\]|I|K"),
    ("NUMBER", r"[0-9a-fA-F]{1,4}"),
    ("SYMBOL", r"[a-zA-Z_.]+[a-zA-Z_.0-9]*"),
    ("COMMA", r","),
    ("COLON", r":"),
    ("STAR", r"\*"),
    ("EQUAL", r"="),
    ("SPACE", r"[ \t]+"),
    ("MISMATCH", r"."),
]

TOK_RE = re.compile("|".join([f"(?P<{tn}>{tp})" for tn, tp in TOKENS]))


def tokenize_line(line, filename="", lineno=-1):
    line = line.split(";", 1)[0]
    if len(line) < 1:
        return []
    r = []
    for match in TOK_RE.finditer(line):
        t = Token(
            type=match.lastgroup,
            value=match.group(),
            filename=filename,
            line=lineno,
            column=match.start(),
        )
        if t.type == "SPACE":
            continue
        r.append(t)
    return r


def tokenize_file(filename):
    with open(filename, "r") as f:
        lineno = 1
        for line in f:
            yield tokenize_line(line, filename, lineno)
            lineno += 1


class ParseError(Exception):
    pass


def ensure_size(val, bits):
    if val.type not in ("NUMBER", "SYMBOL"):
        raise ParseError(f"Literal value expected, found {val} instead")
    if type(val.value) == str:
        v = int(val.value, 16)
    else:
        v = val.value
    if v >= (1 << bits):
        raise ParseError(f"Value to large for {bits} bits: {val}")
    return v


def vreg_index(tok):
    if tok.type != "VREG":
        raise ParseError(f"V register expected, found {tok} instead")
    return int(tok.value[-1], 16)


def assemble_instruction(tl, symbols):
    if type(tl) == int:
        return tl
    if tl[0].type != "MNEMONIC":
        raise ParseError("Mnemonic was expected but found " + repr(tl[0]))
    mnemonic = tl[0].value
    ntok = len(tl)
    for i in range(len(tl)):
        if tl[i].type in ("SYMBOL", "STAR"):
            if tl[i].value in symbols:
                nt = Token(
                    type="NUMBER",
                    value=symbols[tl[i].value],
                    filename=tl[i].filename,
                    line=tl[i].line,
                    column=tl[i].column,
                )
                tl[i] = nt
            else:
                return tl
    if ntok == 1:
        if mnemonic == "CLS":
            return 0x00E0
        elif mnemonic == "RET":
            return 0x00EE
        elif mnemonic == "EXIT":
            return 0x00fd
        else:
            raise ParseError("Invalid number of operands for " + tl[0].value)
    elif ntok == 2:
        op1 = tl[1]
        if mnemonic == "SYS":
            return 0x0000 | ensure_size(op1, 12)
        if mnemonic == "JP":
            return 0x1000 | ensure_size(op1, 12)
        if mnemonic == "CALL":
            return 0x2000 | ensure_size(op1, 12)
        if mnemonic == "DATA":
            return ensure_size(op1, 16)
        if mnemonic == "SKP":
            return 0xE09E | (vreg_index(op1) << 8)
        if mnemonic == "SKNP":
            return 0xE0A1 | (vreg_index(op1) << 8)
        if mnemonic == "SHR":
            return 0x8006 | vreg_index(op1) << 8 | vreg_index(op1) << 4
        if mnemonic == "SHL":
            return 0x800E | vreg_index(op1) << 8 | vreg_index(op1) << 4
        else:
            raise ParseError("Invalid number of operands for " + tl[0].value)
    elif ntok == 3:
        op1 = tl[1]
        op2 = tl[2]
        if mnemonic == "SE":
            if op2.type == "VREG":
                return 0x5000 | vreg_index(op1) << 8 | vreg_index(op2) << 4
            else:
                return 0x3000 | vreg_index(op1) << 8 | ensure_size(op2, 8)
        if mnemonic == "SNE":
            if op2.type == "VREG":
                return 0x9000 | vreg_index(op1) << 8 | vreg_index(op2) << 4
            else:
                return 0x4000 | vreg_index(op1) << 8 | ensure_size(op2, 8)
        if mnemonic == "LD":
            if op1.type == "VREG":
                if op2.type == "VREG":
                    return 0x8000 | vreg_index(op1) << 8 | vreg_index(op2) << 4
                elif op2.type == "PREG":
                    if op2.value == "DT":
                        return 0xF007 | (vreg_index(op1) << 8)
                    elif op2.value == "K":
                        return 0xF00A | (vreg_index(op1) << 8)
                    elif op2.value == "[I]":
                        return 0xF065 | (vreg_index(op1) << 8)
                    elif op2.value == "R":
                        return 0xF085 | (vreg_index(op1) << 8)
                    else:
                        raise ParseError()
                elif op2.type == "NUMBER":
                    return 0x6000 | vreg_index(op1) << 8 | ensure_size(op2, 8)
            elif op1.type == "PREG":
                if op1.value == "I":
                    return 0xA000 | ensure_size(op2, 12)
                elif op1.value == "DT":
                    return 0xF015 | (vreg_index(op2) << 8)
                elif op1.value == "ST":
                    return 0xF018 | (vreg_index(op2) << 8)
                elif op1.value == "F":
                    return 0xF029 | (vreg_index(op2) << 8)
                elif op1.value == "B":
                    return 0xF033 | (vreg_index(op2) << 8)
                elif op1.value == "[I]":
                    return 0xF055 | (vreg_index(op2) << 8)
                else:
                    raise ParseError()
            else:
                raise ParseError()
        if mnemonic == "ADD":
            if op1.type == "VREG":
                if op2.type == "VREG":
                    return 0x8004 | vreg_index(op1) << 8 | vreg_index(op2) << 4
                elif op2.type == "NUMBER":
                    return 0x7000 | vreg_index(op1) << 8 | ensure_size(op2, 8)
            elif op1.value == "I":
                return 0xF01E | vreg_index(op2) << 8
            else:
                raise ParseError()
        if mnemonic == "OR":
            return 0x8001 | vreg_index(op1) << 8 | vreg_index(op2) << 4
        if mnemonic == "AND":
            return 0x8002 | vreg_index(op1) << 8 | vreg_index(op2) << 4
        if mnemonic == "XOR":
            return 0x8003 | vreg_index(op1) << 8 | vreg_index(op2) << 4
        if mnemonic == "SUB":
            return 0x8005 | vreg_index(op1) << 8 | vreg_index(op2) << 4
        if mnemonic == "SUBN":
            return 0x8007 | vreg_index(op1) << 8 | vreg_index(op2) << 4
        if mnemonic == "SHR":
            return 0x8006 | vreg_index(op1) << 8 | vreg_index(op2) << 4
        if mnemonic == "SHL":
            return 0x800E | vreg_index(op1) << 8 | vreg_index(op2) << 4
        if mnemonic == "JP":
            if op1.type != "VREG" or op1.value != "V0":
                raise ParseError(f"Register V0 expected. {op1} found instead")
            return 0xB000 | ensure_size(op1, 12)
        if mnemonic == "RND":
            return 0xC000 | vreg_index(op1) << 8 | ensure_size(op2, 8)
    elif ntok == 4:
        if mnemonic == "DRW":
            op1 = tl[1]
            op2 = tl[2]
            op3 = tl[3]
            return (
                0xD000
                | vreg_index(op1) << 8
                | vreg_index(op2) << 4
                | ensure_size(op3, 4)
            )
        else:
            raise ParseError()
    else:
        raise ParseError("Invalid number of operands for " + tl[0].value)
    return tl


def assemble(filename):
    addr = 0x200
    symbols = {}
    opcodes = []
    for tokline in tokenize_file(filename):
        if len(tokline) < 1:
            continue
        symbols["*"] = addr + len(opcodes) * 2
        if tokline[0].type == "SYMBOL":
            sym = tokline[0].value
            tokline = tokline[1:]
            if tokline[0].type == "COLON":
                tokline = tokline[1:]
            if tokline[0].type == "EQUAL":
                symbols[sym] = ensure_size(tokline[1], 32)
                continue
            symbols[sym] = symbols["*"]
        tokline = [t for t in tokline if t.type != "COMMA"]
        if len(tokline) < 1:
            continue
        opcodes.append(assemble_instruction(tokline, symbols))
    addr = 0x200
    for o in opcodes:
        symbols["*"] = addr
        yield assemble_instruction(o, symbols)
        addr += 2


if __name__ == "__main__":
    import sys

    for fn in sys.argv[1:]:
        if "." in fn:
            ofn = fn[: fn.rindex(".")] + ".ch8"
        else:
            ofn = fn + ".ch8"
        with open(ofn, "wb") as of:
            for word in assemble(fn):
                if type(word) != int:
                    print(word)
                of.write(struct.pack(">H", word))