#!/usr/bin/env python3 from typing import NamedTuple import re, struct class Token(NamedTuple): type: str value: str filename: str line: int column: int TOKENS = [ ( "MNEMONIC", "|".join( [ "CLS", "RET", "SYS", "JP", "CALL", "SE", "LD", "ADD", "AND", "OR", "XOR", "SUBN", "SUB", "SHR", "SHL", "SNE", "RND", "DRW", "SKP", "SKNP", "DATA", "EXIT", ] ), ), ("VREG", r"V[0-9a-fA-F]"), ("PREG", r"ST|DT|F|B|\[I\]|I|K"), ("NUMBER", r"[0-9a-fA-F]{1,4}"), ("SYMBOL", r"[a-zA-Z_.]+[a-zA-Z_.0-9]*"), ("COMMA", r","), ("COLON", r":"), ("STAR", r"\*"), ("EQUAL", r"="), ("SPACE", r"[ \t]+"), ("MISMATCH", r"."), ] TOK_RE = re.compile("|".join([f"(?P<{tn}>{tp})" for tn, tp in TOKENS])) def tokenize_line(line, filename="", lineno=-1): line = line.split(";", 1)[0] if len(line) < 1: return [] r = [] for match in TOK_RE.finditer(line): t = Token( type=match.lastgroup, value=match.group(), filename=filename, line=lineno, column=match.start(), ) if t.type == "SPACE": continue r.append(t) return r def tokenize_file(filename): with open(filename, "r") as f: lineno = 1 for line in f: yield tokenize_line(line, filename, lineno) lineno += 1 class ParseError(Exception): pass def ensure_size(val, bits): if val.type not in ("NUMBER", "SYMBOL"): raise ParseError(f"Literal value expected, found {val} instead") if type(val.value) == str: v = int(val.value, 16) else: v = val.value if v >= (1 << bits): raise ParseError(f"Value to large for {bits} bits: {val}") return v def vreg_index(tok): if tok.type != "VREG": raise ParseError(f"V register expected, found {tok} instead") return int(tok.value[-1], 16) def assemble_instruction(tl, symbols): if type(tl) == int: return tl if tl[0].type != "MNEMONIC": raise ParseError("Mnemonic was expected but found " + repr(tl[0])) mnemonic = tl[0].value ntok = len(tl) for i in range(len(tl)): if tl[i].type in ("SYMBOL", "STAR"): if tl[i].value in symbols: nt = Token( type="NUMBER", value=symbols[tl[i].value], filename=tl[i].filename, line=tl[i].line, column=tl[i].column, ) tl[i] = nt else: return tl if ntok == 1: if mnemonic == "CLS": return 0x00E0 elif mnemonic == "RET": return 0x00EE elif mnemonic == "EXIT": return 0x00fd else: raise ParseError("Invalid number of operands for " + tl[0].value) elif ntok == 2: op1 = tl[1] if mnemonic == "SYS": return 0x0000 | ensure_size(op1, 12) if mnemonic == "JP": return 0x1000 | ensure_size(op1, 12) if mnemonic == "CALL": return 0x2000 | ensure_size(op1, 12) if mnemonic == "DATA": return ensure_size(op1, 16) if mnemonic == "SKP": return 0xE09E | (vreg_index(op1) << 8) if mnemonic == "SKNP": return 0xE0A1 | (vreg_index(op1) << 8) if mnemonic == "SHR": return 0x8006 | vreg_index(op1) << 8 | vreg_index(op1) << 4 if mnemonic == "SHL": return 0x800E | vreg_index(op1) << 8 | vreg_index(op1) << 4 else: raise ParseError("Invalid number of operands for " + tl[0].value) elif ntok == 3: op1 = tl[1] op2 = tl[2] if mnemonic == "SE": if op2.type == "VREG": return 0x5000 | vreg_index(op1) << 8 | vreg_index(op2) << 4 else: return 0x3000 | vreg_index(op1) << 8 | ensure_size(op2, 8) if mnemonic == "SNE": if op2.type == "VREG": return 0x9000 | vreg_index(op1) << 8 | vreg_index(op2) << 4 else: return 0x4000 | vreg_index(op1) << 8 | ensure_size(op2, 8) if mnemonic == "LD": if op1.type == "VREG": if op2.type == "VREG": return 0x8000 | vreg_index(op1) << 8 | vreg_index(op2) << 4 elif op2.type == "PREG": if op2.value == "DT": return 0xF007 | (vreg_index(op1) << 8) elif op2.value == "K": return 0xF00A | (vreg_index(op1) << 8) elif op2.value == "[I]": return 0xF065 | (vreg_index(op1) << 8) elif op2.value == "R": return 0xF085 | (vreg_index(op1) << 8) else: raise ParseError() elif op2.type == "NUMBER": return 0x6000 | vreg_index(op1) << 8 | ensure_size(op2, 8) elif op1.type == "PREG": if op1.value == "I": return 0xA000 | ensure_size(op2, 12) elif op1.value == "DT": return 0xF015 | (vreg_index(op2) << 8) elif op1.value == "ST": return 0xF018 | (vreg_index(op2) << 8) elif op1.value == "F": return 0xF029 | (vreg_index(op2) << 8) elif op1.value == "B": return 0xF033 | (vreg_index(op2) << 8) elif op1.value == "[I]": return 0xF055 | (vreg_index(op2) << 8) else: raise ParseError() else: raise ParseError() if mnemonic == "ADD": if op1.type == "VREG": if op2.type == "VREG": return 0x8004 | vreg_index(op1) << 8 | vreg_index(op2) << 4 elif op2.type == "NUMBER": return 0x7000 | vreg_index(op1) << 8 | ensure_size(op2, 8) elif op1.value == "I": return 0xF01E | vreg_index(op2) << 8 else: raise ParseError() if mnemonic == "OR": return 0x8001 | vreg_index(op1) << 8 | vreg_index(op2) << 4 if mnemonic == "AND": return 0x8002 | vreg_index(op1) << 8 | vreg_index(op2) << 4 if mnemonic == "XOR": return 0x8003 | vreg_index(op1) << 8 | vreg_index(op2) << 4 if mnemonic == "SUB": return 0x8005 | vreg_index(op1) << 8 | vreg_index(op2) << 4 if mnemonic == "SUBN": return 0x8007 | vreg_index(op1) << 8 | vreg_index(op2) << 4 if mnemonic == "SHR": return 0x8006 | vreg_index(op1) << 8 | vreg_index(op2) << 4 if mnemonic == "SHL": return 0x800E | vreg_index(op1) << 8 | vreg_index(op2) << 4 if mnemonic == "JP": if op1.type != "VREG" or op1.value != "V0": raise ParseError(f"Register V0 expected. {op1} found instead") return 0xB000 | ensure_size(op1, 12) if mnemonic == "RND": return 0xC000 | vreg_index(op1) << 8 | ensure_size(op2, 8) elif ntok == 4: if mnemonic == "DRW": op1 = tl[1] op2 = tl[2] op3 = tl[3] return ( 0xD000 | vreg_index(op1) << 8 | vreg_index(op2) << 4 | ensure_size(op3, 4) ) else: raise ParseError() else: raise ParseError("Invalid number of operands for " + tl[0].value) return tl def assemble(filename): addr = 0x200 symbols = {} opcodes = [] for tokline in tokenize_file(filename): if len(tokline) < 1: continue symbols["*"] = addr + len(opcodes) * 2 if tokline[0].type == "SYMBOL": sym = tokline[0].value tokline = tokline[1:] if tokline[0].type == "COLON": tokline = tokline[1:] if tokline[0].type == "EQUAL": symbols[sym] = ensure_size(tokline[1], 32) continue symbols[sym] = symbols["*"] tokline = [t for t in tokline if t.type != "COMMA"] if len(tokline) < 1: continue opcodes.append(assemble_instruction(tokline, symbols)) addr = 0x200 for o in opcodes: symbols["*"] = addr yield assemble_instruction(o, symbols) addr += 2 if __name__ == "__main__": import sys for fn in sys.argv[1:]: if "." in fn: ofn = fn[: fn.rindex(".")] + ".ch8" else: ofn = fn + ".ch8" with open(ofn, "wb") as of: for word in assemble(fn): if type(word) != int: print(word) of.write(struct.pack(">H", word))