chipty5/ch8asm.py

294 lines
9.1 KiB
Python
Executable File

#!/usr/bin/env python3
from typing import NamedTuple
import re, struct
class Token(NamedTuple):
type: str
value: str
filename: str
line: int
column: int
TOKENS = [
(
"MNEMONIC",
"|".join(
[
"CLS",
"RET",
"SYS",
"JP",
"CALL",
"SE",
"LD",
"ADD",
"AND",
"OR",
"XOR",
"SUBN",
"SUB",
"SHR",
"SHL",
"SNE",
"RND",
"DRW",
"SKP",
"SKNP",
"DATA",
"EXIT",
]
),
),
("VREG", r"V[0-9a-fA-F]"),
("PREG", r"ST|DT|F|B|\[I\]|I|K"),
("NUMBER", r"[0-9a-fA-F]{1,4}"),
("SYMBOL", r"[a-zA-Z_.]+[a-zA-Z_.0-9]*"),
("COMMA", r","),
("COLON", r":"),
("STAR", r"\*"),
("EQUAL", r"="),
("SPACE", r"[ \t]+"),
("MISMATCH", r"."),
]
TOK_RE = re.compile("|".join([f"(?P<{tn}>{tp})" for tn, tp in TOKENS]))
def tokenize_line(line, filename="", lineno=-1):
line = line.split(";", 1)[0]
if len(line) < 1:
return []
r = []
for match in TOK_RE.finditer(line):
t = Token(
type=match.lastgroup,
value=match.group(),
filename=filename,
line=lineno,
column=match.start(),
)
if t.type == "SPACE":
continue
r.append(t)
return r
def tokenize_file(filename):
with open(filename, "r") as f:
lineno = 1
for line in f:
yield tokenize_line(line, filename, lineno)
lineno += 1
class ParseError(Exception):
pass
def ensure_size(val, bits):
if val.type not in ("NUMBER", "SYMBOL"):
raise ParseError(f"Literal value expected, found {val} instead")
if type(val.value) == str:
v = int(val.value, 16)
else:
v = val.value
if v >= (1 << bits):
raise ParseError(f"Value to large for {bits} bits: {val}")
return v
def vreg_index(tok):
if tok.type != "VREG":
raise ParseError(f"V register expected, found {tok} instead")
return int(tok.value[-1], 16)
def assemble_instruction(tl, symbols):
if type(tl) == int:
return tl
if tl[0].type != "MNEMONIC":
raise ParseError("Mnemonic was expected but found " + repr(tl[0]))
mnemonic = tl[0].value
ntok = len(tl)
for i in range(len(tl)):
if tl[i].type in ("SYMBOL", "STAR"):
if tl[i].value in symbols:
nt = Token(
type="NUMBER",
value=symbols[tl[i].value],
filename=tl[i].filename,
line=tl[i].line,
column=tl[i].column,
)
tl[i] = nt
else:
return tl
if ntok == 1:
if mnemonic == "CLS":
return 0x00E0
elif mnemonic == "RET":
return 0x00EE
elif mnemonic == "EXIT":
return 0x00fd
else:
raise ParseError("Invalid number of operands for " + tl[0].value)
elif ntok == 2:
op1 = tl[1]
if mnemonic == "SYS":
return 0x0000 | ensure_size(op1, 12)
if mnemonic == "JP":
return 0x1000 | ensure_size(op1, 12)
if mnemonic == "CALL":
return 0x2000 | ensure_size(op1, 12)
if mnemonic == "DATA":
return ensure_size(op1, 16)
if mnemonic == "SKP":
return 0xE09E | (vreg_index(op1) << 8)
if mnemonic == "SKNP":
return 0xE0A1 | (vreg_index(op1) << 8)
if mnemonic == "SHR":
return 0x8006 | vreg_index(op1) << 8 | vreg_index(op1) << 4
if mnemonic == "SHL":
return 0x800E | vreg_index(op1) << 8 | vreg_index(op1) << 4
else:
raise ParseError("Invalid number of operands for " + tl[0].value)
elif ntok == 3:
op1 = tl[1]
op2 = tl[2]
if mnemonic == "SE":
if op2.type == "VREG":
return 0x5000 | vreg_index(op1) << 8 | vreg_index(op2) << 4
else:
return 0x3000 | vreg_index(op1) << 8 | ensure_size(op2, 8)
if mnemonic == "SNE":
if op2.type == "VREG":
return 0x9000 | vreg_index(op1) << 8 | vreg_index(op2) << 4
else:
return 0x4000 | vreg_index(op1) << 8 | ensure_size(op2, 8)
if mnemonic == "LD":
if op1.type == "VREG":
if op2.type == "VREG":
return 0x8000 | vreg_index(op1) << 8 | vreg_index(op2) << 4
elif op2.type == "PREG":
if op2.value == "DT":
return 0xF007 | (vreg_index(op1) << 8)
elif op2.value == "K":
return 0xF00A | (vreg_index(op1) << 8)
elif op2.value == "[I]":
return 0xF065 | (vreg_index(op1) << 8)
elif op2.value == "R":
return 0xF085 | (vreg_index(op1) << 8)
else:
raise ParseError()
elif op2.type == "NUMBER":
return 0x6000 | vreg_index(op1) << 8 | ensure_size(op2, 8)
elif op1.type == "PREG":
if op1.value == "I":
return 0xA000 | ensure_size(op2, 12)
elif op1.value == "DT":
return 0xF015 | (vreg_index(op2) << 8)
elif op1.value == "ST":
return 0xF018 | (vreg_index(op2) << 8)
elif op1.value == "F":
return 0xF029 | (vreg_index(op2) << 8)
elif op1.value == "B":
return 0xF033 | (vreg_index(op2) << 8)
elif op1.value == "[I]":
return 0xF055 | (vreg_index(op2) << 8)
else:
raise ParseError()
else:
raise ParseError()
if mnemonic == "ADD":
if op1.type == "VREG":
if op2.type == "VREG":
return 0x8004 | vreg_index(op1) << 8 | vreg_index(op2) << 4
elif op2.type == "NUMBER":
return 0x7000 | vreg_index(op1) << 8 | ensure_size(op2, 8)
elif op1.value == "I":
return 0xF01E | vreg_index(op2) << 8
else:
raise ParseError()
if mnemonic == "OR":
return 0x8001 | vreg_index(op1) << 8 | vreg_index(op2) << 4
if mnemonic == "AND":
return 0x8002 | vreg_index(op1) << 8 | vreg_index(op2) << 4
if mnemonic == "XOR":
return 0x8003 | vreg_index(op1) << 8 | vreg_index(op2) << 4
if mnemonic == "SUB":
return 0x8005 | vreg_index(op1) << 8 | vreg_index(op2) << 4
if mnemonic == "SUBN":
return 0x8007 | vreg_index(op1) << 8 | vreg_index(op2) << 4
if mnemonic == "SHR":
return 0x8006 | vreg_index(op1) << 8 | vreg_index(op2) << 4
if mnemonic == "SHL":
return 0x800E | vreg_index(op1) << 8 | vreg_index(op2) << 4
if mnemonic == "JP":
if op1.type != "VREG" or op1.value != "V0":
raise ParseError(f"Register V0 expected. {op1} found instead")
return 0xB000 | ensure_size(op1, 12)
if mnemonic == "RND":
return 0xC000 | vreg_index(op1) << 8 | ensure_size(op2, 8)
elif ntok == 4:
if mnemonic == "DRW":
op1 = tl[1]
op2 = tl[2]
op3 = tl[3]
return (
0xD000
| vreg_index(op1) << 8
| vreg_index(op2) << 4
| ensure_size(op3, 4)
)
else:
raise ParseError()
else:
raise ParseError("Invalid number of operands for " + tl[0].value)
return tl
def assemble(filename):
addr = 0x200
symbols = {}
opcodes = []
for tokline in tokenize_file(filename):
if len(tokline) < 1:
continue
symbols["*"] = addr + len(opcodes) * 2
if tokline[0].type == "SYMBOL":
sym = tokline[0].value
tokline = tokline[1:]
if tokline[0].type == "COLON":
tokline = tokline[1:]
if tokline[0].type == "EQUAL":
symbols[sym] = ensure_size(tokline[1], 32)
continue
symbols[sym] = symbols["*"]
tokline = [t for t in tokline if t.type != "COMMA"]
if len(tokline) < 1:
continue
opcodes.append(assemble_instruction(tokline, symbols))
addr = 0x200
for o in opcodes:
symbols["*"] = addr
yield assemble_instruction(o, symbols)
addr += 2
if __name__ == "__main__":
import sys
for fn in sys.argv[1:]:
if "." in fn:
ofn = fn[: fn.rindex(".")] + ".ch8"
else:
ofn = fn + ".ch8"
with open(ofn, "wb") as of:
for word in assemble(fn):
if type(word) != int:
print(word)
of.write(struct.pack(">H", word))