commit eeafe0cbb28a941a500bf7fd7e996cc16404feb5 Author: ry755 Date: Wed Jan 26 22:21:21 2022 -0800 Initial commit I should've made a git repo for this much sooner, oops :p diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..f3aac25 --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,189 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "block-buffer" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c0940dc441f31689269e10ac70eb1002a3a1d3ad1390e030043662eb7fe4688b" +dependencies = [ + "block-padding", + "byte-tools", + "byteorder", + "generic-array", +] + +[[package]] +name = "block-padding" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa79dedbb091f449f1f39e53edf88d5dbe95f895dae6135a8d7b881fb5af73f5" +dependencies = [ + "byte-tools", +] + +[[package]] +name = "byte-tools" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3b5ca7a04898ad4bcd41c90c5285445ff5b791899bb1b0abdd2a2aa791211d7" + +[[package]] +name = "byteorder" +version = "1.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "14c189c53d098945499cdfa7ecc63567cf3886b3332b312a5b4585d8d3a6a610" + +[[package]] +name = "digest" +version = "0.8.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f3d0c8c8752312f9713efd397ff63acb9f85585afbf179282e720e7704954dd5" +dependencies = [ + "generic-array", +] + +[[package]] +name = "fake-simd" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e88a8acf291dafb59c2d96e8f59828f3838bb1a70398823ade51a84de6a6deed" + +[[package]] +name = "fox32asm" +version = "0.1.0" +dependencies = [ + "lazy_static", + "pest", + "pest_derive", +] + +[[package]] +name = "generic-array" +version = "0.12.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ffdf9f34f1447443d37393cc6c2b8313aebddcd96906caf34e54c68d8e57d7bd" +dependencies = [ + "typenum", +] + +[[package]] +name = "lazy_static" +version = "1.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" + +[[package]] +name = "maplit" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3e2e65a1a2e43cfcb47a895c4c8b10d1f4a61097f9f254f183aee60cad9c651d" + +[[package]] +name = "opaque-debug" +version = "0.2.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2839e79665f131bdb5782e51f2c6c9599c133c6098982a54c794358bf432529c" + +[[package]] +name = "pest" +version = "2.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "10f4872ae94d7b90ae48754df22fd42ad52ce740b8f370b03da4835417403e53" +dependencies = [ + "ucd-trie", +] + +[[package]] +name = "pest_derive" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "833d1ae558dc601e9a60366421196a8d94bc0ac980476d0b67e1d0988d72b2d0" +dependencies = [ + "pest", + "pest_generator", +] + +[[package]] +name = "pest_generator" +version = "2.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "99b8db626e31e5b81787b9783425769681b347011cc59471e33ea46d2ea0cf55" +dependencies = [ + "pest", + "pest_meta", + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "pest_meta" +version = "2.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "54be6e404f5317079812fc8f9f5279de376d8856929e21c184ecf6bbd692a11d" +dependencies = [ + "maplit", + "pest", + "sha-1", +] + +[[package]] +name = "proc-macro2" +version = "1.0.32" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ba508cc11742c0dc5c1659771673afbab7a0efab23aa17e854cbab0837ed0b43" +dependencies = [ + "unicode-xid", +] + +[[package]] +name = "quote" +version = "1.0.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38bc8cc6a5f2e3655e0899c1b848643b2562f853f114bfec7be120678e3ace05" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "sha-1" +version = "0.8.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f7d94d0bede923b3cea61f3f1ff57ff8cdfd77b400fb8f9998949e0cf04163df" +dependencies = [ + "block-buffer", + "digest", + "fake-simd", + "opaque-debug", +] + +[[package]] +name = "syn" +version = "1.0.81" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2afee18b8beb5a596ecb4a2dce128c719b4ba399d34126b9e4396e3f9860966" +dependencies = [ + "proc-macro2", + "quote", + "unicode-xid", +] + +[[package]] +name = "typenum" +version = "1.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b63708a265f51345575b27fe43f9500ad611579e764c79edbc2037b1121959ec" + +[[package]] +name = "ucd-trie" +version = "0.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "56dee185309b50d1f11bfedef0fe6d036842e3fb77413abef29f8f8d1c5d4c1c" + +[[package]] +name = "unicode-xid" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8ccb82d61f80a663efe1f787a51b16b5a51e3314d6ac365b08639f52387b33f3" diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..965df62 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,9 @@ +[package] +name = "fox32asm" +version = "0.1.0" +edition = "2021" + +[dependencies] +lazy_static = "1.4.0" +pest = "2.1.3" +pest_derive = "2.1.0" diff --git a/src/fox32.pest b/src/fox32.pest new file mode 100644 index 0000000..5d2ae0a --- /dev/null +++ b/src/fox32.pest @@ -0,0 +1,145 @@ +WHITESPACE = _{ " " | "\t" | "\n" | "\r" | "\x0B" | "\x0C" | "\xA0" | SPACE_SEPARATOR } +COMMENT = _{ ";" ~ (!"\n" ~ ANY)* } + +assembly = { + SOI ~ (origin | include_bin | data | constant | label | instruction)* ~ EOI +} + +origin = { + origin_padding | + origin_no_padding +} +origin_padding = { "org.pad" ~ operand_value } +origin_no_padding = { "org" ~ operand_value } + +include_bin = { "#include_bin" ~ immediate_str } + +data = { + data_byte | + data_half | + data_word | + data_str +} +data_byte = { "data.8" ~ operand_value } +data_half = { "data.16" ~ operand_value } +data_word = { "data.32" ~ operand_value } +data_str = { "data.str" ~ immediate_str } + +constant = { "const" ~ constant_name ~ operand_value } +constant_name = ${label_name ~ ":"} + +label = ${ label_name ~ ":" } +label_name = @{ label_name_chars+ } +label_name_chars = @{ ASCII_ALPHANUMERIC | "_" } + +operand = { + "[" ~ operand_value_ptr ~ "]" | + operand_value +} +operand_value_ptr = { + operand_value +} +operand_value = { + register | + immediate_bin | + immediate_hex | + immediate_dec | + immediate_char | + label_name +} + +size = @{ + ".8" | + ".16" | + ".32" +} + +condition = @{ + "ifz" | + "ifnz" | + "ifc" | + "ifnc" +} + +instruction = { + condition? ~ instruction_conditional +} + +instruction_conditional = { + instruction_zero | + instruction_one ~ size? ~ operand | + instruction_two ~ size? ~ operand ~ "," ~ operand +} + +instruction_zero = @{ + "nop" | + "halt" | + "brk" | + "reti" | + "ret" | + "ise" | + "icl" +} + +instruction_one = @{ + "inc" | + "dec" | + "not" | + "jmp" | + "call" | + "loop" | + "rjmp" | + "rcall" | + "rloop" | + "push" | + "pop" +} + +instruction_two = @{ + "add" | + "sub" | + "mul" | + "pow" | + "div" | + "rem" | + "and" | + "or" | + "xor" | + "sla" | + "sra" | + "srl" | + "rol" | + "ror" | + "bse" | + "bcl" | + "bts" | + "cmp" | + "movz" | + "mov" | + "rta" | + "in" | + "out" +} + +immediate_bin = ${ "0b" ~ body_bin } +body_bin = @{ ASCII_BIN_DIGIT+ } + +immediate_hex = ${ "0x" ~ body_hex } +body_hex = @{ ASCII_HEX_DIGIT+ } + +immediate_dec = ${ body_dec } +body_dec = @{ ASCII_DIGIT+ } + +immediate_char = ${ "'" ~ body_char ~ "'" } +body_char = @{ '\x00'..'\x7F' } + +immediate_str = ${ "\"" ~ body_str ~ "\"" } +body_str = @{ body_str_chars* } +body_str_chars = { + !("\"" | "\\") ~ ANY + | "\\" ~ ("\"" | "\\" | "/" | "b" | "f" | "n" | "r" | "t") + | "\\" ~ ("u" ~ ASCII_HEX_DIGIT{4}) +} + +register = ${ "r" ~ register_num } +register_num = @{ ASCII_DIGIT+ | "sp" } diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..9ba783c --- /dev/null +++ b/src/main.rs @@ -0,0 +1,1081 @@ +#[macro_use] +extern crate lazy_static; +extern crate pest; +#[macro_use] +extern crate pest_derive; + +use pest::error::Error; +use pest::Parser; +use core::panic; +use std::collections::HashMap; +use std::env; +use std::fs::{canonicalize, read, read_to_string, File}; +use std::fmt::Debug; +use std::path::PathBuf; +use std::process::exit; +use std::rc::Rc; +use std::cell::{Cell, RefCell}; +use std::sync::Mutex; +use std::io::Write; +use std::ops::Deref; + +#[derive(Parser)] +#[grammar = "fox32.pest"] +struct Fox32Parser; + +// this is kinda dumb, but oh well !! +lazy_static! { + static ref SOURCE_PATH: Mutex = Mutex::new(PathBuf::new()); + static ref CURRENT_SIZE: Mutex = Mutex::new(Size::Word); + static ref CURRENT_CONDITION: Mutex = Mutex::new(Condition::Always); + static ref LABEL_TARGETS: Mutex>> = Mutex::new(HashMap::new()); + static ref LABEL_ADDRESSES: Mutex> = Mutex::new(HashMap::new()); +} + +#[derive(Debug, Clone)] +struct BackpatchTarget { + index: usize, + is_relative: bool, + instruction: AssembledInstruction, +} + +impl BackpatchTarget { + fn new(instruction: &AssembledInstruction, index: usize, is_relative: bool) -> BackpatchTarget { + Self { + index, is_relative, + instruction: instruction.clone(), + } + } + + fn write(&self, address: u32) { + let ref instruction = self.instruction; + let mut instruction_data = instruction.borrow_mut(); + + let address_bytes = + if self.is_relative { + (address as i32 - self.instruction.get_address() as i32).to_le_bytes() + } else { + address.to_le_bytes() + }; + + instruction_data[self.index] = address_bytes[0]; + instruction_data[self.index + 1] = address_bytes[1]; + instruction_data[self.index + 2] = address_bytes[2]; + instruction_data[self.index + 3] = address_bytes[3]; + } +} + +fn perform_backpatching(targets: &Vec, address: u32) { + for target in targets { + target.write(address); + } +} + +#[derive(Debug, Clone, Default)] +struct AssembledInstruction { + value: Rc>>, + address: Rc>, +} + +impl AssembledInstruction { + fn new() -> Self { + Self { + value: Rc::default(), + address: Rc::default(), + } + } + + fn get_address(&self) -> u32 { + self.address.get() + } + fn set_address(&self, address: u32) { + self.address.set(address); + } +} + +impl From> for AssembledInstruction { + fn from(data: Vec) -> Self { + Self { + value: Rc::new(RefCell::new(data)), + address: Rc::default(), + } + } +} + +impl From<&[u8]> for AssembledInstruction { + fn from(data: &[u8]) -> Self { + Vec::from(data).into() + } +} + +impl From<[u8; N]> for AssembledInstruction { + fn from(data: [u8; N]) -> Self { + (&data[..]).into() + } +} + +impl Deref for AssembledInstruction { + type Target = RefCell>; + + fn deref(&self) -> &Self::Target { + &self.value + } +} + +unsafe impl Send for AssembledInstruction {} +unsafe impl Sync for AssembledInstruction {} + +#[derive(PartialEq, Debug, Clone, Copy)] +enum InstructionZero { + // no operands + Nop, + Halt, + Brk, + Ret, + Reti, + Ise, + Icl, +} + +#[derive(PartialEq, Debug, Clone, Copy)] +enum InstructionOne { + // one operand + Inc, + Dec, + Not, + Jmp, + Call, + Loop, + Rjmp, + Rcall, + Rloop, + Push, + Pop, +} + +#[derive(PartialEq, Debug, Clone, Copy)] +enum InstructionTwo { + // two operands + Add, + Sub, + Mul, + Pow, + Div, + Rem, + And, + Or, + Xor, + Sla, + Sra, + Srl, + Rol, + Ror, + Bse, + Bcl, + Bts, + Cmp, + Mov, + Movz, + Rta, + In, + Out, +} + +#[derive(PartialEq, Debug, Clone, Copy)] +enum Size { + Byte, + Half, + Word, +} + +#[derive(PartialEq, Debug, Clone, Copy)] +enum Condition { + Always, + Zero, + NotZero, + Carry, + NotCarry, +} + +#[derive(PartialEq, Debug, Clone)] +enum AstNode { + OperationZero { + condition: Condition, + instruction: InstructionZero, + }, + OperationOne { + size: Size, + condition: Condition, + instruction: InstructionOne, + operand: Box, + }, + OperationTwo { + size: Size, + condition: Condition, + instruction: InstructionTwo, + lhs: Box, + rhs: Box, + }, + + Immediate8(u8), + Immediate16(u16), + Immediate32(u32), + Register(u8), + ImmediatePointer(u32), + RegisterPointer(u8), + + Constant { + name: String, + address: u32, + }, + + LabelDefine(String), + LabelOperand { + name: String, + is_relative: bool, + }, + LabelOperandPointer { + name: String, + is_relative: bool, + }, + + DataByte(u8), + DataHalf(u16), + DataWord(u32), + DataStr(String), + + IncludedBinary(Vec), + + Origin(u32), + OriginPadded(u32), +} + +fn format_address_table(m: &HashMap) -> String { + let mut v: Vec<(&String, &u32)> = m.into_iter().collect(); + v.sort_by(|(_, v1), (_, v2)| u32::cmp(v1, v2)); + v.iter().map(|(k, v)| format!("{:#010X?} :: {}", v, k)).collect::>().join("\n") +} + +fn main() { + let args: Vec = env::args().collect(); + if args.len() != 3 { + println!("fox32asm\nUsage: {} ", args[0]); + exit(1); + } + + let input_file_name = &args[1]; + let output_file_name = &args[2]; + + let mut input_file = read_to_string(input_file_name).expect("cannot read file"); + println!("Parsing includes..."); + let mut source_path = canonicalize(&input_file_name).unwrap(); + source_path.pop(); + *SOURCE_PATH.lock().unwrap() = source_path; + for _ in 0..8 { + let loop_file = input_file.clone(); // this is a hack to allow modifying input_file from inside the for loop + for (line_number, text) in loop_file.lines().enumerate() { + match text.trim() { + s if s.starts_with("#include \"") => { + input_file = include_text_file(line_number, text.trim(), input_file); + break; + }, + _ => {} + }; + } + } + + println!("Parsing file..."); + let ast = parse(&input_file); + + let mut instructions: Vec = Vec::new(); + let mut current_address: u32 = 0; + + println!("Assembling..."); + for node in ast.unwrap() { + if let AstNode::LabelDefine(name) = node { + let mut address_table = LABEL_ADDRESSES.lock().unwrap(); + address_table.insert(name.clone(), current_address); + std::mem::drop(address_table); + } else if let AstNode::Constant {name, address} = node { + let mut address_table = LABEL_ADDRESSES.lock().unwrap(); + address_table.insert(name.clone(), address); + std::mem::drop(address_table); + } else if let AstNode::Origin(origin_address) = node { + assert!(origin_address > current_address); + current_address = origin_address; + } else if let AstNode::OriginPadded(origin_address) = node { + assert!(origin_address > current_address); + let difference = (origin_address - current_address) as usize; + current_address = origin_address; + instructions.push(vec![0; difference].into()); + } else if let AstNode::IncludedBinary(binary_vec) = node { + current_address += binary_vec.len() as u32; + instructions.push(binary_vec.into()); + } else { + let instruction = assemble_node(node); + instruction.set_address(current_address); + current_address += instruction.borrow().len() as u32; + instructions.push(instruction); + } + } + + println!("Performing label backpatching..."); + let table = LABEL_TARGETS.lock().unwrap(); + let address_table = LABEL_ADDRESSES.lock().unwrap(); + + //println!("{:#?}", table); + //println!("{:#010X?}", address_table); + + let address_file = format_address_table(&address_table); + println!("{}", address_file); + + for (name, targets) in table.iter() { + perform_backpatching(targets, *address_table.get(name).expect(&format!("Label not found: {}", name))); + } + std::mem::drop(table); + std::mem::drop(address_table); + + let mut binary: Vec = Vec::new(); + for instruction in instructions { + binary.extend_from_slice(&(instruction.borrow())[..]); + } + println!("Final binary size: {} bytes = {:.2} KiB = {:.2} MiB", binary.len(), binary.len() / 1024, binary.len() / 1048576); + + let mut output_file = File::create(output_file_name).unwrap(); + output_file.write_all(&binary).unwrap(); +} + +fn include_text_file(line_number: usize, text: &str, input_file: String) -> String { + //println!("{}, {}", line_number, text); + let path_start_index = text.find("\"").unwrap() + 1; + let path_end_index = text.len() - 1; + let path_string = &text[path_start_index..path_end_index]; + //let path = canonicalize(path_string).expect(&format!("failed to include file \"{}\"", path_string)); + + let mut source_path = SOURCE_PATH.lock().unwrap().clone(); + source_path.push(path_string); + + println!("Including file as text data: {:#?}", source_path.file_name().expect("invalid filename")); + + let mut start_of_original_file = String::new(); + for (i, text) in input_file.lines().enumerate() { + if i < line_number { + start_of_original_file.push_str(text); + start_of_original_file.push('\n'); + } + } + + let mut included_file = read_to_string(source_path).expect("failed to include file"); + included_file.push('\n'); + + let mut end_of_original_file = String::new(); + for (i, text) in input_file.lines().enumerate() { + if i > line_number { + end_of_original_file.push_str(text); + end_of_original_file.push('\n'); + } + } + + let mut final_file = String::new(); + + final_file.push_str(&start_of_original_file); + final_file.push_str(&included_file); + final_file.push_str(&end_of_original_file); + final_file +} + +fn include_binary_file(pair: pest::iterators::Pair) -> AstNode { + let path_string = pair.into_inner().next().unwrap().as_str().trim(); + //let path = canonicalize(path_string).expect(&format!("failed to include file \"{}\"", path_string)); + + let mut source_path = SOURCE_PATH.lock().unwrap().clone(); + source_path.push(path_string); + + println!("Including file as binary data: {:#?}", source_path.file_name().expect("invalid filename")); + + let binary = read(source_path).expect("failed to include file"); + + AstNode::IncludedBinary(binary) +} + +fn parse(source: &str) -> Result, Error> { + let mut ast = vec![]; + let pairs = Fox32Parser::parse(Rule::assembly, source).expect("parse was unsuccessful"); + + for pair in pairs.peek().unwrap().into_inner() { + match pair.as_rule() { + Rule::EOI => break, + _ => ast.push(build_ast_from_expression(pair)), + } + } + + Ok(ast) +} + +fn build_ast_from_expression(pair: pest::iterators::Pair) -> AstNode { + //println!("{:#?}\n\n", pair); // debug + + let pair_rule = pair.as_rule(); + let mut inner_pair = pair.into_inner(); + *CURRENT_CONDITION.lock().unwrap() = Condition::Always; + let mut is_pointer = false; + match inner_pair.peek().unwrap().as_rule() { + Rule::condition => { + *CURRENT_CONDITION.lock().unwrap() = parse_condition(&inner_pair.peek().unwrap()); + inner_pair.next().unwrap(); // jump to the next instruction pair after the condition + } + Rule::operand_value_ptr => { + is_pointer = true; + } + _ => {} + } + + match pair_rule { + Rule::assembly => build_ast_from_expression(inner_pair.next().unwrap()), + Rule::instruction => parse_instruction(inner_pair.next().unwrap()), + Rule::operand => parse_operand(inner_pair.next().unwrap(), is_pointer), + Rule::constant => parse_constant(inner_pair), + Rule::label => parse_label(inner_pair.next().unwrap()), + Rule::data => parse_data(inner_pair.next().unwrap()), + Rule::origin => parse_origin(inner_pair.next().unwrap()), + Rule::include_bin => include_binary_file(inner_pair.next().unwrap()), + _ => todo!("{:#?}", pair_rule), + } +} + +fn parse_constant(pairs: pest::iterators::Pairs) -> AstNode { + let mut pairs = pairs; + let constant_name = pairs.next().unwrap().into_inner().next().unwrap().as_str(); + let operand_pair = pairs.next().unwrap(); + let operand_ast = parse_operand(operand_pair, false); + + if let AstNode::Immediate32(address) = operand_ast { + AstNode::Constant { + name: constant_name.to_string(), + address, + } + } else { + panic!("Constant must be an immediate value"); + } +} + +fn parse_label(pair: pest::iterators::Pair) -> AstNode { + AstNode::LabelDefine(pair.as_str().to_string()) +} + +fn parse_data(pair: pest::iterators::Pair) -> AstNode { + //println!("{:#?}", pair); + match pair.as_rule() { + Rule::data_byte => { + let ast = parse_operand(pair.into_inner().next().unwrap(), false); + let byte = { + if let AstNode::Immediate32(word) = ast { + word as u8 + } else { + unreachable!() + } + }; + AstNode::DataByte(byte) + }, + Rule::data_half => { + let ast = parse_operand(pair.into_inner().next().unwrap(), false); + let word = { + if let AstNode::Immediate32(word) = ast { + word as u16 + } else { + unreachable!() + } + }; + AstNode::DataHalf(word) + }, + Rule::data_word => { + match parse_operand(pair.into_inner().next().unwrap(), false) { + AstNode::Immediate32(word) => AstNode::DataWord(word), + AstNode::LabelOperand {name, is_relative} => AstNode::LabelOperand {name, is_relative}, + _ => unreachable!(), + } + }, + Rule::data_str => { + let string = pair.into_inner().next().unwrap().into_inner().next().unwrap().as_str(); + AstNode::DataStr(string.to_string()) + }, + _ => panic!("Unsupported data: {}", pair.as_str()), + } +} + +fn parse_origin(pair: pest::iterators::Pair) -> AstNode { + //println!("{:#?}", pair); + match pair.as_rule() { + Rule::origin_no_padding => { + let ast = parse_operand(pair.into_inner().next().unwrap(), false); + let address = { + if let AstNode::Immediate32(word) = ast { + word + } else { + unreachable!() + } + }; + AstNode::Origin(address) + }, + Rule::origin_padding => { + let ast = parse_operand(pair.into_inner().next().unwrap(), false); + let address = { + if let AstNode::Immediate32(word) = ast { + word + } else { + unreachable!() + } + }; + AstNode::OriginPadded(address) + }, + _ => panic!("Unsupported origin: {}", pair.as_str()), + } +} + +fn parse_size(pair: &pest::iterators::Pair) -> Size { + match pair.as_str() { + ".8" => Size::Byte, + ".16" => Size::Half, + ".32" => Size::Word, + _ => panic!("Unsupported size: {}", pair.as_str()), + } +} + +fn parse_condition(pair: &pest::iterators::Pair) -> Condition { + match pair.as_str() { + "ifz" => Condition::Zero, + "ifnz" => Condition::NotZero, + "ifc" => Condition::Carry, + "ifnc" => Condition::NotCarry, + _ => panic!("Unsupported condition: {}", pair.as_str()), + } +} + +fn parse_instruction(pair: pest::iterators::Pair) -> AstNode { + //println!("parse_instruction: {:#?}", pair); // debug + let mut size = Size::Word; + let condition = *CURRENT_CONDITION.lock().unwrap(); + match pair.as_rule() { + Rule::instruction_conditional => { + let mut inner_pair = pair.into_inner(); + let instruction_conditional_pair = inner_pair.next().unwrap(); + match instruction_conditional_pair.as_rule() { + Rule::instruction_zero => parse_instruction_zero(instruction_conditional_pair, condition), + Rule::instruction_one => { + if inner_pair.peek().unwrap().as_rule() == Rule::size { + size = parse_size(&inner_pair.next().unwrap()); + } + *CURRENT_SIZE.lock().unwrap() = size; + let operand = inner_pair.next().unwrap(); + let operand_ast = build_ast_from_expression(operand); + parse_instruction_one(instruction_conditional_pair, operand_ast, size, condition) + } + Rule::instruction_two => { + if inner_pair.peek().unwrap().as_rule() == Rule::size { + size = parse_size(&inner_pair.next().unwrap()); + } + *CURRENT_SIZE.lock().unwrap() = size; + let lhs = inner_pair.next().unwrap(); + let rhs = inner_pair.next().unwrap(); + let lhs_ast = build_ast_from_expression(lhs); + let rhs_ast = build_ast_from_expression(rhs); + parse_instruction_two(instruction_conditional_pair, lhs_ast, rhs_ast, size, condition) + } + _ => todo!(), + } + } + _ => panic!("Unsupported instruction type: {:#?}", pair.as_rule()), + } +} + +fn parse_operand(mut pair: pest::iterators::Pair, is_pointer: bool) -> AstNode { + //println!("parse_operand: {:#?}", pair); // debug + let size = *CURRENT_SIZE.lock().unwrap(); + if is_pointer { + // skip past the operand_value_ptr pair and look at its operand_value rule + pair = pair.into_inner().next().unwrap(); + } + match pair.as_rule() { + Rule::operand_value => { + let mut inner_pair = pair.into_inner(); + let operand_value_pair = inner_pair.next().unwrap(); + match operand_value_pair.as_rule() { + Rule::immediate_bin => { + let body_bin_str = operand_value_pair.into_inner().next().unwrap().as_str(); + let immediate = u32::from_str_radix(body_bin_str, 2).unwrap(); + if is_pointer { + AstNode::ImmediatePointer(immediate) + } else { + match size { + Size::Byte => AstNode::Immediate8(immediate as u8), + Size::Half => AstNode::Immediate16(immediate as u16), + Size::Word => AstNode::Immediate32(immediate), + } + } + } + Rule::immediate_hex => { + let body_hex_str = operand_value_pair.into_inner().next().unwrap().as_str(); + let immediate = u32::from_str_radix(body_hex_str, 16).unwrap(); + if is_pointer { + AstNode::ImmediatePointer(immediate) + } else { + match size { + Size::Byte => AstNode::Immediate8(immediate as u8), + Size::Half => AstNode::Immediate16(immediate as u16), + Size::Word => AstNode::Immediate32(immediate), + } + } + } + Rule::immediate_dec => { + let body_dec_str = operand_value_pair.into_inner().next().unwrap().as_str(); + let immediate = body_dec_str.parse::().unwrap(); + if is_pointer { + AstNode::ImmediatePointer(immediate) + } else { + match size { + Size::Byte => AstNode::Immediate8(immediate as u8), + Size::Half => AstNode::Immediate16(immediate as u16), + Size::Word => AstNode::Immediate32(immediate), + } + } + } + Rule::immediate_char => { + let body_char_str = operand_value_pair.into_inner().next().unwrap().as_str(); + let immediate = body_char_str.chars().nth(0).unwrap() as u8 as u32; + if is_pointer { + AstNode::ImmediatePointer(immediate) + } else { + match size { + Size::Byte => AstNode::Immediate8(immediate as u8), + Size::Half => AstNode::Immediate16(immediate as u16), + Size::Word => AstNode::Immediate32(immediate), + } + } + } + Rule::register => { + let register_num_pair = operand_value_pair.into_inner().next().unwrap(); + let register_num = if register_num_pair.as_str() == "sp" { 32 } + else { register_num_pair.as_str().parse::().unwrap() }; + if register_num > 32 { panic!("register number out of range"); } + if is_pointer { + AstNode::RegisterPointer(register_num) + } else { + AstNode::Register(register_num) + } + } + Rule::label_name => { + if is_pointer { + AstNode::LabelOperandPointer { + name: operand_value_pair.as_str().to_string(), + is_relative: false, + } + } else { + AstNode::LabelOperand { + name: operand_value_pair.as_str().to_string(), + is_relative: false, + } + } + } + _ => todo!(), + } + } + _ => panic!(), + } +} + +fn parse_instruction_zero(pair: pest::iterators::Pair, condition: Condition) -> AstNode { + AstNode::OperationZero { + condition: condition, + instruction: match pair.as_str() { + "nop" => InstructionZero::Nop, + "halt" => InstructionZero::Halt, + "brk" => InstructionZero::Brk, + "ret" => InstructionZero::Ret, + "reti" => InstructionZero::Reti, + "ise" => InstructionZero::Ise, + "icl" => InstructionZero::Icl, + _ => panic!("Unsupported conditional instruction (zero): {}", pair.as_str()), + }, + } +} + +fn parse_instruction_one(pair: pest::iterators::Pair, mut operand: AstNode, size: Size, condition: Condition) -> AstNode { + AstNode::OperationOne { + size: size, + condition: condition, + instruction: match pair.as_str() { + "inc" => InstructionOne::Inc, + "dec" => InstructionOne::Dec, + "not" => InstructionOne::Not, + "jmp" => InstructionOne::Jmp, + "call" => InstructionOne::Call, + "loop" => InstructionOne::Loop, + "rjmp" => { + match &mut operand { + &mut AstNode::LabelOperand {name: _, ref mut is_relative} | + &mut AstNode::LabelOperandPointer {name: _, ref mut is_relative} => { + *is_relative = true; + } + _ => {} + } + InstructionOne::Rjmp + }, + "rcall" => { + match &mut operand { + &mut AstNode::LabelOperand {name: _, ref mut is_relative} | + &mut AstNode::LabelOperandPointer {name: _, ref mut is_relative} => { + *is_relative = true; + } + _ => {} + } + InstructionOne::Rcall + }, + "rloop" => { + match &mut operand { + &mut AstNode::LabelOperand {name: _, ref mut is_relative} | + &mut AstNode::LabelOperandPointer {name: _, ref mut is_relative} => { + *is_relative = true; + } + _ => {} + } + InstructionOne::Rloop + }, + "push" => InstructionOne::Push, + "pop" => InstructionOne::Pop, + _ => panic!("Unsupported conditional instruction (one): {}", pair.as_str()), + }, + operand: Box::new(operand), + } +} + +fn parse_instruction_two(pair: pest::iterators::Pair, mut lhs: AstNode, mut rhs: AstNode, size: Size, condition: Condition) -> AstNode { + AstNode::OperationTwo { + size: size, + condition: condition, + instruction: match pair.as_str() { + "add" => InstructionTwo::Add, + "sub" => InstructionTwo::Sub, + "mul" => InstructionTwo::Mul, + "pow" => InstructionTwo::Pow, + "div" => InstructionTwo::Div, + "rem" => InstructionTwo::Rem, + "and" => InstructionTwo::And, + "or" => InstructionTwo::Or, + "xor" => InstructionTwo::Xor, + "sla" => InstructionTwo::Sla, + "sra" => InstructionTwo::Sra, + "srl" => InstructionTwo::Srl, + "rol" => InstructionTwo::Rol, + "ror" => InstructionTwo::Ror, + "bse" => InstructionTwo::Bse, + "bcl" => InstructionTwo::Bcl, + "bts" => InstructionTwo::Bts, + "cmp" => InstructionTwo::Cmp, + "mov" => InstructionTwo::Mov, + "movz" => InstructionTwo::Movz, + "rta" => { + match &mut lhs { + &mut AstNode::LabelOperand {name: _, ref mut is_relative} | + &mut AstNode::LabelOperandPointer {name: _, ref mut is_relative} => { + *is_relative = true; + } + _ => {} + } + match &mut rhs { + &mut AstNode::LabelOperand {name: _, ref mut is_relative} | + &mut AstNode::LabelOperandPointer {name: _, ref mut is_relative} => { + *is_relative = true; + } + _ => {} + } + InstructionTwo::Rta + } + "in" => InstructionTwo::In, + "out" => InstructionTwo::Out, + _ => panic!("Unsupported conditional instruction (two): {}", pair.as_str()), + }, + lhs: Box::new(lhs), + rhs: Box::new(rhs), + } +} + +fn assemble_node(node: AstNode) -> AssembledInstruction { + // if this is data, don't interpret it as an instruction + match node { + AstNode::DataByte(byte) => { + return vec![byte].into(); + }, + AstNode::DataHalf(half) => { + return half.to_le_bytes().into(); + }, + AstNode::DataWord(word) => { + return word.to_le_bytes().into(); + }, + AstNode::DataStr(string) => { + return string.as_bytes().into(); + }, + AstNode::LabelOperand {name, is_relative} => { + // label is used on its own, not as an operand: + // LabelOperand was previously only checked as part of operands + let instruction = AssembledInstruction::new(); + generate_backpatch_immediate(&name, &instruction, is_relative); + return instruction; + }, + _ => {} + } + + let mut instruction_data: Vec = Vec::new(); + + instruction_data.push(condition_source_destination_to_byte(&node)); + instruction_data.push(instruction_to_byte(&node)); + + let mut instruction: AssembledInstruction = instruction_data.into(); + + node_to_immediate_values(&node, &mut instruction); + + instruction +} + +// fn node_to_vec(node: AstNode) -> Vec { +// let mut vec = Vec::::new(); +// let instruction = instruction_to_byte(&node); +// let condition_source_destination = condition_source_destination_to_byte(&node); +// vec.push(condition_source_destination); +// vec.push(instruction); +// node_to_immediate_values(&node, &mut vec); +// vec +// } + +fn size_to_byte(size: &Size) -> u8 { + match size { + Size::Byte => 0b00000000, + Size::Half => 0b01000000, + Size::Word => 0b10000000, + } +} + +fn instruction_to_byte(node: &AstNode) -> u8 { + match node { + AstNode::OperationZero {instruction, ..} => { + match instruction { + InstructionZero::Nop => 0x00 | size_to_byte(&Size::Word), + InstructionZero::Halt => 0x10 | size_to_byte(&Size::Word), + InstructionZero::Brk => 0x20 | size_to_byte(&Size::Word), + InstructionZero::Ret => 0x2A | size_to_byte(&Size::Word), + InstructionZero::Reti => 0x3A | size_to_byte(&Size::Word), + InstructionZero::Ise => 0x0C | size_to_byte(&Size::Word), + InstructionZero::Icl => 0x1C | size_to_byte(&Size::Word), + } + } + AstNode::OperationOne {size, instruction, ..} => { + match instruction { + InstructionOne::Inc => 0x11 | size_to_byte(size), + InstructionOne::Dec => 0x31 | size_to_byte(size), + InstructionOne::Not => 0x33 | size_to_byte(size), + InstructionOne::Jmp => 0x08 | size_to_byte(size), + InstructionOne::Call => 0x18 | size_to_byte(size), + InstructionOne::Loop => 0x28 | size_to_byte(size), + InstructionOne::Rjmp => 0x09 | size_to_byte(size), + InstructionOne::Rcall => 0x19 | size_to_byte(size), + InstructionOne::Rloop => 0x29 | size_to_byte(size), + InstructionOne::Push => 0x0A | size_to_byte(size), + InstructionOne::Pop => 0x1A | size_to_byte(size), + } + } + AstNode::OperationTwo {size, instruction, ..} => { + match instruction { + InstructionTwo::Add => 0x01 | size_to_byte(size), + InstructionTwo::Sub => 0x21 | size_to_byte(size), + InstructionTwo::Mul => 0x02 | size_to_byte(size), + InstructionTwo::Pow => 0x12 | size_to_byte(size), + InstructionTwo::Div => 0x22 | size_to_byte(size), + InstructionTwo::Rem => 0x32 | size_to_byte(size), + InstructionTwo::And => 0x03 | size_to_byte(size), + InstructionTwo::Or => 0x13 | size_to_byte(size), + InstructionTwo::Xor => 0x23 | size_to_byte(size), + InstructionTwo::Sla => 0x04 | size_to_byte(size), + InstructionTwo::Sra => 0x05 | size_to_byte(size), + InstructionTwo::Srl => 0x15 | size_to_byte(size), + InstructionTwo::Rol => 0x24 | size_to_byte(size), + InstructionTwo::Ror => 0x25 | size_to_byte(size), + InstructionTwo::Bse => 0x06 | size_to_byte(size), + InstructionTwo::Bcl => 0x16 | size_to_byte(size), + InstructionTwo::Bts => 0x26 | size_to_byte(size), + InstructionTwo::Cmp => 0x07 | size_to_byte(size), + InstructionTwo::Mov => 0x17 | size_to_byte(size), + InstructionTwo::Movz => 0x27 | size_to_byte(size), + InstructionTwo::Rta => 0x39 | size_to_byte(size), + InstructionTwo::In => 0x0B | size_to_byte(size), + InstructionTwo::Out => 0x1B | size_to_byte(size), + } + } + _ => panic!("Attempting to parse a non-instruction AST node as an instruction: {:#?}", node), + } +} + +fn condition_source_destination_to_byte(node: &AstNode) -> u8 { + let source: u8 = match node { + AstNode::OperationZero {..} => 0x00, + AstNode::OperationOne {operand, ..} => { + match operand.as_ref() { + AstNode::Register(_) => 0x00, + AstNode::RegisterPointer(_) => 0x01, + AstNode::Immediate8(_) | AstNode::Immediate16(_) | AstNode::Immediate32(_) | AstNode::LabelOperand {..} => 0x02, + AstNode::ImmediatePointer(_) | AstNode::LabelOperandPointer {..} => 0x03, + _ => panic!("Attempting to parse a non-instruction AST node as an instruction: {:#?}", node), + } + } + AstNode::OperationTwo {rhs, ..} => { + match rhs.as_ref() { + AstNode::Register(_) => 0x00, + AstNode::RegisterPointer(_) => 0x01, + AstNode::Immediate8(_) | AstNode::Immediate16(_) | AstNode::Immediate32(_) | AstNode::LabelOperand {..} => 0x02, + AstNode::ImmediatePointer(_) | AstNode::LabelOperandPointer {..} => 0x03, + _ => panic!("Attempting to parse a non-instruction AST node as an instruction: {:#?}", node), + } + } + _ => panic!("Attempting to parse a non-instruction AST node as an instruction: {:#?}", node), + }; + let destination: u8 = match node { + AstNode::OperationZero {..} => 0x00, + AstNode::OperationOne {..} => 0x00, + AstNode::OperationTwo {lhs, ..} => { + match lhs.as_ref() { + AstNode::Register(_) => 0x00, + AstNode::RegisterPointer(_) => 0x04, + AstNode::ImmediatePointer(_) | AstNode::LabelOperandPointer {..} => 0x08, + _ => panic!("Attempting to parse a non-instruction AST node as an instruction: {:#?}", node), + } + } + _ => panic!("Attempting to parse a non-instruction AST node as an instruction: {:#?}", node), + }; + let condition: u8 = match node { + AstNode::OperationZero {condition, ..} => { + match condition { + Condition::Always => 0x00, + Condition::Zero => 0x10, + Condition::NotZero => 0x20, + Condition::Carry => 0x30, + Condition::NotCarry => 0x40, + } + } + AstNode::OperationOne {condition, ..} => { + match condition { + Condition::Always => 0x00, + Condition::Zero => 0x10, + Condition::NotZero => 0x20, + Condition::Carry => 0x30, + Condition::NotCarry => 0x40, + } + } + AstNode::OperationTwo {condition, ..} => { + match condition { + Condition::Always => 0x00, + Condition::Zero => 0x10, + Condition::NotZero => 0x20, + Condition::Carry => 0x30, + Condition::NotCarry => 0x40, + } + } + _ => panic!("Attempting to parse a non-instruction AST node as an instruction: {:#?}", node), + }; + condition | source | destination +} + +fn generate_backpatch_immediate(name: &String, instruction: &AssembledInstruction, is_relative: bool) { + let index = instruction.borrow().len(); + { + let mut vec = instruction.borrow_mut(); + for _ in 0..4 { + vec.push(0xAB); + } + } + let mut table = LABEL_TARGETS.lock().unwrap(); + let targets = { + if let Some(targets) = table.get_mut(name) { + targets + } else { + table.insert(name.clone(), Vec::new()); + table.get_mut(name).unwrap() + } + }; + targets.push(BackpatchTarget::new(instruction, index, is_relative)); +} + +fn node_to_immediate_values(node: &AstNode, instruction: &AssembledInstruction) { + { + let mut vec = instruction.borrow_mut(); + + match node { + AstNode::OperationZero {..} => {} + + AstNode::OperationOne {operand, ..} => { + match *operand.as_ref() { + AstNode::Register (register) => vec.push(register), + AstNode::RegisterPointer(register) => vec.push(register), + + AstNode::Immediate8 (immediate) => vec.push(immediate), + AstNode::Immediate16 (immediate) => vec.extend_from_slice(&immediate.to_le_bytes()), + AstNode::Immediate32 (immediate) => vec.extend_from_slice(&immediate.to_le_bytes()), + AstNode::ImmediatePointer(immediate) => vec.extend_from_slice(&immediate.to_le_bytes()), + + AstNode::LabelOperand {ref name, is_relative} | + AstNode::LabelOperandPointer {ref name, is_relative} => { + std::mem::drop(vec); + generate_backpatch_immediate(name, instruction, is_relative); + } + + _ => panic!("Attempting to parse a non-instruction AST node as an instruction: {:#?}", node), + } + } + + AstNode::OperationTwo {rhs, ..} => { + match *rhs.as_ref() { + AstNode::Register (register) => vec.push(register), + AstNode::RegisterPointer(register) => vec.push(register), + + AstNode::Immediate8 (immediate) => vec.push(immediate), + AstNode::Immediate16 (immediate) => vec.extend_from_slice(&immediate.to_le_bytes()), + AstNode::Immediate32 (immediate) => vec.extend_from_slice(&immediate.to_le_bytes()), + AstNode::ImmediatePointer(immediate) => vec.extend_from_slice(&immediate.to_le_bytes()), + + AstNode::LabelOperand {ref name, is_relative} | + AstNode::LabelOperandPointer {ref name, is_relative} => { + std::mem::drop(vec); + generate_backpatch_immediate(name, instruction, is_relative); + } + + _ => panic!("Attempting to parse a non-instruction AST node as an instruction: {:#?}", node), + } + } + + _ => panic!("Attempting to parse a non-instruction AST node as an instruction: {:#?}", node), + } + } + + let mut vec = instruction.borrow_mut(); + + match node { + AstNode::OperationZero {..} => {} + AstNode::OperationOne {..} => {} + + AstNode::OperationTwo {lhs, ..} => { + match *lhs.as_ref() { + AstNode::Register (register) => vec.push(register), + AstNode::RegisterPointer(register) => vec.push(register), + + AstNode::ImmediatePointer(immediate) => vec.extend_from_slice(&immediate.to_le_bytes()), + + AstNode::LabelOperand {ref name, is_relative} | + AstNode::LabelOperandPointer {ref name, is_relative} => { + std::mem::drop(vec); + generate_backpatch_immediate(name, instruction, is_relative); + } + + _ => panic!("Attempting to parse a non-instruction AST node as an instruction: {:#?}", node), + } + } + + _ => panic!("Attempting to parse a non-instruction AST node as an instruction: {:#?}", node), + }; +} \ No newline at end of file