From ae015197ebebdd13c51eabf5c5c065a87faa974f Mon Sep 17 00:00:00 2001 From: Valentin B Date: Tue, 4 Jan 2022 21:02:30 +0100 Subject: [PATCH] faucon-asm: Introduce higher-level wrappers over syntax constructs from the parser --- Cargo.lock | 13 +- faucon-asm/Cargo.toml | 1 + faucon-asm/src/assembler.rs | 2 + faucon-asm/src/assembler/error.rs | 49 ++++- faucon-asm/src/assembler/interner.rs | 7 +- faucon-asm/src/assembler/lexer.rs | 89 ++++++++- faucon-asm/src/assembler/parser.rs | 99 ++++++--- faucon-asm/src/assembler/span.rs | 74 +++++-- faucon-asm/src/assembler/syntax.rs | 288 +++++++++++++++++++++++++++ 9 files changed, 553 insertions(+), 69 deletions(-) create mode 100644 faucon-asm/src/assembler/syntax.rs diff --git a/Cargo.lock b/Cargo.lock index 3ddece2cd4..ce198e7b3f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -150,7 +150,7 @@ dependencies = [ "eyre", "indenter", "once_cell", - "owo-colors", + "owo-colors 1.3.0", "tracing-error", ] @@ -161,7 +161,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b6eee477a4a8a72f4addd4de416eb56d54bc307b284d6601bafdee1f4ea462d1" dependencies = [ "once_cell", - "owo-colors", + "owo-colors 1.3.0", "tracing-core", "tracing-error", ] @@ -215,7 +215,7 @@ dependencies = [ "faucon-asm", "faucon-emu", "nom 6.2.1", - "owo-colors", + "owo-colors 1.3.0", "paste", "rustyline", "rustyline-derive", @@ -232,6 +232,7 @@ dependencies = [ "nom 7.1.0", "nom_locate", "num-traits 0.2.14", + "owo-colors 3.2.0", ] [[package]] @@ -430,6 +431,12 @@ version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2386b4ebe91c2f7f51082d4cefa145d030e33a1842a96b12e4885cc3c01f7a55" +[[package]] +name = "owo-colors" +version = "3.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "20448fd678ec04e6ea15bbe0476874af65e98a01515d667aa49f1434dc44ebf4" + [[package]] name = "paste" version = "1.0.5" diff --git a/faucon-asm/Cargo.toml b/faucon-asm/Cargo.toml index 7f8a616717..0c84e0268a 100644 --- a/faucon-asm/Cargo.toml +++ b/faucon-asm/Cargo.toml @@ -14,3 +14,4 @@ ahash = "0.7" nom = "7.1" nom_locate = "4.0" num-traits = "0.2" +owo-colors = "3.2" diff --git a/faucon-asm/src/assembler.rs b/faucon-asm/src/assembler.rs index ca30917963..0d00041ad0 100644 --- a/faucon-asm/src/assembler.rs +++ b/faucon-asm/src/assembler.rs @@ -8,3 +8,5 @@ mod lexer; mod parser; mod span; + +mod syntax; diff --git a/faucon-asm/src/assembler/error.rs b/faucon-asm/src/assembler/error.rs index f86e7a8b99..02195e1db7 100644 --- a/faucon-asm/src/assembler/error.rs +++ b/faucon-asm/src/assembler/error.rs @@ -1,7 +1,9 @@ +use std::fmt; + use nom::Finish; +use owo_colors::OwoColorize; use super::{ - lexer::Token, parser, span::{Span, Spanned}, }; @@ -13,7 +15,7 @@ use super::{ #[derive(Debug)] pub struct AssemblerError { span: Span, - line: String, + quoted: (String, bool), // Quoted string + whether it is a fragment of a full line. msg: String, } @@ -26,14 +28,29 @@ impl AssemblerError { .take_while(|&c| !c.is_whitespace() && c != ';') .count(), ); - let line = nom_span + let quoted = nom_span .extra .extract_line(nom_span.location_offset()) .to_owned(); Self { span, - line, + quoted: (quoted, false), + msg: msg.to_string(), + } + } + + pub(crate) fn custom(input: &str, span: Spanned, msg: S) -> Self { + let span = span.into_span(); + let quoted = format!( + "{dots}{line}{dots}", + dots = "...".blue(), + line = &input[&span] + ); + + Self { + span, + quoted: (quoted, true), msg: msg.to_string(), } } @@ -43,9 +60,9 @@ impl AssemblerError { &self.span } - /// Gets the source line in which the error occurred. - pub fn line(&self) -> &str { - &self.line + /// Gets the quoted string that caused the error. + pub fn quoted(&self) -> &str { + &self.quoted.0 } /// Gets the message of this error which provides further details. @@ -55,11 +72,25 @@ impl AssemblerError { // Consumes nom's IResult from the tokenization step and checks for errors. pub(crate) fn check_tokenization<'t>( - result: nom::IResult, Vec>>>, - ) -> Result>>, Self> { + result: nom::IResult, Vec>>, + ) -> Result>, Self> { match result.finish() { Ok((_, tokens)) => Ok(tokens), Err(e) => Err(Self::new(e.input, "Unparseable tokens detected")), } } } + +impl fmt::Display for AssemblerError { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!( + f, + "at line {}, column {}: {}", + self.span().line(), + self.span().column(), + self.msg + ) + } +} + +impl std::error::Error for AssemblerError {} diff --git a/faucon-asm/src/assembler/interner.rs b/faucon-asm/src/assembler/interner.rs index f8971645f6..c4c550de1d 100644 --- a/faucon-asm/src/assembler/interner.rs +++ b/faucon-asm/src/assembler/interner.rs @@ -1,7 +1,6 @@ -use std::collections::HashMap; use std::mem; -use ahash::RandomState; +use ahash::AHashMap; // https://matklad.github.io/2020/03/22/fast-simple-rust-interner.html // https://www.reddit.com/r/rust/comments/fn1jxf/blog_post_fast_and_simple_rust_interner/ @@ -14,7 +13,7 @@ impl FileId { } pub struct Interner { - map: HashMap<&'static str, FileId, RandomState>, + map: AHashMap<&'static str, FileId>, vec: Vec<&'static str>, buf: String, full: Vec, @@ -25,7 +24,7 @@ impl Interner { cap = cap.next_power_of_two(); Interner { - map: HashMap::default(), + map: AHashMap::new(), vec: Vec::new(), buf: String::with_capacity(cap), full: Vec::new(), diff --git a/faucon-asm/src/assembler/lexer.rs b/faucon-asm/src/assembler/lexer.rs index 238e12f69c..e2fb72e476 100644 --- a/faucon-asm/src/assembler/lexer.rs +++ b/faucon-asm/src/assembler/lexer.rs @@ -1,3 +1,5 @@ +use num_traits::{cast, NumCast}; + use super::{error::AssemblerError, interner::FileId, parser, span::Spanned}; use crate::{ isa::InstructionKind, @@ -6,13 +8,13 @@ use crate::{ }; #[derive(Clone, Debug, PartialEq)] -pub enum Token<'s> { +pub enum Token<'t> { // -0x42, 0x33, ... SignedInteger(i32), // 0xFF, 0b1101, ... UnsignedInteger(u32), // "I'm a string" - StrLiteral(&'s str), + StrLiteral(&'t str), // 9:17 BitField(u32, u32), @@ -23,16 +25,91 @@ pub enum Token<'s> { // D[$r5 + $r4], I[$r0] Memory(MemoryAccess), // #a, ##symbol - Symbol(&'s str, bool), + Symbol(&'t str, bool), // .align - Directive(&'s str), + Directive(&'t str), // label: - Label(&'s str), + Label(&'t str), // MOV.H Mnemonic(InstructionKind, OperandSize), } -pub fn tokenize(input: &str, file: FileId) -> Result>>, AssemblerError> { +impl<'t> Token<'t> { + pub fn try_as_int(this: Spanned) -> Result, Spanned> { + this.try_map(|t| match t { + Token::UnsignedInteger(i) => cast::<_, I>(i).ok_or(t), + Token::SignedInteger(i) => cast::<_, I>(i).ok_or(t), + _ => Err(t), + }) + } + + pub fn try_as_str(this: Spanned) -> Result, Spanned> { + this.try_map(|t| match t { + Token::StrLiteral(s) => Ok(s), + _ => Err(t), + }) + } + + pub fn try_as_bitfield(this: Spanned) -> Result, Spanned> { + this.try_map(|t| match t { + Token::BitField(start, end) => Ok((start, end)), + _ => Err(t), + }) + } + + pub fn try_as_flag(this: Spanned) -> Result, Spanned> { + this.try_map(|t| match t { + Token::Flag(f) => Ok(f), + _ => Err(t), + }) + } + + pub fn try_as_register(this: Spanned) -> Result, Spanned> { + this.try_map(|t| match t { + Token::Register(reg) => Ok(reg), + _ => Err(t), + }) + } + + pub fn try_as_memory(this: Spanned) -> Result, Spanned> { + this.try_map(|t| match t { + Token::Memory(mem) => Ok(mem), + _ => Err(t), + }) + } + + pub fn try_as_symbol(this: Spanned) -> Result, Spanned> { + this.try_map(|t| match t { + Token::Symbol(ident, phys) => Ok((ident, phys)), + _ => Err(t), + }) + } + + pub fn try_as_directive(this: Spanned) -> Result, Spanned> { + this.try_map(|t| match t { + Token::Directive(d) => Ok(d), + _ => Err(t), + }) + } + + pub fn try_as_label(this: Spanned) -> Result, Spanned> { + this.try_map(|t| match t { + Token::Label(l) => Ok(l), + _ => Err(t), + }) + } + + pub fn try_as_mnemonic( + this: Spanned, + ) -> Result, Spanned> { + this.try_map(|t| match t { + Token::Mnemonic(kind, size) => Ok((kind, size)), + _ => Err(t), + }) + } +} + +pub fn tokenize(input: &str, file: FileId) -> Result>, AssemblerError> { let result = parser::start(file, parser::do_parse)(input); AssemblerError::check_tokenization(result) } diff --git a/faucon-asm/src/assembler/parser.rs b/faucon-asm/src/assembler/parser.rs index 6eff66292b..d3752fcb03 100644 --- a/faucon-asm/src/assembler/parser.rs +++ b/faucon-asm/src/assembler/parser.rs @@ -33,6 +33,55 @@ impl<'c> ContextData<'c> { } } +// A raw assembly statement. +// +// This essentially represents an individual line in the source code +// consisting of an optional label and/or an optional expression. +// +// This is produced by the parser during tokenization. +#[derive(Clone, Debug)] +pub struct Statement<'ctx> { + pub label: Option>>, // Token::Label. + pub expr: Option>, +} + +// A raw assembly expression. +// +// This is either a directive or an instruction mnemonic along with +// all its data/operands. +// +// This is produced by the parser during tokenization. +#[derive(Clone, Debug)] +pub struct Expression<'ctx> { + pub expr: Spanned>, // Token::Directive or Token::Mnemonic. + pub data: Vec>>, // Any other yet unmentioned Tokens. +} + +impl<'ctx> Statement<'ctx> { + pub fn new() -> Self { + Self { + label: None, + expr: None, + } + } + + #[must_use = "this returns the modified statement rather than altering the old value"] + pub fn with_label(mut self, label: Spanned>) -> Self { + self.label = Some(label); + self + } + + #[must_use = "this returns the modified statement rather than altering the old value"] + pub fn with_expr( + mut self, + expr: Spanned>, + data: Vec>>, + ) -> Self { + self.expr = Some(Expression { expr, data }); + self + } +} + pub fn start<'c, T, P>( file: FileId, mut parser: P, @@ -44,20 +93,13 @@ where } // *separator_list*? ( *statement* *separator_list* )* *eof* -pub fn do_parse(input: NomSpan<'_>) -> IResult, Vec>>> { +pub fn do_parse(input: NomSpan<'_>) -> IResult, Vec>> { let (input, _) = opt(separator_list)(input)?; let (input, result) = fold_many0( pair(statement, separator_list), Vec::new, - |mut acc, ((label, inst), _)| { - if let Some(l) = label { - acc.push(l); - } - if let Some(i) = inst { - acc.push(i.0); - acc.extend(i.1); - } - + |mut acc, (stmt, _)| { + acc.push(stmt); acc }, )(input)?; @@ -67,22 +109,27 @@ pub fn do_parse(input: NomSpan<'_>) -> IResult, Vec, -) -> IResult< - NomSpan<'_>, - ( - Option>>, - Option<(Spanned>, Vec>>)>, - ), -> { - pair( - opt(ws0(label_decl)), - opt(pair( - expression, - many0(preceded(many1(whitespace), operand)), - )), +fn statement(input: NomSpan<'_>) -> IResult, Statement<'_>> { + map( + pair( + opt(ws0(label_decl)), + opt(pair( + expression, + many0(preceded(many1(whitespace), operand)), + )), + ), + |(label, expr)| { + let mut stmt = Statement::new(); + + if let Some(label) = label { + stmt = stmt.with_label(label); + } + if let Some((expr, data)) = expr { + stmt = stmt.with_expr(expr, data); + } + + stmt + }, )(input) } diff --git a/faucon-asm/src/assembler/span.rs b/faucon-asm/src/assembler/span.rs index e9e2cc8b90..3984d33c0c 100644 --- a/faucon-asm/src/assembler/span.rs +++ b/faucon-asm/src/assembler/span.rs @@ -7,72 +7,85 @@ use super::{interner::FileId, parser::NomSpan}; pub struct Span { file_id: FileId, line: u32, - start_index: usize, - end_index: usize, + column: usize, + start: usize, + end: usize, } impl Span { pub const DUMMY: Span = Span { file_id: FileId::DUMMY, line: 0, - start_index: 0, - end_index: 0, + column: 0, + start: 0, + end: 0, }; pub fn new>( file_id: ID, line: u32, - start_index: usize, - end_index: usize, + column: usize, + start: usize, + end: usize, ) -> Self { - Span { + Self { file_id: file_id.into(), line, - start_index, - end_index, + column, + start, + end, } } pub fn from_nom(span: &NomSpan<'_>, width: usize) -> Self { - let start_index = span.naive_get_utf8_column(); + let start = span.location_offset(); Self { file_id: span.extra.file_id, line: span.location_line(), - start_index, - end_index: start_index + width, + column: span.naive_get_utf8_column(), + start, + end: start + width, } } pub fn start(&self) -> usize { - self.start_index + self.start } pub fn end(&self) -> usize { - self.end_index + self.end + } + + pub fn line(&self) -> u32 { + self.line + } + + pub fn column(&self) -> usize { + self.column } pub fn width(&self) -> usize { - self.end_index - self.start_index + self.end - self.start } pub fn as_range(&self) -> Range { - self.start_index..self.end_index + self.start..self.end } } impl fmt::Debug for Span { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { f.debug_tuple("Span") - .field(&format_args!("{}..{}", self.start_index, self.end_index)) + .field(&format_args!("{}..{}", self.start, self.end)) .finish() } } -impl Index for str { +impl Index<&Span> for str { type Output = str; - fn index(&self, span: Span) -> &Self::Output { - self.index(span.start_index..span.end_index) + fn index(&self, span: &Span) -> &Self::Output { + self.index(span.start..span.end) } } @@ -100,6 +113,17 @@ impl Spanned { } } + pub fn try_map(self, f: F) -> Result, Spanned> + where + F: FnOnce(T) -> Result, + { + let Self { node, span } = self; + + f(node) + .map(|node| Spanned { node, span }) + .map_err(|node| Spanned { node, span }) + } + pub fn parse<'a>( mut parser: impl FnMut(NomSpan<'a>) -> nom::IResult, T>, ) -> impl FnMut(NomSpan<'a>) -> nom::IResult, Spanned> { @@ -124,13 +148,21 @@ impl Spanned { pub fn span(&self) -> &Span { &self.span } + + pub fn into_node(self) -> T { + self.node + } + + pub fn into_span(self) -> Span { + self.span + } } impl Clone for Spanned { fn clone(&self) -> Self { Self { node: self.node.clone(), - span: self.span.clone(), + span: self.span, } } } diff --git a/faucon-asm/src/assembler/syntax.rs b/faucon-asm/src/assembler/syntax.rs new file mode 100644 index 0000000000..ed6f59c8ab --- /dev/null +++ b/faucon-asm/src/assembler/syntax.rs @@ -0,0 +1,288 @@ +use num_traits::NumCast; + +use super::{error::AssemblerError, lexer::Token, parser, span::Spanned}; +use crate::{isa::InstructionKind, opcode::OperandSize}; + +pub struct Statement<'ctx> { + label: Option>, + expr: Option>, +} + +impl<'ctx> Statement<'ctx> { + fn from_parser( + input: &'ctx str, + stmt: parser::Statement<'ctx>, + ) -> Result { + let label = stmt.label.map(|label| { + Token::try_as_str(label) + .unwrap_or_else(|e| panic!("Parser anomaly: {:?} at label position", e)) + }); + let expr = match stmt.expr { + Some(expr) => { + let parser::Expression { expr, data } = expr; + Some(match Token::try_as_mnemonic(expr) { + Ok(mnemonic) => Expression::Instruction(Instruction { + mnemonic, + operands: data, + }), + Err(t) => Expression::Directive(parse_directive(input, t, data.into_iter())?), + }) + } + None => None, + }; + + Ok(Self { label, expr }) + } +} + +pub enum Expression<'ctx> { + Directive(Directive<'ctx>), + Instruction(Instruction<'ctx>), +} + +// Representation of the different Falcon security modes. +// TODO: Move this elsewhere? +#[derive(Clone, Copy, Debug, PartialEq, Eq)] +pub enum SecurityMode { + // The Falcon No Secure mode. This is the only mode unsigned + // microcode may execute in directly. + None, + // The Falcon Light Secure mode. Must be entered from Heavy + // Secure context and grants debugging possibilities for + // secure code. + Light, + // The Falcon Heavy Secure mode. Microcode in this mode is + // granted the highest possible set of privileges while, at + // the same time, all debugging features are disabled. + Heavy, +} + +// Assembler directives supported in the Falcon assembly language. +#[derive(Clone, Debug, PartialEq)] +pub enum Directive<'ctx> { + // Specifies alignment of the following code to a given byte boundary. + Align(u32), + // Inserts a byte literal at the current position in code. + Byte(u8), + // Assigns a numeric constant to a symbol with the chosen name. + Equ(&'ctx str, u32), + // Inserts a halfword literal at the current position in code. + // + // Uses little endian byte ordering. + HalfWord(u16), + // Includes another assembler source file as an additional translation + // unit into a source file. + Include(&'ctx str), + // Inserts a word literal at the current position in code. + // + // Uses little endian byte ordering. + Word(u32), + // Declares a code section with a name and an optional start address. + Section(SecurityMode, &'ctx str, Option), + // Skips bytes to set the program counter to the supplied value + // relative to the current section. + Size(u32), + // Skips the given amount of bytes in code and optionally fills them + // with a supplied value. + Skip(u32, Option), + // Inserts a string literal at the current position in code. + Str(&'ctx str), +} + +// Falcon assembly instruction which is lowered into machine code. +// +// This only stores the necessary information for instruction selection to +// enumerate possible forms and find the one that matches the operand list. +#[derive(Clone, Debug, PartialEq)] +pub struct Instruction<'ctx> { + // The instruction mnemonic. + pub mnemonic: Spanned<(InstructionKind, OperandSize)>, + // Various kinds of operands validated during instruction selection. + pub operands: Vec>>, +} + +pub fn parse_integer<'ctx, I: Iterator>>, U: NumCast>( + input: &'ctx str, + iter: &mut I, +) -> Result, AssemblerError> { + match iter.next() { + Some(t) => Token::try_as_int(t) + .map(|u| Some(u.into_node())) + .map_err(|e| AssemblerError::custom(input, e, "Expected integer operand")), + None => Ok(None), + } +} + +pub fn parse_string<'ctx, I: Iterator>>>( + input: &'ctx str, + iter: &mut I, +) -> Result, AssemblerError> { + match iter.next() { + Some(t) => Token::try_as_str(t) + .map(|s| Some(s.into_node())) + .map_err(|e| AssemblerError::custom(input, e, "Expected string literal operand")), + None => Ok(None), + } +} + +fn parse_directive<'ctx, I: Iterator>>>( + input: &'ctx str, + dir: Spanned>, + mut iter: I, +) -> Result, AssemblerError> { + match Token::try_as_directive(dir) { + Ok(dir) => match *dir.node() { + "align" => { + let align = parse_integer(input, &mut iter)?.ok_or_else(|| { + AssemblerError::custom( + input, + dir, + "`align` requires integer operand denoting alignment", + ) + })?; + + Ok(Directive::Align(align)) + } + "byte" => { + let value = parse_integer(input, &mut iter)?.ok_or_else(|| { + AssemblerError::custom( + input, + dir, + "`byte` requires integer operand denoting a value", + ) + })?; + + Ok(Directive::Byte(value)) + } + "equ" => { + let symbol = parse_string(input, &mut iter)?.ok_or_else(|| { + AssemblerError::custom( + input, + dir.clone(), + "`equ` requires string literal denoting symbol", + ) + })?; + let value = parse_integer(input, &mut iter)?.ok_or_else(|| { + AssemblerError::custom( + input, + dir, + "`equ` requires integer operand denoting constant value after symbol", + ) + })?; + + Ok(Directive::Equ(symbol, value)) + } + "halfword" => { + let value = parse_integer(input, &mut iter)?.ok_or_else(|| { + AssemblerError::custom( + input, + dir, + "`halfword` requires integer operand denoting a value", + ) + })?; + + Ok(Directive::HalfWord(value)) + } + "include" => { + let path = parse_string(input, &mut iter)?.ok_or_else(|| { + AssemblerError::custom( + input, + dir.clone(), + "`include` requires string literal denoting file path to include", + ) + })?; + + Ok(Directive::Include(path)) + } + "word" => { + let value = parse_integer(input, &mut iter)?.ok_or_else(|| { + AssemblerError::custom( + input, + dir, + "`word` requires integer operand denoting a value", + ) + })?; + + Ok(Directive::Word(value)) + } + "nsection" => { + let name = parse_string(input, &mut iter)?.ok_or_else(|| { + AssemblerError::custom( + input, + dir.clone(), + "`nsection` requires string literal denoting section name", + ) + })?; + let addr = parse_integer(input, &mut iter)?; + + Ok(Directive::Section(SecurityMode::None, name, addr)) + } + "lsection" => { + let name = parse_string(input, &mut iter)?.ok_or_else(|| { + AssemblerError::custom( + input, + dir.clone(), + "`lsection` requires string literal denoting section name", + ) + })?; + let addr = parse_integer(input, &mut iter)?; + + Ok(Directive::Section(SecurityMode::Light, name, addr)) + } + "hsection" => { + let name = parse_string(input, &mut iter)?.ok_or_else(|| { + AssemblerError::custom( + input, + dir.clone(), + "`hsection` requires string literal denoting section name", + ) + })?; + let addr = parse_integer(input, &mut iter)?; + + Ok(Directive::Section(SecurityMode::Heavy, name, addr)) + } + "size" => { + let size = parse_integer(input, &mut iter)?.ok_or_else(|| { + AssemblerError::custom( + input, + dir, + "`size` requires integer operand denoting position to pad to", + ) + })?; + + Ok(Directive::Size(size)) + } + "skip" => { + let amount = parse_integer(input, &mut iter)?.ok_or_else(|| { + AssemblerError::custom( + input, + dir, + "`skip` requires integer operand denoting bytes to skip", + ) + })?; + let fill = parse_integer(input, &mut iter)?; + + Ok(Directive::Skip(amount, fill)) + } + "str" => { + let lit = parse_string(input, &mut iter)?.ok_or_else(|| { + AssemblerError::custom( + input, + dir.clone(), + "`str` requires string literal to insert", + ) + })?; + + Ok(Directive::Str(lit)) + } + + _ => Err(AssemblerError::custom(input, dir, "Unknown directive")), + }, + + Err(e) => Err(AssemblerError::custom( + input, + e, + "Expected assembler directive at this position", + )), + } +}