Skip to content

Commit

Permalink
faucon-asm: Introduce higher-level wrappers over syntax constructs fr…
Browse files Browse the repository at this point in the history
…om the parser
  • Loading branch information
vbe0201 committed Jan 4, 2022
1 parent 63b250f commit ae01519
Show file tree
Hide file tree
Showing 9 changed files with 553 additions and 69 deletions.
13 changes: 10 additions & 3 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions faucon-asm/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,4 @@ ahash = "0.7"
nom = "7.1"
nom_locate = "4.0"
num-traits = "0.2"
owo-colors = "3.2"
2 changes: 2 additions & 0 deletions faucon-asm/src/assembler.rs
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,5 @@ mod lexer;
mod parser;

mod span;

mod syntax;
49 changes: 40 additions & 9 deletions faucon-asm/src/assembler/error.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
use std::fmt;

use nom::Finish;
use owo_colors::OwoColorize;

use super::{
lexer::Token,
parser,
span::{Span, Spanned},
};
Expand All @@ -13,7 +15,7 @@ use super::{
#[derive(Debug)]
pub struct AssemblerError {
span: Span,
line: String,
quoted: (String, bool), // Quoted string + whether it is a fragment of a full line.
msg: String,
}

Expand All @@ -26,14 +28,29 @@ impl AssemblerError {
.take_while(|&c| !c.is_whitespace() && c != ';')
.count(),
);
let line = nom_span
let quoted = nom_span
.extra
.extract_line(nom_span.location_offset())
.to_owned();

Self {
span,
line,
quoted: (quoted, false),
msg: msg.to_string(),
}
}

pub(crate) fn custom<S: ToString, T>(input: &str, span: Spanned<T>, msg: S) -> Self {
let span = span.into_span();
let quoted = format!(
"{dots}{line}{dots}",
dots = "...".blue(),
line = &input[&span]
);

Self {
span,
quoted: (quoted, true),
msg: msg.to_string(),
}
}
Expand All @@ -43,9 +60,9 @@ impl AssemblerError {
&self.span
}

/// Gets the source line in which the error occurred.
pub fn line(&self) -> &str {
&self.line
/// Gets the quoted string that caused the error.
pub fn quoted(&self) -> &str {
&self.quoted.0
}

/// Gets the message of this error which provides further details.
Expand All @@ -55,11 +72,25 @@ impl AssemblerError {

// Consumes nom's IResult from the tokenization step and checks for errors.
pub(crate) fn check_tokenization<'t>(
result: nom::IResult<parser::NomSpan<'t>, Vec<Spanned<Token<'t>>>>,
) -> Result<Vec<Spanned<Token<'t>>>, Self> {
result: nom::IResult<parser::NomSpan<'t>, Vec<parser::Statement<'t>>>,
) -> Result<Vec<parser::Statement<'t>>, Self> {
match result.finish() {
Ok((_, tokens)) => Ok(tokens),
Err(e) => Err(Self::new(e.input, "Unparseable tokens detected")),
}
}
}

impl fmt::Display for AssemblerError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(
f,
"at line {}, column {}: {}",
self.span().line(),
self.span().column(),
self.msg
)
}
}

impl std::error::Error for AssemblerError {}
7 changes: 3 additions & 4 deletions faucon-asm/src/assembler/interner.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
use std::collections::HashMap;
use std::mem;

use ahash::RandomState;
use ahash::AHashMap;

// https://matklad.github.io/2020/03/22/fast-simple-rust-interner.html
// https://www.reddit.com/r/rust/comments/fn1jxf/blog_post_fast_and_simple_rust_interner/
Expand All @@ -14,7 +13,7 @@ impl FileId {
}

pub struct Interner {
map: HashMap<&'static str, FileId, RandomState>,
map: AHashMap<&'static str, FileId>,
vec: Vec<&'static str>,
buf: String,
full: Vec<String>,
Expand All @@ -25,7 +24,7 @@ impl Interner {
cap = cap.next_power_of_two();

Interner {
map: HashMap::default(),
map: AHashMap::new(),
vec: Vec::new(),
buf: String::with_capacity(cap),
full: Vec::new(),
Expand Down
89 changes: 83 additions & 6 deletions faucon-asm/src/assembler/lexer.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
use num_traits::{cast, NumCast};

use super::{error::AssemblerError, interner::FileId, parser, span::Spanned};
use crate::{
isa::InstructionKind,
Expand All @@ -6,13 +8,13 @@ use crate::{
};

#[derive(Clone, Debug, PartialEq)]
pub enum Token<'s> {
pub enum Token<'t> {
// -0x42, 0x33, ...
SignedInteger(i32),
// 0xFF, 0b1101, ...
UnsignedInteger(u32),
// "I'm a string"
StrLiteral(&'s str),
StrLiteral(&'t str),
// 9:17
BitField(u32, u32),

Expand All @@ -23,16 +25,91 @@ pub enum Token<'s> {
// D[$r5 + $r4], I[$r0]
Memory(MemoryAccess),
// #a, ##symbol
Symbol(&'s str, bool),
Symbol(&'t str, bool),
// .align
Directive(&'s str),
Directive(&'t str),
// label:
Label(&'s str),
Label(&'t str),
// MOV.H
Mnemonic(InstructionKind, OperandSize),
}

pub fn tokenize(input: &str, file: FileId) -> Result<Vec<Spanned<Token<'_>>>, AssemblerError> {
impl<'t> Token<'t> {
pub fn try_as_int<I: NumCast>(this: Spanned<Self>) -> Result<Spanned<I>, Spanned<Self>> {
this.try_map(|t| match t {
Token::UnsignedInteger(i) => cast::<_, I>(i).ok_or(t),
Token::SignedInteger(i) => cast::<_, I>(i).ok_or(t),
_ => Err(t),
})
}

pub fn try_as_str(this: Spanned<Self>) -> Result<Spanned<&'t str>, Spanned<Self>> {
this.try_map(|t| match t {
Token::StrLiteral(s) => Ok(s),
_ => Err(t),
})
}

pub fn try_as_bitfield(this: Spanned<Self>) -> Result<Spanned<(u32, u32)>, Spanned<Self>> {
this.try_map(|t| match t {
Token::BitField(start, end) => Ok((start, end)),
_ => Err(t),
})
}

pub fn try_as_flag(this: Spanned<Self>) -> Result<Spanned<u8>, Spanned<Self>> {
this.try_map(|t| match t {
Token::Flag(f) => Ok(f),
_ => Err(t),
})
}

pub fn try_as_register(this: Spanned<Self>) -> Result<Spanned<Register>, Spanned<Self>> {
this.try_map(|t| match t {
Token::Register(reg) => Ok(reg),
_ => Err(t),
})
}

pub fn try_as_memory(this: Spanned<Self>) -> Result<Spanned<MemoryAccess>, Spanned<Self>> {
this.try_map(|t| match t {
Token::Memory(mem) => Ok(mem),
_ => Err(t),
})
}

pub fn try_as_symbol(this: Spanned<Self>) -> Result<Spanned<(&'t str, bool)>, Spanned<Self>> {
this.try_map(|t| match t {
Token::Symbol(ident, phys) => Ok((ident, phys)),
_ => Err(t),
})
}

pub fn try_as_directive(this: Spanned<Self>) -> Result<Spanned<&'t str>, Spanned<Self>> {
this.try_map(|t| match t {
Token::Directive(d) => Ok(d),
_ => Err(t),
})
}

pub fn try_as_label(this: Spanned<Self>) -> Result<Spanned<&'t str>, Spanned<Self>> {
this.try_map(|t| match t {
Token::Label(l) => Ok(l),
_ => Err(t),
})
}

pub fn try_as_mnemonic(
this: Spanned<Self>,
) -> Result<Spanned<(InstructionKind, OperandSize)>, Spanned<Self>> {
this.try_map(|t| match t {
Token::Mnemonic(kind, size) => Ok((kind, size)),
_ => Err(t),
})
}
}

pub fn tokenize(input: &str, file: FileId) -> Result<Vec<parser::Statement<'_>>, AssemblerError> {
let result = parser::start(file, parser::do_parse)(input);
AssemblerError::check_tokenization(result)
}
Loading

0 comments on commit ae01519

Please sign in to comment.