Parser mostly done for now

This commit is contained in:
Lumi Kalt 2024-01-21 01:21:37 +00:00
parent eb23e06fec
commit fad3e6d361
7 changed files with 449 additions and 12 deletions

121
Cargo.lock generated
View file

@ -2,6 +2,127 @@
# It is not intended for manual editing. # It is not intended for manual editing.
version = 3 version = 3
[[package]]
name = "codespan-reporting"
version = "0.11.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3538270d33cc669650c4b093848450d380def10c331d38c768e34cac80576e6e"
dependencies = [
"termcolor",
"unicode-width",
]
[[package]]
name = "crossbeam-deque"
version = "0.8.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "613f8cc01fe9cf1a3eb3d7f488fd2fa8388403e97039e2f73692932e291a770d"
dependencies = [
"crossbeam-epoch",
"crossbeam-utils",
]
[[package]]
name = "crossbeam-epoch"
version = "0.9.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e"
dependencies = [
"crossbeam-utils",
]
[[package]]
name = "crossbeam-utils"
version = "0.8.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "248e3bacc7dc6baa3b21e405ee045c3047101a49145e7e9eca583ab4c2ca5345"
[[package]]
name = "either"
version = "1.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a26ae43d7bcc3b814de94796a5e736d4029efb0ee900c12e2d54c993ad1a1e07"
[[package]]
name = "itertools"
version = "0.12.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "25db6b064527c5d482d0423354fcd07a89a2dfe07b67892e62411946db7f07b0"
dependencies = [
"either",
]
[[package]]
name = "rayon"
version = "1.8.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fa7237101a77a10773db45d62004a272517633fbcc3df19d96455ede1122e051"
dependencies = [
"either",
"rayon-core",
]
[[package]]
name = "rayon-core"
version = "1.12.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1465873a3dfdaa8ae7cb14b4383657caab0b3e8a0aa9ae8e04b044854c8dfce2"
dependencies = [
"crossbeam-deque",
"crossbeam-utils",
]
[[package]] [[package]]
name = "riscv_interpreter" name = "riscv_interpreter"
version = "0.1.0" version = "0.1.0"
dependencies = [
"codespan-reporting",
"itertools",
"rayon",
]
[[package]]
name = "termcolor"
version = "1.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "06794f8f6c5c898b3275aebefa6b8a1cb24cd2c6c79397ab15774837a0bc5755"
dependencies = [
"winapi-util",
]
[[package]]
name = "unicode-width"
version = "0.1.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e51733f11c9c4f72aa0c160008246859e340b00807569a0da0e7a1079b27ba85"
[[package]]
name = "winapi"
version = "0.3.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
dependencies = [
"winapi-i686-pc-windows-gnu",
"winapi-x86_64-pc-windows-gnu",
]
[[package]]
name = "winapi-i686-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
[[package]]
name = "winapi-util"
version = "0.1.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f29e6f9198ba0d26b4c9f07dbe6f9ed633e1f3d5b8b414090084349e46a52596"
dependencies = [
"winapi",
]
[[package]]
name = "winapi-x86_64-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"

View file

@ -2,3 +2,8 @@
name = "riscv_interpreter" name = "riscv_interpreter"
version = "0.1.0" version = "0.1.0"
edition = "2021" edition = "2021"
[dependencies]
codespan-reporting = { version = "0.11.1", features = ["ascii-only"] }
itertools = "0.12.0"
rayon = "1.8.1"

View file

@ -1,3 +1,79 @@
use std::collections::HashMap;
#[derive(Debug)]
pub struct Env {
register_alias: HashMap<String, usize>,
labels: HashMap<String, usize>,
registers: [i64; 32],
stack: Vec<i64>, // TODO: Find the size of the stack
}
pub enum Env {} impl Env {
pub fn new() -> Self {
// alias -> xN
let register_alias = [
("zero", 0),
("ra", 1),
("sp", 2),
("gp", 3),
("tp", 4),
("t0", 5),
("t1", 6),
("t2", 7),
("s0", 8),
("s1", 9),
("a0", 10),
("a1", 11),
("a2", 12),
("a3", 13),
("a4", 14),
("a5", 15),
("a6", 16),
("a7", 17),
("s2", 18),
("s3", 19),
("s4", 20),
("s5", 21),
("s6", 22),
("s7", 23),
("s8", 24),
("s9", 25),
("s10", 26),
("s11", 27),
("t3", 28),
("t4", 29),
("t5", 30),
("t6", 31),
]
.iter()
.map(|(k, v)| (k.to_string(), v.to_owned()))
.collect::<HashMap<_, _>>();
Self {
register_alias,
labels: HashMap::new(),
registers: [0; 32],
stack: Vec::new(),
}
}
pub fn set_register(&mut self, reg: usize, value: i64) {
self.registers[reg] = value;
}
pub fn get_register(&self, reg: usize) -> i64 {
self.registers[reg]
}
pub fn alias_to_register(&self, reg: &str) -> Option<usize> {
self.register_alias.get(reg).copied()
}
pub fn add_label(&mut self, label: &str, value: usize) {
self.labels.insert(label.to_string(), value);
}
pub fn get_label(&self, label: &str) -> Option<usize> {
self.labels.get(label).copied()
}
}

View file

@ -1,19 +1,26 @@
use std::fmt::{self, Display, Formatter}; use std::fmt::{self, Display, Formatter};
#[derive(Debug, Clone)]
pub enum SyntaxErr { pub enum SyntaxErr {
TraillingComma, TraillingComma,
UnmatchedParen, /// false for '(' true for ')'
UnmatchedParen(bool),
UnexpectedChar,
OutsideOp(String),
} }
impl Display for SyntaxErr { impl Display for SyntaxErr {
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
match self { match self {
SyntaxErr::TraillingComma => write!(f, "trailling comma"), SyntaxErr::TraillingComma => write!(f, "trailling comma"),
SyntaxErr::UnmatchedParen => write!(f, "unmatched parenthesis"), SyntaxErr::UnmatchedParen(_) => write!(f, "unmatched parenthesis"),
SyntaxErr::UnexpectedChar => write!(f, "unexpected character"),
SyntaxErr::OutsideOp(kind) => write!(f, "{kind} before opcode"),
} }
} }
} }
#[derive(Debug, Clone)]
pub enum RuntimeErr { pub enum RuntimeErr {
InvalidRegister(String), InvalidRegister(String),
UnexpectedImmediate, UnexpectedImmediate,

View file

@ -1,3 +1,9 @@
use rayon::prelude::*;
use riscv_interpreter::parser::{parse, Token};
fn main() { fn main() {
println!("Hello, world!"); let input = std::fs::read_to_string("test.s").unwrap();
let tokens = parse(&input);
// println!("{:#?} -> {:#?}", input, tokens.into_par_iter().filter(|(token, _)| !matches!(token, Token::Spacing)).collect::<Vec<_>>());
println!("{:#?} -> {:#?}", input, tokens);
} }

View file

@ -1,23 +1,245 @@
pub enum Expr { /// TODO: Strings, Symbols
use crate::err::SyntaxErr;
use itertools::Itertools;
use rayon::prelude::*;
#[derive(Debug, Clone)]
pub enum Token {
/// ' ', '\t', '\r'
Spacing,
/// \# blablabla,
Comment,
/// 1, 2, -1 /// 1, 2, -1
Immediate(i64), Immediate(i64),
/// zero, r1, pc /// zero, r1, pc
///
/// Technically also label references and symbols, but we'll handle those later
Register(String), Register(String),
/// add, xor, j /// add, xor, j
Op(Vec<Expr>), Op(String, Vec<(Token, Loc)>),
/// <label>: /// <label>:
LabelDef(String), LabelDef(String),
/// j <label> /// 0(a0)
LabelRef(String), Memory(Box<Token>, Option<Box<Token>>),
/// symbol
Symbol(String),
/// "string"
String(String),
/// Error token
Error(ParseErr),
} }
pub struct Location { impl Token {
pub fn kind(&self) -> &'static str {
use Token::*;
match self {
Spacing => "spacing",
Comment => "comment",
Immediate(_) => "immediate",
Register(_) => "register",
Op(_, _) => "op",
LabelDef(_) => "label def",
Memory(_, _) => "memory",
Symbol(_) => "symbol",
String(_) => "string",
Error(_) => "error",
}
}
}
type ParseErr = (SyntaxErr, Loc, Vec<(Token, Loc)>, Option<String>);
#[derive(Debug, Clone, Copy)]
pub struct Loc {
pub line: usize, pub line: usize,
pub col: usize, pub col: usize,
pub start: usize, pub start: usize,
pub end: usize, pub end: usize,
} }
pub fn parse(input: &str) -> Result<Expr, ()> { fn parse_line(input: &str, line: usize) -> Result<Vec<(Token, Loc)>, ParseErr> {
todo!() let mut loc = Loc {
line,
col: 1,
start: 0,
end: 0,
};
let mut tokens: Vec<(Token, Loc)> = Vec::new();
let mut chars = input.chars().peekable();
use Token::*;
while let Some(c) = chars.next() {
let token = match c {
'\t' => {
// TODO: Make a flag to set the tab size
loc.col += 3;
Spacing
}
' ' => Spacing,
'#' => {
while let Some(_) = chars.peek() {
chars.next();
loc.end += 1;
}
Comment
}
'0'..='9' => {
let mut num = c.to_string();
while let Some('0'..='9') = chars.peek() {
num.push(chars.next().unwrap());
loc.end += 1;
}
Immediate(num.parse().unwrap())
}
'-' => {
let mut num = c.to_string();
while let Some('0'..='9') = chars.peek() {
num.push(chars.next().unwrap());
}
Immediate(num.parse().unwrap())
}
'(' => {
let imm;
if let Some((Immediate(_), _)) = tokens.last() {
imm = Box::new(tokens.pop().unwrap());
loc.start = imm.1.start;
loc.col = imm.1.col;
} else {
return Err((
SyntaxErr::UnexpectedChar,
loc.clone(),
tokens.clone(),
Some("a memory index must be of the form imm(reg) or imm".to_string()),
));
}
let mut reg = std::string::String::new();
while let Some(' ') | Some('0'..='9') | Some('a'..='z') | Some('A'..='Z') =
chars.peek()
{
reg.push(chars.next().unwrap());
}
if chars.next() != Some(')') {
return Err((
SyntaxErr::UnmatchedParen(false),
loc.clone(),
tokens.clone(),
None,
));
}
loc.end += 2;
Memory(
Box::new(imm.0),
Some(Box::new(Register(reg.trim().to_string()))),
)
}
')' => {
return Err((
SyntaxErr::UnmatchedParen(true),
loc.clone(),
tokens.clone(),
None,
))
}
// Opcode or Label definition
'a'..='z' | 'A'..='Z' | '_' => {
dbg!("op");
let mut str = c.to_string();
while let Some('a'..='z') | Some('A'..='Z') | Some('_') | Some('0'..='9') =
chars.peek()
{
str.push(chars.next().unwrap());
loc.end += 1;
}
if let Some(':') = chars.peek() {
chars.next();
loc.end += 1;
LabelDef(str[..str.len()].to_string())
} else if let Some((Op(_, _), _)) = tokens.get(tokens.len() - 2) {
// These Registers may actually be label references or symbols, but there's ambiguity
// between them and registers, so we'll just assume they're registers for now
Register(str)
} else {
Op(str, vec![])
}
}
_ => return Err((SyntaxErr::UnexpectedChar, loc.clone(), tokens.clone(), None)),
};
tokens.push((token, loc.clone()));
loc.end += 1;
loc.col += loc.end - loc.start;
loc.col;
loc.start = loc.end;
}
Ok(tokens
.into_iter()
.filter(|(token, _)| !matches!(token, Token::Spacing))
.group_by(|(token, _)| {
matches!(
token,
Op(_, _) | Immediate(_) | Register(_) | Memory(_, _) | Symbol(_) | String(_)
)
})
.into_iter()
.flat_map(|group| {
let (is_op, group) = group;
if is_op {
let group = group.collect::<Vec<_>>();
let (op, loc) = dbg!(dbg!(group[0].clone()));
let (op, mut args) = match op {
Op(op, args) => (op, args),
_ => {
return vec![(
Token::Error((
SyntaxErr::OutsideOp(op.kind().to_string()),
loc.clone(),
group.clone(),
None,
)),
loc.clone(),
)]
}
};
args.extend_from_slice(&group[1..]);
vec![(Op(op, args), loc)]
} else {
group.collect::<Vec<_>>()
}
})
.collect::<Vec<_>>())
}
/// Parse the input
///
/// Returns a vector of tokens and their locations, if successful, or an error vector
/// containing the error, the location of the error, the tokens parsed up to that point,
/// and an optional message to display to the users
pub fn parse(input: &str) -> Result<Vec<(Token, Loc)>, Vec<ParseErr>> {
let parsed_lines = input
.lines()
.enumerate()
.par_bridge()
.map(|(i, line)| parse_line(line, i + 1))
.collect::<Vec<_>>();
let (ok, err) = parsed_lines
.into_par_iter()
.partition::<Vec<Result<_, _>>, Vec<Result<_, _>>, _>(|line| matches!(line, Ok(_)));
dbg!(&err);
if err.is_empty() {
Ok(ok.into_par_iter().flat_map(|line| line.unwrap()).collect())
} else {
dbg!("err");
Err(err.into_par_iter().map(|line| line.unwrap_err()).collect())
}
} }

2
test.s
View file

@ -1 +1 @@
add r1 r2 r1 a: lw a0 1(sp)