Report misuse of registers before opcodes and enforce proper register naming

This commit is contained in:
Lumi Kalt 2024-01-21 14:52:58 +00:00
parent 9a931b4b8f
commit 5939a1c89b
5 changed files with 53 additions and 34 deletions

View file

@ -5,7 +5,7 @@ pub struct Env {
pub register_alias: HashMap<String, usize>,
labels: HashMap<String, usize>,
registers: [i64; 32],
stack: Vec<i64>, // TODO: Find the size of the stack
pub stack: Vec<i64>, // TODO: Find the size of the stack
}
impl Env {
@ -69,12 +69,20 @@ impl Env {
self.register_alias.get(reg).copied()
}
pub fn xn_to_register(&self, reg: &str) -> Option<usize> {
if reg.starts_with("x") {
reg[1..].parse::<usize>().ok()
if reg == "x0" {
Some(0)
} else if reg.starts_with("x") && !reg[1..].starts_with("0") {
match reg[1..].parse::<usize>() {
Ok(n) if n < 32 => Some(n),
_ => None,
}
} else {
None
}
}
pub fn is_valid_register(&self, reg: &str) -> bool {
self.alias_to_register(reg).or_else(|| self.xn_to_register(reg)).is_some()
}
pub fn add_label(&mut self, label: &str, value: usize) {
self.labels.insert(label.to_string(), value);

View file

@ -30,7 +30,7 @@ impl SyntaxErr {
SyntaxErr::UnmatchedParen(true) => "add '(' before the register name".to_string(),
SyntaxErr::UnexpectedChar => "ensure the input is well-formed".to_string(),
SyntaxErr::OutsideOp(kind) => format!("add '{}'s only after an opcode", kind),
SyntaxErr::MemoryInvalidRegister => "valid registers are of the form xN, 0 <= N < 32, or the standard aliases".to_string(),
SyntaxErr::MemoryInvalidRegister => "registers are either xN (N < 32 with no leading 0) or the standard aliases".to_string(),
}
}
}

View file

@ -1,5 +1,5 @@
use codespan_reporting::{
diagnostic::{self, Diagnostic, Label},
diagnostic::{Diagnostic, Label},
files::SimpleFile,
term::{
self,
@ -25,7 +25,7 @@ fn main() -> anyhow::Result<()> {
Err(errs) => {
for err in errs {
let start = err.1.start;
let end = err.1.end;
let end = err.1.end + 1;
let diagnostic = Diagnostic::error()
.with_message("Syntax Error")

View file

@ -53,7 +53,6 @@ type ParseErr = (SyntaxErr, Loc, Vec<(Token, Loc)>, Option<String>);
#[derive(Debug, Clone, Copy)]
pub struct Loc {
pub line: usize,
pub col: usize,
pub start: usize,
pub end: usize,
}
@ -61,7 +60,6 @@ pub struct Loc {
fn parse_line(env: &Env, input: &str, line: usize) -> Result<Vec<(Token, Loc)>, ParseErr> {
let mut loc = Loc {
line,
col: 1,
start: 0,
end: 0,
};
@ -73,12 +71,7 @@ fn parse_line(env: &Env, input: &str, line: usize) -> Result<Vec<(Token, Loc)>,
while let Some(c) = chars.next() {
let token = match c {
'\t' => {
// TODO: Make a flag to set the tab size
loc.col += 3;
Spacing
}
' ' => Spacing,
'\t' | ' ' => Spacing,
'#' => {
while let Some(_) = chars.peek() {
@ -94,7 +87,20 @@ fn parse_line(env: &Env, input: &str, line: usize) -> Result<Vec<(Token, Loc)>,
num.push(chars.next().unwrap());
loc.end += 1;
}
if let Some('(') | Some(' ') = chars.peek() {
Immediate(num.parse().unwrap())
} else {
return Err((
SyntaxErr::UnexpectedChar,
Loc {
line,
start: loc.end + 1,
end: loc.end + 1,
},
tokens.clone(),
None,
));
}
}
'-' => {
let mut num = c.to_string();
@ -104,14 +110,12 @@ fn parse_line(env: &Env, input: &str, line: usize) -> Result<Vec<(Token, Loc)>,
Immediate(num.parse().unwrap())
}
'(' => {
let start = loc.start + 1;
let col = loc.col + 1;
let start = loc.start + 2;
let imm;
if let Some((Immediate(_), _)) = tokens.last() {
imm = Box::new(tokens.pop().unwrap());
loc.start = imm.1.start;
loc.col = imm.1.col;
} else {
return Err((
SyntaxErr::UnexpectedChar,
@ -131,15 +135,10 @@ fn parse_line(env: &Env, input: &str, line: usize) -> Result<Vec<(Token, Loc)>,
let end = loc.end + 1;
let reg = reg.trim();
if env.alias_to_register(reg).is_none() && env.xn_to_register(reg).is_none() {
if !env.is_valid_register(reg) {
return Err((
SyntaxErr::MemoryInvalidRegister,
Loc {
line,
col,
start,
end,
},
Loc { line, start, end },
tokens.clone(),
None,
));
@ -170,9 +169,8 @@ fn parse_line(env: &Env, input: &str, line: usize) -> Result<Vec<(Token, Loc)>,
// Opcode or Label definition
'a'..='z' | 'A'..='Z' | '_' => {
dbg!("op");
let mut str = c.to_string();
while let Some('a'..='z') | Some('A'..='Z') | Some('_') | Some('0'..='9') =
while let Some('a'..='z') | Some('A'..='Z') | Some('_') | Some('0'..='9') | Some('.') =
chars.peek()
{
str.push(chars.next().unwrap());
@ -185,8 +183,24 @@ fn parse_line(env: &Env, input: &str, line: usize) -> Result<Vec<(Token, Loc)>,
} else if let Some((Op(_, _), _)) = tokens.get(tokens.len() - 2) {
// These Registers may actually be label references or symbols, but there's ambiguity
// between them and registers, so we'll just assume they're registers for now
Register(str)
Register(str.trim().to_owned())
} else {
if env.is_valid_register(&str) {
return Err((
SyntaxErr::OutsideOp("register".to_string()),
loc.clone(),
tokens.clone(),
None,
));
}
if str.trim().contains(|c: char| !c.is_alphabetic() && c != '.') {
return Err((
SyntaxErr::UnexpectedChar,
dbg!(loc.clone()),
tokens.clone(),
Some("opcodes must only contain ascii letters".to_string()),
));
}
Op(str, vec![])
}
}
@ -194,8 +208,6 @@ fn parse_line(env: &Env, input: &str, line: usize) -> Result<Vec<(Token, Loc)>,
};
tokens.push((token, loc.clone()));
loc.end += 1;
loc.col += loc.end - loc.start;
loc.col;
loc.start = loc.end;
}
@ -213,7 +225,7 @@ fn parse_line(env: &Env, input: &str, line: usize) -> Result<Vec<(Token, Loc)>,
let (is_op, group) = group;
if is_op {
let group = group.collect::<Vec<_>>();
let (op, loc) = dbg!(dbg!(group[0].clone()));
let (op, loc) = group[0].clone();
let (op, mut args) = match op {
Op(op, args) => (op, args),
// because any register/symbol/label def is interpreted as an Op by default, this only
@ -251,7 +263,7 @@ fn parse_line(env: &Env, input: &str, line: usize) -> Result<Vec<(Token, Loc)>,
///
/// Returns a vector of tokens and their locations, if successful, or an error vector
/// containing the error, the location of the error, the tokens parsed up to that point,
/// and an optional message to display to the users
/// and an optional message to display to the users for each line with an error
pub fn parse(env: &Env, input: &str) -> Result<Vec<(Token, Loc)>, Vec<ParseErr>> {
let parsed_lines = input
.lines()
@ -268,7 +280,6 @@ pub fn parse(env: &Env, input: &str) -> Result<Vec<(Token, Loc)>, Vec<ParseErr>>
if err.is_empty() {
Ok(ok.into_par_iter().flat_map(|line| line.unwrap()).collect())
} else {
dbg!("err");
Err(err.into_par_iter().map(|line| line.unwrap_err()).collect())
}
}

2
test.s
View file

@ -1 +1 @@
lw a0 -4(sp)
x0 li 8(x0)