Report misuse of registers before opcodes and enforce proper register naming

This commit is contained in:
Lumi Kalt 2024-01-21 14:52:58 +00:00
parent 9a931b4b8f
commit 5939a1c89b
5 changed files with 53 additions and 34 deletions

View file

@ -5,7 +5,7 @@ pub struct Env {
pub register_alias: HashMap<String, usize>, pub register_alias: HashMap<String, usize>,
labels: HashMap<String, usize>, labels: HashMap<String, usize>,
registers: [i64; 32], registers: [i64; 32],
stack: Vec<i64>, // TODO: Find the size of the stack pub stack: Vec<i64>, // TODO: Find the size of the stack
} }
impl Env { impl Env {
@ -69,12 +69,20 @@ impl Env {
self.register_alias.get(reg).copied() self.register_alias.get(reg).copied()
} }
pub fn xn_to_register(&self, reg: &str) -> Option<usize> { pub fn xn_to_register(&self, reg: &str) -> Option<usize> {
if reg.starts_with("x") { if reg == "x0" {
reg[1..].parse::<usize>().ok() Some(0)
} else if reg.starts_with("x") && !reg[1..].starts_with("0") {
match reg[1..].parse::<usize>() {
Ok(n) if n < 32 => Some(n),
_ => None,
}
} else { } else {
None None
} }
} }
pub fn is_valid_register(&self, reg: &str) -> bool {
self.alias_to_register(reg).or_else(|| self.xn_to_register(reg)).is_some()
}
pub fn add_label(&mut self, label: &str, value: usize) { pub fn add_label(&mut self, label: &str, value: usize) {
self.labels.insert(label.to_string(), value); self.labels.insert(label.to_string(), value);

View file

@ -30,7 +30,7 @@ impl SyntaxErr {
SyntaxErr::UnmatchedParen(true) => "add '(' before the register name".to_string(), SyntaxErr::UnmatchedParen(true) => "add '(' before the register name".to_string(),
SyntaxErr::UnexpectedChar => "ensure the input is well-formed".to_string(), SyntaxErr::UnexpectedChar => "ensure the input is well-formed".to_string(),
SyntaxErr::OutsideOp(kind) => format!("add '{}'s only after an opcode", kind), SyntaxErr::OutsideOp(kind) => format!("add '{}'s only after an opcode", kind),
SyntaxErr::MemoryInvalidRegister => "valid registers are of the form xN, 0 <= N < 32, or the standard aliases".to_string(), SyntaxErr::MemoryInvalidRegister => "registers are either xN (N < 32 with no leading 0) or the standard aliases".to_string(),
} }
} }
} }

View file

@ -1,5 +1,5 @@
use codespan_reporting::{ use codespan_reporting::{
diagnostic::{self, Diagnostic, Label}, diagnostic::{Diagnostic, Label},
files::SimpleFile, files::SimpleFile,
term::{ term::{
self, self,
@ -25,7 +25,7 @@ fn main() -> anyhow::Result<()> {
Err(errs) => { Err(errs) => {
for err in errs { for err in errs {
let start = err.1.start; let start = err.1.start;
let end = err.1.end; let end = err.1.end + 1;
let diagnostic = Diagnostic::error() let diagnostic = Diagnostic::error()
.with_message("Syntax Error") .with_message("Syntax Error")

View file

@ -53,7 +53,6 @@ type ParseErr = (SyntaxErr, Loc, Vec<(Token, Loc)>, Option<String>);
#[derive(Debug, Clone, Copy)] #[derive(Debug, Clone, Copy)]
pub struct Loc { pub struct Loc {
pub line: usize, pub line: usize,
pub col: usize,
pub start: usize, pub start: usize,
pub end: usize, pub end: usize,
} }
@ -61,7 +60,6 @@ pub struct Loc {
fn parse_line(env: &Env, input: &str, line: usize) -> Result<Vec<(Token, Loc)>, ParseErr> { fn parse_line(env: &Env, input: &str, line: usize) -> Result<Vec<(Token, Loc)>, ParseErr> {
let mut loc = Loc { let mut loc = Loc {
line, line,
col: 1,
start: 0, start: 0,
end: 0, end: 0,
}; };
@ -73,12 +71,7 @@ fn parse_line(env: &Env, input: &str, line: usize) -> Result<Vec<(Token, Loc)>,
while let Some(c) = chars.next() { while let Some(c) = chars.next() {
let token = match c { let token = match c {
'\t' => { '\t' | ' ' => Spacing,
// TODO: Make a flag to set the tab size
loc.col += 3;
Spacing
}
' ' => Spacing,
'#' => { '#' => {
while let Some(_) = chars.peek() { while let Some(_) = chars.peek() {
@ -94,7 +87,20 @@ fn parse_line(env: &Env, input: &str, line: usize) -> Result<Vec<(Token, Loc)>,
num.push(chars.next().unwrap()); num.push(chars.next().unwrap());
loc.end += 1; loc.end += 1;
} }
Immediate(num.parse().unwrap()) if let Some('(') | Some(' ') = chars.peek() {
Immediate(num.parse().unwrap())
} else {
return Err((
SyntaxErr::UnexpectedChar,
Loc {
line,
start: loc.end + 1,
end: loc.end + 1,
},
tokens.clone(),
None,
));
}
} }
'-' => { '-' => {
let mut num = c.to_string(); let mut num = c.to_string();
@ -104,14 +110,12 @@ fn parse_line(env: &Env, input: &str, line: usize) -> Result<Vec<(Token, Loc)>,
Immediate(num.parse().unwrap()) Immediate(num.parse().unwrap())
} }
'(' => { '(' => {
let start = loc.start + 1; let start = loc.start + 2;
let col = loc.col + 1;
let imm; let imm;
if let Some((Immediate(_), _)) = tokens.last() { if let Some((Immediate(_), _)) = tokens.last() {
imm = Box::new(tokens.pop().unwrap()); imm = Box::new(tokens.pop().unwrap());
loc.start = imm.1.start; loc.start = imm.1.start;
loc.col = imm.1.col;
} else { } else {
return Err(( return Err((
SyntaxErr::UnexpectedChar, SyntaxErr::UnexpectedChar,
@ -131,15 +135,10 @@ fn parse_line(env: &Env, input: &str, line: usize) -> Result<Vec<(Token, Loc)>,
let end = loc.end + 1; let end = loc.end + 1;
let reg = reg.trim(); let reg = reg.trim();
if env.alias_to_register(reg).is_none() && env.xn_to_register(reg).is_none() { if !env.is_valid_register(reg) {
return Err(( return Err((
SyntaxErr::MemoryInvalidRegister, SyntaxErr::MemoryInvalidRegister,
Loc { Loc { line, start, end },
line,
col,
start,
end,
},
tokens.clone(), tokens.clone(),
None, None,
)); ));
@ -170,9 +169,8 @@ fn parse_line(env: &Env, input: &str, line: usize) -> Result<Vec<(Token, Loc)>,
// Opcode or Label definition // Opcode or Label definition
'a'..='z' | 'A'..='Z' | '_' => { 'a'..='z' | 'A'..='Z' | '_' => {
dbg!("op");
let mut str = c.to_string(); let mut str = c.to_string();
while let Some('a'..='z') | Some('A'..='Z') | Some('_') | Some('0'..='9') = while let Some('a'..='z') | Some('A'..='Z') | Some('_') | Some('0'..='9') | Some('.') =
chars.peek() chars.peek()
{ {
str.push(chars.next().unwrap()); str.push(chars.next().unwrap());
@ -185,8 +183,24 @@ fn parse_line(env: &Env, input: &str, line: usize) -> Result<Vec<(Token, Loc)>,
} else if let Some((Op(_, _), _)) = tokens.get(tokens.len() - 2) { } else if let Some((Op(_, _), _)) = tokens.get(tokens.len() - 2) {
// These Registers may actually be label references or symbols, but there's ambiguity // These Registers may actually be label references or symbols, but there's ambiguity
// between them and registers, so we'll just assume they're registers for now // between them and registers, so we'll just assume they're registers for now
Register(str) Register(str.trim().to_owned())
} else { } else {
if env.is_valid_register(&str) {
return Err((
SyntaxErr::OutsideOp("register".to_string()),
loc.clone(),
tokens.clone(),
None,
));
}
if str.trim().contains(|c: char| !c.is_alphabetic() && c != '.') {
return Err((
SyntaxErr::UnexpectedChar,
dbg!(loc.clone()),
tokens.clone(),
Some("opcodes must only contain ascii letters".to_string()),
));
}
Op(str, vec![]) Op(str, vec![])
} }
} }
@ -194,8 +208,6 @@ fn parse_line(env: &Env, input: &str, line: usize) -> Result<Vec<(Token, Loc)>,
}; };
tokens.push((token, loc.clone())); tokens.push((token, loc.clone()));
loc.end += 1; loc.end += 1;
loc.col += loc.end - loc.start;
loc.col;
loc.start = loc.end; loc.start = loc.end;
} }
@ -213,7 +225,7 @@ fn parse_line(env: &Env, input: &str, line: usize) -> Result<Vec<(Token, Loc)>,
let (is_op, group) = group; let (is_op, group) = group;
if is_op { if is_op {
let group = group.collect::<Vec<_>>(); let group = group.collect::<Vec<_>>();
let (op, loc) = dbg!(dbg!(group[0].clone())); let (op, loc) = group[0].clone();
let (op, mut args) = match op { let (op, mut args) = match op {
Op(op, args) => (op, args), Op(op, args) => (op, args),
// because any register/symbol/label def is interpreted as an Op by default, this only // because any register/symbol/label def is interpreted as an Op by default, this only
@ -251,7 +263,7 @@ fn parse_line(env: &Env, input: &str, line: usize) -> Result<Vec<(Token, Loc)>,
/// ///
/// Returns a vector of tokens and their locations, if successful, or an error vector /// Returns a vector of tokens and their locations, if successful, or an error vector
/// containing the error, the location of the error, the tokens parsed up to that point, /// containing the error, the location of the error, the tokens parsed up to that point,
/// and an optional message to display to the users /// and an optional message to display to the users for each line with an error
pub fn parse(env: &Env, input: &str) -> Result<Vec<(Token, Loc)>, Vec<ParseErr>> { pub fn parse(env: &Env, input: &str) -> Result<Vec<(Token, Loc)>, Vec<ParseErr>> {
let parsed_lines = input let parsed_lines = input
.lines() .lines()
@ -268,7 +280,6 @@ pub fn parse(env: &Env, input: &str) -> Result<Vec<(Token, Loc)>, Vec<ParseErr>>
if err.is_empty() { if err.is_empty() {
Ok(ok.into_par_iter().flat_map(|line| line.unwrap()).collect()) Ok(ok.into_par_iter().flat_map(|line| line.unwrap()).collect())
} else { } else {
dbg!("err");
Err(err.into_par_iter().map(|line| line.unwrap_err()).collect()) Err(err.into_par_iter().map(|line| line.unwrap_err()).collect())
} }
} }

2
test.s
View file

@ -1 +1 @@
lw a0 -4(sp) x0 li 8(x0)