collapse some tokens into each other in the lexer and create a very basic, incomplete validator with unit tests

This commit is contained in:
abbie 2024-01-16 21:42:50 +00:00
parent 117aa64c0a
commit aa1408eaf3
Signed by: threeoh6000
GPG key ID: 801FE4AD456E922C
8 changed files with 127 additions and 29 deletions

2
Cargo.lock generated
View file

@ -4,4 +4,4 @@ version = 3
[[package]] [[package]]
name = "frostwalker" name = "frostwalker"
version = "0.0.2" version = "0.0.3"

View file

@ -1,6 +1,6 @@
[package] [package]
name = "frostwalker" name = "frostwalker"
version = "0.0.2" version = "0.0.3"
edition = "2021" edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

View file

@ -1,5 +1,7 @@
# Frostwalker # Frostwalker
A TOML-like configuration language parser for Rust. At present, the crate has a lexer with unit tests written for common cases. A validator and parser are planned to be added. *Frostwalker is not stable nor ready for dependency development yet. Unless you plan to implement your own formatter, please wait for 0.1.0 to release. 0.0 versions are placeholders and will not be tagged.*
A TOML-like configuration language parser for Rust. At present, the crate has a lexer and an incomplete validator with unit tests written for common cases. A formatter will eventually be added.
## Justification ## Justification
The Rust crate `toml` pulls in 8 other crates to do its job, including `serde`. While this may not be irksome to basically most Rust developers, I'm used to using severely underpowered Intel hardware so low compile times is a big focus for me so a crate made by me that requires only the standard library compared to one that requires other dependencies is preferrable. The Rust crate `toml` pulls in 8 other crates to do its job, including `serde`. While this may not be irksome to basically most Rust developers, I'm used to using severely underpowered Intel hardware so low compile times is a big focus for me so a crate made by me that requires only the standard library compared to one that requires other dependencies is preferrable.

View file

@ -11,11 +11,9 @@ pub enum Class {
IDENTIFIER, IDENTIFIER,
SEPARATOR, SEPARATOR,
EQUALS, EQUALS,
STRING, LITERAL,
INTEGER,
NEWLINE, NEWLINE,
TRUE, BOOLEAN,
FALSE,
UNKNOWN, UNKNOWN,
} }
@ -66,21 +64,21 @@ pub fn tokenize(source: &str) -> Vec<Token> {
} }
if words[i] == "true" || words[i] == "TRUE" { if words[i] == "true" || words[i] == "TRUE" {
tree.push(Token { class: Class::TRUE, value: None }); tree.push(Token { class: Class::BOOLEAN, value: Some("true".to_string()) });
added = true; added = true;
i = i + 1; i = i + 1;
continue; continue;
} }
if words[i] == "false" || words[i] == "FALSE" { if words[i] == "false" || words[i] == "FALSE" {
tree.push(Token { class: Class::FALSE, value: None }); tree.push(Token { class: Class::BOOLEAN, value: Some("false".to_string()) });
added = true; added = true;
i = i + 1; i = i + 1;
continue; continue;
} }
if words[i].parse::<i32>().is_ok() { if words[i].parse::<i32>().is_ok() {
tree.push(Token { class: Class::INTEGER, value: Some(words[i].to_string()) }); tree.push(Token { class: Class::LITERAL, value: Some(words[i].to_string()) });
added = true; added = true;
i = i + 1; i = i + 1;
continue; continue;
@ -112,7 +110,7 @@ pub fn tokenize(source: &str) -> Vec<Token> {
if words[i].ends_with("\"") { if words[i].ends_with("\"") {
words[i] = words[i].chars().next().map(|c| &words[i][c.len_utf8()..]).unwrap_or(""); words[i] = words[i].chars().next().map(|c| &words[i][c.len_utf8()..]).unwrap_or("");
words[i] = words[i].chars().next_back().map(|_| &words[i][..words[i].len()-1]).unwrap_or(""); words[i] = words[i].chars().next_back().map(|_| &words[i][..words[i].len()-1]).unwrap_or("");
tree.push(Token { class: Class::STRING, value: Some(words[i].replace("\\\"", "\"").to_string()) }); tree.push(Token { class: Class::LITERAL, value: Some(words[i].replace("\\\"", "\"").to_string()) });
added = true; added = true;
i = i + 1; i = i + 1;
continue; continue;
@ -129,7 +127,7 @@ pub fn tokenize(source: &str) -> Vec<Token> {
} }
built_string = format!("{} {}", built_string, words[i].replace("\\\"", "\"")); built_string = format!("{} {}", built_string, words[i].replace("\\\"", "\""));
} }
tree.push(Token { class: Class::STRING, value: Some(built_string) }); tree.push(Token { class: Class::LITERAL, value: Some(built_string) });
added = true; added = true;
i = i + 1; i = i + 1;
continue; continue;

View file

@ -6,7 +6,7 @@ fn single_key() {
let id = Token { class: Class::IDENTIFIER, value: Some("key".to_string()) }; let id = Token { class: Class::IDENTIFIER, value: Some("key".to_string()) };
let op = Token { class: Class::EQUALS, value: None }; let op = Token { class: Class::EQUALS, value: None };
let strn = Token { class: Class::STRING, value: Some("value".to_string()) }; let strn = Token { class: Class::LITERAL, value: Some("value".to_string()) };
let manual_tree = vec![id, op, strn]; let manual_tree = vec![id, op, strn];
assert_eq!(tree, manual_tree); assert_eq!(tree, manual_tree);
} }
@ -18,9 +18,9 @@ fn single_key_array() {
let id = Token { class: Class::IDENTIFIER, value: Some("key".to_string()) }; let id = Token { class: Class::IDENTIFIER, value: Some("key".to_string()) };
let op = Token { class: Class::EQUALS, value: None }; let op = Token { class: Class::EQUALS, value: None };
let t1 = Token { class: Class::SEPARATOR, value: Some("[".to_string()) }; let t1 = Token { class: Class::SEPARATOR, value: Some("[".to_string()) };
let strn = Token { class: Class::STRING, value: Some("\"value".to_string()) }; let strn = Token { class: Class::LITERAL, value: Some("\"value".to_string()) };
let t2 = Token { class: Class::SEPARATOR, value: Some(",".to_string()) }; let t2 = Token { class: Class::SEPARATOR, value: Some(",".to_string()) };
let strn2 = Token { class: Class::INTEGER, value: Some("6".to_string()) }; let strn2 = Token { class: Class::LITERAL, value: Some("6".to_string()) };
let t3 = Token { class: Class::SEPARATOR, value: Some("]".to_string()) }; let t3 = Token { class: Class::SEPARATOR, value: Some("]".to_string()) };
let manual_tree = vec![id, op, t1, strn, t2, strn2, t3]; let manual_tree = vec![id, op, t1, strn, t2, strn2, t3];
assert_eq!(tree, manual_tree); assert_eq!(tree, manual_tree);
@ -43,7 +43,7 @@ fn single_key_escaped_double_quote_in_string() {
let id = Token { class: Class::IDENTIFIER, value: Some("key".to_string()) }; let id = Token { class: Class::IDENTIFIER, value: Some("key".to_string()) };
let op = Token { class: Class::EQUALS, value: None }; let op = Token { class: Class::EQUALS, value: None };
let strn = Token { class: Class::STRING, value: Some("\"value".to_string()) }; let strn = Token { class: Class::LITERAL, value: Some("\"value".to_string()) };
let manual_tree = vec![id, op, strn]; let manual_tree = vec![id, op, strn];
assert_eq!(tree, manual_tree); assert_eq!(tree, manual_tree);
} }
@ -54,11 +54,11 @@ fn integers() {
let id = Token { class: Class::IDENTIFIER, value: Some("key".to_string()) }; let id = Token { class: Class::IDENTIFIER, value: Some("key".to_string()) };
let op = Token { class: Class::EQUALS, value: None }; let op = Token { class: Class::EQUALS, value: None };
let strn = Token { class: Class::INTEGER, value: Some("123567".to_string()) }; let strn = Token { class: Class::LITERAL, value: Some("123567".to_string()) };
let nl = Token { class: Class::NEWLINE, value: None }; let nl = Token { class: Class::NEWLINE, value: None };
let id2 = Token { class: Class::IDENTIFIER, value: Some("key2".to_string()) }; let id2 = Token { class: Class::IDENTIFIER, value: Some("key2".to_string()) };
let op2 = Token { class: Class::EQUALS, value: None }; let op2 = Token { class: Class::EQUALS, value: None };
let strn2 = Token { class: Class::INTEGER, value: Some("-400".to_string()) }; let strn2 = Token { class: Class::LITERAL, value: Some("-400".to_string()) };
let manual_tree = vec![id, op, strn, nl, id2, op2, strn2]; let manual_tree = vec![id, op, strn, nl, id2, op2, strn2];
assert_eq!(tree, manual_tree); assert_eq!(tree, manual_tree);
} }
@ -69,11 +69,11 @@ fn booleans() {
let id = Token { class: Class::IDENTIFIER, value: Some("key".to_string()) }; let id = Token { class: Class::IDENTIFIER, value: Some("key".to_string()) };
let op = Token { class: Class::EQUALS, value: None }; let op = Token { class: Class::EQUALS, value: None };
let strn = Token { class: Class::TRUE, value: None }; let strn = Token { class: Class::BOOLEAN, value: Some("true".to_string()) };
let nl = Token { class: Class::NEWLINE, value: None }; let nl = Token { class: Class::NEWLINE, value: None };
let id2 = Token { class: Class::IDENTIFIER, value: Some("key2".to_string()) }; let id2 = Token { class: Class::IDENTIFIER, value: Some("key2".to_string()) };
let op2 = Token { class: Class::EQUALS, value: None }; let op2 = Token { class: Class::EQUALS, value: None };
let strn2 = Token { class: Class::FALSE, value: None }; let strn2 = Token { class: Class::BOOLEAN, value: Some("false".to_string()) };
let manual_tree = vec![id, op, strn, nl, id2, op2, strn2]; let manual_tree = vec![id, op, strn, nl, id2, op2, strn2];
assert_eq!(tree, manual_tree); assert_eq!(tree, manual_tree);
} }
@ -84,7 +84,7 @@ fn single_key_space_in_string() {
let id = Token { class: Class::IDENTIFIER, value: Some("key".to_string()) }; let id = Token { class: Class::IDENTIFIER, value: Some("key".to_string()) };
let op = Token { class: Class::EQUALS, value: None }; let op = Token { class: Class::EQUALS, value: None };
let strn = Token { class: Class::STRING, value: Some("a b c".to_string()) }; let strn = Token { class: Class::LITERAL, value: Some("a b c".to_string()) };
let manual_tree = vec![id, op, strn]; let manual_tree = vec![id, op, strn];
assert_eq!(tree, manual_tree); assert_eq!(tree, manual_tree);
} }
@ -95,7 +95,7 @@ fn single_key_double_space_in_string() {
let id = Token { class: Class::IDENTIFIER, value: Some("key".to_string()) }; let id = Token { class: Class::IDENTIFIER, value: Some("key".to_string()) };
let op = Token { class: Class::EQUALS, value: None }; let op = Token { class: Class::EQUALS, value: None };
let strn = Token { class: Class::STRING, value: Some("a b c d".to_string()) }; let strn = Token { class: Class::LITERAL, value: Some("a b c d".to_string()) };
let manual_tree = vec![id, op, strn]; let manual_tree = vec![id, op, strn];
assert_eq!(tree, manual_tree); assert_eq!(tree, manual_tree);
} }
@ -108,13 +108,13 @@ fn triple_key() {
let nl2 = Token { class: Class::NEWLINE, value: None }; let nl2 = Token { class: Class::NEWLINE, value: None };
let id = Token { class: Class::IDENTIFIER, value: Some("key".to_string()) }; let id = Token { class: Class::IDENTIFIER, value: Some("key".to_string()) };
let op = Token { class: Class::EQUALS, value: None }; let op = Token { class: Class::EQUALS, value: None };
let strn = Token { class: Class::STRING, value: Some("value".to_string()) }; let strn = Token { class: Class::LITERAL, value: Some("value".to_string()) };
let id2 = Token { class: Class::IDENTIFIER, value: Some("key2".to_string()) }; let id2 = Token { class: Class::IDENTIFIER, value: Some("key2".to_string()) };
let op2 = Token { class: Class::EQUALS, value: None }; let op2 = Token { class: Class::EQUALS, value: None };
let strn2 = Token { class: Class::STRING, value: Some("value2".to_string()) }; let strn2 = Token { class: Class::LITERAL, value: Some("value2".to_string()) };
let id3 = Token { class: Class::IDENTIFIER, value: Some("key3".to_string()) }; let id3 = Token { class: Class::IDENTIFIER, value: Some("key3".to_string()) };
let op3 = Token { class: Class::EQUALS, value: None }; let op3 = Token { class: Class::EQUALS, value: None };
let strn3 = Token { class: Class::STRING, value: Some("value3".to_string()) }; let strn3 = Token { class: Class::LITERAL, value: Some("value3".to_string()) };
let manual_tree = vec![id, op, strn, nl, id2, op2, strn2, nl2, id3, op3, strn3]; let manual_tree = vec![id, op, strn, nl, id2, op2, strn2, nl2, id3, op3, strn3];
assert_eq!(tree, manual_tree); assert_eq!(tree, manual_tree);
@ -126,7 +126,7 @@ fn comment() {
let id = Token { class: Class::IDENTIFIER, value: Some("key".to_string()) }; let id = Token { class: Class::IDENTIFIER, value: Some("key".to_string()) };
let op = Token { class: Class::EQUALS, value: None }; let op = Token { class: Class::EQUALS, value: None };
let strn = Token { class: Class::STRING, value: Some("value".to_string()) }; let strn = Token { class: Class::LITERAL, value: Some("value".to_string()) };
let manual_tree = vec![id, op, strn]; let manual_tree = vec![id, op, strn];
assert_eq!(tree, manual_tree); assert_eq!(tree, manual_tree);
@ -138,7 +138,7 @@ fn inline_comment() {
let id = Token { class: Class::IDENTIFIER, value: Some("key".to_string()) }; let id = Token { class: Class::IDENTIFIER, value: Some("key".to_string()) };
let op = Token { class: Class::EQUALS, value: None }; let op = Token { class: Class::EQUALS, value: None };
let strn = Token { class: Class::STRING, value: Some("value".to_string()) }; let strn = Token { class: Class::LITERAL, value: Some("value".to_string()) };
let manual_tree = vec![id, op, strn]; let manual_tree = vec![id, op, strn];
assert_eq!(tree, manual_tree); assert_eq!(tree, manual_tree);
@ -150,7 +150,7 @@ fn inline_comment_no_spaces() {
let id = Token { class: Class::IDENTIFIER, value: Some("key".to_string()) }; let id = Token { class: Class::IDENTIFIER, value: Some("key".to_string()) };
let op = Token { class: Class::EQUALS, value: None }; let op = Token { class: Class::EQUALS, value: None };
let strn = Token { class: Class::STRING, value: Some("value".to_string()) }; let strn = Token { class: Class::LITERAL, value: Some("value".to_string()) };
let manual_tree = vec![id, op, strn]; let manual_tree = vec![id, op, strn];
assert_eq!(tree, manual_tree); assert_eq!(tree, manual_tree);
@ -162,7 +162,7 @@ fn quoted_identifier() {
let id = Token { class: Class::IDENTIFIER, value: Some("key".to_string()) }; let id = Token { class: Class::IDENTIFIER, value: Some("key".to_string()) };
let op = Token { class: Class::EQUALS, value: None }; let op = Token { class: Class::EQUALS, value: None };
let strn = Token { class: Class::STRING, value: Some("value".to_string()) }; let strn = Token { class: Class::LITERAL, value: Some("value".to_string()) };
let manual_tree = vec![id, op, strn]; let manual_tree = vec![id, op, strn];
assert_eq!(tree, manual_tree); assert_eq!(tree, manual_tree);

View file

@ -1,4 +1,8 @@
pub mod lexer; pub mod lexer;
pub mod validator;
#[cfg(test)] #[cfg(test)]
mod lexer_tests; mod lexer_tests;
#[cfg(test)]
mod validator_tests;

55
src/validator.rs Normal file
View file

@ -0,0 +1,55 @@
use super::lexer::{Token, Class};
#[derive(Debug)]
#[derive(PartialEq)]
enum ExpectedClass {
IDENTIFIER,
EQUALS,
NEWLINE,
LITERALORSEPARATOR,
}
pub fn validate(tree: &Vec<Token>) -> Option<String> {
let mut expected_token: ExpectedClass = ExpectedClass::IDENTIFIER;
let mut i = 0;
let mut line = 1;
while i < tree.len() {
if tree[i].class == Class::NEWLINE && i == 0 {
i = i + 1;
line = line + 1;
continue;
}
if tree[i].class == Class::NEWLINE && expected_token == ExpectedClass::NEWLINE {
i = i + 1;
line = line + 1;
expected_token = ExpectedClass::IDENTIFIER;
continue;
}
if tree[i].class == Class::UNKNOWN {
return Some(format!("Invalid token {} at line {}.", tree[i].value.clone().unwrap_or("None".to_string()), line));
}
if tree[i].class == Class::IDENTIFIER && expected_token == ExpectedClass::IDENTIFIER {
i = i + 1;
expected_token = ExpectedClass::EQUALS;
continue;
}
if tree[i].class == Class::EQUALS && expected_token == ExpectedClass::EQUALS {
i = i + 1;
expected_token = ExpectedClass::LITERALORSEPARATOR;
continue;
}
if (tree[i].class == Class::LITERAL || tree[i].class == Class::BOOLEAN) && expected_token == ExpectedClass::LITERALORSEPARATOR {
i = i + 1;
expected_token = ExpectedClass::NEWLINE;
continue;
}
return Some(format!("{:?} found where {:?} expected at line {}.", tree[i].class, expected_token, line));
}
return None;
}

39
src/validator_tests.rs Normal file
View file

@ -0,0 +1,39 @@
use super::validator;
use super::lexer::{Token, Class};
#[test]
fn single_key() {
let t1 = Token { class: Class::IDENTIFIER, value: Some("key".to_string()) };
let t2 = Token { class: Class::EQUALS, value: None };
let t3 = Token { class: Class::LITERAL, value: Some("10".to_string()) };
let result = validator::validate(&vec![t1, t2, t3]);
assert_eq!(result.is_none(), true);
}
#[test]
fn triple_key() {
let t1 = Token { class: Class::IDENTIFIER, value: Some("key".to_string()) };
let t2 = Token { class: Class::EQUALS, value: None };
let t3 = Token { class: Class::LITERAL, value: Some("10".to_string()) };
let nl = Token { class: Class::NEWLINE, value: None };
let t4 = Token { class: Class::IDENTIFIER, value: Some("key2".to_string()) };
let t5 = Token { class: Class::EQUALS, value: None };
let t6 = Token { class: Class::LITERAL, value: Some("13".to_string()) };
let result = validator::validate(&vec![t1, t2, t3, nl, t4, t5, t6]);
assert_eq!(result.is_none(), true);
}
#[test]
fn triple_key_no_newline() {
let t1 = Token { class: Class::IDENTIFIER, value: Some("key".to_string()) };
let t2 = Token { class: Class::EQUALS, value: None };
let t3 = Token { class: Class::LITERAL, value: Some("10".to_string()) };
let t4 = Token { class: Class::IDENTIFIER, value: Some("key2".to_string()) };
let t5 = Token { class: Class::EQUALS, value: None };
let t6 = Token { class: Class::LITERAL, value: Some("13".to_string()) };
let result = validator::validate(&vec![t1, t2, t3, t4, t5, t6]);
assert_eq!(result.unwrap_or("".to_string()), "IDENTIFIER found where NEWLINE expected at line 1.".to_string());
}