update README, add array related tokens to lexer and create tests relating

This commit is contained in:
abbie 2024-01-16 20:25:09 +00:00
parent 6b2cfdf187
commit 3744490034
Signed by: threeoh6000
GPG key ID: 801FE4AD456E922C
5 changed files with 81 additions and 26 deletions

2
Cargo.lock generated
View file

@ -4,4 +4,4 @@ version = 3
[[package]] [[package]]
name = "frostwalker" name = "frostwalker"
version = "0.0.0" version = "0.0.1"

View file

@ -1,6 +1,6 @@
[package] [package]
name = "frostwalker" name = "frostwalker"
version = "0.0.0" version = "0.0.1"
edition = "2021" edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

View file

@ -1,5 +1,5 @@
# Frostwalker # Frostwalker
A TOML-like configuration language parser for Rust. At present, the crate has a work in progress lexer with unit tests written for common cases. A validator and parser are planned to be added. A TOML-like configuration language parser for Rust. At present, the crate has a lexer with unit tests written for common cases. A validator and parser are planned to be added.
## Justification ## Justification
The Rust crate `toml` pulls in 8 other crates to do its job, including `serde`. While this may not be irksome to basically most Rust developers, I'm used to using severely underpowered Intel hardware so low compile times is a big focus for me so a crate made by me that requires only the standard library compared to one that requires other dependencies is preferrable. The Rust crate `toml` pulls in 8 other crates to do its job, including `serde`. While this may not be irksome to basically most Rust developers, I'm used to using severely underpowered Intel hardware so low compile times is a big focus for me so a crate made by me that requires only the standard library compared to one that requires other dependencies is preferrable.
@ -16,3 +16,4 @@ I also use a TOML-like language instead of TOML directly as I don't need all the
* The equals sign * The equals sign
* Booleans * Booleans
* Integers * Integers
* Arrays (single depth only)

View file

@ -9,35 +9,62 @@ pub struct Token {
#[derive(PartialEq)] #[derive(PartialEq)]
pub enum Class { pub enum Class {
IDENTIFIER, IDENTIFIER,
SEPERATOR, SEPARATOR,
EQUALS, EQUALS,
STRING, STRING,
INTEGER, INTEGER,
NEWLINE, NEWLINE,
TRUE, TRUE,
FALSE, FALSE,
UNKNOWN,
} }
pub fn tokenise(source: &str) -> Option<Vec<Token>> { pub fn tokenise(source: &str) -> Vec<Token> {
return tokenize(source); return tokenize(source);
} }
pub fn tokenize(source: &str) -> Option<Vec<Token>> { pub fn tokenize(source: &str) -> Vec<Token> {
let lines: Vec<&str> = source.lines().collect(); let lines: Vec<&str> = source.lines().collect();
let lines_len = lines.len(); let lines_len = lines.len();
println!("{:#?}", lines);
let mut tree: Vec<Token> = vec![]; let mut tree: Vec<Token> = vec![];
for line in lines { for line in lines {
let mut added = false; let mut added = false;
let mut words: Vec<&str> = line.split(" ").collect(); let mut words: Vec<&str> = line.split(" ").collect();
let mut i = 0; let mut i = 0;
while i < words.len() { while i < words.len() {
if words[i].ends_with(",") && words[i-1] == "[" {
words[i] = words[i].chars().next_back().map(|_| &words[i][..words[i].len()-1]).unwrap_or("");
words.insert(i+1, ",");
}
if words[i] == "," {
tree.push(Token { class: Class::SEPARATOR, value: Some(",".to_string()) });
added = true;
i = i + 1;
continue;
}
if words[i].replace(" ","") == "" { if words[i].replace(" ","") == "" {
i = i + 1; i = i + 1;
continue; continue;
} }
if words[i] == "[" {
tree.push(Token { class: Class::SEPARATOR, value: Some("[".to_string()) });
added = true;
i = i + 1;
continue;
}
if words[i] == "]" {
tree.push(Token { class: Class::SEPARATOR, value: Some("]".to_string()) });
added = true;
i = i + 1;
continue;
}
if words[i] == "true" || words[i] == "TRUE" { if words[i] == "true" || words[i] == "TRUE" {
tree.push(Token { class: Class::TRUE, value: None }); tree.push(Token { class: Class::TRUE, value: None });
added = true; added = true;
@ -85,22 +112,22 @@ pub fn tokenize(source: &str) -> Option<Vec<Token>> {
if words[i].ends_with("\"") { if words[i].ends_with("\"") {
words[i] = words[i].chars().next().map(|c| &words[i][c.len_utf8()..]).unwrap_or(""); words[i] = words[i].chars().next().map(|c| &words[i][c.len_utf8()..]).unwrap_or("");
words[i] = words[i].chars().next_back().map(|_| &words[i][..words[i].len()-1]).unwrap_or(""); words[i] = words[i].chars().next_back().map(|_| &words[i][..words[i].len()-1]).unwrap_or("");
tree.push(Token { class: Class::STRING, value: Some(words[i].to_string()) }); tree.push(Token { class: Class::STRING, value: Some(words[i].replace("\\\"", "\"").to_string()) });
added = true; added = true;
i = i + 1; i = i + 1;
continue; continue;
} else { } else {
words[i] = words[i].chars().next().map(|c| &words[i][c.len_utf8()..]).unwrap_or(""); words[i] = words[i].chars().next().map(|c| &words[i][c.len_utf8()..]).unwrap_or("");
let mut built_string = words[i].to_string(); let mut built_string = words[i].replace("\\\"", "\"").to_string();
loop { loop {
i = i + 1; i = i + 1;
if words[i].ends_with("\"") { if words[i].ends_with("\"") {
words[i] = words[i].chars().next_back().map(|_| &words[i][..words[i].len()-1]).unwrap_or(""); words[i] = words[i].chars().next_back().map(|_| &words[i][..words[i].len()-1]).unwrap_or("");
built_string = format!("{} {}", built_string, words[i]); built_string = format!("{} {}", built_string, words[i].replace("\\\"", "\""));
break; break;
} }
built_string = format!("{} {}", built_string, words[i]); built_string = format!("{} {}", built_string, words[i].replace("\\\"", "\""));
} }
tree.push(Token { class: Class::STRING, value: Some(built_string) }); tree.push(Token { class: Class::STRING, value: Some(built_string) });
added = true; added = true;
@ -113,7 +140,8 @@ pub fn tokenize(source: &str) -> Option<Vec<Token>> {
break; break;
} }
return None; tree.push(Token { class: Class::UNKNOWN, value: Some(words[i].to_string()) });
i = i + 1;
} }
if lines_len > 1 && added { if lines_len > 1 && added {
tree.push(Token { class: Class::NEWLINE, value: None }); tree.push(Token { class: Class::NEWLINE, value: None });
@ -124,5 +152,5 @@ pub fn tokenize(source: &str) -> Option<Vec<Token>> {
tree.pop(); tree.pop();
} }
return Some(tree); return tree;
} }

View file

@ -12,7 +12,33 @@ mod lexer_tests {
let op = Token { class: Class::EQUALS, value: None }; let op = Token { class: Class::EQUALS, value: None };
let strn = Token { class: Class::STRING, value: Some("value".to_string()) }; let strn = Token { class: Class::STRING, value: Some("value".to_string()) };
let manual_tree = vec![id, op, strn]; let manual_tree = vec![id, op, strn];
assert_eq!(tree.unwrap(), manual_tree); assert_eq!(tree, manual_tree);
}
#[test]
fn single_key_array() {
let tree = lexer::tokenize("key = [ \"\\\"value\", 6 ]");
let id = Token { class: Class::IDENTIFIER, value: Some("key".to_string()) };
let op = Token { class: Class::EQUALS, value: None };
let t1 = Token { class: Class::SEPARATOR, value: Some("[".to_string()) };
let strn = Token { class: Class::STRING, value: Some("\"value".to_string()) };
let t2 = Token { class: Class::SEPARATOR, value: Some(",".to_string()) };
let strn2 = Token { class: Class::INTEGER, value: Some("6".to_string()) };
let t3 = Token { class: Class::SEPARATOR, value: Some("]".to_string()) };
let manual_tree = vec![id, op, t1, strn, t2, strn2, t3];
assert_eq!(tree, manual_tree);
}
#[test]
fn single_key_unknown_token() {
let tree = lexer::tokenize("key = ^");
let id = Token { class: Class::IDENTIFIER, value: Some("key".to_string()) };
let op = Token { class: Class::EQUALS, value: None };
let strn = Token { class: Class::UNKNOWN, value: Some("^".to_string()) };
let manual_tree = vec![id, op, strn];
assert_eq!(tree, manual_tree);
} }
#[test] #[test]
@ -21,9 +47,9 @@ mod lexer_tests {
let id = Token { class: Class::IDENTIFIER, value: Some("key".to_string()) }; let id = Token { class: Class::IDENTIFIER, value: Some("key".to_string()) };
let op = Token { class: Class::EQUALS, value: None }; let op = Token { class: Class::EQUALS, value: None };
let strn = Token { class: Class::STRING, value: Some("\\\"value".to_string()) }; let strn = Token { class: Class::STRING, value: Some("\"value".to_string()) };
let manual_tree = vec![id, op, strn]; let manual_tree = vec![id, op, strn];
assert_eq!(tree.unwrap(), manual_tree); assert_eq!(tree, manual_tree);
} }
#[test] #[test]
@ -38,7 +64,7 @@ mod lexer_tests {
let op2 = Token { class: Class::EQUALS, value: None }; let op2 = Token { class: Class::EQUALS, value: None };
let strn2 = Token { class: Class::INTEGER, value: Some("-400".to_string()) }; let strn2 = Token { class: Class::INTEGER, value: Some("-400".to_string()) };
let manual_tree = vec![id, op, strn, nl, id2, op2, strn2]; let manual_tree = vec![id, op, strn, nl, id2, op2, strn2];
assert_eq!(tree.unwrap(), manual_tree); assert_eq!(tree, manual_tree);
} }
#[test] #[test]
@ -53,7 +79,7 @@ mod lexer_tests {
let op2 = Token { class: Class::EQUALS, value: None }; let op2 = Token { class: Class::EQUALS, value: None };
let strn2 = Token { class: Class::FALSE, value: None }; let strn2 = Token { class: Class::FALSE, value: None };
let manual_tree = vec![id, op, strn, nl, id2, op2, strn2]; let manual_tree = vec![id, op, strn, nl, id2, op2, strn2];
assert_eq!(tree.unwrap(), manual_tree); assert_eq!(tree, manual_tree);
} }
#[test] #[test]
@ -64,7 +90,7 @@ mod lexer_tests {
let op = Token { class: Class::EQUALS, value: None }; let op = Token { class: Class::EQUALS, value: None };
let strn = Token { class: Class::STRING, value: Some("a b c".to_string()) }; let strn = Token { class: Class::STRING, value: Some("a b c".to_string()) };
let manual_tree = vec![id, op, strn]; let manual_tree = vec![id, op, strn];
assert_eq!(tree.unwrap(), manual_tree); assert_eq!(tree, manual_tree);
} }
#[test] #[test]
@ -75,7 +101,7 @@ mod lexer_tests {
let op = Token { class: Class::EQUALS, value: None }; let op = Token { class: Class::EQUALS, value: None };
let strn = Token { class: Class::STRING, value: Some("a b c d".to_string()) }; let strn = Token { class: Class::STRING, value: Some("a b c d".to_string()) };
let manual_tree = vec![id, op, strn]; let manual_tree = vec![id, op, strn];
assert_eq!(tree.unwrap(), manual_tree); assert_eq!(tree, manual_tree);
} }
#[test] #[test]
@ -95,7 +121,7 @@ mod lexer_tests {
let strn3 = Token { class: Class::STRING, value: Some("value3".to_string()) }; let strn3 = Token { class: Class::STRING, value: Some("value3".to_string()) };
let manual_tree = vec![id, op, strn, nl, id2, op2, strn2, nl2, id3, op3, strn3]; let manual_tree = vec![id, op, strn, nl, id2, op2, strn2, nl2, id3, op3, strn3];
assert_eq!(tree.unwrap(), manual_tree); assert_eq!(tree, manual_tree);
} }
#[test] #[test]
@ -107,7 +133,7 @@ mod lexer_tests {
let strn = Token { class: Class::STRING, value: Some("value".to_string()) }; let strn = Token { class: Class::STRING, value: Some("value".to_string()) };
let manual_tree = vec![id, op, strn]; let manual_tree = vec![id, op, strn];
assert_eq!(tree.unwrap(), manual_tree); assert_eq!(tree, manual_tree);
} }
#[test] #[test]
@ -119,7 +145,7 @@ mod lexer_tests {
let strn = Token { class: Class::STRING, value: Some("value".to_string()) }; let strn = Token { class: Class::STRING, value: Some("value".to_string()) };
let manual_tree = vec![id, op, strn]; let manual_tree = vec![id, op, strn];
assert_eq!(tree.unwrap(), manual_tree); assert_eq!(tree, manual_tree);
} }
#[test] #[test]
@ -131,7 +157,7 @@ mod lexer_tests {
let strn = Token { class: Class::STRING, value: Some("value".to_string()) }; let strn = Token { class: Class::STRING, value: Some("value".to_string()) };
let manual_tree = vec![id, op, strn]; let manual_tree = vec![id, op, strn];
assert_eq!(tree.unwrap(), manual_tree); assert_eq!(tree, manual_tree);
} }
#[test] #[test]
@ -143,7 +169,7 @@ mod lexer_tests {
let strn = Token { class: Class::STRING, value: Some("value".to_string()) }; let strn = Token { class: Class::STRING, value: Some("value".to_string()) };
let manual_tree = vec![id, op, strn]; let manual_tree = vec![id, op, strn];
assert_eq!(tree.unwrap(), manual_tree); assert_eq!(tree, manual_tree);
} }
#[test] #[test]
@ -155,7 +181,7 @@ mod lexer_tests {
let op2 = Token { class: Class::EQUALS, value: None }; let op2 = Token { class: Class::EQUALS, value: None };
let manual_tree = vec![id, op, op2]; let manual_tree = vec![id, op, op2];
assert_eq!(tree.unwrap(), manual_tree); assert_eq!(tree, manual_tree);
} }
} }