From aa1408eaf314d236f822e30aa78cfaa5acaecb92 Mon Sep 17 00:00:00 2001 From: threeoh6000 Date: Tue, 16 Jan 2024 21:42:50 +0000 Subject: [PATCH] collapse some tokens into each other in the lexer and create a very basic, incomplete validator with unit tests --- Cargo.lock | 2 +- Cargo.toml | 2 +- README.md | 4 ++- src/lexer.rs | 16 ++++++------ src/lexer_tests.rs | 34 +++++++++++++------------- src/lib.rs | 4 +++ src/validator.rs | 55 ++++++++++++++++++++++++++++++++++++++++++ src/validator_tests.rs | 39 ++++++++++++++++++++++++++++++ 8 files changed, 127 insertions(+), 29 deletions(-) create mode 100644 src/validator.rs create mode 100644 src/validator_tests.rs diff --git a/Cargo.lock b/Cargo.lock index 872d1e6..778e664 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4,4 +4,4 @@ version = 3 [[package]] name = "frostwalker" -version = "0.0.2" +version = "0.0.3" diff --git a/Cargo.toml b/Cargo.toml index 9efd551..0febb4d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "frostwalker" -version = "0.0.2" +version = "0.0.3" edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html diff --git a/README.md b/README.md index 5591939..6b818d2 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,7 @@ # Frostwalker -A TOML-like configuration language parser for Rust. At present, the crate has a lexer with unit tests written for common cases. A validator and parser are planned to be added. +*Frostwalker is not stable nor ready for dependency development yet. Unless you plan to implement your own formatter, please wait for 0.1.0 to release. 0.0 versions are placeholders and will not be tagged.* + +A TOML-like configuration language parser for Rust. At present, the crate has a lexer and an incomplete validator with unit tests written for common cases. A formatter will eventually be added. ## Justification The Rust crate `toml` pulls in 8 other crates to do its job, including `serde`. While this may not be irksome to basically most Rust developers, I'm used to using severely underpowered Intel hardware so low compile times is a big focus for me so a crate made by me that requires only the standard library compared to one that requires other dependencies is preferrable. diff --git a/src/lexer.rs b/src/lexer.rs index ec3c9d1..ff52821 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -11,11 +11,9 @@ pub enum Class { IDENTIFIER, SEPARATOR, EQUALS, - STRING, - INTEGER, + LITERAL, NEWLINE, - TRUE, - FALSE, + BOOLEAN, UNKNOWN, } @@ -66,21 +64,21 @@ pub fn tokenize(source: &str) -> Vec { } if words[i] == "true" || words[i] == "TRUE" { - tree.push(Token { class: Class::TRUE, value: None }); + tree.push(Token { class: Class::BOOLEAN, value: Some("true".to_string()) }); added = true; i = i + 1; continue; } if words[i] == "false" || words[i] == "FALSE" { - tree.push(Token { class: Class::FALSE, value: None }); + tree.push(Token { class: Class::BOOLEAN, value: Some("false".to_string()) }); added = true; i = i + 1; continue; } if words[i].parse::().is_ok() { - tree.push(Token { class: Class::INTEGER, value: Some(words[i].to_string()) }); + tree.push(Token { class: Class::LITERAL, value: Some(words[i].to_string()) }); added = true; i = i + 1; continue; @@ -112,7 +110,7 @@ pub fn tokenize(source: &str) -> Vec { if words[i].ends_with("\"") { words[i] = words[i].chars().next().map(|c| &words[i][c.len_utf8()..]).unwrap_or(""); words[i] = words[i].chars().next_back().map(|_| &words[i][..words[i].len()-1]).unwrap_or(""); - tree.push(Token { class: Class::STRING, value: Some(words[i].replace("\\\"", "\"").to_string()) }); + tree.push(Token { class: Class::LITERAL, value: Some(words[i].replace("\\\"", "\"").to_string()) }); added = true; i = i + 1; continue; @@ -129,7 +127,7 @@ pub fn tokenize(source: &str) -> Vec { } built_string = format!("{} {}", built_string, words[i].replace("\\\"", "\"")); } - tree.push(Token { class: Class::STRING, value: Some(built_string) }); + tree.push(Token { class: Class::LITERAL, value: Some(built_string) }); added = true; i = i + 1; continue; diff --git a/src/lexer_tests.rs b/src/lexer_tests.rs index e7ea07e..e81ca62 100644 --- a/src/lexer_tests.rs +++ b/src/lexer_tests.rs @@ -6,7 +6,7 @@ fn single_key() { let id = Token { class: Class::IDENTIFIER, value: Some("key".to_string()) }; let op = Token { class: Class::EQUALS, value: None }; - let strn = Token { class: Class::STRING, value: Some("value".to_string()) }; + let strn = Token { class: Class::LITERAL, value: Some("value".to_string()) }; let manual_tree = vec![id, op, strn]; assert_eq!(tree, manual_tree); } @@ -18,9 +18,9 @@ fn single_key_array() { let id = Token { class: Class::IDENTIFIER, value: Some("key".to_string()) }; let op = Token { class: Class::EQUALS, value: None }; let t1 = Token { class: Class::SEPARATOR, value: Some("[".to_string()) }; - let strn = Token { class: Class::STRING, value: Some("\"value".to_string()) }; + let strn = Token { class: Class::LITERAL, value: Some("\"value".to_string()) }; let t2 = Token { class: Class::SEPARATOR, value: Some(",".to_string()) }; - let strn2 = Token { class: Class::INTEGER, value: Some("6".to_string()) }; + let strn2 = Token { class: Class::LITERAL, value: Some("6".to_string()) }; let t3 = Token { class: Class::SEPARATOR, value: Some("]".to_string()) }; let manual_tree = vec![id, op, t1, strn, t2, strn2, t3]; assert_eq!(tree, manual_tree); @@ -43,7 +43,7 @@ fn single_key_escaped_double_quote_in_string() { let id = Token { class: Class::IDENTIFIER, value: Some("key".to_string()) }; let op = Token { class: Class::EQUALS, value: None }; - let strn = Token { class: Class::STRING, value: Some("\"value".to_string()) }; + let strn = Token { class: Class::LITERAL, value: Some("\"value".to_string()) }; let manual_tree = vec![id, op, strn]; assert_eq!(tree, manual_tree); } @@ -54,11 +54,11 @@ fn integers() { let id = Token { class: Class::IDENTIFIER, value: Some("key".to_string()) }; let op = Token { class: Class::EQUALS, value: None }; - let strn = Token { class: Class::INTEGER, value: Some("123567".to_string()) }; + let strn = Token { class: Class::LITERAL, value: Some("123567".to_string()) }; let nl = Token { class: Class::NEWLINE, value: None }; let id2 = Token { class: Class::IDENTIFIER, value: Some("key2".to_string()) }; let op2 = Token { class: Class::EQUALS, value: None }; - let strn2 = Token { class: Class::INTEGER, value: Some("-400".to_string()) }; + let strn2 = Token { class: Class::LITERAL, value: Some("-400".to_string()) }; let manual_tree = vec![id, op, strn, nl, id2, op2, strn2]; assert_eq!(tree, manual_tree); } @@ -69,11 +69,11 @@ fn booleans() { let id = Token { class: Class::IDENTIFIER, value: Some("key".to_string()) }; let op = Token { class: Class::EQUALS, value: None }; - let strn = Token { class: Class::TRUE, value: None }; + let strn = Token { class: Class::BOOLEAN, value: Some("true".to_string()) }; let nl = Token { class: Class::NEWLINE, value: None }; let id2 = Token { class: Class::IDENTIFIER, value: Some("key2".to_string()) }; let op2 = Token { class: Class::EQUALS, value: None }; - let strn2 = Token { class: Class::FALSE, value: None }; + let strn2 = Token { class: Class::BOOLEAN, value: Some("false".to_string()) }; let manual_tree = vec![id, op, strn, nl, id2, op2, strn2]; assert_eq!(tree, manual_tree); } @@ -84,7 +84,7 @@ fn single_key_space_in_string() { let id = Token { class: Class::IDENTIFIER, value: Some("key".to_string()) }; let op = Token { class: Class::EQUALS, value: None }; - let strn = Token { class: Class::STRING, value: Some("a b c".to_string()) }; + let strn = Token { class: Class::LITERAL, value: Some("a b c".to_string()) }; let manual_tree = vec![id, op, strn]; assert_eq!(tree, manual_tree); } @@ -95,7 +95,7 @@ fn single_key_double_space_in_string() { let id = Token { class: Class::IDENTIFIER, value: Some("key".to_string()) }; let op = Token { class: Class::EQUALS, value: None }; - let strn = Token { class: Class::STRING, value: Some("a b c d".to_string()) }; + let strn = Token { class: Class::LITERAL, value: Some("a b c d".to_string()) }; let manual_tree = vec![id, op, strn]; assert_eq!(tree, manual_tree); } @@ -108,13 +108,13 @@ fn triple_key() { let nl2 = Token { class: Class::NEWLINE, value: None }; let id = Token { class: Class::IDENTIFIER, value: Some("key".to_string()) }; let op = Token { class: Class::EQUALS, value: None }; - let strn = Token { class: Class::STRING, value: Some("value".to_string()) }; + let strn = Token { class: Class::LITERAL, value: Some("value".to_string()) }; let id2 = Token { class: Class::IDENTIFIER, value: Some("key2".to_string()) }; let op2 = Token { class: Class::EQUALS, value: None }; - let strn2 = Token { class: Class::STRING, value: Some("value2".to_string()) }; + let strn2 = Token { class: Class::LITERAL, value: Some("value2".to_string()) }; let id3 = Token { class: Class::IDENTIFIER, value: Some("key3".to_string()) }; let op3 = Token { class: Class::EQUALS, value: None }; - let strn3 = Token { class: Class::STRING, value: Some("value3".to_string()) }; + let strn3 = Token { class: Class::LITERAL, value: Some("value3".to_string()) }; let manual_tree = vec![id, op, strn, nl, id2, op2, strn2, nl2, id3, op3, strn3]; assert_eq!(tree, manual_tree); @@ -126,7 +126,7 @@ fn comment() { let id = Token { class: Class::IDENTIFIER, value: Some("key".to_string()) }; let op = Token { class: Class::EQUALS, value: None }; - let strn = Token { class: Class::STRING, value: Some("value".to_string()) }; + let strn = Token { class: Class::LITERAL, value: Some("value".to_string()) }; let manual_tree = vec![id, op, strn]; assert_eq!(tree, manual_tree); @@ -138,7 +138,7 @@ fn inline_comment() { let id = Token { class: Class::IDENTIFIER, value: Some("key".to_string()) }; let op = Token { class: Class::EQUALS, value: None }; - let strn = Token { class: Class::STRING, value: Some("value".to_string()) }; + let strn = Token { class: Class::LITERAL, value: Some("value".to_string()) }; let manual_tree = vec![id, op, strn]; assert_eq!(tree, manual_tree); @@ -150,7 +150,7 @@ fn inline_comment_no_spaces() { let id = Token { class: Class::IDENTIFIER, value: Some("key".to_string()) }; let op = Token { class: Class::EQUALS, value: None }; - let strn = Token { class: Class::STRING, value: Some("value".to_string()) }; + let strn = Token { class: Class::LITERAL, value: Some("value".to_string()) }; let manual_tree = vec![id, op, strn]; assert_eq!(tree, manual_tree); @@ -162,7 +162,7 @@ fn quoted_identifier() { let id = Token { class: Class::IDENTIFIER, value: Some("key".to_string()) }; let op = Token { class: Class::EQUALS, value: None }; - let strn = Token { class: Class::STRING, value: Some("value".to_string()) }; + let strn = Token { class: Class::LITERAL, value: Some("value".to_string()) }; let manual_tree = vec![id, op, strn]; assert_eq!(tree, manual_tree); diff --git a/src/lib.rs b/src/lib.rs index 73e423c..d3fbffd 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,4 +1,8 @@ pub mod lexer; +pub mod validator; #[cfg(test)] mod lexer_tests; + +#[cfg(test)] +mod validator_tests; diff --git a/src/validator.rs b/src/validator.rs new file mode 100644 index 0000000..eec7b92 --- /dev/null +++ b/src/validator.rs @@ -0,0 +1,55 @@ +use super::lexer::{Token, Class}; + +#[derive(Debug)] +#[derive(PartialEq)] +enum ExpectedClass { + IDENTIFIER, + EQUALS, + NEWLINE, + LITERALORSEPARATOR, +} + +pub fn validate(tree: &Vec) -> Option { + let mut expected_token: ExpectedClass = ExpectedClass::IDENTIFIER; + let mut i = 0; + let mut line = 1; + while i < tree.len() { + if tree[i].class == Class::NEWLINE && i == 0 { + i = i + 1; + line = line + 1; + continue; + } + + if tree[i].class == Class::NEWLINE && expected_token == ExpectedClass::NEWLINE { + i = i + 1; + line = line + 1; + expected_token = ExpectedClass::IDENTIFIER; + continue; + } + + if tree[i].class == Class::UNKNOWN { + return Some(format!("Invalid token {} at line {}.", tree[i].value.clone().unwrap_or("None".to_string()), line)); + } + + if tree[i].class == Class::IDENTIFIER && expected_token == ExpectedClass::IDENTIFIER { + i = i + 1; + expected_token = ExpectedClass::EQUALS; + continue; + } + + if tree[i].class == Class::EQUALS && expected_token == ExpectedClass::EQUALS { + i = i + 1; + expected_token = ExpectedClass::LITERALORSEPARATOR; + continue; + } + + if (tree[i].class == Class::LITERAL || tree[i].class == Class::BOOLEAN) && expected_token == ExpectedClass::LITERALORSEPARATOR { + i = i + 1; + expected_token = ExpectedClass::NEWLINE; + continue; + } + + return Some(format!("{:?} found where {:?} expected at line {}.", tree[i].class, expected_token, line)); + } + return None; +} diff --git a/src/validator_tests.rs b/src/validator_tests.rs new file mode 100644 index 0000000..5ba51a8 --- /dev/null +++ b/src/validator_tests.rs @@ -0,0 +1,39 @@ +use super::validator; +use super::lexer::{Token, Class}; + +#[test] +fn single_key() { + let t1 = Token { class: Class::IDENTIFIER, value: Some("key".to_string()) }; + let t2 = Token { class: Class::EQUALS, value: None }; + let t3 = Token { class: Class::LITERAL, value: Some("10".to_string()) }; + let result = validator::validate(&vec![t1, t2, t3]); + + assert_eq!(result.is_none(), true); +} + +#[test] +fn triple_key() { + let t1 = Token { class: Class::IDENTIFIER, value: Some("key".to_string()) }; + let t2 = Token { class: Class::EQUALS, value: None }; + let t3 = Token { class: Class::LITERAL, value: Some("10".to_string()) }; + let nl = Token { class: Class::NEWLINE, value: None }; + let t4 = Token { class: Class::IDENTIFIER, value: Some("key2".to_string()) }; + let t5 = Token { class: Class::EQUALS, value: None }; + let t6 = Token { class: Class::LITERAL, value: Some("13".to_string()) }; + let result = validator::validate(&vec![t1, t2, t3, nl, t4, t5, t6]); + + assert_eq!(result.is_none(), true); +} + +#[test] +fn triple_key_no_newline() { + let t1 = Token { class: Class::IDENTIFIER, value: Some("key".to_string()) }; + let t2 = Token { class: Class::EQUALS, value: None }; + let t3 = Token { class: Class::LITERAL, value: Some("10".to_string()) }; + let t4 = Token { class: Class::IDENTIFIER, value: Some("key2".to_string()) }; + let t5 = Token { class: Class::EQUALS, value: None }; + let t6 = Token { class: Class::LITERAL, value: Some("13".to_string()) }; + let result = validator::validate(&vec![t1, t2, t3, t4, t5, t6]); + + assert_eq!(result.unwrap_or("".to_string()), "IDENTIFIER found where NEWLINE expected at line 1.".to_string()); +}