From 374449003465b420f3aaee8af379eee9bb969b59 Mon Sep 17 00:00:00 2001 From: threeoh6000 Date: Tue, 16 Jan 2024 20:25:09 +0000 Subject: [PATCH] update README, add array related tokens to lexer and create tests relating --- Cargo.lock | 2 +- Cargo.toml | 2 +- README.md | 3 ++- src/lexer.rs | 48 ++++++++++++++++++++++++++++++++++++++---------- src/lib.rs | 52 +++++++++++++++++++++++++++++++++++++++------------- 5 files changed, 81 insertions(+), 26 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index acdddfc..e5a8fa0 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4,4 +4,4 @@ version = 3 [[package]] name = "frostwalker" -version = "0.0.0" +version = "0.0.1" diff --git a/Cargo.toml b/Cargo.toml index 4c09023..84bf36e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "frostwalker" -version = "0.0.0" +version = "0.0.1" edition = "2021" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html diff --git a/README.md b/README.md index 6eedd04..5591939 100644 --- a/README.md +++ b/README.md @@ -1,5 +1,5 @@ # Frostwalker -A TOML-like configuration language parser for Rust. At present, the crate has a work in progress lexer with unit tests written for common cases. A validator and parser are planned to be added. +A TOML-like configuration language parser for Rust. At present, the crate has a lexer with unit tests written for common cases. A validator and parser are planned to be added. ## Justification The Rust crate `toml` pulls in 8 other crates to do its job, including `serde`. While this may not be irksome to basically most Rust developers, I'm used to using severely underpowered Intel hardware so low compile times is a big focus for me so a crate made by me that requires only the standard library compared to one that requires other dependencies is preferrable. @@ -16,3 +16,4 @@ I also use a TOML-like language instead of TOML directly as I don't need all the * The equals sign * Booleans * Integers +* Arrays (single depth only) diff --git a/src/lexer.rs b/src/lexer.rs index ef484be..ec3c9d1 100644 --- a/src/lexer.rs +++ b/src/lexer.rs @@ -9,35 +9,62 @@ pub struct Token { #[derive(PartialEq)] pub enum Class { IDENTIFIER, - SEPERATOR, + SEPARATOR, EQUALS, STRING, INTEGER, NEWLINE, TRUE, FALSE, + UNKNOWN, } -pub fn tokenise(source: &str) -> Option> { +pub fn tokenise(source: &str) -> Vec { return tokenize(source); } -pub fn tokenize(source: &str) -> Option> { +pub fn tokenize(source: &str) -> Vec { let lines: Vec<&str> = source.lines().collect(); let lines_len = lines.len(); - println!("{:#?}", lines); let mut tree: Vec = vec![]; + for line in lines { let mut added = false; let mut words: Vec<&str> = line.split(" ").collect(); let mut i = 0; while i < words.len() { + if words[i].ends_with(",") && words[i-1] == "[" { + words[i] = words[i].chars().next_back().map(|_| &words[i][..words[i].len()-1]).unwrap_or(""); + words.insert(i+1, ","); + } + + if words[i] == "," { + tree.push(Token { class: Class::SEPARATOR, value: Some(",".to_string()) }); + added = true; + i = i + 1; + continue; + } + if words[i].replace(" ","") == "" { i = i + 1; continue; } + if words[i] == "[" { + tree.push(Token { class: Class::SEPARATOR, value: Some("[".to_string()) }); + added = true; + i = i + 1; + continue; + } + + if words[i] == "]" { + tree.push(Token { class: Class::SEPARATOR, value: Some("]".to_string()) }); + added = true; + i = i + 1; + continue; + } + if words[i] == "true" || words[i] == "TRUE" { tree.push(Token { class: Class::TRUE, value: None }); added = true; @@ -85,22 +112,22 @@ pub fn tokenize(source: &str) -> Option> { if words[i].ends_with("\"") { words[i] = words[i].chars().next().map(|c| &words[i][c.len_utf8()..]).unwrap_or(""); words[i] = words[i].chars().next_back().map(|_| &words[i][..words[i].len()-1]).unwrap_or(""); - tree.push(Token { class: Class::STRING, value: Some(words[i].to_string()) }); + tree.push(Token { class: Class::STRING, value: Some(words[i].replace("\\\"", "\"").to_string()) }); added = true; i = i + 1; continue; } else { words[i] = words[i].chars().next().map(|c| &words[i][c.len_utf8()..]).unwrap_or(""); - let mut built_string = words[i].to_string(); + let mut built_string = words[i].replace("\\\"", "\"").to_string(); loop { i = i + 1; if words[i].ends_with("\"") { words[i] = words[i].chars().next_back().map(|_| &words[i][..words[i].len()-1]).unwrap_or(""); - built_string = format!("{} {}", built_string, words[i]); + built_string = format!("{} {}", built_string, words[i].replace("\\\"", "\"")); break; } - built_string = format!("{} {}", built_string, words[i]); + built_string = format!("{} {}", built_string, words[i].replace("\\\"", "\"")); } tree.push(Token { class: Class::STRING, value: Some(built_string) }); added = true; @@ -113,7 +140,8 @@ pub fn tokenize(source: &str) -> Option> { break; } - return None; + tree.push(Token { class: Class::UNKNOWN, value: Some(words[i].to_string()) }); + i = i + 1; } if lines_len > 1 && added { tree.push(Token { class: Class::NEWLINE, value: None }); @@ -124,5 +152,5 @@ pub fn tokenize(source: &str) -> Option> { tree.pop(); } - return Some(tree); + return tree; } diff --git a/src/lib.rs b/src/lib.rs index 76f9297..220b882 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -12,7 +12,33 @@ mod lexer_tests { let op = Token { class: Class::EQUALS, value: None }; let strn = Token { class: Class::STRING, value: Some("value".to_string()) }; let manual_tree = vec![id, op, strn]; - assert_eq!(tree.unwrap(), manual_tree); + assert_eq!(tree, manual_tree); + } + + #[test] + fn single_key_array() { + let tree = lexer::tokenize("key = [ \"\\\"value\", 6 ]"); + + let id = Token { class: Class::IDENTIFIER, value: Some("key".to_string()) }; + let op = Token { class: Class::EQUALS, value: None }; + let t1 = Token { class: Class::SEPARATOR, value: Some("[".to_string()) }; + let strn = Token { class: Class::STRING, value: Some("\"value".to_string()) }; + let t2 = Token { class: Class::SEPARATOR, value: Some(",".to_string()) }; + let strn2 = Token { class: Class::INTEGER, value: Some("6".to_string()) }; + let t3 = Token { class: Class::SEPARATOR, value: Some("]".to_string()) }; + let manual_tree = vec![id, op, t1, strn, t2, strn2, t3]; + assert_eq!(tree, manual_tree); + } + + #[test] + fn single_key_unknown_token() { + let tree = lexer::tokenize("key = ^"); + + let id = Token { class: Class::IDENTIFIER, value: Some("key".to_string()) }; + let op = Token { class: Class::EQUALS, value: None }; + let strn = Token { class: Class::UNKNOWN, value: Some("^".to_string()) }; + let manual_tree = vec![id, op, strn]; + assert_eq!(tree, manual_tree); } #[test] @@ -21,9 +47,9 @@ mod lexer_tests { let id = Token { class: Class::IDENTIFIER, value: Some("key".to_string()) }; let op = Token { class: Class::EQUALS, value: None }; - let strn = Token { class: Class::STRING, value: Some("\\\"value".to_string()) }; + let strn = Token { class: Class::STRING, value: Some("\"value".to_string()) }; let manual_tree = vec![id, op, strn]; - assert_eq!(tree.unwrap(), manual_tree); + assert_eq!(tree, manual_tree); } #[test] @@ -38,7 +64,7 @@ mod lexer_tests { let op2 = Token { class: Class::EQUALS, value: None }; let strn2 = Token { class: Class::INTEGER, value: Some("-400".to_string()) }; let manual_tree = vec![id, op, strn, nl, id2, op2, strn2]; - assert_eq!(tree.unwrap(), manual_tree); + assert_eq!(tree, manual_tree); } #[test] @@ -53,7 +79,7 @@ mod lexer_tests { let op2 = Token { class: Class::EQUALS, value: None }; let strn2 = Token { class: Class::FALSE, value: None }; let manual_tree = vec![id, op, strn, nl, id2, op2, strn2]; - assert_eq!(tree.unwrap(), manual_tree); + assert_eq!(tree, manual_tree); } #[test] @@ -64,7 +90,7 @@ mod lexer_tests { let op = Token { class: Class::EQUALS, value: None }; let strn = Token { class: Class::STRING, value: Some("a b c".to_string()) }; let manual_tree = vec![id, op, strn]; - assert_eq!(tree.unwrap(), manual_tree); + assert_eq!(tree, manual_tree); } #[test] @@ -75,7 +101,7 @@ mod lexer_tests { let op = Token { class: Class::EQUALS, value: None }; let strn = Token { class: Class::STRING, value: Some("a b c d".to_string()) }; let manual_tree = vec![id, op, strn]; - assert_eq!(tree.unwrap(), manual_tree); + assert_eq!(tree, manual_tree); } #[test] @@ -95,7 +121,7 @@ mod lexer_tests { let strn3 = Token { class: Class::STRING, value: Some("value3".to_string()) }; let manual_tree = vec![id, op, strn, nl, id2, op2, strn2, nl2, id3, op3, strn3]; - assert_eq!(tree.unwrap(), manual_tree); + assert_eq!(tree, manual_tree); } #[test] @@ -107,7 +133,7 @@ mod lexer_tests { let strn = Token { class: Class::STRING, value: Some("value".to_string()) }; let manual_tree = vec![id, op, strn]; - assert_eq!(tree.unwrap(), manual_tree); + assert_eq!(tree, manual_tree); } #[test] @@ -119,7 +145,7 @@ mod lexer_tests { let strn = Token { class: Class::STRING, value: Some("value".to_string()) }; let manual_tree = vec![id, op, strn]; - assert_eq!(tree.unwrap(), manual_tree); + assert_eq!(tree, manual_tree); } #[test] @@ -131,7 +157,7 @@ mod lexer_tests { let strn = Token { class: Class::STRING, value: Some("value".to_string()) }; let manual_tree = vec![id, op, strn]; - assert_eq!(tree.unwrap(), manual_tree); + assert_eq!(tree, manual_tree); } #[test] @@ -143,7 +169,7 @@ mod lexer_tests { let strn = Token { class: Class::STRING, value: Some("value".to_string()) }; let manual_tree = vec![id, op, strn]; - assert_eq!(tree.unwrap(), manual_tree); + assert_eq!(tree, manual_tree); } #[test] @@ -155,7 +181,7 @@ mod lexer_tests { let op2 = Token { class: Class::EQUALS, value: None }; let manual_tree = vec![id, op, op2]; - assert_eq!(tree.unwrap(), manual_tree); + assert_eq!(tree, manual_tree); } }