From 38052a43676b511ededd7a1b83d91f29b63f48e6 Mon Sep 17 00:00:00 2001 From: Tristan Smith Date: Mon, 16 Sep 2024 00:27:18 -0400 Subject: [PATCH] parsed syntax keywords, simplified language, refactored current tests --- Cargo.lock | 4 +- Cargo.toml | 8 +- src/compiler/codegen.rs | 3 + src/{interpreter => compiler}/mod.rs | 0 src/interpreter/eval.rs | 3 - src/lexer/lexer.rs | 213 +++++++-------------------- src/lexer/token.rs | 97 ++++-------- src/lib.rs | 2 +- src/main.rs | 2 +- tests/lexer_tests.rs | 116 ++++++++++----- 10 files changed, 169 insertions(+), 279 deletions(-) create mode 100644 src/compiler/codegen.rs rename src/{interpreter => compiler}/mod.rs (100%) delete mode 100644 src/interpreter/eval.rs diff --git a/Cargo.lock b/Cargo.lock index b0e5d81..d06f066 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3,5 +3,5 @@ version = 3 [[package]] -name = "fiddle" -version = "0.1.0" +name = "fddl" +version = "0.0.2" diff --git a/Cargo.toml b/Cargo.toml index f21e4e5..cb6a6ef 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,9 +1,9 @@ [package] -name = "fiddle" -version = "0.1.0" +name = "fddl" +version = "0.0.2" edition = "2021" -authors = ["Tristan Smith "] +authors = ["Tristan Smith "] description = "A small programming language written in Rust." -license = "BSD-3-Clause" +license = "MIT" [dependencies] diff --git a/src/compiler/codegen.rs b/src/compiler/codegen.rs new file mode 100644 index 0000000..3e107d9 --- /dev/null +++ b/src/compiler/codegen.rs @@ -0,0 +1,3 @@ +// Placeholder for compiler implementation + +// not even close yet \ No newline at end of file diff --git a/src/interpreter/mod.rs b/src/compiler/mod.rs similarity index 100% rename from src/interpreter/mod.rs rename to src/compiler/mod.rs diff --git a/src/interpreter/eval.rs b/src/interpreter/eval.rs deleted file mode 100644 index b992674..0000000 --- a/src/interpreter/eval.rs +++ /dev/null @@ -1,3 +0,0 @@ -// Placeholder for interpreter implementation - -// not even close yet \ No newline at end of file diff --git a/src/lexer/lexer.rs b/src/lexer/lexer.rs index 7bad899..3216b3b 100644 --- a/src/lexer/lexer.rs +++ b/src/lexer/lexer.rs @@ -35,7 +35,7 @@ impl Lexer { let c = self.advance(); match c { - '#' => self.handle_comment_or_doc(), + // Single-character tokens '(' => Some(Token::LeftParen), ')' => Some(Token::RightParen), '{' => Some(Token::LeftBrace), @@ -46,57 +46,41 @@ impl Lexer { '+' => Some(Token::Plus), ';' => Some(Token::Semicolon), '*' => Some(Token::Star), - '%' => Some(Token::Percent), - '^' => Some(Token::Caret), '~' => { if self.match_char('=') { Some(Token::TildeEqual) } else { Some(Token::Tilde) } + }, + '/' => { + if self.match_char('/') { + // Line comment starting with // + self.line_comment() // Generate Comment token + } else { + Some(Token::Slash) + } }, - '`' => Some(Token::Backtick), - '$' => Some(Token::Dollar), - '@' => Some(Token::At), - '?' => Some(Token::Question), + '#' => { + // Line comment starting with # + self.line_comment() // Generate Comment token + }, + + // One or two character tokens '!' => { if self.match_char('=') { Some(Token::BangEqual) } else { - Some(Token::Exclamation) - } - }, - '|' => { - if self.match_char('|') { - Some(Token::DoublePipe) - } else { - Some(Token::Pipe) - } - }, - '&' => { - if self.match_char('&') { - Some(Token::DoubleAmpersand) - } else { - Some(Token::Ampersand) + None // Or handle as an error or another token if needed } }, '=' => { if self.match_char('=') { Some(Token::EqualEqual) - } else if self.match_char('>') { - Some(Token::FatArrow) } else { Some(Token::Equal) } }, - ':' => { - if self.match_char('=') { - Some(Token::ColonEqual) - } else { - // Handle single ':' if needed - Some(Token::Colon) - } - }, '<' => { if self.match_char('=') { Some(Token::LessEqual) @@ -111,26 +95,20 @@ impl Lexer { Some(Token::Greater) } }, - '/' => { - if self.match_char('/') { - // It's a comment, consume until end of line - let mut comment = String::new(); - while self.peek() != '\n' && !self.is_at_end() { - comment.push(self.advance()); - } - Some(Token::Comment(comment)) - } else { - Some(Token::Slash) - } - }, - ' ' | '\r' | '\t' => None, // Ignore whitespace + + // Whitespace + ' ' | '\r' | '\t' => None, '\n' => { self.line += 1; None }, + + // Literals '"' => self.string(), c if c.is_ascii_digit() => self.number(), c if self.is_alpha(c) => self.identifier(), + + // Any other character _ => { eprintln!("Unexpected character '{}' on line {}", c, self.line); None @@ -139,18 +117,17 @@ impl Lexer { } // Helper methods - // function to consume the current character + // Consume the current character and return it fn advance(&mut self) -> char { - let c = if self.is_at_end() { - '\0' - } else { - self.source[self.current] - }; + if self.is_at_end() { + return '\0'; + } + let c = self.source[self.current]; self.current += 1; c } - // function to parse the current character if it matches the expected character + // Check if the current character matches the expected character fn match_char(&mut self, expected: char) -> bool { if self.is_at_end() { return false; @@ -164,7 +141,7 @@ impl Lexer { true } - // function to parse the current character without consuming it + // Look at current character fn peek(&self) -> char { if self.is_at_end() { '\0' @@ -173,7 +150,7 @@ impl Lexer { } } - // function to parse the next character without consuming it + // Look ahead by one character fn peek_next(&self) -> char { if self.current + 1 >= self.source.len() { '\0' @@ -182,12 +159,12 @@ impl Lexer { } } - // function to check if we've reached the end of the source + // Check if we have reached the end of the source fn is_at_end(&self) -> bool { self.current >= self.source.len() } - // Function to handle different token types + // Function to handle string literals fn string(&mut self) -> Option { while self.peek() != '"' && !self.is_at_end() { if self.peek() == '\n' { @@ -196,7 +173,6 @@ impl Lexer { self.advance(); } - // Check if we've reached the end without finding a closing quote if self.is_at_end() { eprintln!("Unterminated string on line {}", self.line); return None; @@ -235,7 +211,7 @@ impl Lexer { Some(Token::Number(value)) } - // Function to handle identifiers + // Function to handle identifiers and keywords fn identifier(&mut self) -> Option { while self.is_alphanumeric(self.peek()) || self.peek() == '_' { self.advance(); @@ -248,132 +224,43 @@ impl Lexer { // Check for reserved keywords let token = match text.as_str() { "and" => Token::And, - "class" => Token::Class, - "else" => Token::Else, - "false" => Token::False, - "func" => Token::Func, - "for" => Token::For, - "if" => Token::If, - "nil" => Token::Nil, "or" => Token::Or, - "print" => Token::Print, - "return" => Token::Return, - "super" => Token::Super, - "this" => Token::This, + "if" => Token::If, + "else" => Token::Else, "true" => Token::True, + "false" => Token::False, "let" => Token::Let, - "while" => Token::While, "const" => Token::Const, - "define" => Token::Define, - "lambda" => Token::Lambda, - "match" => Token::Match, - "case" => Token::Case, - "switch" => Token::Switch, - "until" => Token::Until, - "repeat" => Token::Repeat, - "unless" => Token::Unless, - "yes" => Token::Yes, - "no" => Token::No, - "on" => Token::On, - "off" => Token::Off, - "module" => Token::Module, + "func" => Token::Func, + "return" => Token::Return, + "for" => Token::For, + "while" => Token::While, + "print" => Token::Print, + "pub" => Token::Pub, + "sym" => Token::Sym, _ => Token::Identifier(text), }; Some(token) } - // Function to check if a character is an alphabetic character or an underscore fn is_alpha(&self, c: char) -> bool { c.is_alphabetic() || c == '_' } - // Function to check if a character is an alphanumeric character or an underscore fn is_alphanumeric(&self, c: char) -> bool { c.is_alphanumeric() || c == '_' } - // Function to handle comments and documentation - fn line_comment(&mut self) { - while self.peek() != '\n' && !self.is_at_end() { - self.advance(); - } - } - - // Function to handle block comments - fn block_comment(&mut self) { - while !self.is_at_end() { - if self.peek() == '*' && self.peek_next() == '/' { - self.advance(); - self.advance(); - break; - } else { - if self.peek() == '\n' { - self.line += 1; - } - self.advance(); - } - } - } - - // Function to handle comments and documentation - fn handle_comment_or_doc(&mut self) -> Option { - // We have matched one '#' character so far - let mut count = 1; - - // Count additional consecutive '#' characters - while self.match_char('#') { - count += 1; - } - - // Check for an exclamation mark after the '#' characters - let has_exclamation = self.match_char('!'); - - match (count, has_exclamation) { - (1, _) => { - // Single '#' - Line comment - self.line_comment(); - None - } - (2, true) => { - // '##!' - Module-level documentation comment - self.doc_comment("module") - } - (2, false) => { - // '##' - Block comment - self.block_comment(); - None - } - (3, _) => { - // '###' - Item-level documentation comment - self.doc_comment("item") - } - (n, _) if n >= 4 => { - // '####' or more - Block comment - self.block_comment(); - None - } - _ => { - // Fallback to line comment - self.line_comment(); - None - } - } - } - - // Function to handle documentation comments - fn doc_comment(&mut self, _kind: &str) -> Option { + // Function to handle line comments + fn line_comment(&mut self) -> Option{ let mut comment = String::new(); + while self.peek() != '\n' && !self.is_at_end() { comment.push(self.advance()); } - - // Consume the newline character - if self.peek() == '\n' { - self.advance(); - } - - Some(Token::DocComment(comment.trim().to_string())) + + Some(Token::Comment(comment)) } - } + diff --git a/src/lexer/token.rs b/src/lexer/token.rs index e905f32..0210082 100644 --- a/src/lexer/token.rs +++ b/src/lexer/token.rs @@ -1,43 +1,26 @@ #[derive(Debug, PartialEq, Clone)] pub enum Token { // Single-character tokens - LeftParen, // ( - RightParen, // ) - LeftBrace, // { - RightBrace, // } - Comma, // , - Dot, // . - Minus, // - - Plus, // + - Semicolon, // ; - Colon, // : - Slash, // / - Star, // * - Percent, // % - Caret, // ^ - Tilde, // ~ - Backtick, // ` - Dollar, // $ - At, // @ - // Hash, // # - Question, // ? - Exclamation, // ! - Pipe, // | - Ampersand, // & - - // one or two character tokens - BangEqual, // != - Equal, // = - EqualEqual, // == - Greater, // > + LeftParen, // ( + RightParen, // ) + LeftBrace, // { + RightBrace, // } + Comma, // , + Dot, // . + Minus, // - + Plus, // + + Semicolon, // ; + Slash, // / + Star, // * + Equal, // = + BangEqual, // != + EqualEqual, // == + Greater, // > GreaterEqual, // >= - Less, // < - LessEqual, // <= - FatArrow, // => - ColonEqual, // := - TildeEqual, // ~= - DoublePipe, // || - DoubleAmpersand, // && + Less, // < + LessEqual, // <= + Tilde, // ~ + TildeEqual, // ~= // Literals Identifier(String), @@ -46,39 +29,23 @@ pub enum Token { // Keywords And, - Class, - Else, - False, - Func, - For, - If, - Nil, Or, - Print, - Return, - Super, - This, + If, + Else, True, + False, Let, - While, Const, - Define, - Lambda, - Match, - Case, - Switch, - Until, - Repeat, - Unless, - Yes, - No, - On, - Off, - Module, + Func, + Return, + For, + While, + Print, + Pub, + Sym, - // Documentation and comments - DocComment(String), // ##!, ### - Comment(String), // # + // Comments + Comment(String), EOF, -} \ No newline at end of file +} diff --git a/src/lib.rs b/src/lib.rs index 9a09c5f..50e73f0 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,3 +1,3 @@ pub mod lexer; pub mod parser; -pub mod interpreter; \ No newline at end of file +pub mod compiler; \ No newline at end of file diff --git a/src/main.rs b/src/main.rs index c051a33..6337449 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,6 +1,6 @@ mod lexer; mod parser; -mod interpreter; +mod compiler; use std::env; use std::fs; diff --git a/tests/lexer_tests.rs b/tests/lexer_tests.rs index 03b0bf7..4772a56 100644 --- a/tests/lexer_tests.rs +++ b/tests/lexer_tests.rs @@ -1,5 +1,5 @@ -use fiddle::lexer::Lexer; -use fiddle::lexer::token::Token; +use fddl::lexer::Lexer; +use fddl::lexer::token::Token; #[test] fn test_single_tokens() { @@ -23,62 +23,94 @@ fn test_single_tokens() { } #[test] -fn test_identifier_and_keywords() { - let source = String::from("let $varName := 123; "); +fn test_keywords_and_identifiers() { + let source = String::from("sym myVar = 123;"); let mut lexer = Lexer::new(source); let tokens = lexer.scan_tokens(); assert_eq!( tokens, vec![ - Token::Let, - Token::Dollar, - Token::Identifier("varName".to_string()), - Token::ColonEqual, + Token::Sym, + Token::Identifier("myVar".to_string()), + Token::Equal, Token::Number(123.0), Token::Semicolon, Token::EOF ] ); + println!("{:?}", tokens); +} + +#[test] +fn test_pub_keyword() { + let source = String::from("pub func example() { return 42; }"); + let mut lexer = Lexer::new(source); + let tokens = lexer.scan_tokens(); + + assert_eq!( + tokens, + vec![ + Token::Pub, + Token::Func, + Token::Identifier("example".to_string()), + Token::LeftParen, + Token::RightParen, + Token::LeftBrace, + Token::Return, + Token::Number(42.0), + Token::Semicolon, + Token::RightBrace, + Token::EOF + ] + ); +} + +#[test] +fn test_comments() { + let source = String::from("# This is a comment\nlet a = 5;"); + let mut lexer = Lexer::new(source); + let tokens = lexer.scan_tokens(); + + assert_eq!( + tokens, + vec![ + Token::Comment(" This is a comment".to_string()), + Token::Let, + Token::Identifier("a".to_string()), + Token::Equal, + Token::Number(5.0), + Token::Semicolon, + Token::EOF + ] + ); } #[test] -fn test_doc_comments() { - let source = String::from("##! Module documentation -module test { - ### Function documentation - func example() { - # Regular comment - return 42; - } -} -"); +fn test_operators_and_comparison() { + let source = String::from("a >= 10 != b == 5;"); let mut lexer = Lexer::new(source); let tokens = lexer.scan_tokens(); - println!("Tokens: {:?}", tokens); - - assert_eq!(tokens[0], Token::DocComment("Module documentation".to_string())); - assert_eq!(tokens[1], Token::Module); - assert_eq!(tokens[2], Token::Identifier("test".to_string())); - assert_eq!(tokens[3], Token::LeftBrace); - assert_eq!(tokens[4], Token::DocComment("Function documentation".to_string())); - assert_eq!(tokens[5], Token::Func); - assert_eq!(tokens[6], Token::Identifier("example".to_string())); - assert_eq!(tokens[7], Token::LeftParen); - assert_eq!(tokens[8], Token::RightParen); - assert_eq!(tokens[9], Token::LeftBrace); - assert_eq!(tokens[10], Token::Return); - assert_eq!(tokens[11], Token::Number(42.0)); - assert_eq!(tokens[12], Token::Semicolon); - assert_eq!(tokens[13], Token::RightBrace); // Closes function body - assert_eq!(tokens[14], Token::RightBrace); // Closes module - assert_eq!(tokens[15], Token::EOF); + assert_eq!( + tokens, + vec![ + Token::Identifier("a".to_string()), + Token::GreaterEqual, + Token::Number(10.0), + Token::BangEqual, + Token::Identifier("b".to_string()), + Token::EqualEqual, + Token::Number(5.0), + Token::Semicolon, + Token::EOF + ] + ); } #[test] fn test_tilde_operator() { - let source = String::from("if (a ~= b) { ~c }"); + let source = String::from("if (a != b) { let c = ~5; }"); let mut lexer = Lexer::new(source); let tokens = lexer.scan_tokens(); @@ -88,14 +120,18 @@ fn test_tilde_operator() { Token::If, Token::LeftParen, Token::Identifier("a".to_string()), - Token::TildeEqual, + Token::BangEqual, Token::Identifier("b".to_string()), Token::RightParen, Token::LeftBrace, - Token::Tilde, + Token::Let, Token::Identifier("c".to_string()), + Token::Equal, + Token::Tilde, + Token::Number(5.0), + Token::Semicolon, Token::RightBrace, Token::EOF ] ); -} +} \ No newline at end of file