From 38052a43676b511ededd7a1b83d91f29b63f48e6 Mon Sep 17 00:00:00 2001
From: Tristan Smith <tristan.smith@pm.me>
Date: Mon, 16 Sep 2024 00:27:18 -0400
Subject: [PATCH] parsed syntax keywords, simplified language, refactored
 current tests

---
 Cargo.lock                           |   4 +-
 Cargo.toml                           |   8 +-
 src/compiler/codegen.rs              |   3 +
 src/{interpreter => compiler}/mod.rs |   0
 src/interpreter/eval.rs              |   3 -
 src/lexer/lexer.rs                   | 213 +++++++--------------------
 src/lexer/token.rs                   |  97 ++++--------
 src/lib.rs                           |   2 +-
 src/main.rs                          |   2 +-
 tests/lexer_tests.rs                 | 116 ++++++++++-----
 10 files changed, 169 insertions(+), 279 deletions(-)
 create mode 100644 src/compiler/codegen.rs
 rename src/{interpreter => compiler}/mod.rs (100%)
 delete mode 100644 src/interpreter/eval.rs

diff --git a/Cargo.lock b/Cargo.lock
index b0e5d81..d06f066 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -3,5 +3,5 @@
 version = 3
 
 [[package]]
-name = "fiddle"
-version = "0.1.0"
+name = "fddl"
+version = "0.0.2"
diff --git a/Cargo.toml b/Cargo.toml
index f21e4e5..cb6a6ef 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,9 +1,9 @@
 [package]
-name = "fiddle"
-version = "0.1.0"
+name = "fddl"
+version = "0.0.2"
 edition = "2021"
-authors = ["Tristan Smith <tristan.smith@pm.me>"]
+authors = ["Tristan Smith <tristan@fddl.dev>"]
 description = "A small programming language written in Rust."
-license = "BSD-3-Clause"
+license = "MIT"
 
 [dependencies]
diff --git a/src/compiler/codegen.rs b/src/compiler/codegen.rs
new file mode 100644
index 0000000..3e107d9
--- /dev/null
+++ b/src/compiler/codegen.rs
@@ -0,0 +1,3 @@
+// Placeholder for compiler implementation
+
+// not even close yet
\ No newline at end of file
diff --git a/src/interpreter/mod.rs b/src/compiler/mod.rs
similarity index 100%
rename from src/interpreter/mod.rs
rename to src/compiler/mod.rs
diff --git a/src/interpreter/eval.rs b/src/interpreter/eval.rs
deleted file mode 100644
index b992674..0000000
--- a/src/interpreter/eval.rs
+++ /dev/null
@@ -1,3 +0,0 @@
-// Placeholder for interpreter implementation
-
-// not even close yet
\ No newline at end of file
diff --git a/src/lexer/lexer.rs b/src/lexer/lexer.rs
index 7bad899..3216b3b 100644
--- a/src/lexer/lexer.rs
+++ b/src/lexer/lexer.rs
@@ -35,7 +35,7 @@ impl Lexer {
         let c = self.advance();
 
         match c {
-            '#' => self.handle_comment_or_doc(),
+            // Single-character tokens
             '(' => Some(Token::LeftParen),
             ')' => Some(Token::RightParen),
             '{' => Some(Token::LeftBrace),
@@ -46,57 +46,41 @@ impl Lexer {
             '+' => Some(Token::Plus),
             ';' => Some(Token::Semicolon),
             '*' => Some(Token::Star),
-            '%' => Some(Token::Percent),
-            '^' => Some(Token::Caret),  
             '~' => {
                 if self.match_char('=') {
                     Some(Token::TildeEqual)
                 } else {
                     Some(Token::Tilde)
                 }
+            }, 
+            '/' => {
+                if self.match_char('/') {
+                    // Line comment starting with //
+                    self.line_comment() // Generate Comment token
+                } else {
+                    Some(Token::Slash)
+                }
             },
-            '`' => Some(Token::Backtick),
-            '$' => Some(Token::Dollar),
-            '@' => Some(Token::At),
-            '?' => Some(Token::Question),
+            '#' => {
+                // Line comment starting with #
+                self.line_comment() // Generate Comment token
+            },
+
+            // One or two character tokens
             '!' => {
                 if self.match_char('=') {
                     Some(Token::BangEqual)
                 } else {
-                    Some(Token::Exclamation)
-                }
-            },
-            '|' => {
-                if self.match_char('|') {
-                    Some(Token::DoublePipe)
-                } else {
-                    Some(Token::Pipe)
-                }
-            },
-            '&' => {
-                if self.match_char('&') {
-                    Some(Token::DoubleAmpersand)
-                } else {
-                    Some(Token::Ampersand)
+                    None // Or handle as an error or another token if needed
                 }
             },
             '=' => {
                 if self.match_char('=') {
                     Some(Token::EqualEqual)
-                } else if self.match_char('>') {
-                    Some(Token::FatArrow)
                 } else {
                     Some(Token::Equal)
                 }
             },
-            ':' => {
-                if self.match_char('=') {
-                    Some(Token::ColonEqual)
-                } else {
-                    // Handle single ':' if needed
-                    Some(Token::Colon)
-                }
-            },
             '<' => {
                 if self.match_char('=') {
                     Some(Token::LessEqual)
@@ -111,26 +95,20 @@ impl Lexer {
                     Some(Token::Greater)
                 }
             },
-            '/' => {
-                if self.match_char('/') {
-                    // It's a comment, consume until end of line
-                    let mut comment = String::new();
-                    while self.peek() != '\n' && !self.is_at_end() {
-                        comment.push(self.advance());
-                    }
-                    Some(Token::Comment(comment))
-                } else {
-                    Some(Token::Slash)
-                }
-                },
-            ' ' | '\r' | '\t' => None, // Ignore whitespace
+
+            // Whitespace
+            ' ' | '\r' | '\t' => None,
             '\n' => {
                 self.line += 1;
                 None
             },
+
+            // Literals
             '"' => self.string(),
             c if c.is_ascii_digit() => self.number(),
             c if self.is_alpha(c) => self.identifier(),
+
+            // Any other character
             _ => {
                 eprintln!("Unexpected character '{}' on line {}", c, self.line);
                 None
@@ -139,18 +117,17 @@ impl Lexer {
     }
 
     // Helper methods
-    // function to consume the current character
+    // Consume the current character and return it
     fn advance(&mut self) -> char {
-        let c = if self.is_at_end() {
-            '\0'
-        } else {
-            self.source[self.current]
-        };
+        if self.is_at_end() {
+            return '\0';
+        }
+        let c = self.source[self.current];
         self.current += 1;
         c
     }
 
-    // function to parse the current character if it matches the expected character
+    // Check if the current character matches the expected character
     fn match_char(&mut self, expected: char) -> bool {
         if self.is_at_end() {
             return false;
@@ -164,7 +141,7 @@ impl Lexer {
         true
     }
 
-    // function to parse the current character without consuming it
+    // Look at current character
     fn peek(&self) -> char {
         if self.is_at_end() {
             '\0'
@@ -173,7 +150,7 @@ impl Lexer {
         }
     }
 
-    // function to parse the next character without consuming it
+    // Look ahead by one character
     fn peek_next(&self) -> char {
         if self.current + 1 >= self.source.len() {
             '\0'
@@ -182,12 +159,12 @@ impl Lexer {
         }
     }
 
-    // function to check if we've reached the end of the source
+    // Check if we have reached the end of the source
     fn is_at_end(&self) -> bool {
         self.current >= self.source.len()
     }
 
-    // Function to handle different token types
+    // Function to handle string literals
     fn string(&mut self) -> Option<Token> {
         while self.peek() != '"' && !self.is_at_end() {
             if self.peek() == '\n' {
@@ -196,7 +173,6 @@ impl Lexer {
             self.advance();
         }
 
-        // Check if we've reached the end without finding a closing quote
         if self.is_at_end() {
             eprintln!("Unterminated string on line {}", self.line);
             return None;
@@ -235,7 +211,7 @@ impl Lexer {
         Some(Token::Number(value))
     }
 
-    // Function to handle identifiers
+    // Function to handle identifiers and keywords
     fn identifier(&mut self) -> Option<Token> {
         while self.is_alphanumeric(self.peek()) || self.peek() == '_' {
             self.advance();
@@ -248,132 +224,43 @@ impl Lexer {
         // Check for reserved keywords
         let token = match text.as_str() {
             "and" => Token::And,
-            "class" => Token::Class,
-            "else" => Token::Else,
-            "false" => Token::False,
-            "func" => Token::Func,
-            "for" => Token::For,
-            "if" => Token::If,
-            "nil" => Token::Nil,
             "or" => Token::Or,
-            "print" => Token::Print,
-            "return" => Token::Return,
-            "super" => Token::Super,
-            "this" => Token::This,
+            "if" => Token::If,
+            "else" => Token::Else,
             "true" => Token::True,
+            "false" => Token::False,
             "let" => Token::Let,
-            "while" => Token::While,
             "const" => Token::Const,
-            "define" => Token::Define,
-            "lambda" => Token::Lambda,
-            "match" => Token::Match,
-            "case" => Token::Case,
-            "switch" => Token::Switch,
-            "until" => Token::Until,
-            "repeat" => Token::Repeat,
-            "unless" => Token::Unless,
-            "yes" => Token::Yes,
-            "no" => Token::No,
-            "on" => Token::On,
-            "off" => Token::Off,
-            "module" => Token::Module,
+            "func" => Token::Func,
+            "return" => Token::Return,
+            "for" => Token::For,
+            "while" => Token::While,
+            "print" => Token::Print,
+            "pub" => Token::Pub,
+            "sym" => Token::Sym,
             _ => Token::Identifier(text),
         };
 
         Some(token)
     }
 
-    // Function to check if a character is an alphabetic character or an underscore
     fn is_alpha(&self, c: char) -> bool {
         c.is_alphabetic() || c == '_'
     }
 
-    // Function to check if a character is an alphanumeric character or an underscore
     fn is_alphanumeric(&self, c: char) -> bool {
         c.is_alphanumeric() || c == '_'
     }
 
-    // Function to handle comments and documentation
-    fn line_comment(&mut self) {
-        while self.peek() != '\n' && !self.is_at_end() {
-            self.advance();
-        }
-    }
-
-    // Function to handle block comments
-    fn block_comment(&mut self) {
-        while !self.is_at_end() {
-            if self.peek() == '*' && self.peek_next() == '/' {
-                self.advance();
-                self.advance();
-                break;
-            } else {
-                if self.peek() == '\n' {
-                    self.line += 1;
-                }
-                self.advance();
-            }
-        }
-    }
-
-    // Function to handle comments and documentation
-    fn handle_comment_or_doc(&mut self) -> Option<Token> {
-        // We have matched one '#' character so far
-        let mut count = 1;
-    
-        // Count additional consecutive '#' characters
-        while self.match_char('#') {
-            count += 1;
-        }
-    
-        // Check for an exclamation mark after the '#' characters
-        let has_exclamation = self.match_char('!');
-    
-        match (count, has_exclamation) {
-            (1, _) => {
-                // Single '#' - Line comment
-                self.line_comment();
-                None
-            }
-            (2, true) => {
-                // '##!' - Module-level documentation comment
-                self.doc_comment("module")
-            }
-            (2, false) => {
-                // '##' - Block comment
-                self.block_comment();
-                None
-            }
-            (3, _) => {
-                // '###' - Item-level documentation comment
-                self.doc_comment("item")
-            }
-            (n, _) if n >= 4 => {
-                // '####' or more - Block comment
-                self.block_comment();
-                None
-            }
-            _ => {
-                // Fallback to line comment
-                self.line_comment();
-                None
-            }
-        }
-    }
-                     
-    // Function to handle documentation comments
-    fn doc_comment(&mut self, _kind: &str) -> Option<Token> {
+    // Function to handle line comments
+    fn line_comment(&mut self) -> Option<Token>{
         let mut comment = String::new();
+
         while self.peek() != '\n' && !self.is_at_end() {
             comment.push(self.advance());
         }
-    
-        // Consume the newline character
-        if self.peek() == '\n' {
-            self.advance();
-        }
-    
-        Some(Token::DocComment(comment.trim().to_string()))
+
+        Some(Token::Comment(comment))
     }
-    
 }
+
diff --git a/src/lexer/token.rs b/src/lexer/token.rs
index e905f32..0210082 100644
--- a/src/lexer/token.rs
+++ b/src/lexer/token.rs
@@ -1,43 +1,26 @@
 #[derive(Debug, PartialEq, Clone)]
 pub enum Token {
     // Single-character tokens
-    LeftParen, // (
-    RightParen, // )
-    LeftBrace, // {
-    RightBrace, // }
-    Comma, // ,
-    Dot, // .
-    Minus, // -
-    Plus, // +
-    Semicolon, // ;
-    Colon, // :
-    Slash, // /
-    Star, // *
-    Percent, // %
-    Caret, // ^
-    Tilde, // ~
-    Backtick, // `
-    Dollar, // $
-    At, // @
-    //                      Hash, // #
-    Question, // ?
-    Exclamation, // !
-    Pipe, // |
-    Ampersand, // &
-
-    // one or two character tokens
-    BangEqual, // !=
-    Equal, // =
-    EqualEqual, // ==
-    Greater, // >
+    LeftParen,    // (
+    RightParen,   // )
+    LeftBrace,    // {
+    RightBrace,   // }
+    Comma,        // ,
+    Dot,          // .
+    Minus,        // -
+    Plus,         // +
+    Semicolon,    // ;
+    Slash,        // /
+    Star,         // *
+    Equal,        // =
+    BangEqual,    // !=
+    EqualEqual,   // ==
+    Greater,      // >
     GreaterEqual, // >=
-    Less, // <
-    LessEqual, // <=
-    FatArrow, // =>
-    ColonEqual, // :=
-    TildeEqual, // ~=
-    DoublePipe, // ||
-    DoubleAmpersand, // &&
+    Less,         // <
+    LessEqual,    // <=
+    Tilde,        // ~
+    TildeEqual,   // ~=
 
     // Literals
     Identifier(String),
@@ -46,39 +29,23 @@ pub enum Token {
 
     // Keywords
     And,
-    Class,
-    Else,
-    False,
-    Func,
-    For,
-    If,
-    Nil,
     Or,
-    Print,
-    Return,
-    Super,
-    This,
+    If,
+    Else,
     True,
+    False,
     Let,
-    While,
     Const,
-    Define,
-    Lambda,
-    Match,
-    Case,
-    Switch,
-    Until,
-    Repeat,
-    Unless,
-    Yes,
-    No,
-    On,
-    Off,
-    Module,
+    Func,
+    Return,
+    For,
+    While,
+    Print,
+    Pub,
+    Sym,
 
-    // Documentation and comments
-    DocComment(String), // ##!, ###
-    Comment(String), // #
+    // Comments
+    Comment(String),
 
     EOF,
-}
\ No newline at end of file
+}
diff --git a/src/lib.rs b/src/lib.rs
index 9a09c5f..50e73f0 100644
--- a/src/lib.rs
+++ b/src/lib.rs
@@ -1,3 +1,3 @@
 pub mod lexer;
 pub mod parser;
-pub mod interpreter;
\ No newline at end of file
+pub mod compiler;
\ No newline at end of file
diff --git a/src/main.rs b/src/main.rs
index c051a33..6337449 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -1,6 +1,6 @@
 mod lexer;
 mod parser;
-mod interpreter;
+mod compiler;
 
 use std::env;
 use std::fs;
diff --git a/tests/lexer_tests.rs b/tests/lexer_tests.rs
index 03b0bf7..4772a56 100644
--- a/tests/lexer_tests.rs
+++ b/tests/lexer_tests.rs
@@ -1,5 +1,5 @@
-use fiddle::lexer::Lexer;
-use fiddle::lexer::token::Token;
+use fddl::lexer::Lexer;
+use fddl::lexer::token::Token;
 
 #[test]
 fn test_single_tokens() {
@@ -23,62 +23,94 @@ fn test_single_tokens() {
 }
 
 #[test]
-fn test_identifier_and_keywords() {
-    let source = String::from("let $varName := 123; ");
+fn test_keywords_and_identifiers() {
+    let source = String::from("sym myVar = 123;");
     let mut lexer = Lexer::new(source);
     let tokens = lexer.scan_tokens();
 
     assert_eq!(
         tokens,
         vec![
-            Token::Let,
-            Token::Dollar,
-            Token::Identifier("varName".to_string()),
-            Token::ColonEqual,
+            Token::Sym,
+            Token::Identifier("myVar".to_string()),
+            Token::Equal,
             Token::Number(123.0),
             Token::Semicolon,
             Token::EOF
         ]
     );
+    println!("{:?}", tokens);
+}
+
+#[test]
+fn test_pub_keyword() {
+    let source = String::from("pub func example() { return 42; }");
+    let mut lexer = Lexer::new(source);
+    let tokens = lexer.scan_tokens();
+
+    assert_eq!(
+        tokens,
+        vec![
+            Token::Pub,
+            Token::Func,
+            Token::Identifier("example".to_string()),
+            Token::LeftParen,
+            Token::RightParen,
+            Token::LeftBrace,
+            Token::Return,
+            Token::Number(42.0),
+            Token::Semicolon,
+            Token::RightBrace,
+            Token::EOF
+        ]
+    );
+}
+
+#[test]
+fn test_comments() {
+    let source = String::from("# This is a comment\nlet a = 5;");
+    let mut lexer = Lexer::new(source);
+    let tokens = lexer.scan_tokens();
+
+    assert_eq!(
+        tokens,
+        vec![
+            Token::Comment(" This is a comment".to_string()),
+            Token::Let,
+            Token::Identifier("a".to_string()),
+            Token::Equal,
+            Token::Number(5.0),
+            Token::Semicolon,
+            Token::EOF
+        ]
+    );
 }
 
 #[test]
-fn test_doc_comments() {
-    let source = String::from("##! Module documentation
-module test {
-    ### Function documentation
-    func example() {
-        # Regular comment
-        return 42;
-    }
-}      
-");
+fn test_operators_and_comparison() {
+    let source = String::from("a >= 10 != b == 5;");
     let mut lexer = Lexer::new(source);
     let tokens = lexer.scan_tokens();
 
-    println!("Tokens: {:?}", tokens);
-
-    assert_eq!(tokens[0], Token::DocComment("Module documentation".to_string()));
-    assert_eq!(tokens[1], Token::Module);
-    assert_eq!(tokens[2], Token::Identifier("test".to_string()));
-    assert_eq!(tokens[3], Token::LeftBrace);
-    assert_eq!(tokens[4], Token::DocComment("Function documentation".to_string()));
-    assert_eq!(tokens[5], Token::Func);
-    assert_eq!(tokens[6], Token::Identifier("example".to_string()));
-    assert_eq!(tokens[7], Token::LeftParen);
-    assert_eq!(tokens[8], Token::RightParen);
-    assert_eq!(tokens[9], Token::LeftBrace);
-    assert_eq!(tokens[10], Token::Return);
-    assert_eq!(tokens[11], Token::Number(42.0));
-    assert_eq!(tokens[12], Token::Semicolon);
-    assert_eq!(tokens[13], Token::RightBrace); // Closes function body
-    assert_eq!(tokens[14], Token::RightBrace); // Closes module
-    assert_eq!(tokens[15], Token::EOF);
+    assert_eq!(
+        tokens,
+        vec![
+            Token::Identifier("a".to_string()),
+            Token::GreaterEqual,
+            Token::Number(10.0),
+            Token::BangEqual,
+            Token::Identifier("b".to_string()),
+            Token::EqualEqual,
+            Token::Number(5.0),
+            Token::Semicolon,
+            Token::EOF
+        ]
+    );
 }
 
 #[test]
 fn test_tilde_operator() {
-    let source = String::from("if (a ~= b) { ~c }");
+    let source = String::from("if (a != b) { let c = ~5; }");
     let mut lexer = Lexer::new(source);
     let tokens = lexer.scan_tokens();
 
@@ -88,14 +120,18 @@ fn test_tilde_operator() {
             Token::If,
             Token::LeftParen,
             Token::Identifier("a".to_string()),
-            Token::TildeEqual,
+            Token::BangEqual,
             Token::Identifier("b".to_string()),
             Token::RightParen,
             Token::LeftBrace,
-            Token::Tilde,
+            Token::Let,
             Token::Identifier("c".to_string()),
+            Token::Equal,
+            Token::Tilde,
+            Token::Number(5.0),
+            Token::Semicolon,
             Token::RightBrace,
             Token::EOF
         ]
     );
-}
+}
\ No newline at end of file