parsed syntax keywords, simplified language, refactored current tests

2024-12-25 13:50:26 +00:00 · 2024-09-16 00:27:18 -04:00 · 2024-09-16 00:27:18 -04:00 · 38052a4367
commit 38052a4367
parent d9ac25fe4a
10 changed files with 169 additions and 279 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -3,5 +3,5 @@
 version = 3

 [[package]]
-name = "fiddle"
-version = "0.1.0"
+name = "fddl"
+version = "0.0.2"
--- a/Cargo.toml
+++ b/Cargo.toml
@ -1,9 +1,9 @@
 [package]
-name = "fiddle"
-version = "0.1.0"
+name = "fddl"
+version = "0.0.2"
 edition = "2021"
-authors = ["Tristan Smith <tristan.smith@pm.me>"]
+authors = ["Tristan Smith <tristan@fddl.dev>"]
 description = "A small programming language written in Rust."
-license = "BSD-3-Clause"
+license = "MIT"

 [dependencies]
--- a/src/compiler/codegen.rs
+++ b/src/compiler/codegen.rs
@ -0,0 +1,3 @@
+// Placeholder for compiler implementation
+
+// not even close yet
--- a/src/interpreter/mod.rs
+++ b/src/interpreter/mod.rs
--- a/src/interpreter/eval.rs
+++ b/src/interpreter/eval.rs
@ -1,3 +0,0 @@
-// Placeholder for interpreter implementation
-
-// not even close yet
--- a/src/lexer/lexer.rs
+++ b/src/lexer/lexer.rs
@ -35,7 +35,7 @@ impl Lexer {
        let c = self.advance();

        match c {
-            '#' => self.handle_comment_or_doc(),
+            // Single-character tokens
            '(' => Some(Token::LeftParen),
            ')' => Some(Token::RightParen),
            '{' => Some(Token::LeftBrace),
@ -46,8 +46,6 @@ impl Lexer {
            '+' => Some(Token::Plus),
            ';' => Some(Token::Semicolon),
            '*' => Some(Token::Star),
-            '%' => Some(Token::Percent),
-            '^' => Some(Token::Caret),  
            '~' => {
                if self.match_char('=') {
                    Some(Token::TildeEqual)
@ -55,48 +53,34 @@ impl Lexer {
                    Some(Token::Tilde)
                }
            }, 
-            '`' => Some(Token::Backtick),
-            '$' => Some(Token::Dollar),
-            '@' => Some(Token::At),
-            '?' => Some(Token::Question),
+            '/' => {
+                if self.match_char('/') {
+                    // Line comment starting with //
+                    self.line_comment() // Generate Comment token
+                } else {
+                    Some(Token::Slash)
+                }
+            },
+            '#' => {
+                // Line comment starting with #
+                self.line_comment() // Generate Comment token
+            },
+
+            // One or two character tokens
            '!' => {
                if self.match_char('=') {
                    Some(Token::BangEqual)
                } else {
-                    Some(Token::Exclamation)
-                }
-            },
-            '|' => {
-                if self.match_char('|') {
-                    Some(Token::DoublePipe)
-                } else {
-                    Some(Token::Pipe)
-                }
-            },
-            '&' => {
-                if self.match_char('&') {
-                    Some(Token::DoubleAmpersand)
-                } else {
-                    Some(Token::Ampersand)
+                    None // Or handle as an error or another token if needed
                }
            },
            '=' => {
                if self.match_char('=') {
                    Some(Token::EqualEqual)
-                } else if self.match_char('>') {
-                    Some(Token::FatArrow)
                } else {
                    Some(Token::Equal)
                }
            },
-            ':' => {
-                if self.match_char('=') {
-                    Some(Token::ColonEqual)
-                } else {
-                    // Handle single ':' if needed
-                    Some(Token::Colon)
-                }
-            },
            '<' => {
                if self.match_char('=') {
                    Some(Token::LessEqual)
@ -111,26 +95,20 @@ impl Lexer {
                    Some(Token::Greater)
                }
            },
-            '/' => {
-                if self.match_char('/') {
-                    // It's a comment, consume until end of line
-                    let mut comment = String::new();
-                    while self.peek() != '\n' && !self.is_at_end() {
-                        comment.push(self.advance());
-                    }
-                    Some(Token::Comment(comment))
-                } else {
-                    Some(Token::Slash)
-                }
-                },
-            ' ' | '\r' | '\t' => None, // Ignore whitespace
+
+            // Whitespace
+            ' ' | '\r' | '\t' => None,
            '\n' => {
                self.line += 1;
                None
            },
+
+            // Literals
            '"' => self.string(),
            c if c.is_ascii_digit() => self.number(),
            c if self.is_alpha(c) => self.identifier(),
+
+            // Any other character
            _ => {
                eprintln!("Unexpected character '{}' on line {}", c, self.line);
                None
@ -139,18 +117,17 @@ impl Lexer {
    }

    // Helper methods
-    // function to consume the current character
+    // Consume the current character and return it
    fn advance(&mut self) -> char {
-        let c = if self.is_at_end() {
-            '\0'
-        } else {
-            self.source[self.current]
-        };
+        if self.is_at_end() {
+            return '\0';
+        }
+        let c = self.source[self.current];
        self.current += 1;
        c
    }

-    // function to parse the current character if it matches the expected character
+    // Check if the current character matches the expected character
    fn match_char(&mut self, expected: char) -> bool {
        if self.is_at_end() {
            return false;
@ -164,7 +141,7 @@ impl Lexer {
        true
    }

-    // function to parse the current character without consuming it
+    // Look at current character
    fn peek(&self) -> char {
        if self.is_at_end() {
            '\0'
@ -173,7 +150,7 @@ impl Lexer {
        }
    }

-    // function to parse the next character without consuming it
+    // Look ahead by one character
    fn peek_next(&self) -> char {
        if self.current + 1 >= self.source.len() {
            '\0'
@ -182,12 +159,12 @@ impl Lexer {
        }
    }

-    // function to check if we've reached the end of the source
+    // Check if we have reached the end of the source
    fn is_at_end(&self) -> bool {
        self.current >= self.source.len()
    }

-    // Function to handle different token types
+    // Function to handle string literals
    fn string(&mut self) -> Option<Token> {
        while self.peek() != '"' && !self.is_at_end() {
            if self.peek() == '\n' {
@ -196,7 +173,6 @@ impl Lexer {
            self.advance();
        }

-        // Check if we've reached the end without finding a closing quote
        if self.is_at_end() {
            eprintln!("Unterminated string on line {}", self.line);
            return None;
@ -235,7 +211,7 @@ impl Lexer {
        Some(Token::Number(value))
    }

-    // Function to handle identifiers
+    // Function to handle identifiers and keywords
    fn identifier(&mut self) -> Option<Token> {
        while self.is_alphanumeric(self.peek()) || self.peek() == '_' {
            self.advance();
@ -248,132 +224,43 @@ impl Lexer {
        // Check for reserved keywords
        let token = match text.as_str() {
            "and" => Token::And,
-            "class" => Token::Class,
-            "else" => Token::Else,
-            "false" => Token::False,
-            "func" => Token::Func,
-            "for" => Token::For,
-            "if" => Token::If,
-            "nil" => Token::Nil,
            "or" => Token::Or,
-            "print" => Token::Print,
-            "return" => Token::Return,
-            "super" => Token::Super,
-            "this" => Token::This,
+            "if" => Token::If,
+            "else" => Token::Else,
            "true" => Token::True,
+            "false" => Token::False,
            "let" => Token::Let,
-            "while" => Token::While,
            "const" => Token::Const,
-            "define" => Token::Define,
-            "lambda" => Token::Lambda,
-            "match" => Token::Match,
-            "case" => Token::Case,
-            "switch" => Token::Switch,
-            "until" => Token::Until,
-            "repeat" => Token::Repeat,
-            "unless" => Token::Unless,
-            "yes" => Token::Yes,
-            "no" => Token::No,
-            "on" => Token::On,
-            "off" => Token::Off,
-            "module" => Token::Module,
+            "func" => Token::Func,
+            "return" => Token::Return,
+            "for" => Token::For,
+            "while" => Token::While,
+            "print" => Token::Print,
+            "pub" => Token::Pub,
+            "sym" => Token::Sym,
            _ => Token::Identifier(text),
        };

        Some(token)
    }

-    // Function to check if a character is an alphabetic character or an underscore
    fn is_alpha(&self, c: char) -> bool {
        c.is_alphabetic() || c == '_'
    }

-    // Function to check if a character is an alphanumeric character or an underscore
    fn is_alphanumeric(&self, c: char) -> bool {
        c.is_alphanumeric() || c == '_'
    }

-    // Function to handle comments and documentation
-    fn line_comment(&mut self) {
-        while self.peek() != '\n' && !self.is_at_end() {
-            self.advance();
-        }
-    }
-
-    // Function to handle block comments
-    fn block_comment(&mut self) {
-        while !self.is_at_end() {
-            if self.peek() == '*' && self.peek_next() == '/' {
-                self.advance();
-                self.advance();
-                break;
-            } else {
-                if self.peek() == '\n' {
-                    self.line += 1;
-                }
-                self.advance();
-            }
-        }
-    }
-
-    // Function to handle comments and documentation
-    fn handle_comment_or_doc(&mut self) -> Option<Token> {
-        // We have matched one '#' character so far
-        let mut count = 1;
-    
-        // Count additional consecutive '#' characters
-        while self.match_char('#') {
-            count += 1;
-        }
-    
-        // Check for an exclamation mark after the '#' characters
-        let has_exclamation = self.match_char('!');
-    
-        match (count, has_exclamation) {
-            (1, _) => {
-                // Single '#' - Line comment
-                self.line_comment();
-                None
-            }
-            (2, true) => {
-                // '##!' - Module-level documentation comment
-                self.doc_comment("module")
-            }
-            (2, false) => {
-                // '##' - Block comment
-                self.block_comment();
-                None
-            }
-            (3, _) => {
-                // '###' - Item-level documentation comment
-                self.doc_comment("item")
-            }
-            (n, _) if n >= 4 => {
-                // '####' or more - Block comment
-                self.block_comment();
-                None
-            }
-            _ => {
-                // Fallback to line comment
-                self.line_comment();
-                None
-            }
-        }
-    }
-                     
-    // Function to handle documentation comments
-    fn doc_comment(&mut self, _kind: &str) -> Option<Token> {
+    // Function to handle line comments
+    fn line_comment(&mut self) -> Option<Token>{
        let mut comment = String::new();
+
        while self.peek() != '\n' && !self.is_at_end() {
            comment.push(self.advance());
        }

-        // Consume the newline character
-        if self.peek() == '\n' {
-            self.advance();
-        }
-    
-        Some(Token::DocComment(comment.trim().to_string()))
+        Some(Token::Comment(comment))
    }
-    
 }
+
--- a/src/lexer/token.rs
+++ b/src/lexer/token.rs
@ -1,43 +1,26 @@
 #[derive(Debug, PartialEq, Clone)]
 pub enum Token {
    // Single-character tokens
-    LeftParen, // (
-    RightParen, // )
-    LeftBrace, // {
-    RightBrace, // }
-    Comma, // ,
-    Dot, // .
-    Minus, // -
-    Plus, // +
-    Semicolon, // ;
-    Colon, // :
-    Slash, // /
-    Star, // *
-    Percent, // %
-    Caret, // ^
-    Tilde, // ~
-    Backtick, // `
-    Dollar, // $
-    At, // @
-    //                      Hash, // #
-    Question, // ?
-    Exclamation, // !
-    Pipe, // |
-    Ampersand, // &
-
-    // one or two character tokens
-    BangEqual, // !=
-    Equal, // =
-    EqualEqual, // ==
-    Greater, // >
+    LeftParen,    // (
+    RightParen,   // )
+    LeftBrace,    // {
+    RightBrace,   // }
+    Comma,        // ,
+    Dot,          // .
+    Minus,        // -
+    Plus,         // +
+    Semicolon,    // ;
+    Slash,        // /
+    Star,         // *
+    Equal,        // =
+    BangEqual,    // !=
+    EqualEqual,   // ==
+    Greater,      // >
    GreaterEqual, // >=
-    Less, // <
-    LessEqual, // <=
-    FatArrow, // =>
-    ColonEqual, // :=
-    TildeEqual, // ~=
-    DoublePipe, // ||
-    DoubleAmpersand, // &&
+    Less,         // <
+    LessEqual,    // <=
+    Tilde,        // ~
+    TildeEqual,   // ~=

    // Literals
    Identifier(String),
@ -46,39 +29,23 @@ pub enum Token {

    // Keywords
    And,
-    Class,
-    Else,
-    False,
-    Func,
-    For,
-    If,
-    Nil,
    Or,
-    Print,
-    Return,
-    Super,
-    This,
+    If,
+    Else,
    True,
+    False,
    Let,
-    While,
    Const,
-    Define,
-    Lambda,
-    Match,
-    Case,
-    Switch,
-    Until,
-    Repeat,
-    Unless,
-    Yes,
-    No,
-    On,
-    Off,
-    Module,
+    Func,
+    Return,
+    For,
+    While,
+    Print,
+    Pub,
+    Sym,

-    // Documentation and comments
-    DocComment(String), // ##!, ###
-    Comment(String), // #
+    // Comments
+    Comment(String),

    EOF,
 }
--- a/src/lib.rs
+++ b/src/lib.rs
@ -1,3 +1,3 @@
 pub mod lexer;
 pub mod parser;
-pub mod interpreter;
+pub mod compiler;
--- a/src/main.rs
+++ b/src/main.rs
@ -1,6 +1,6 @@
 mod lexer;
 mod parser;
-mod interpreter;
+mod compiler;

 use std::env;
 use std::fs;
--- a/tests/lexer_tests.rs
+++ b/tests/lexer_tests.rs
@ -1,5 +1,5 @@
-use fiddle::lexer::Lexer;
-use fiddle::lexer::token::Token;
+use fddl::lexer::Lexer;
+use fddl::lexer::token::Token;

 #[test]
 fn test_single_tokens() {
@ -23,62 +23,94 @@ fn test_single_tokens() {
 }

 #[test]
-fn test_identifier_and_keywords() {
-    let source = String::from("let $varName := 123; ");
+fn test_keywords_and_identifiers() {
+    let source = String::from("sym myVar = 123;");
    let mut lexer = Lexer::new(source);
    let tokens = lexer.scan_tokens();

    assert_eq!(
        tokens,
        vec![
-            Token::Let,
-            Token::Dollar,
-            Token::Identifier("varName".to_string()),
-            Token::ColonEqual,
+            Token::Sym,
+            Token::Identifier("myVar".to_string()),
+            Token::Equal,
            Token::Number(123.0),
            Token::Semicolon,
            Token::EOF
        ]
    );
+    println!("{:?}", tokens);
+}
+
+#[test]
+fn test_pub_keyword() {
+    let source = String::from("pub func example() { return 42; }");
+    let mut lexer = Lexer::new(source);
+    let tokens = lexer.scan_tokens();
+
+    assert_eq!(
+        tokens,
+        vec![
+            Token::Pub,
+            Token::Func,
+            Token::Identifier("example".to_string()),
+            Token::LeftParen,
+            Token::RightParen,
+            Token::LeftBrace,
+            Token::Return,
+            Token::Number(42.0),
+            Token::Semicolon,
+            Token::RightBrace,
+            Token::EOF
+        ]
+    );
+}
+
+#[test]
+fn test_comments() {
+    let source = String::from("# This is a comment\nlet a = 5;");
+    let mut lexer = Lexer::new(source);
+    let tokens = lexer.scan_tokens();
+
+    assert_eq!(
+        tokens,
+        vec![
+            Token::Comment(" This is a comment".to_string()),
+            Token::Let,
+            Token::Identifier("a".to_string()),
+            Token::Equal,
+            Token::Number(5.0),
+            Token::Semicolon,
+            Token::EOF
+        ]
+    );
 }

 #[test]
-fn test_doc_comments() {
-    let source = String::from("##! Module documentation
-module test {
-    ### Function documentation
-    func example() {
-        # Regular comment
-        return 42;
-    }
-}      
-");
+fn test_operators_and_comparison() {
+    let source = String::from("a >= 10 != b == 5;");
    let mut lexer = Lexer::new(source);
    let tokens = lexer.scan_tokens();

-    println!("Tokens: {:?}", tokens);
-
-    assert_eq!(tokens[0], Token::DocComment("Module documentation".to_string()));
-    assert_eq!(tokens[1], Token::Module);
-    assert_eq!(tokens[2], Token::Identifier("test".to_string()));
-    assert_eq!(tokens[3], Token::LeftBrace);
-    assert_eq!(tokens[4], Token::DocComment("Function documentation".to_string()));
-    assert_eq!(tokens[5], Token::Func);
-    assert_eq!(tokens[6], Token::Identifier("example".to_string()));
-    assert_eq!(tokens[7], Token::LeftParen);
-    assert_eq!(tokens[8], Token::RightParen);
-    assert_eq!(tokens[9], Token::LeftBrace);
-    assert_eq!(tokens[10], Token::Return);
-    assert_eq!(tokens[11], Token::Number(42.0));
-    assert_eq!(tokens[12], Token::Semicolon);
-    assert_eq!(tokens[13], Token::RightBrace); // Closes function body
-    assert_eq!(tokens[14], Token::RightBrace); // Closes module
-    assert_eq!(tokens[15], Token::EOF);
+    assert_eq!(
+        tokens,
+        vec![
+            Token::Identifier("a".to_string()),
+            Token::GreaterEqual,
+            Token::Number(10.0),
+            Token::BangEqual,
+            Token::Identifier("b".to_string()),
+            Token::EqualEqual,
+            Token::Number(5.0),
+            Token::Semicolon,
+            Token::EOF
+        ]
+    );
 }

 #[test]
 fn test_tilde_operator() {
-    let source = String::from("if (a ~= b) { ~c }");
+    let source = String::from("if (a != b) { let c = ~5; }");
    let mut lexer = Lexer::new(source);
    let tokens = lexer.scan_tokens();

@ -88,12 +120,16 @@ fn test_tilde_operator() {
            Token::If,
            Token::LeftParen,
            Token::Identifier("a".to_string()),
-            Token::TildeEqual,
+            Token::BangEqual,
            Token::Identifier("b".to_string()),
            Token::RightParen,
            Token::LeftBrace,
-            Token::Tilde,
+            Token::Let,
            Token::Identifier("c".to_string()),
+            Token::Equal,
+            Token::Tilde,
+            Token::Number(5.0),
+            Token::Semicolon,
            Token::RightBrace,
            Token::EOF
        ]