parsed syntax keywords, simplified language, refactored current tests

2024-12-25 21:50:31 +00:00 · 2024-09-16 00:27:18 -04:00 · 2024-09-16 00:27:18 -04:00 · 38052a4367
commit 38052a4367
parent d9ac25fe4a
10 changed files with 169 additions and 279 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -3,5 +3,5 @@
 version = 3
 [[package]]
-name = "fiddle"
+name = "fddl"
-version = "0.1.0"
+version = "0.0.2"
--- a/Cargo.toml
+++ b/Cargo.toml
@ -1,9 +1,9 @@
 [package]
-name = "fiddle"
+name = "fddl"
-version = "0.1.0"
+version = "0.0.2"
 edition = "2021"
-authors = ["Tristan Smith <tristan.smith@pm.me>"]
+authors = ["Tristan Smith <tristan@fddl.dev>"]
 description = "A small programming language written in Rust."
-license = "BSD-3-Clause"
+license = "MIT"
 [dependencies]
--- a/src/compiler/codegen.rs
+++ b/src/compiler/codegen.rs
@ -0,0 +1,3 @@
 // Placeholder for compiler implementation
 // not even close yet
--- a/src/interpreter/mod.rs
+++ b/src/interpreter/mod.rs
--- a/src/interpreter/eval.rs
+++ b/src/interpreter/eval.rs
@ -1,3 +0,0 @@
 // Placeholder for interpreter implementation
 // not even close yet
--- a/src/lexer/lexer.rs
+++ b/src/lexer/lexer.rs
@ -35,7 +35,7 @@ impl Lexer {
        let c = self.advance();
        match c {
-            '#' => self.handle_comment_or_doc(),
+            // Single-character tokens
            '(' => Some(Token::LeftParen),
            ')' => Some(Token::RightParen),
            '{' => Some(Token::LeftBrace),
@ -46,8 +46,6 @@ impl Lexer {
            '+' => Some(Token::Plus),
            ';' => Some(Token::Semicolon),
            '*' => Some(Token::Star),
            '%' => Some(Token::Percent),
            '^' => Some(Token::Caret),  
            '~' => {
                if self.match_char('=') {
                    Some(Token::TildeEqual)
@ -55,48 +53,34 @@ impl Lexer {
                    Some(Token::Tilde)
                }
            }, 
-            '`' => Some(Token::Backtick),
+            '/' => {
-            '$' => Some(Token::Dollar),
+                if self.match_char('/') {
-            '@' => Some(Token::At),
+                    // Line comment starting with //
-            '?' => Some(Token::Question),
+                    self.line_comment() // Generate Comment token
                } else {
                    Some(Token::Slash)
                }
            },
            '#' => {
                // Line comment starting with #
                self.line_comment() // Generate Comment token
            },
            // One or two character tokens
            '!' => {
                if self.match_char('=') {
                    Some(Token::BangEqual)
                } else {
-                    Some(Token::Exclamation)
+                    None // Or handle as an error or another token if needed
                }
            },
            '|' => {
                if self.match_char('|') {
                    Some(Token::DoublePipe)
                } else {
                    Some(Token::Pipe)
                }
            },
            '&' => {
                if self.match_char('&') {
                    Some(Token::DoubleAmpersand)
                } else {
                    Some(Token::Ampersand)
                }
            },
            '=' => {
                if self.match_char('=') {
                    Some(Token::EqualEqual)
                } else if self.match_char('>') {
                    Some(Token::FatArrow)
                } else {
                    Some(Token::Equal)
                }
            },
            ':' => {
                if self.match_char('=') {
                    Some(Token::ColonEqual)
                } else {
                    // Handle single ':' if needed
                    Some(Token::Colon)
                }
            },
            '<' => {
                if self.match_char('=') {
                    Some(Token::LessEqual)
@ -111,26 +95,20 @@ impl Lexer {
                    Some(Token::Greater)
                }
            },
-            '/' => {
+
-                if self.match_char('/') {
+            // Whitespace
-                    // It's a comment, consume until end of line
+            ' ' | '\r' | '\t' => None,
                    let mut comment = String::new();
                    while self.peek() != '\n' && !self.is_at_end() {
                        comment.push(self.advance());
                    }
                    Some(Token::Comment(comment))
                } else {
                    Some(Token::Slash)
                }
                },
            ' ' | '\r' | '\t' => None, // Ignore whitespace
            '\n' => {
                self.line += 1;
                None
            },
            // Literals
            '"' => self.string(),
            c if c.is_ascii_digit() => self.number(),
            c if self.is_alpha(c) => self.identifier(),
            // Any other character
            _ => {
                eprintln!("Unexpected character '{}' on line {}", c, self.line);
                None
@ -139,18 +117,17 @@ impl Lexer {
    }
    // Helper methods
-    // function to consume the current character
+    // Consume the current character and return it
    fn advance(&mut self) -> char {
-        let c = if self.is_at_end() {
+        if self.is_at_end() {
-            '\0'
+            return '\0';
-        } else {
+        }
-            self.source[self.current]
+        let c = self.source[self.current];
        };
        self.current += 1;
        c
    }
-    // function to parse the current character if it matches the expected character
+    // Check if the current character matches the expected character
    fn match_char(&mut self, expected: char) -> bool {
        if self.is_at_end() {
            return false;
@ -164,7 +141,7 @@ impl Lexer {
        true
    }
-    // function to parse the current character without consuming it
+    // Look at current character
    fn peek(&self) -> char {
        if self.is_at_end() {
            '\0'
@ -173,7 +150,7 @@ impl Lexer {
        }
    }
-    // function to parse the next character without consuming it
+    // Look ahead by one character
    fn peek_next(&self) -> char {
        if self.current + 1 >= self.source.len() {
            '\0'
@ -182,12 +159,12 @@ impl Lexer {
        }
    }
-    // function to check if we've reached the end of the source
+    // Check if we have reached the end of the source
    fn is_at_end(&self) -> bool {
        self.current >= self.source.len()
    }
-    // Function to handle different token types
+    // Function to handle string literals
    fn string(&mut self) -> Option<Token> {
        while self.peek() != '"' && !self.is_at_end() {
            if self.peek() == '\n' {
@ -196,7 +173,6 @@ impl Lexer {
            self.advance();
        }
        // Check if we've reached the end without finding a closing quote
        if self.is_at_end() {
            eprintln!("Unterminated string on line {}", self.line);
            return None;
@ -235,7 +211,7 @@ impl Lexer {
        Some(Token::Number(value))
    }
-    // Function to handle identifiers
+    // Function to handle identifiers and keywords
    fn identifier(&mut self) -> Option<Token> {
        while self.is_alphanumeric(self.peek()) || self.peek() == '_' {
            self.advance();
@ -248,132 +224,43 @@ impl Lexer {
        // Check for reserved keywords
        let token = match text.as_str() {
            "and" => Token::And,
            "class" => Token::Class,
            "else" => Token::Else,
            "false" => Token::False,
            "func" => Token::Func,
            "for" => Token::For,
            "if" => Token::If,
            "nil" => Token::Nil,
            "or" => Token::Or,
-            "print" => Token::Print,
+            "if" => Token::If,
-            "return" => Token::Return,
+            "else" => Token::Else,
            "super" => Token::Super,
            "this" => Token::This,
            "true" => Token::True,
            "false" => Token::False,
            "let" => Token::Let,
            "while" => Token::While,
            "const" => Token::Const,
-            "define" => Token::Define,
+            "func" => Token::Func,
-            "lambda" => Token::Lambda,
+            "return" => Token::Return,
-            "match" => Token::Match,
+            "for" => Token::For,
-            "case" => Token::Case,
+            "while" => Token::While,
-            "switch" => Token::Switch,
+            "print" => Token::Print,
-            "until" => Token::Until,
+            "pub" => Token::Pub,
-            "repeat" => Token::Repeat,
+            "sym" => Token::Sym,
            "unless" => Token::Unless,
            "yes" => Token::Yes,
            "no" => Token::No,
            "on" => Token::On,
            "off" => Token::Off,
            "module" => Token::Module,
            _ => Token::Identifier(text),
        };
        Some(token)
    }
    // Function to check if a character is an alphabetic character or an underscore
    fn is_alpha(&self, c: char) -> bool {
        c.is_alphabetic() || c == '_'
    }
    // Function to check if a character is an alphanumeric character or an underscore
    fn is_alphanumeric(&self, c: char) -> bool {
        c.is_alphanumeric() || c == '_'
    }
-    // Function to handle comments and documentation
+    // Function to handle line comments
-    fn line_comment(&mut self) {
+    fn line_comment(&mut self) -> Option<Token>{
        while self.peek() != '\n' && !self.is_at_end() {
            self.advance();
        }
    }
    // Function to handle block comments
    fn block_comment(&mut self) {
        while !self.is_at_end() {
            if self.peek() == '*' && self.peek_next() == '/' {
                self.advance();
                self.advance();
                break;
            } else {
                if self.peek() == '\n' {
                    self.line += 1;
                }
                self.advance();
            }
        }
    }
    // Function to handle comments and documentation
    fn handle_comment_or_doc(&mut self) -> Option<Token> {
        // We have matched one '#' character so far
        let mut count = 1;
        // Count additional consecutive '#' characters
        while self.match_char('#') {
            count += 1;
        }
        // Check for an exclamation mark after the '#' characters
        let has_exclamation = self.match_char('!');
        match (count, has_exclamation) {
            (1, _) => {
                // Single '#' - Line comment
                self.line_comment();
                None
            }
            (2, true) => {
                // '##!' - Module-level documentation comment
                self.doc_comment("module")
            }
            (2, false) => {
                // '##' - Block comment
                self.block_comment();
                None
            }
            (3, _) => {
                // '###' - Item-level documentation comment
                self.doc_comment("item")
            }
            (n, _) if n >= 4 => {
                // '####' or more - Block comment
                self.block_comment();
                None
            }
            _ => {
                // Fallback to line comment
                self.line_comment();
                None
            }
        }
    }
    // Function to handle documentation comments
    fn doc_comment(&mut self, _kind: &str) -> Option<Token> {
        let mut comment = String::new();
        while self.peek() != '\n' && !self.is_at_end() {
            comment.push(self.advance());
        }
-        // Consume the newline character
+        Some(Token::Comment(comment))
-        if self.peek() == '\n' {
+    }
            self.advance();
 }
        Some(Token::DocComment(comment.trim().to_string()))
    }
 }
--- a/src/lexer/token.rs
+++ b/src/lexer/token.rs
@ -10,34 +10,17 @@ pub enum Token {
    Minus,        // -
    Plus,         // +
    Semicolon,    // ;
    Colon, // :
    Slash,        // /
    Star,         // *
    Percent, // %
    Caret, // ^
    Tilde, // ~
    Backtick, // `
    Dollar, // $
    At, // @
    //                      Hash, // #
    Question, // ?
    Exclamation, // !
    Pipe, // |
    Ampersand, // &
    // one or two character tokens
    BangEqual, // !=
    Equal,        // =
    BangEqual,    // !=
    EqualEqual,   // ==
    Greater,      // >
    GreaterEqual, // >=
    Less,         // <
    LessEqual,    // <=
-    FatArrow, // =>
+    Tilde,        // ~
    ColonEqual, // :=
    TildeEqual,   // ~=
    DoublePipe, // ||
    DoubleAmpersand, // &&
    // Literals
    Identifier(String),
@ -46,39 +29,23 @@ pub enum Token {
    // Keywords
    And,
    Class,
    Else,
    False,
    Func,
    For,
    If,
    Nil,
    Or,
-    Print,
+    If,
-    Return,
+    Else,
    Super,
    This,
    True,
    False,
    Let,
    While,
    Const,
-    Define,
+    Func,
-    Lambda,
+    Return,
-    Match,
+    For,
-    Case,
+    While,
-    Switch,
+    Print,
-    Until,
+    Pub,
-    Repeat,
+    Sym,
    Unless,
    Yes,
    No,
    On,
    Off,
    Module,
-    // Documentation and comments
+    // Comments
-    DocComment(String), // ##!, ###
+    Comment(String),
    Comment(String), // #
    EOF,
 }
--- a/src/lib.rs
+++ b/src/lib.rs
@ -1,3 +1,3 @@
 pub mod lexer;
 pub mod parser;
-pub mod interpreter;
+pub mod compiler;
--- a/src/main.rs
+++ b/src/main.rs
@ -1,6 +1,6 @@
 mod lexer;
 mod parser;
-mod interpreter;
+mod compiler;
 use std::env;
 use std::fs;
--- a/tests/lexer_tests.rs
+++ b/tests/lexer_tests.rs
@ -1,5 +1,5 @@
-use fiddle::lexer::Lexer;
+use fddl::lexer::Lexer;
-use fiddle::lexer::token::Token;
+use fddl::lexer::token::Token;
 #[test]
 fn test_single_tokens() {
@ -23,62 +23,94 @@ fn test_single_tokens() {
 }
 #[test]
-fn test_identifier_and_keywords() {
+fn test_keywords_and_identifiers() {
-    let source = String::from("let $varName := 123; ");
+    let source = String::from("sym myVar = 123;");
    let mut lexer = Lexer::new(source);
    let tokens = lexer.scan_tokens();
    assert_eq!(
        tokens,
        vec![
-            Token::Let,
+            Token::Sym,
-            Token::Dollar,
+            Token::Identifier("myVar".to_string()),
-            Token::Identifier("varName".to_string()),
+            Token::Equal,
            Token::ColonEqual,
            Token::Number(123.0),
            Token::Semicolon,
            Token::EOF
        ]
    );
    println!("{:?}", tokens);
 }
 #[test]
 fn test_pub_keyword() {
    let source = String::from("pub func example() { return 42; }");
    let mut lexer = Lexer::new(source);
    let tokens = lexer.scan_tokens();
    assert_eq!(
        tokens,
        vec![
            Token::Pub,
            Token::Func,
            Token::Identifier("example".to_string()),
            Token::LeftParen,
            Token::RightParen,
            Token::LeftBrace,
            Token::Return,
            Token::Number(42.0),
            Token::Semicolon,
            Token::RightBrace,
            Token::EOF
        ]
    );
 }
 #[test]
 fn test_comments() {
    let source = String::from("# This is a comment\nlet a = 5;");
    let mut lexer = Lexer::new(source);
    let tokens = lexer.scan_tokens();
    assert_eq!(
        tokens,
        vec![
            Token::Comment(" This is a comment".to_string()),
            Token::Let,
            Token::Identifier("a".to_string()),
            Token::Equal,
            Token::Number(5.0),
            Token::Semicolon,
            Token::EOF
        ]
    );
 }
 #[test]
-fn test_doc_comments() {
+fn test_operators_and_comparison() {
-    let source = String::from("##! Module documentation
+    let source = String::from("a >= 10 != b == 5;");
 module test {
    ### Function documentation
    func example() {
        # Regular comment
        return 42;
    }
 }      
 ");
    let mut lexer = Lexer::new(source);
    let tokens = lexer.scan_tokens();
-    println!("Tokens: {:?}", tokens);
+    assert_eq!(
-
+        tokens,
-    assert_eq!(tokens[0], Token::DocComment("Module documentation".to_string()));
+        vec![
-    assert_eq!(tokens[1], Token::Module);
+            Token::Identifier("a".to_string()),
-    assert_eq!(tokens[2], Token::Identifier("test".to_string()));
+            Token::GreaterEqual,
-    assert_eq!(tokens[3], Token::LeftBrace);
+            Token::Number(10.0),
-    assert_eq!(tokens[4], Token::DocComment("Function documentation".to_string()));
+            Token::BangEqual,
-    assert_eq!(tokens[5], Token::Func);
+            Token::Identifier("b".to_string()),
-    assert_eq!(tokens[6], Token::Identifier("example".to_string()));
+            Token::EqualEqual,
-    assert_eq!(tokens[7], Token::LeftParen);
+            Token::Number(5.0),
-    assert_eq!(tokens[8], Token::RightParen);
+            Token::Semicolon,
-    assert_eq!(tokens[9], Token::LeftBrace);
+            Token::EOF
-    assert_eq!(tokens[10], Token::Return);
+        ]
-    assert_eq!(tokens[11], Token::Number(42.0));
+    );
    assert_eq!(tokens[12], Token::Semicolon);
    assert_eq!(tokens[13], Token::RightBrace); // Closes function body
    assert_eq!(tokens[14], Token::RightBrace); // Closes module
    assert_eq!(tokens[15], Token::EOF);
 }
 #[test]
 fn test_tilde_operator() {
-    let source = String::from("if (a ~= b) { ~c }");
+    let source = String::from("if (a != b) { let c = ~5; }");
    let mut lexer = Lexer::new(source);
    let tokens = lexer.scan_tokens();
@ -88,12 +120,16 @@ fn test_tilde_operator() {
            Token::If,
            Token::LeftParen,
            Token::Identifier("a".to_string()),
-            Token::TildeEqual,
+            Token::BangEqual,
            Token::Identifier("b".to_string()),
            Token::RightParen,
            Token::LeftBrace,
-            Token::Tilde,
+            Token::Let,
            Token::Identifier("c".to_string()),
            Token::Equal,
            Token::Tilde,
            Token::Number(5.0),
            Token::Semicolon,
            Token::RightBrace,
            Token::EOF
        ]
		`@ -0,0 +1,3 @@`
							`// Placeholder for compiler implementation`

							`// not even close yet`
		`@ -1,3 +0,0 @@`
			`// Placeholder for interpreter implementation`

			`// not even close yet`