commit e8b8d41baab853600037675a27221ba7288256d1 Author: Tristan Smith Date: Sat Sep 14 23:01:16 2024 -0400 initial commit - lexer builds, tests run diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ea8c4bf --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +/target diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..b0e5d81 --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,7 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "fiddle" +version = "0.1.0" diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..f21e4e5 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,9 @@ +[package] +name = "fiddle" +version = "0.1.0" +edition = "2021" +authors = ["Tristan Smith "] +description = "A small programming language written in Rust." +license = "BSD-3-Clause" + +[dependencies] diff --git a/readme.md b/readme.md new file mode 100644 index 0000000..970c2a1 --- /dev/null +++ b/readme.md @@ -0,0 +1,75 @@ +# Fiddle Programming Language + +Fiddle is a small programming language inspired by various languages, designed to help learn language implementation concepts in Rust. + +I have, off and on throughout the last 15 or so years attempted to learn a programming language of some sort. I could always get through the basics, but would get stuck with any real world projects. And I wouldn't know who to turn to even if I knew where to start. + +So I started learning Rust and really like it. So I've been following some tutorials and the Crafting Interpretors site as guides for this very problematic programming language. + +I like aspects of so many programming languages, but I don't really like any of them, so I always found it hard to pick one and stick with it. But I had the same problem playing World of Warcraft, too. + +So I, like many of you, decided to make a hobby programming language to see what may be able to be done with it. + +## Features + +- Custom syntax with unique operators and keywords +- Documentation comments using `#`, similar to Rust's style +- Lexer and parser built from scratch in Rust + +## Getting Started + +To run the REPL: + +```sh +cargo run +``` + +To run a fiddle script: + +```sh +cargo run path/to/script.fddl +``` + +## Examples + +```sh +##! This is a sample module + +module math { + + ### Computes the square of a number + func square(x) => x ^ 2; +} + +define $number := 5; +print(`The square of $number is ${math.square($number)}`); +``` + +(At least for right now.) + +## License + +This project is licensed under the MIT License. + + +--- + +## **Notes and Next Steps** + +- [x] Added first new set of tokens and features, added the first lexer tests. +- [ ] `parser` module is a placeholder. +- [ ] `interpreter` module is a placeholder. +- [ ] Implement a more robust error handling mechanism instead of using `stderr`. +- [ ] Imlement string interpolation (backticks with `$variable`) +- [ ] Continue to expand tests to cover all new syntax and features. + +--- + +## **Running the Project** + +Make sure your project compiles and the tests pass: + +```bash +cargo build +cargo test +``` \ No newline at end of file diff --git a/src/interpreter/eval.rs b/src/interpreter/eval.rs new file mode 100644 index 0000000..b992674 --- /dev/null +++ b/src/interpreter/eval.rs @@ -0,0 +1,3 @@ +// Placeholder for interpreter implementation + +// not even close yet \ No newline at end of file diff --git a/src/interpreter/mod.rs b/src/interpreter/mod.rs new file mode 100644 index 0000000..8654f7c --- /dev/null +++ b/src/interpreter/mod.rs @@ -0,0 +1,3 @@ +//pub mod eval; + +//pub use eval::*; \ No newline at end of file diff --git a/src/lexer/lexer.rs b/src/lexer/lexer.rs new file mode 100644 index 0000000..98640e7 --- /dev/null +++ b/src/lexer/lexer.rs @@ -0,0 +1,366 @@ +use crate::lexer::token::Token; + +pub struct Lexer { + source: Vec, + start: usize, + current: usize, + line: usize, +} + +impl Lexer { + pub fn new(source: String) -> Self { + Lexer { + source: source.chars().collect(), + start: 0, + current: 0, + line: 1, + } + } + + pub fn scan_tokens(&mut self) -> Vec { + let mut tokens = Vec::new(); + + while !self.is_at_end() { + self.start = self.current; + if let Some(token) = self.scan_token() { + tokens.push(token); + } + } + + tokens.push(Token::EOF); + tokens + } + + fn scan_token(&mut self) -> Option { + let c = self.advance(); + + match c { + '#' => self.handle_comment_or_doc(), + '(' => Some(Token::LeftParen), + ')' => Some(Token::RightParen), + '{' => Some(Token::LeftBrace), + '}' => Some(Token::RightBrace), + ',' => Some(Token::Comma), + '.' => Some(Token::Dot), + '-' => Some(Token::Minus), + '+' => Some(Token::Plus), + ';' => Some(Token::Semicolon), + '*' => Some(Token::Star), + '%' => Some(Token::Percent), + '^' => Some(Token::Caret), + '~' => { + if self.match_char('=') { + Some(Token::TildeEqual) + } else { + Some(Token::Tilde) + } + }, + '`' => Some(Token::Backtick), + '$' => Some(Token::Dollar), + '@' => Some(Token::At), + '?' => Some(Token::Question), + '!' => { + if self.match_char('=') { + Some(Token::BangEqual) + } else { + Some(Token::Exclamation) + } + }, + '|' => { + if self.match_char('|') { + Some(Token::DoublePipe) + } else { + Some(Token::Pipe) + } + }, + '&' => { + if self.match_char('&') { + Some(Token::DoubleAmpersand) + } else { + Some(Token::Ampersand) + } + }, + '=' => { + if self.match_char('=') { + Some(Token::EqualEqual) + } else if self.match_char('>') { + Some(Token::FatArrow) + } else { + Some(Token::Equal) + } + }, + ':' => { + if self.match_char('=') { + Some(Token::ColonEqual) + } else { + // Handle single ':' if needed + Some(Token::Colon) + } + }, + '<' => { + if self.match_char('=') { + Some(Token::LessEqual) + } else { + Some(Token::Less) + } + }, + '>' => { + if self.match_char('=') { + Some(Token::GreaterEqual) + } else { + Some(Token::Greater) + } + }, + '/' => { + if self.match_char('/') { + // It's a comment, consume until end of line + let mut comment = String::new(); + while self.peek() != '\n' && !self.is_at_end() { + comment.push(self.advance()); + } + Some(Token::Comment(comment)) + } else { + Some(Token::Slash) + } + }, + ' ' | '\r' | '\t' => None, // Ignore whitespace + '\n' => { + self.line += 1; + None + }, + '"' => self.string(), + c if c.is_ascii_digit() => self.number(), + c if self.is_alpha(c) => self.identifier(), + _ => { + eprintln!("Unexpected character '{}' on line {}", c, self.line); + None + } + } + } + + // Helper methods + fn advance(&mut self) -> char { + let c = if self.is_at_end() { + '\0' + } else { + self.source[self.current] + }; + self.current += 1; + c + } + + fn match_char(&mut self, expected: char) -> bool { + if self.is_at_end() { + return false; + } + + if self.source[self.current] != expected { + return false; + } + + self.current += 1; + true + } + + fn peek(&self) -> char { + if self.is_at_end() { + '\0' + } else { + self.source[self.current] + } + } + + fn peek_next(&self) -> char { + if self.current + 1 >= self.source.len() { + '\0' + } else { + self.source[self.current + 1] + } + } + + fn is_at_end(&self) -> bool { + self.current >= self.source.len() + } + + fn string(&mut self) -> Option { + while self.peek() != '"' && !self.is_at_end() { + if self.peek() == '\n' { + self.line += 1; + } + self.advance(); + } + + // Check if we've reached the end without finding a closing quote + if self.is_at_end() { + eprintln!("Unterminated string on line {}", self.line); + return None; + } + + // Consume the closing quote + self.advance(); + + // Extract the string value + let value: String = self.source[self.start + 1..self.current - 1] + .iter() + .collect(); + Some(Token::StringLiteral(value)) + } + + fn number(&mut self) -> Option { + while self.peek().is_ascii_digit() { + self.advance(); + } + + // Look for a fractional part + if self.peek() == '.' && self.peek_next().is_ascii_digit() { + // Consume the '.' + self.advance(); + + while self.peek().is_ascii_digit() { + self.advance(); + } + } + + let value_str: String = self.source[self.start..self.current] + .iter() + .collect(); + let value = value_str.parse::().unwrap(); + Some(Token::Number(value)) + } + + fn identifier(&mut self) -> Option { + while self.is_alphanumeric(self.peek()) || self.peek() == '_' { + self.advance(); + } + + let text: String = self.source[self.start..self.current] + .iter() + .collect(); + + // Check for reserved keywords + let token = match text.as_str() { + "and" => Token::And, + "class" => Token::Class, + "else" => Token::Else, + "false" => Token::False, + "func" => Token::Func, + "for" => Token::For, + "if" => Token::If, + "nil" => Token::Nil, + "or" => Token::Or, + "print" => Token::Print, + "return" => Token::Return, + "super" => Token::Super, + "this" => Token::This, + "true" => Token::True, + "let" => Token::Let, + "while" => Token::While, + "const" => Token::Const, + "define" => Token::Define, + "lambda" => Token::Lambda, + "match" => Token::Match, + "case" => Token::Case, + "switch" => Token::Switch, + "until" => Token::Until, + "repeat" => Token::Repeat, + "unless" => Token::Unless, + "yes" => Token::Yes, + "no" => Token::No, + "on" => Token::On, + "off" => Token::Off, + "module" => Token::Module, + _ => Token::Identifier(text), + }; + + Some(token) + } + + fn is_alpha(&self, c: char) -> bool { + c.is_alphabetic() || c == '_' + } + + fn is_alphanumeric(&self, c: char) -> bool { + c.is_alphanumeric() || c == '_' + } + + fn line_comment(&mut self) { + while self.peek() != '\n' && !self.is_at_end() { + self.advance(); + } + } + + fn block_comment(&mut self) { + while !self.is_at_end() { + if self.peek() == '*' && self.peek_next() == '/' { + self.advance(); + self.advance(); + break; + } else { + if self.peek() == '\n' { + self.line += 1; + } + self.advance(); + } + } + } + + fn handle_comment_or_doc(&mut self) -> Option { + // We have matched one '#' character so far + let mut count = 1; + + // Count additional consecutive '#' characters + while self.match_char('#') { + count += 1; + } + + // Check for an exclamation mark after the '#' characters + let has_exclamation = self.match_char('!'); + + match (count, has_exclamation) { + (1, _) => { + // Single '#' - Line comment + self.line_comment(); + None + } + (2, true) => { + // '##!' - Module-level documentation comment + self.doc_comment("module") + } + (2, false) => { + // '##' - Block comment + self.block_comment(); + None + } + (3, _) => { + // '###' - Item-level documentation comment + self.doc_comment("item") + } + (n, _) if n >= 4 => { + // '####' or more - Block comment + self.block_comment(); + None + } + _ => { + // Fallback to line comment + self.line_comment(); + None + } + } + } + + + fn doc_comment(&mut self, _kind: &str) -> Option { + let mut comment = String::new(); + while self.peek() != '\n' && !self.is_at_end() { + comment.push(self.advance()); + } + + // Consume the newline character + if self.peek() == '\n' { + self.advance(); + } + + Some(Token::DocComment(comment.trim().to_string())) + } + +} diff --git a/src/lexer/mod.rs b/src/lexer/mod.rs new file mode 100644 index 0000000..0230a2f --- /dev/null +++ b/src/lexer/mod.rs @@ -0,0 +1,5 @@ +pub mod lexer; +pub mod token; + +pub use lexer::Lexer; +// pub use token::Token; \ No newline at end of file diff --git a/src/lexer/token.rs b/src/lexer/token.rs new file mode 100644 index 0000000..e905f32 --- /dev/null +++ b/src/lexer/token.rs @@ -0,0 +1,84 @@ +#[derive(Debug, PartialEq, Clone)] +pub enum Token { + // Single-character tokens + LeftParen, // ( + RightParen, // ) + LeftBrace, // { + RightBrace, // } + Comma, // , + Dot, // . + Minus, // - + Plus, // + + Semicolon, // ; + Colon, // : + Slash, // / + Star, // * + Percent, // % + Caret, // ^ + Tilde, // ~ + Backtick, // ` + Dollar, // $ + At, // @ + // Hash, // # + Question, // ? + Exclamation, // ! + Pipe, // | + Ampersand, // & + + // one or two character tokens + BangEqual, // != + Equal, // = + EqualEqual, // == + Greater, // > + GreaterEqual, // >= + Less, // < + LessEqual, // <= + FatArrow, // => + ColonEqual, // := + TildeEqual, // ~= + DoublePipe, // || + DoubleAmpersand, // && + + // Literals + Identifier(String), + StringLiteral(String), + Number(f64), + + // Keywords + And, + Class, + Else, + False, + Func, + For, + If, + Nil, + Or, + Print, + Return, + Super, + This, + True, + Let, + While, + Const, + Define, + Lambda, + Match, + Case, + Switch, + Until, + Repeat, + Unless, + Yes, + No, + On, + Off, + Module, + + // Documentation and comments + DocComment(String), // ##!, ### + Comment(String), // # + + EOF, +} \ No newline at end of file diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..8100f1d --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,5 @@ +pub mod lexer; +pub mod parser; +pub mod interpreter; + +// ohhhhh, this file puts your created files together \ No newline at end of file diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..163f311 --- /dev/null +++ b/src/main.rs @@ -0,0 +1,53 @@ +mod lexer; +mod parser; +mod interpreter; + +use std::env; +use std::fs; +use std::io::{self, Write}; + +use lexer::Lexer; + +fn main() { + let args: Vec = env::args().collect(); + + if args.len() > 1 { + // If a file is provided, run it + run_file(&args[1]); + } else { + // Otherwise start the REPL + run_repl(); + } +} + +fn run_repl() { + println!("fiddle REPL"); + loop { + print!("> "); + io::stdout().flush().unwrap(); + + let mut buffer = String::new(); + io::stdin().read_line(&mut buffer).unwrap(); + + if buffer.trim().is_empty() { + continue; + } + + run(buffer.clone()); + } +} +fn run_file(path: &str) { + let source = fs::read_to_string(path).expect("Failed to read source file"); + run(source); +} + +fn run(source: String) { + let mut lexer = Lexer::new(source); + let tokens = lexer.scan_tokens(); + + for token in tokens { + println!("{:?}", token); + } + + // pass tokens to parser and interpreter +} \ No newline at end of file diff --git a/src/parser/ast.rs b/src/parser/ast.rs new file mode 100644 index 0000000..8567eb3 --- /dev/null +++ b/src/parser/ast.rs @@ -0,0 +1,9 @@ +// placeholder for ast defintions + +pub enum Expression { + // Define expression types +} + +pub enum Statement { + // Define statement types +} \ No newline at end of file diff --git a/src/parser/mod.rs b/src/parser/mod.rs new file mode 100644 index 0000000..949d0c2 --- /dev/null +++ b/src/parser/mod.rs @@ -0,0 +1,4 @@ +// pub mod ast; + +// pub use ast::*; +// don't fully understand this re-export \ No newline at end of file diff --git a/tests/lexer_tests.rs b/tests/lexer_tests.rs new file mode 100644 index 0000000..03b0bf7 --- /dev/null +++ b/tests/lexer_tests.rs @@ -0,0 +1,101 @@ +use fiddle::lexer::Lexer; +use fiddle::lexer::token::Token; + +#[test] +fn test_single_tokens() { + let source = String::from("()+-*/;"); + let mut lexer = Lexer::new(source); + let tokens = lexer.scan_tokens(); + + assert_eq!( + tokens, + vec![ + Token::LeftParen, + Token::RightParen, + Token::Plus, + Token::Minus, + Token::Star, + Token::Slash, + Token::Semicolon, + Token::EOF + ] + ); +} + +#[test] +fn test_identifier_and_keywords() { + let source = String::from("let $varName := 123; "); + let mut lexer = Lexer::new(source); + let tokens = lexer.scan_tokens(); + + assert_eq!( + tokens, + vec![ + Token::Let, + Token::Dollar, + Token::Identifier("varName".to_string()), + Token::ColonEqual, + Token::Number(123.0), + Token::Semicolon, + Token::EOF + ] + ); +} + +#[test] +fn test_doc_comments() { + let source = String::from("##! Module documentation +module test { + ### Function documentation + func example() { + # Regular comment + return 42; + } +} +"); + let mut lexer = Lexer::new(source); + let tokens = lexer.scan_tokens(); + + println!("Tokens: {:?}", tokens); + + assert_eq!(tokens[0], Token::DocComment("Module documentation".to_string())); + assert_eq!(tokens[1], Token::Module); + assert_eq!(tokens[2], Token::Identifier("test".to_string())); + assert_eq!(tokens[3], Token::LeftBrace); + assert_eq!(tokens[4], Token::DocComment("Function documentation".to_string())); + assert_eq!(tokens[5], Token::Func); + assert_eq!(tokens[6], Token::Identifier("example".to_string())); + assert_eq!(tokens[7], Token::LeftParen); + assert_eq!(tokens[8], Token::RightParen); + assert_eq!(tokens[9], Token::LeftBrace); + assert_eq!(tokens[10], Token::Return); + assert_eq!(tokens[11], Token::Number(42.0)); + assert_eq!(tokens[12], Token::Semicolon); + assert_eq!(tokens[13], Token::RightBrace); // Closes function body + assert_eq!(tokens[14], Token::RightBrace); // Closes module + assert_eq!(tokens[15], Token::EOF); +} + +#[test] +fn test_tilde_operator() { + let source = String::from("if (a ~= b) { ~c }"); + let mut lexer = Lexer::new(source); + let tokens = lexer.scan_tokens(); + + assert_eq!( + tokens, + vec![ + Token::If, + Token::LeftParen, + Token::Identifier("a".to_string()), + Token::TildeEqual, + Token::Identifier("b".to_string()), + Token::RightParen, + Token::LeftBrace, + Token::Tilde, + Token::Identifier("c".to_string()), + Token::RightBrace, + Token::EOF + ] + ); +} diff --git a/tests/parser_tests.rs b/tests/parser_tests.rs new file mode 100644 index 0000000..e69de29