parsed syntax keywords, simplified language, refactored current tests

This commit is contained in:
Tristan Smith 2024-09-16 00:27:18 -04:00
parent d9ac25fe4a
commit 38052a4367
10 changed files with 169 additions and 279 deletions

4
Cargo.lock generated
View file

@ -3,5 +3,5 @@
version = 3 version = 3
[[package]] [[package]]
name = "fiddle" name = "fddl"
version = "0.1.0" version = "0.0.2"

View file

@ -1,9 +1,9 @@
[package] [package]
name = "fiddle" name = "fddl"
version = "0.1.0" version = "0.0.2"
edition = "2021" edition = "2021"
authors = ["Tristan Smith <tristan.smith@pm.me>"] authors = ["Tristan Smith <tristan@fddl.dev>"]
description = "A small programming language written in Rust." description = "A small programming language written in Rust."
license = "BSD-3-Clause" license = "MIT"
[dependencies] [dependencies]

3
src/compiler/codegen.rs Normal file
View file

@ -0,0 +1,3 @@
// Placeholder for compiler implementation
// not even close yet

View file

@ -1,3 +0,0 @@
// Placeholder for interpreter implementation
// not even close yet

View file

@ -35,7 +35,7 @@ impl Lexer {
let c = self.advance(); let c = self.advance();
match c { match c {
'#' => self.handle_comment_or_doc(), // Single-character tokens
'(' => Some(Token::LeftParen), '(' => Some(Token::LeftParen),
')' => Some(Token::RightParen), ')' => Some(Token::RightParen),
'{' => Some(Token::LeftBrace), '{' => Some(Token::LeftBrace),
@ -46,8 +46,6 @@ impl Lexer {
'+' => Some(Token::Plus), '+' => Some(Token::Plus),
';' => Some(Token::Semicolon), ';' => Some(Token::Semicolon),
'*' => Some(Token::Star), '*' => Some(Token::Star),
'%' => Some(Token::Percent),
'^' => Some(Token::Caret),
'~' => { '~' => {
if self.match_char('=') { if self.match_char('=') {
Some(Token::TildeEqual) Some(Token::TildeEqual)
@ -55,48 +53,34 @@ impl Lexer {
Some(Token::Tilde) Some(Token::Tilde)
} }
}, },
'`' => Some(Token::Backtick), '/' => {
'$' => Some(Token::Dollar), if self.match_char('/') {
'@' => Some(Token::At), // Line comment starting with //
'?' => Some(Token::Question), self.line_comment() // Generate Comment token
} else {
Some(Token::Slash)
}
},
'#' => {
// Line comment starting with #
self.line_comment() // Generate Comment token
},
// One or two character tokens
'!' => { '!' => {
if self.match_char('=') { if self.match_char('=') {
Some(Token::BangEqual) Some(Token::BangEqual)
} else { } else {
Some(Token::Exclamation) None // Or handle as an error or another token if needed
}
},
'|' => {
if self.match_char('|') {
Some(Token::DoublePipe)
} else {
Some(Token::Pipe)
}
},
'&' => {
if self.match_char('&') {
Some(Token::DoubleAmpersand)
} else {
Some(Token::Ampersand)
} }
}, },
'=' => { '=' => {
if self.match_char('=') { if self.match_char('=') {
Some(Token::EqualEqual) Some(Token::EqualEqual)
} else if self.match_char('>') {
Some(Token::FatArrow)
} else { } else {
Some(Token::Equal) Some(Token::Equal)
} }
}, },
':' => {
if self.match_char('=') {
Some(Token::ColonEqual)
} else {
// Handle single ':' if needed
Some(Token::Colon)
}
},
'<' => { '<' => {
if self.match_char('=') { if self.match_char('=') {
Some(Token::LessEqual) Some(Token::LessEqual)
@ -111,26 +95,20 @@ impl Lexer {
Some(Token::Greater) Some(Token::Greater)
} }
}, },
'/' => {
if self.match_char('/') { // Whitespace
// It's a comment, consume until end of line ' ' | '\r' | '\t' => None,
let mut comment = String::new();
while self.peek() != '\n' && !self.is_at_end() {
comment.push(self.advance());
}
Some(Token::Comment(comment))
} else {
Some(Token::Slash)
}
},
' ' | '\r' | '\t' => None, // Ignore whitespace
'\n' => { '\n' => {
self.line += 1; self.line += 1;
None None
}, },
// Literals
'"' => self.string(), '"' => self.string(),
c if c.is_ascii_digit() => self.number(), c if c.is_ascii_digit() => self.number(),
c if self.is_alpha(c) => self.identifier(), c if self.is_alpha(c) => self.identifier(),
// Any other character
_ => { _ => {
eprintln!("Unexpected character '{}' on line {}", c, self.line); eprintln!("Unexpected character '{}' on line {}", c, self.line);
None None
@ -139,18 +117,17 @@ impl Lexer {
} }
// Helper methods // Helper methods
// function to consume the current character // Consume the current character and return it
fn advance(&mut self) -> char { fn advance(&mut self) -> char {
let c = if self.is_at_end() { if self.is_at_end() {
'\0' return '\0';
} else { }
self.source[self.current] let c = self.source[self.current];
};
self.current += 1; self.current += 1;
c c
} }
// function to parse the current character if it matches the expected character // Check if the current character matches the expected character
fn match_char(&mut self, expected: char) -> bool { fn match_char(&mut self, expected: char) -> bool {
if self.is_at_end() { if self.is_at_end() {
return false; return false;
@ -164,7 +141,7 @@ impl Lexer {
true true
} }
// function to parse the current character without consuming it // Look at current character
fn peek(&self) -> char { fn peek(&self) -> char {
if self.is_at_end() { if self.is_at_end() {
'\0' '\0'
@ -173,7 +150,7 @@ impl Lexer {
} }
} }
// function to parse the next character without consuming it // Look ahead by one character
fn peek_next(&self) -> char { fn peek_next(&self) -> char {
if self.current + 1 >= self.source.len() { if self.current + 1 >= self.source.len() {
'\0' '\0'
@ -182,12 +159,12 @@ impl Lexer {
} }
} }
// function to check if we've reached the end of the source // Check if we have reached the end of the source
fn is_at_end(&self) -> bool { fn is_at_end(&self) -> bool {
self.current >= self.source.len() self.current >= self.source.len()
} }
// Function to handle different token types // Function to handle string literals
fn string(&mut self) -> Option<Token> { fn string(&mut self) -> Option<Token> {
while self.peek() != '"' && !self.is_at_end() { while self.peek() != '"' && !self.is_at_end() {
if self.peek() == '\n' { if self.peek() == '\n' {
@ -196,7 +173,6 @@ impl Lexer {
self.advance(); self.advance();
} }
// Check if we've reached the end without finding a closing quote
if self.is_at_end() { if self.is_at_end() {
eprintln!("Unterminated string on line {}", self.line); eprintln!("Unterminated string on line {}", self.line);
return None; return None;
@ -235,7 +211,7 @@ impl Lexer {
Some(Token::Number(value)) Some(Token::Number(value))
} }
// Function to handle identifiers // Function to handle identifiers and keywords
fn identifier(&mut self) -> Option<Token> { fn identifier(&mut self) -> Option<Token> {
while self.is_alphanumeric(self.peek()) || self.peek() == '_' { while self.is_alphanumeric(self.peek()) || self.peek() == '_' {
self.advance(); self.advance();
@ -248,132 +224,43 @@ impl Lexer {
// Check for reserved keywords // Check for reserved keywords
let token = match text.as_str() { let token = match text.as_str() {
"and" => Token::And, "and" => Token::And,
"class" => Token::Class,
"else" => Token::Else,
"false" => Token::False,
"func" => Token::Func,
"for" => Token::For,
"if" => Token::If,
"nil" => Token::Nil,
"or" => Token::Or, "or" => Token::Or,
"print" => Token::Print, "if" => Token::If,
"return" => Token::Return, "else" => Token::Else,
"super" => Token::Super,
"this" => Token::This,
"true" => Token::True, "true" => Token::True,
"false" => Token::False,
"let" => Token::Let, "let" => Token::Let,
"while" => Token::While,
"const" => Token::Const, "const" => Token::Const,
"define" => Token::Define, "func" => Token::Func,
"lambda" => Token::Lambda, "return" => Token::Return,
"match" => Token::Match, "for" => Token::For,
"case" => Token::Case, "while" => Token::While,
"switch" => Token::Switch, "print" => Token::Print,
"until" => Token::Until, "pub" => Token::Pub,
"repeat" => Token::Repeat, "sym" => Token::Sym,
"unless" => Token::Unless,
"yes" => Token::Yes,
"no" => Token::No,
"on" => Token::On,
"off" => Token::Off,
"module" => Token::Module,
_ => Token::Identifier(text), _ => Token::Identifier(text),
}; };
Some(token) Some(token)
} }
// Function to check if a character is an alphabetic character or an underscore
fn is_alpha(&self, c: char) -> bool { fn is_alpha(&self, c: char) -> bool {
c.is_alphabetic() || c == '_' c.is_alphabetic() || c == '_'
} }
// Function to check if a character is an alphanumeric character or an underscore
fn is_alphanumeric(&self, c: char) -> bool { fn is_alphanumeric(&self, c: char) -> bool {
c.is_alphanumeric() || c == '_' c.is_alphanumeric() || c == '_'
} }
// Function to handle comments and documentation // Function to handle line comments
fn line_comment(&mut self) { fn line_comment(&mut self) -> Option<Token>{
while self.peek() != '\n' && !self.is_at_end() {
self.advance();
}
}
// Function to handle block comments
fn block_comment(&mut self) {
while !self.is_at_end() {
if self.peek() == '*' && self.peek_next() == '/' {
self.advance();
self.advance();
break;
} else {
if self.peek() == '\n' {
self.line += 1;
}
self.advance();
}
}
}
// Function to handle comments and documentation
fn handle_comment_or_doc(&mut self) -> Option<Token> {
// We have matched one '#' character so far
let mut count = 1;
// Count additional consecutive '#' characters
while self.match_char('#') {
count += 1;
}
// Check for an exclamation mark after the '#' characters
let has_exclamation = self.match_char('!');
match (count, has_exclamation) {
(1, _) => {
// Single '#' - Line comment
self.line_comment();
None
}
(2, true) => {
// '##!' - Module-level documentation comment
self.doc_comment("module")
}
(2, false) => {
// '##' - Block comment
self.block_comment();
None
}
(3, _) => {
// '###' - Item-level documentation comment
self.doc_comment("item")
}
(n, _) if n >= 4 => {
// '####' or more - Block comment
self.block_comment();
None
}
_ => {
// Fallback to line comment
self.line_comment();
None
}
}
}
// Function to handle documentation comments
fn doc_comment(&mut self, _kind: &str) -> Option<Token> {
let mut comment = String::new(); let mut comment = String::new();
while self.peek() != '\n' && !self.is_at_end() { while self.peek() != '\n' && !self.is_at_end() {
comment.push(self.advance()); comment.push(self.advance());
} }
// Consume the newline character Some(Token::Comment(comment))
if self.peek() == '\n' { }
self.advance();
} }
Some(Token::DocComment(comment.trim().to_string()))
}
}

View file

@ -10,34 +10,17 @@ pub enum Token {
Minus, // - Minus, // -
Plus, // + Plus, // +
Semicolon, // ; Semicolon, // ;
Colon, // :
Slash, // / Slash, // /
Star, // * Star, // *
Percent, // %
Caret, // ^
Tilde, // ~
Backtick, // `
Dollar, // $
At, // @
// Hash, // #
Question, // ?
Exclamation, // !
Pipe, // |
Ampersand, // &
// one or two character tokens
BangEqual, // !=
Equal, // = Equal, // =
BangEqual, // !=
EqualEqual, // == EqualEqual, // ==
Greater, // > Greater, // >
GreaterEqual, // >= GreaterEqual, // >=
Less, // < Less, // <
LessEqual, // <= LessEqual, // <=
FatArrow, // => Tilde, // ~
ColonEqual, // :=
TildeEqual, // ~= TildeEqual, // ~=
DoublePipe, // ||
DoubleAmpersand, // &&
// Literals // Literals
Identifier(String), Identifier(String),
@ -46,39 +29,23 @@ pub enum Token {
// Keywords // Keywords
And, And,
Class,
Else,
False,
Func,
For,
If,
Nil,
Or, Or,
Print, If,
Return, Else,
Super,
This,
True, True,
False,
Let, Let,
While,
Const, Const,
Define, Func,
Lambda, Return,
Match, For,
Case, While,
Switch, Print,
Until, Pub,
Repeat, Sym,
Unless,
Yes,
No,
On,
Off,
Module,
// Documentation and comments // Comments
DocComment(String), // ##!, ### Comment(String),
Comment(String), // #
EOF, EOF,
} }

View file

@ -1,3 +1,3 @@
pub mod lexer; pub mod lexer;
pub mod parser; pub mod parser;
pub mod interpreter; pub mod compiler;

View file

@ -1,6 +1,6 @@
mod lexer; mod lexer;
mod parser; mod parser;
mod interpreter; mod compiler;
use std::env; use std::env;
use std::fs; use std::fs;

View file

@ -1,5 +1,5 @@
use fiddle::lexer::Lexer; use fddl::lexer::Lexer;
use fiddle::lexer::token::Token; use fddl::lexer::token::Token;
#[test] #[test]
fn test_single_tokens() { fn test_single_tokens() {
@ -23,62 +23,94 @@ fn test_single_tokens() {
} }
#[test] #[test]
fn test_identifier_and_keywords() { fn test_keywords_and_identifiers() {
let source = String::from("let $varName := 123; "); let source = String::from("sym myVar = 123;");
let mut lexer = Lexer::new(source); let mut lexer = Lexer::new(source);
let tokens = lexer.scan_tokens(); let tokens = lexer.scan_tokens();
assert_eq!( assert_eq!(
tokens, tokens,
vec![ vec![
Token::Let, Token::Sym,
Token::Dollar, Token::Identifier("myVar".to_string()),
Token::Identifier("varName".to_string()), Token::Equal,
Token::ColonEqual,
Token::Number(123.0), Token::Number(123.0),
Token::Semicolon, Token::Semicolon,
Token::EOF Token::EOF
] ]
); );
println!("{:?}", tokens);
}
#[test]
fn test_pub_keyword() {
let source = String::from("pub func example() { return 42; }");
let mut lexer = Lexer::new(source);
let tokens = lexer.scan_tokens();
assert_eq!(
tokens,
vec![
Token::Pub,
Token::Func,
Token::Identifier("example".to_string()),
Token::LeftParen,
Token::RightParen,
Token::LeftBrace,
Token::Return,
Token::Number(42.0),
Token::Semicolon,
Token::RightBrace,
Token::EOF
]
);
}
#[test]
fn test_comments() {
let source = String::from("# This is a comment\nlet a = 5;");
let mut lexer = Lexer::new(source);
let tokens = lexer.scan_tokens();
assert_eq!(
tokens,
vec![
Token::Comment(" This is a comment".to_string()),
Token::Let,
Token::Identifier("a".to_string()),
Token::Equal,
Token::Number(5.0),
Token::Semicolon,
Token::EOF
]
);
} }
#[test] #[test]
fn test_doc_comments() { fn test_operators_and_comparison() {
let source = String::from("##! Module documentation let source = String::from("a >= 10 != b == 5;");
module test {
### Function documentation
func example() {
# Regular comment
return 42;
}
}
");
let mut lexer = Lexer::new(source); let mut lexer = Lexer::new(source);
let tokens = lexer.scan_tokens(); let tokens = lexer.scan_tokens();
println!("Tokens: {:?}", tokens); assert_eq!(
tokens,
assert_eq!(tokens[0], Token::DocComment("Module documentation".to_string())); vec![
assert_eq!(tokens[1], Token::Module); Token::Identifier("a".to_string()),
assert_eq!(tokens[2], Token::Identifier("test".to_string())); Token::GreaterEqual,
assert_eq!(tokens[3], Token::LeftBrace); Token::Number(10.0),
assert_eq!(tokens[4], Token::DocComment("Function documentation".to_string())); Token::BangEqual,
assert_eq!(tokens[5], Token::Func); Token::Identifier("b".to_string()),
assert_eq!(tokens[6], Token::Identifier("example".to_string())); Token::EqualEqual,
assert_eq!(tokens[7], Token::LeftParen); Token::Number(5.0),
assert_eq!(tokens[8], Token::RightParen); Token::Semicolon,
assert_eq!(tokens[9], Token::LeftBrace); Token::EOF
assert_eq!(tokens[10], Token::Return); ]
assert_eq!(tokens[11], Token::Number(42.0)); );
assert_eq!(tokens[12], Token::Semicolon);
assert_eq!(tokens[13], Token::RightBrace); // Closes function body
assert_eq!(tokens[14], Token::RightBrace); // Closes module
assert_eq!(tokens[15], Token::EOF);
} }
#[test] #[test]
fn test_tilde_operator() { fn test_tilde_operator() {
let source = String::from("if (a ~= b) { ~c }"); let source = String::from("if (a != b) { let c = ~5; }");
let mut lexer = Lexer::new(source); let mut lexer = Lexer::new(source);
let tokens = lexer.scan_tokens(); let tokens = lexer.scan_tokens();
@ -88,12 +120,16 @@ fn test_tilde_operator() {
Token::If, Token::If,
Token::LeftParen, Token::LeftParen,
Token::Identifier("a".to_string()), Token::Identifier("a".to_string()),
Token::TildeEqual, Token::BangEqual,
Token::Identifier("b".to_string()), Token::Identifier("b".to_string()),
Token::RightParen, Token::RightParen,
Token::LeftBrace, Token::LeftBrace,
Token::Tilde, Token::Let,
Token::Identifier("c".to_string()), Token::Identifier("c".to_string()),
Token::Equal,
Token::Tilde,
Token::Number(5.0),
Token::Semicolon,
Token::RightBrace, Token::RightBrace,
Token::EOF Token::EOF
] ]