commit e6ae86f673e797916f88ef0388f966fe67f42530 Author: bigsketti Date: Sun Sep 22 11:42:41 2024 -0400 Initial commit, only the lexer as of now. diff --git a/headers/lexer.h b/headers/lexer.h new file mode 100644 index 0000000..09ac0b7 --- /dev/null +++ b/headers/lexer.h @@ -0,0 +1,63 @@ +#ifndef LEXER_H +#define LEXER_H + +#include + +//list of keywords +enum class Keywords { + identifier, + integer, + floatpnt, + string, + character, + boolean, + plus, + minus, + multiply, + divide, + l_Paren, + r_Paren, + l_Brace, + r_Brace, + l_Brack, + r_Brack, + semicolon, + end_of_file, + unknown +}; + +struct Token { + Keywords type; + std::string value; + + + Token(Keywords type, std::string value) : type(type), value(value) {} +}; + +class Lexer { + private: + + std::string stringInput; + int position; + char currentChar; + + //advances position in the input string + void nextPosition(); + + void skipWhiteSpace(); + + //handles integer literals + Token number(); + + //handles identifiers + Token identifier(); + + public: + + Lexer(const std::string &input); + + Token getNextToken(); + +}; + +#endif \ No newline at end of file diff --git a/profaneCompiler b/profaneCompiler new file mode 100755 index 0000000..0cb9e7c Binary files /dev/null and b/profaneCompiler differ diff --git a/src/lexer.cpp b/src/lexer.cpp new file mode 100644 index 0000000..caaedb2 --- /dev/null +++ b/src/lexer.cpp @@ -0,0 +1,110 @@ +#include "/home/mason/code-shit/ProfaneC/headers/lexer.h" + +#include +#include + +void Lexer::nextPosition() { + position++; + + if(position < stringInput.size()) { + currentChar = stringInput[position]; + std::cout << "Advancing to: " << currentChar << " | at position: " << position << std::endl; + } else { + currentChar = '\0'; + std::cout << "end of input\n"; + } +} + +void Lexer::skipWhiteSpace() { + if (currentChar != '\0' && std::isspace(currentChar)) { + Lexer::nextPosition(); + } +} + +Token Lexer::number() { + std::string value; + + while (currentChar != '\0' && isdigit(currentChar)) { + value += currentChar; + nextPosition(); + } + return Token(Keywords::integer, value); +} + +Token Lexer::identifier() { + std::string value; + + while (currentChar != '\0' && std::isalnum(currentChar)) { + value += currentChar; + nextPosition(); + } + return Token(Keywords::identifier, value); +} + +Lexer::Lexer(const std::string &input) { + this->position = 0; + this->stringInput = input; + this->currentChar = stringInput[0]; +} + +Token Lexer::getNextToken() { + while (currentChar != '\0') { + std::cout << currentChar << std::endl; + + if (std::isspace(currentChar)) { + skipWhiteSpace(); + continue; + } + + if (std::isdigit(currentChar)) { + return number(); + } + + if (std::isalpha(currentChar)) { + return identifier(); + } + + //this sure is ugly + switch (currentChar) { + case '+': + nextPosition(); + return Token(Keywords::plus, "+"); + case '-': + nextPosition(); + return Token(Keywords::minus, "-"); + case '*': + nextPosition(); + return Token(Keywords::multiply, "*"); + case '/': + nextPosition(); + return Token(Keywords::divide, "/"); + case '(': + nextPosition(); + return Token(Keywords::l_Paren, "("); + case ')': + nextPosition(); + return Token(Keywords::r_Paren, ")"); + case '{': + nextPosition(); + return Token(Keywords::l_Brace, "{"); + case '}': + nextPosition(); + return Token(Keywords::r_Brace, "}"); + case '[': + nextPosition(); + return Token(Keywords::l_Brack, "["); + case ']': + nextPosition(); + return Token(Keywords::r_Brack, "]"); + case ';': + nextPosition(); + return Token(Keywords::semicolon, ";"); + default: + nextPosition(); + return Token(Keywords::unknown, ""); + } + } + std::cout << "EOF" << std::endl; + return Token(Keywords::end_of_file, ""); + +} \ No newline at end of file diff --git a/src/profaneCompiler.cpp b/src/profaneCompiler.cpp new file mode 100644 index 0000000..26be998 --- /dev/null +++ b/src/profaneCompiler.cpp @@ -0,0 +1,48 @@ +#include +#include +#include +#include + +#include "/home/mason/code-shit/ProfaneC/headers/lexer.h" + +std::ostream& operator<<(std::ostream& os, const Token& token) { + os << "Type: " << static_cast(token.type) << ", Value: " << token.value; + return os; +} + +int main() { + + std::vector tokenVec; + std::ifstream file_in("/home/mason/code-shit/ProfaneC/testInput.txt"); + + if (file_in.is_open()) { + std::cout << "file open\n"; + std::string line; + + while (getline(file_in, line)) { + std::cout << "Reading line: " << line << std::endl; + + Lexer Lexer(line); + Token token = Lexer.getNextToken(); + + while (token.type != Keywords::end_of_file) { + std::cout << "Token " << static_cast(token.type) << " | Value: " << token.value << std::endl; + tokenVec.push_back(token); + token = Lexer.getNextToken(); + } + } + + } else if (file_in.fail()) { + std::cerr << "File read error" << std::endl; + } + + file_in.close(); + + std::cout << "printing all tokens : \n"; + + for (int i = 0; i < tokenVec.size(); i++) { + std::cout << tokenVec.at(i) << std::endl; + } + + return 0; +} \ No newline at end of file diff --git a/testInput.txt b/testInput.txt new file mode 100644 index 0000000..e9cdae1 --- /dev/null +++ b/testInput.txt @@ -0,0 +1,3 @@ +int main() { + return 0; +} \ No newline at end of file