diff --git a/src/main.rs b/src/main.rs index 91f98e1..6cfeddc 100644 --- a/src/main.rs +++ b/src/main.rs @@ -5,7 +5,12 @@ use std::io::Write; use std::process; mod rlox; +mod scanner; +mod token_type; +mod token; + use crate::rlox::RLox; +use crate::scanner::Scanner; // Exit codes from #include const EX_OK: i32 = 0; @@ -54,10 +59,8 @@ fn run_prompt() -> i32 { exit_code } -fn run( _script: String ) -> i32 { +fn run( script: String ) -> i32 { let rlox_interpreter = RLox { had_error: false }; - - if rlox_interpreter.had_error { EX_DATAERR } else { EX_OK } } diff --git a/src/rlox.rs b/src/rlox.rs index 4bb3780..48fb0dd 100644 --- a/src/rlox.rs +++ b/src/rlox.rs @@ -1,12 +1,9 @@ -mod token_type; -mod token; -mod scanner; +use crate::scanner::Scanner; pub struct RLox { pub had_error: bool, } - impl RLox { fn error(&self, line: u32, message: String) { self.report(line, String::from(""), message); @@ -15,4 +12,10 @@ impl RLox { fn report(&self, line: u32, place: String, message: String) { println!("[line {line}] Error {place}: {message}"); } + + fn run(&self, src: String) { + let mut scanner = Scanner::create_scanner( src ); + + + } } diff --git a/src/rlox/scanner.rs b/src/rlox/scanner.rs deleted file mode 100644 index 2af9d2e..0000000 --- a/src/rlox/scanner.rs +++ /dev/null @@ -1,191 +0,0 @@ -use super::token_type::TokenType; -use super::token::Token; -use std::collections::HashMap; - - -//scores.insert(String::from("Blue"), 10); - - -fn is_digit( c: char ) -> bool { - c>='0' && c<='9' -} - -fn is_alpha( c: char ) -> bool { - (c>='a' && c<='z') || (c>='A' && c<='Z') || c=='_' -} - -fn is_alpha_numeric ( c: char ) -> bool { - is_digit(c) || is_alpha(c) -} - -struct Scanner { - source: Vec, - tokens: Vec, - - start: usize, - current: usize, - line: u32, - - keywords: HashMap, -} - -impl Scanner { - fn init_keywords(&mut self) { - self.keywords = HashMap::new(); - self.keywords.insert( String::from("and"), TokenType::And ); - self.keywords.insert( String::from("class"), TokenType::Class ); - self.keywords.insert( String::from("else"), TokenType::Else ); - self.keywords.insert( String::from("false"), TokenType::False ); - self.keywords.insert( String::from("for"), TokenType::For ); - self.keywords.insert( String::from("fun"), TokenType::Fun ); - self.keywords.insert( String::from("if"), TokenType::If ); - self.keywords.insert( String::from("nil"), TokenType::Nil ); - self.keywords.insert( String::from("or"), TokenType::Or ); - self.keywords.insert( String::from("print"), TokenType::Print ); - self.keywords.insert( String::from("return"), TokenType::Return ); - self.keywords.insert( String::from("super"), TokenType::Super ); - self.keywords.insert( String::from("this"), TokenType::This ); - self.keywords.insert( String::from("true"), TokenType::True ); - self.keywords.insert( String::from("var"), TokenType::Var ); - self.keywords.insert( String::from("while"), TokenType::While ); - } - - fn scan_tokens(&mut self) { - while !self.is_at_end() { - self.start = self.current; - self.scan_token(); - } - - // Ajout d'un token final quand il n'y a plus rien à parser - self.tokens.push(Token{ token_type: TokenType::Eof, lexeme: String::from(""), literal: String::from(""), line: self.line } ); - } - - fn is_at_end(&self) -> bool { - self.current>= self.source.len() - } - - fn scan_token(&mut self) { - let c = self.advance(); - match c { - '(' => self.add_simple_token( TokenType::LeftParen ), - ')' => self.add_simple_token( TokenType::RightParen ), - '{' => self.add_simple_token( TokenType::LeftBrace ), - '}' => self.add_simple_token( TokenType::RightBrace ), - ',' => self.add_simple_token( TokenType::Comma ), - '.' => self.add_simple_token( TokenType::Dot ), - '-' => self.add_simple_token( TokenType::Minus ), - '+' => self.add_simple_token( TokenType::Plus ), - ';' => self.add_simple_token( TokenType::Semicolon ), - '*' => self.add_simple_token( TokenType::Star ), - '!' => { if self.match_next('=') { self.add_simple_token( TokenType::BangEqual ) } else { self.add_simple_token( TokenType::Bang ) } }, - '=' => { if self.match_next('=') { self.add_simple_token( TokenType::EqualEqual ) } else { self.add_simple_token( TokenType::Equal ) } }, - '<' => { if self.match_next('=') { self.add_simple_token( TokenType::LessEqual ) } else { self.add_simple_token( TokenType::Less ) } }, - '>' => { if self.match_next('=') { self.add_simple_token( TokenType::GreaterEqual ) } else { self.add_simple_token( TokenType::Greater ) } }, - '/' => { if self.match_next('/') { - // commentaire : avance jusqu'à la fin de la ligne sans ajouter de token - while self.peek()!='\n' && !self.is_at_end() { - self.advance(); - } - } else { - self.add_simple_token( TokenType::Slash ) } - }, - ' ' => (), - '\r' => (), - '\t' => (), - '\n' => self.line += 1, - '"' => self.string(), - _ => { - if is_digit(c) { - self.number(); - } else if is_alpha(c) { - self.identifier(); - } else { - // Erreur : lexeme inconnu - } - } - } - } - - fn advance(&mut self) -> char { - self.current += 1; - self.source[self.current] - } - - fn match_next(&mut self, expected: char) -> bool { - if self.is_at_end() { return false; } - if self.source[self.current]!=expected { return false; } - - self.current += 1; - true - } - - fn peek(&self) -> char { - if self.is_at_end() { - '\0' - } else { - self.source[self.current] - } - } - - fn peek_next(&self) -> char { - if self.current+1 >= self.source.len() { - '\0' - } else { - self.source[self.current + 1] - } - } - - fn add_simple_token(&mut self, t: TokenType) { - self.add_token(t, String::from("")); - } - - fn add_token(&mut self, t: TokenType, l: String) { - let text = self.source[self.start..self.current].iter().collect(); - self.tokens.push(Token{ token_type: t, lexeme: text, literal: l, line: self.line } ); - } - - fn string(&mut self) { - // Consomme les caractères jusqu'à trouver le délimiteur de chaînes ou la fin du fichier - while self.peek()!='"' && !self.is_at_end() { - if self.peek()=='\n' { - self.line += 1; // les chaînes peuvent être multilignes - } - self.advance(); - } - - if self.is_at_end() { - // Erreur : chaîne non terminée - return; - } - - self.advance(); // Consomme le délimiteur final - - self.add_token( TokenType::String, self.source[self.start+1..self.current-1].into_iter().collect() ); - } - - fn number(&mut self) { - while is_digit(self.peek()) { - self.advance(); - } - - if self.peek()=='.' && is_digit(self.peek_next()) { - while is_digit(self.peek()) { - self.advance(); - } - } - - self.add_token( TokenType::Number, self.source[self.start..self.current].into_iter().collect() ); // Il faudra faire un parse sur la chaîne pour connaître la valeur effective - } - - fn identifier(&mut self) { - while is_alpha_numeric(self.peek()) { - self.advance(); - } - - let text: String = self.source[self.start..self.current].into_iter().collect(); - match self.keywords.get(&text) { - Some( t ) => { self.add_simple_token( *t ) }, - None => { self.add_token( TokenType::Identifier, text ) } - } - } -} \ No newline at end of file diff --git a/src/rlox/token_type.rs b/src/rlox/token_type.rs deleted file mode 100644 index 5c093e6..0000000 --- a/src/rlox/token_type.rs +++ /dev/null @@ -1,106 +0,0 @@ - -#[derive(Debug, Copy, Clone)] -pub enum TokenType { - // Single-character tokens. - LeftParen, - RightParen, - LeftBrace, - RightBrace, - Comma, - Dot, - Minus, - Plus, - Semicolon, - Slash, - Star, - - // One or two character tokens. - Bang, - BangEqual, - Equal, - EqualEqual, - Greater, - GreaterEqual, - Less, - LessEqual, - - // Literals. - Identifier, - String, - Number, - - // Keywords. - And, - Class, - Else, - False, - Fun, - For, - If, - Nil, - Or, - Print, - Return, - Super, - This, - True, - Var, - While, - - Eof, -} - -impl std::fmt::Display for crate::rlox::token_type::TokenType { - fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - match self { - // Single-character tokens. - TokenType::LeftParen=> write!(f, "LEFT_PAREN"), - TokenType::RightParen=> write!(f, "RIGHT_PAREN"), - TokenType::LeftBrace=> write!(f, "LEFT_BRACE"), - TokenType::RightBrace=> write!(f, "RIGHT_BRACE"), - TokenType::Comma=> write!(f, "COMMA"), - TokenType::Dot=> write!(f, "DOT"), - TokenType::Minus=> write!(f, "MINUS"), - TokenType::Plus=> write!(f, "PLUS"), - TokenType::Semicolon=> write!(f, "SEMICOLON"), - TokenType::Slash=> write!(f, "SLASH"), - TokenType::Star=> write!(f, "STAR"), - - // One or two character tokens. - TokenType::Bang=> write!(f, "BANG"), - TokenType::BangEqual=> write!(f, "BANG_EQUAL"), - TokenType::Equal=> write!(f, "EQUAL"), - TokenType::EqualEqual=> write!(f, "EQUAL_EQUAL"), - TokenType::Greater=> write!(f, "GREATER"), - TokenType::GreaterEqual=> write!(f, "GREATER_EQUAL"), - TokenType::Less=> write!(f, "LESS"), - TokenType::LessEqual=> write!(f, "LESS_EQUAL"), - - // Literals. - TokenType::Identifier=> write!(f, "IDENTIFIER"), - TokenType::String=> write!(f, "STRING"), - TokenType::Number=> write!(f, "NUMBER"), - - // Keywords. - TokenType::And=> write!(f, "AND"), - TokenType::Class=> write!(f, "CLASS"), - TokenType::Else=> write!(f, "ELSE"), - TokenType::False=> write!(f, "FALSE"), - TokenType::Fun=> write!(f, "FUN"), - TokenType::For=> write!(f, "FOR"), - TokenType::If=> write!(f, "IF"), - TokenType::Nil=> write!(f, "NIL"), - TokenType::Or=> write!(f, "OR"), - TokenType::Print=> write!(f, "PRINT"), - TokenType::Return=> write!(f, "RETURN"), - TokenType::Super=> write!(f, "SUPER"), - TokenType::This=> write!(f, "THIS"), - TokenType::True=> write!(f, "TRUE"), - TokenType::Var=> write!(f, "VAR"), - TokenType::While=> write!(f, "WHILE"), - - TokenType::Eof=> write!(f, "EOF"), - } - } -} - diff --git a/src/scanner.rs b/src/scanner.rs new file mode 100644 index 0000000..acdf6ae --- /dev/null +++ b/src/scanner.rs @@ -0,0 +1,200 @@ +use std::collections::HashMap; +use crate::token::Token; +use crate::token_type::TokenType; + +fn is_digit(c: char) -> bool { + c >= '0' && c <= '9' +} + +fn is_alpha(c: char) -> bool { + (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_' +} + +fn is_alpha_numeric(c: char) -> bool { + is_digit(c) || is_alpha(c) +} + +pub struct Scanner { + source: Vec, + tokens: Vec, + + start: usize, + current: usize, + line: u32, + + keywords: HashMap, +} + +impl Scanner { + pub fn create_scanner( src: String ) -> Self { + Self { + source: src.chars().collect::>(), + tokens: vec![], + start: 0, + current: 0, + line: 0, + keywords: HashMap::new() + } + } + + fn init_keywords(&mut self) { + self.keywords = HashMap::new(); + self.keywords.insert(String::from("and"), TokenType::And); + self.keywords.insert(String::from("class"), TokenType::Class); + self.keywords.insert(String::from("else"), TokenType::Else); + self.keywords.insert(String::from("false"), TokenType::False); + self.keywords.insert(String::from("for"), TokenType::For); + self.keywords.insert(String::from("fun"), TokenType::Fun); + self.keywords.insert(String::from("if"), TokenType::If); + self.keywords.insert(String::from("nil"), TokenType::Nil); + self.keywords.insert(String::from("or"), TokenType::Or); + self.keywords.insert(String::from("print"), TokenType::Print); + self.keywords.insert(String::from("return"), TokenType::Return); + self.keywords.insert(String::from("super"), TokenType::Super); + self.keywords.insert(String::from("this"), TokenType::This); + self.keywords.insert(String::from("true"), TokenType::True); + self.keywords.insert(String::from("var"), TokenType::Var); + self.keywords.insert(String::from("while"), TokenType::While); + } + + fn scan_tokens(&mut self) { + while !self.is_at_end() { + self.start = self.current; + self.scan_token(); + } + + // Ajout d'un token final quand il n'y a plus rien à parser + self.tokens.push(Token { token_type: TokenType::Eof, lexeme: String::from(""), literal: String::from(""), line: self.line }); + } + + fn is_at_end(&self) -> bool { + self.current >= self.source.len() + } + + fn scan_token(&mut self) { + let c = self.advance(); + match c { + '(' => self.add_simple_token(TokenType::LeftParen), + ')' => self.add_simple_token(TokenType::RightParen), + '{' => self.add_simple_token(TokenType::LeftBrace), + '}' => self.add_simple_token(TokenType::RightBrace), + ',' => self.add_simple_token(TokenType::Comma), + '.' => self.add_simple_token(TokenType::Dot), + '-' => self.add_simple_token(TokenType::Minus), + '+' => self.add_simple_token(TokenType::Plus), + ';' => self.add_simple_token(TokenType::Semicolon), + '*' => self.add_simple_token(TokenType::Star), + '!' => { if self.match_next('=') { self.add_simple_token(TokenType::BangEqual) } else { self.add_simple_token(TokenType::Bang) } }, + '=' => { if self.match_next('=') { self.add_simple_token(TokenType::EqualEqual) } else { self.add_simple_token(TokenType::Equal) } }, + '<' => { if self.match_next('=') { self.add_simple_token(TokenType::LessEqual) } else { self.add_simple_token(TokenType::Less) } }, + '>' => { if self.match_next('=') { self.add_simple_token(TokenType::GreaterEqual) } else { self.add_simple_token(TokenType::Greater) } }, + '/' => { + if self.match_next('/') { + // commentaire : avance jusqu'à la fin de la ligne sans ajouter de token + while self.peek() != '\n' && !self.is_at_end() { + self.advance(); + } + } else { + self.add_simple_token(TokenType::Slash) + } + }, + ' ' => (), + '\r' => (), + '\t' => (), + '\n' => self.line += 1, + '"' => self.string(), + _ => { + if is_digit(c) { + self.number(); + } else if is_alpha(c) { + self.identifier(); + } else { + // Erreur : lexeme inconnu + } + } + } + } + + fn advance(&mut self) -> char { + self.current += 1; + self.source[self.current] + } + + fn match_next(&mut self, expected: char) -> bool { + if self.is_at_end() { return false; } + if self.source[self.current] != expected { return false; } + + self.current += 1; + true + } + + fn peek(&self) -> char { + if self.is_at_end() { + '\0' + } else { + self.source[self.current] + } + } + + fn peek_next(&self) -> char { + if self.current + 1 >= self.source.len() { + '\0' + } else { + self.source[self.current + 1] + } + } + + fn add_simple_token(&mut self, t: TokenType) { + self.add_token(t, String::from("")); + } + + fn add_token(&mut self, t: TokenType, l: String) { + let text = self.source[self.start..self.current].iter().collect(); + self.tokens.push(Token { token_type: t, lexeme: text, literal: l, line: self.line }); + } + + fn string(&mut self) { + // Consomme les caractères jusqu'à trouver le délimiteur de chaînes ou la fin du fichier + while self.peek() != '"' && !self.is_at_end() { + if self.peek() == '\n' { + self.line += 1; // les chaînes peuvent être multilignes + } + self.advance(); + } + + if self.is_at_end() { + // Erreur : chaîne non terminée + return; + } + + self.advance(); // Consomme le délimiteur final + + self.add_token(TokenType::String, self.source[self.start + 1..self.current - 1].into_iter().collect()); + } + + fn number(&mut self) { + while is_digit(self.peek()) { + self.advance(); + } + + if self.peek() == '.' && is_digit(self.peek_next()) { + while is_digit(self.peek()) { + self.advance(); + } + } + + self.add_token(TokenType::Number, self.source[self.start..self.current].into_iter().collect()); // Il faudra faire un parse sur la chaîne pour connaître la valeur effective + } + + fn identifier(&mut self) { + while is_alpha_numeric(self.peek()) { + self.advance(); + } + + let text: String = self.source[self.start..self.current].into_iter().collect(); + match self.keywords.get(&text) { + Some(t) => { self.add_simple_token(*t) }, + None => { self.add_token(TokenType::Identifier, text) } + } + } +} diff --git a/src/rlox/token.rs b/src/token.rs similarity index 69% rename from src/rlox/token.rs rename to src/token.rs index 644c5ca..62148ef 100644 --- a/src/rlox/token.rs +++ b/src/token.rs @@ -1,4 +1,4 @@ -use super::token_type::TokenType; +use crate::token_type::TokenType; #[derive(Debug)] pub struct Token { @@ -10,6 +10,6 @@ pub struct Token { impl std::fmt::Display for Token { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{0} {1} {2}",self.token_type, self.lexeme, self.literal) + write!(f, "{0} {1} {2}", self.token_type, self.lexeme, self.literal) } -} \ No newline at end of file +} diff --git a/src/token_type.rs b/src/token_type.rs new file mode 100644 index 0000000..37cd057 --- /dev/null +++ b/src/token_type.rs @@ -0,0 +1,106 @@ + + +#[derive(Debug, Copy, Clone)] +pub enum TokenType { + // Single-character tokens. + LeftParen, + RightParen, + LeftBrace, + RightBrace, + Comma, + Dot, + Minus, + Plus, + Semicolon, + Slash, + Star, + + // One or two character tokens. + Bang, + BangEqual, + Equal, + EqualEqual, + Greater, + GreaterEqual, + Less, + LessEqual, + + // Literals. + Identifier, + String, + Number, + + // Keywords. + And, + Class, + Else, + False, + Fun, + For, + If, + Nil, + Or, + Print, + Return, + Super, + This, + True, + Var, + While, + + Eof, +} + +impl std::fmt::Display for TokenType { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + // Single-character tokens. + TokenType::LeftParen => write!(f, "LEFT_PAREN"), + TokenType::RightParen => write!(f, "RIGHT_PAREN"), + TokenType::LeftBrace => write!(f, "LEFT_BRACE"), + TokenType::RightBrace => write!(f, "RIGHT_BRACE"), + TokenType::Comma => write!(f, "COMMA"), + TokenType::Dot => write!(f, "DOT"), + TokenType::Minus => write!(f, "MINUS"), + TokenType::Plus => write!(f, "PLUS"), + TokenType::Semicolon => write!(f, "SEMICOLON"), + TokenType::Slash => write!(f, "SLASH"), + TokenType::Star => write!(f, "STAR"), + + // One or two character tokens. + TokenType::Bang => write!(f, "BANG"), + TokenType::BangEqual => write!(f, "BANG_EQUAL"), + TokenType::Equal => write!(f, "EQUAL"), + TokenType::EqualEqual => write!(f, "EQUAL_EQUAL"), + TokenType::Greater => write!(f, "GREATER"), + TokenType::GreaterEqual => write!(f, "GREATER_EQUAL"), + TokenType::Less => write!(f, "LESS"), + TokenType::LessEqual => write!(f, "LESS_EQUAL"), + + // Literals. + TokenType::Identifier => write!(f, "IDENTIFIER"), + TokenType::String => write!(f, "STRING"), + TokenType::Number => write!(f, "NUMBER"), + + // Keywords. + TokenType::And => write!(f, "AND"), + TokenType::Class => write!(f, "CLASS"), + TokenType::Else => write!(f, "ELSE"), + TokenType::False => write!(f, "FALSE"), + TokenType::Fun => write!(f, "FUN"), + TokenType::For => write!(f, "FOR"), + TokenType::If => write!(f, "IF"), + TokenType::Nil => write!(f, "NIL"), + TokenType::Or => write!(f, "OR"), + TokenType::Print => write!(f, "PRINT"), + TokenType::Return => write!(f, "RETURN"), + TokenType::Super => write!(f, "SUPER"), + TokenType::This => write!(f, "THIS"), + TokenType::True => write!(f, "TRUE"), + TokenType::Var => write!(f, "VAR"), + TokenType::While => write!(f, "WHILE"), + + TokenType::Eof => write!(f, "EOF"), + } + } +}