use std::collections::HashMap; use crate::token::Token; use crate::token_type::TokenType; fn is_digit(c: char) -> bool { c >= '0' && c <= '9' } fn is_alpha(c: char) -> bool { (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_' } fn is_alpha_numeric(c: char) -> bool { is_digit(c) || is_alpha(c) } pub struct Scanner { source: Vec, pub tokens: Vec, start: usize, current: usize, line: u32, keywords: HashMap, had_error: bool, } impl Scanner { pub fn new(src: String) -> Self { let mut scanner = Self { source: src.chars().collect::>(), tokens: vec![], start: 0, current: 0, line: 0, keywords: HashMap::new(), had_error: false }; scanner.init_keywords(); scanner } fn error(&mut self, line: u32, message: String) { self.had_error = true; println!("[line {line}] Error : {message}"); } fn init_keywords(&mut self) { self.keywords = HashMap::new(); self.keywords.insert(String::from("and"), TokenType::And); self.keywords.insert(String::from("class"), TokenType::Class); self.keywords.insert(String::from("else"), TokenType::Else); self.keywords.insert(String::from("false"), TokenType::False); self.keywords.insert(String::from("for"), TokenType::For); self.keywords.insert(String::from("fun"), TokenType::Fun); self.keywords.insert(String::from("if"), TokenType::If); self.keywords.insert(String::from("nil"), TokenType::Nil); self.keywords.insert(String::from("or"), TokenType::Or); self.keywords.insert(String::from("print"), TokenType::Print); self.keywords.insert(String::from("return"), TokenType::Return); self.keywords.insert(String::from("super"), TokenType::Super); self.keywords.insert(String::from("this"), TokenType::This); self.keywords.insert(String::from("true"), TokenType::True); self.keywords.insert(String::from("var"), TokenType::Var); self.keywords.insert(String::from("while"), TokenType::While); } pub fn scan_tokens(&mut self) { while !self.is_at_end() { self.start = self.current; self.scan_token(); } // Ajout d'un token final quand il n'y a plus rien à parser self.tokens.push(Token { token_type: TokenType::Eof, lexeme: String::from(""), literal: String::from(""), line: self.line }); } fn is_at_end(&self) -> bool { let at_end = self.current >= self.source.len(); at_end } pub fn scan_token(&mut self) { let c = self.advance(); match c { '(' => self.add_simple_token(TokenType::LeftParen), ')' => self.add_simple_token(TokenType::RightParen), '{' => self.add_simple_token(TokenType::LeftBrace), '}' => self.add_simple_token(TokenType::RightBrace), ',' => self.add_simple_token(TokenType::Comma), '.' => self.add_simple_token(TokenType::Dot), '-' => self.add_simple_token(TokenType::Minus), '+' => self.add_simple_token(TokenType::Plus), ';' => self.add_simple_token(TokenType::Semicolon), '*' => self.add_simple_token(TokenType::Star), '!' => { if self.match_next('=') { self.add_simple_token(TokenType::BangEqual) } else { self.add_simple_token(TokenType::Bang) } }, '=' => { if self.match_next('=') { self.add_simple_token(TokenType::EqualEqual) } else { self.add_simple_token(TokenType::Equal) } }, '<' => { if self.match_next('=') { self.add_simple_token(TokenType::LessEqual) } else { self.add_simple_token(TokenType::Less) } }, '>' => { if self.match_next('=') { self.add_simple_token(TokenType::GreaterEqual) } else { self.add_simple_token(TokenType::Greater) } }, '/' => { if self.match_next('/') { // commentaire : avance jusqu'à la fin de la ligne sans ajouter de token while self.peek() != '\n' && !self.is_at_end() { self.advance(); } } else { self.add_simple_token(TokenType::Slash) } }, ' ' => (), '\r' => (), '\t' => (), '\n' => self.line += 1, '"' => self.string(), _ => { if is_digit(c) { self.number(); } else if is_alpha(c) { self.identifier(); } else { self.error(self.line, String::from("Unexpected token")); } } } } fn advance(&mut self) -> char { self.current += 1; self.source[self.current-1] } fn match_next(&mut self, expected: char) -> bool { if self.is_at_end() { return false; } if self.source[self.current] != expected { return false; } self.current += 1; true } fn peek(&self) -> char { if self.is_at_end() { '\0' } else { self.source[self.current] } } fn peek_next(&self) -> char { if self.current + 1 >= self.source.len() { '\0' } else { self.source[self.current + 1] } } fn add_simple_token(&mut self, t: TokenType) { self.add_token(t, String::from("")); } fn add_token(&mut self, t: TokenType, l: String) { let text = self.source[self.start..self.current].iter().collect(); self.tokens.push(Token { token_type: t, lexeme: text, literal: l, line: self.line }); } fn string(&mut self) { // Consomme les caractères jusqu'à trouver le délimiteur de chaînes ou la fin du fichier while self.peek() != '"' && !self.is_at_end() { if self.peek() == '\n' { self.line += 1; // les chaînes peuvent être multilignes } self.advance(); } if self.is_at_end() { // Erreur : chaîne non terminée return; } self.advance(); // Consomme le délimiteur final self.add_token(TokenType::String, self.source[self.start + 1..self.current - 1].into_iter().collect()); } fn number(&mut self) { while is_digit(self.peek()) { self.advance(); } if self.peek() == '.' && is_digit(self.peek_next()) { while is_digit(self.peek()) { self.advance(); } } self.add_token(TokenType::Number, self.source[self.start..self.current].into_iter().collect()); // Il faudra faire un parse sur la chaîne pour connaître la valeur effective } fn identifier(&mut self) { while is_alpha_numeric(self.peek()) { self.advance(); } let text: String = self.source[self.start..self.current].into_iter().collect(); match self.keywords.get(&text) { Some(t) => { self.add_simple_token(*t) }, None => { self.add_token(TokenType::Identifier, text) } } } } #[cfg(test)] mod tests { use super::*; #[test] fn test_digit_and_alpha() { assert!(is_digit('0')); assert_eq!(is_digit('a'), false); assert!(is_alpha('a')); assert_eq!(is_alpha('🤣'), false); assert!(is_alpha_numeric('0')); assert!(is_alpha_numeric('a')); assert_eq!(is_alpha_numeric('🤣'), false); } #[test] fn test_static_keywords() { let s = Scanner::new(String::from("")); assert_eq!(s.keywords.len(), 16); } #[test] fn test_is_at_end() { let s = Scanner::new(String::from("")); assert!(s.is_at_end()); let mut s2 = Scanner::new(String::from("1+2")); assert_eq!(s2.is_at_end(), false); s2.current = 3; assert!(s2.is_at_end()); } #[test] fn test_advance() { let mut s = Scanner::new(String::from("1+2")); assert_eq!(s.current, 0); let c = s.advance(); assert_eq!(c, '1'); assert_eq!(s.current, 1); let c2 = s.advance(); assert_eq!(c2, '+'); assert_eq!(s.current, 2); } #[test] fn test_match_next() { let mut s = Scanner::new(String::from("1+2")); let is_not_a_match = s.match_next('6'); assert_eq!(is_not_a_match, false); assert_eq!(s.current, 0); let is_a_match = s.match_next('1'); assert_eq!(is_a_match, true); assert_eq!(s.current, 1); } #[test] fn test_peek() { let mut s = Scanner::new(String::from("1+2")); assert_eq!(s.peek(), '1'); s.advance(); s.advance(); s.advance(); assert!(s.is_at_end()); assert_eq!(s.peek(), '\0'); } #[test] fn test_peek_next() { let s = Scanner::new(String::from("1+2")); assert_eq!(s.peek_next(), '+'); } #[test] fn test_string() { let mut s = Scanner::new(String::from("\"hello\"")); s.advance(); s.string(); assert!(s.is_at_end()); } #[test] fn test_number() { let mut s = Scanner::new(String::from("12345")); s.number(); assert!(s.is_at_end()); } #[test] fn test_identifier() { let mut s = Scanner::new(String::from("+id")); s.identifier(); assert!(!s.is_at_end()); s.advance(); s.identifier(); assert!(s.is_at_end()); } #[test] fn test_add_simple_token() { let mut s = Scanner::new(String::from("")); s.add_simple_token(TokenType::LeftParen); assert_eq!(s.tokens.len(), 1); } #[test] fn test_add_token() { let mut s = Scanner::new(String::from("")); s.add_token(TokenType::Identifier, String::from("id")); assert_eq!(s.tokens.len(), 1); } #[test] fn test_scan_token() { let mut s = Scanner::new(String::from("+")); s.scan_token(); assert_eq!(s.tokens.len(), 1); assert_eq!(s.tokens[0].token_type, TokenType::Plus); } #[test] fn test_scan_tokens() { let mut s = Scanner::new(String::from("1+id-\"toto\"")); s.scan_tokens(); assert_eq!(s.tokens.len(), 6); assert_eq!(s.tokens[0].token_type, TokenType::Number); assert_eq!(s.tokens[1].token_type, TokenType::Plus); assert_eq!(s.tokens[2].token_type, TokenType::Identifier); assert_eq!(s.tokens[3].token_type, TokenType::Minus); assert_eq!(s.tokens[4].token_type, TokenType::String); assert_eq!(s.tokens[5].token_type, TokenType::Eof); assert_eq!(s.tokens[0].literal, String::from("1")); assert_eq!(s.tokens[2].literal, String::from("id")); assert_eq!(s.tokens[4].literal, String::from("toto")); } }