Files
rlox/src/scanner.rs

354 lines
11 KiB
Rust

use std::collections::HashMap;
use crate::token::Token;
use crate::token_type::TokenType;
fn is_digit(c: char) -> bool {
c >= '0' && c <= '9'
}
fn is_alpha(c: char) -> bool {
(c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_'
}
fn is_alpha_numeric(c: char) -> bool {
is_digit(c) || is_alpha(c)
}
pub struct Scanner {
source: Vec<char>,
pub tokens: Vec<Token>,
start: usize,
current: usize,
line: u32,
keywords: HashMap<String, TokenType>,
had_error: bool,
}
impl Scanner {
pub fn new(src: String) -> Self {
let mut scanner = Self {
source: src.chars().collect::<Vec<_>>(),
tokens: vec![],
start: 0,
current: 0,
line: 0,
keywords: HashMap::new(),
had_error: false
};
scanner.init_keywords();
scanner
}
fn error(&mut self, line: u32, message: String) {
self.had_error = true;
println!("[line {line}] Error : {message}");
}
fn init_keywords(&mut self) {
self.keywords = HashMap::new();
self.keywords.insert(String::from("and"), TokenType::And);
self.keywords.insert(String::from("class"), TokenType::Class);
self.keywords.insert(String::from("else"), TokenType::Else);
self.keywords.insert(String::from("false"), TokenType::False);
self.keywords.insert(String::from("for"), TokenType::For);
self.keywords.insert(String::from("fun"), TokenType::Fun);
self.keywords.insert(String::from("if"), TokenType::If);
self.keywords.insert(String::from("nil"), TokenType::Nil);
self.keywords.insert(String::from("or"), TokenType::Or);
self.keywords.insert(String::from("print"), TokenType::Print);
self.keywords.insert(String::from("return"), TokenType::Return);
self.keywords.insert(String::from("super"), TokenType::Super);
self.keywords.insert(String::from("this"), TokenType::This);
self.keywords.insert(String::from("true"), TokenType::True);
self.keywords.insert(String::from("var"), TokenType::Var);
self.keywords.insert(String::from("while"), TokenType::While);
}
pub fn scan_tokens(&mut self) {
while !self.is_at_end() {
self.start = self.current;
self.scan_token();
}
// Ajout d'un token final quand il n'y a plus rien à parser
self.tokens.push(Token { token_type: TokenType::Eof, lexeme: String::from(""), literal: String::from(""), line: self.line });
}
fn is_at_end(&self) -> bool {
let at_end = self.current >= self.source.len();
at_end
}
pub fn scan_token(&mut self) {
let c = self.advance();
match c {
'(' => self.add_simple_token(TokenType::LeftParen),
')' => self.add_simple_token(TokenType::RightParen),
'{' => self.add_simple_token(TokenType::LeftBrace),
'}' => self.add_simple_token(TokenType::RightBrace),
',' => self.add_simple_token(TokenType::Comma),
'.' => self.add_simple_token(TokenType::Dot),
'-' => self.add_simple_token(TokenType::Minus),
'+' => self.add_simple_token(TokenType::Plus),
';' => self.add_simple_token(TokenType::Semicolon),
'*' => self.add_simple_token(TokenType::Star),
'!' => { if self.match_next('=') { self.add_simple_token(TokenType::BangEqual) } else { self.add_simple_token(TokenType::Bang) } },
'=' => { if self.match_next('=') { self.add_simple_token(TokenType::EqualEqual) } else { self.add_simple_token(TokenType::Equal) } },
'<' => { if self.match_next('=') { self.add_simple_token(TokenType::LessEqual) } else { self.add_simple_token(TokenType::Less) } },
'>' => { if self.match_next('=') { self.add_simple_token(TokenType::GreaterEqual) } else { self.add_simple_token(TokenType::Greater) } },
'/' => {
if self.match_next('/') {
// commentaire : avance jusqu'à la fin de la ligne sans ajouter de token
while self.peek() != '\n' && !self.is_at_end() {
self.advance();
}
} else {
self.add_simple_token(TokenType::Slash)
}
},
' ' => (),
'\r' => (),
'\t' => (),
'\n' => self.line += 1,
'"' => self.string(),
_ => {
if is_digit(c) {
self.number();
} else if is_alpha(c) {
self.identifier();
} else {
self.error(self.line, String::from("Unexpected token"));
}
}
}
}
fn advance(&mut self) -> char {
self.current += 1;
self.source[self.current-1]
}
fn match_next(&mut self, expected: char) -> bool {
if self.is_at_end() { return false; }
if self.source[self.current] != expected { return false; }
self.current += 1;
true
}
fn peek(&self) -> char {
if self.is_at_end() {
'\0'
} else {
self.source[self.current]
}
}
fn peek_next(&self) -> char {
if self.current + 1 >= self.source.len() {
'\0'
} else {
self.source[self.current + 1]
}
}
fn add_simple_token(&mut self, t: TokenType) {
self.add_token(t, String::from(""));
}
fn add_token(&mut self, t: TokenType, l: String) {
let text = self.source[self.start..self.current].iter().collect();
self.tokens.push(Token { token_type: t, lexeme: text, literal: l, line: self.line });
}
fn string(&mut self) {
// Consomme les caractères jusqu'à trouver le délimiteur de chaînes ou la fin du fichier
while self.peek() != '"' && !self.is_at_end() {
if self.peek() == '\n' {
self.line += 1; // les chaînes peuvent être multilignes
}
self.advance();
}
if self.is_at_end() {
// Erreur : chaîne non terminée
return;
}
self.advance(); // Consomme le délimiteur final
self.add_token(TokenType::String, self.source[self.start + 1..self.current - 1].into_iter().collect());
}
fn number(&mut self) {
while is_digit(self.peek()) {
self.advance();
}
if self.peek() == '.' && is_digit(self.peek_next()) {
while is_digit(self.peek()) {
self.advance();
}
}
self.add_token(TokenType::Number, self.source[self.start..self.current].into_iter().collect()); // Il faudra faire un parse sur la chaîne pour connaître la valeur effective
}
fn identifier(&mut self) {
while is_alpha_numeric(self.peek()) {
self.advance();
}
let text: String = self.source[self.start..self.current].into_iter().collect();
match self.keywords.get(&text) {
Some(t) => { self.add_simple_token(*t) },
None => { self.add_token(TokenType::Identifier, text) }
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_digit_and_alpha() {
assert!(is_digit('0'));
assert_eq!(is_digit('a'), false);
assert!(is_alpha('a'));
assert_eq!(is_alpha('🤣'), false);
assert!(is_alpha_numeric('0'));
assert!(is_alpha_numeric('a'));
assert_eq!(is_alpha_numeric('🤣'), false);
}
#[test]
fn test_static_keywords() {
let s = Scanner::new(String::from(""));
assert_eq!(s.keywords.len(), 16);
}
#[test]
fn test_is_at_end() {
let s = Scanner::new(String::from(""));
assert!(s.is_at_end());
let mut s2 = Scanner::new(String::from("1+2"));
assert_eq!(s2.is_at_end(), false);
s2.current = 3;
assert!(s2.is_at_end());
}
#[test]
fn test_advance() {
let mut s = Scanner::new(String::from("1+2"));
assert_eq!(s.current, 0);
let c = s.advance();
assert_eq!(c, '1');
assert_eq!(s.current, 1);
let c2 = s.advance();
assert_eq!(c2, '+');
assert_eq!(s.current, 2);
}
#[test]
fn test_match_next() {
let mut s = Scanner::new(String::from("1+2"));
let is_not_a_match = s.match_next('6');
assert_eq!(is_not_a_match, false);
assert_eq!(s.current, 0);
let is_a_match = s.match_next('1');
assert_eq!(is_a_match, true);
assert_eq!(s.current, 1);
}
#[test]
fn test_peek() {
let mut s = Scanner::new(String::from("1+2"));
assert_eq!(s.peek(), '1');
s.advance();
s.advance();
s.advance();
assert!(s.is_at_end());
assert_eq!(s.peek(), '\0');
}
#[test]
fn test_peek_next() {
let s = Scanner::new(String::from("1+2"));
assert_eq!(s.peek_next(), '+');
}
#[test]
fn test_string() {
let mut s = Scanner::new(String::from("\"hello\""));
s.advance();
s.string();
assert!(s.is_at_end());
}
#[test]
fn test_number() {
let mut s = Scanner::new(String::from("12345"));
s.number();
assert!(s.is_at_end());
}
#[test]
fn test_identifier() {
let mut s = Scanner::new(String::from("+id"));
s.identifier();
assert!(!s.is_at_end());
s.advance();
s.identifier();
assert!(s.is_at_end());
}
#[test]
fn test_add_simple_token() {
let mut s = Scanner::new(String::from(""));
s.add_simple_token(TokenType::LeftParen);
assert_eq!(s.tokens.len(), 1);
}
#[test]
fn test_add_token() {
let mut s = Scanner::new(String::from(""));
s.add_token(TokenType::Identifier, String::from("id"));
assert_eq!(s.tokens.len(), 1);
}
#[test]
fn test_scan_token() {
let mut s = Scanner::new(String::from("+"));
s.scan_token();
assert_eq!(s.tokens.len(), 1);
assert_eq!(s.tokens[0].token_type, TokenType::Plus);
}
#[test]
fn test_scan_tokens() {
let mut s = Scanner::new(String::from("1+id-\"toto\""));
s.scan_tokens();
assert_eq!(s.tokens.len(), 6);
assert_eq!(s.tokens[0].token_type, TokenType::Number);
assert_eq!(s.tokens[1].token_type, TokenType::Plus);
assert_eq!(s.tokens[2].token_type, TokenType::Identifier);
assert_eq!(s.tokens[3].token_type, TokenType::Minus);
assert_eq!(s.tokens[4].token_type, TokenType::String);
assert_eq!(s.tokens[5].token_type, TokenType::Eof);
assert_eq!(s.tokens[0].literal, String::from("1"));
assert_eq!(s.tokens[2].literal, String::from("id"));
assert_eq!(s.tokens[4].literal, String::from("toto"));
}
}