354 lines
11 KiB
Rust
354 lines
11 KiB
Rust
use std::collections::HashMap;
|
|
use crate::token::Token;
|
|
use crate::token_type::TokenType;
|
|
|
|
fn is_digit(c: char) -> bool {
|
|
c >= '0' && c <= '9'
|
|
}
|
|
|
|
fn is_alpha(c: char) -> bool {
|
|
(c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_'
|
|
}
|
|
|
|
fn is_alpha_numeric(c: char) -> bool {
|
|
is_digit(c) || is_alpha(c)
|
|
}
|
|
|
|
pub struct Scanner {
|
|
source: Vec<char>,
|
|
pub tokens: Vec<Token>,
|
|
|
|
start: usize,
|
|
current: usize,
|
|
line: u32,
|
|
|
|
keywords: HashMap<String, TokenType>,
|
|
had_error: bool,
|
|
}
|
|
|
|
impl Scanner {
|
|
pub fn new(src: String) -> Self {
|
|
let mut scanner = Self {
|
|
source: src.chars().collect::<Vec<_>>(),
|
|
tokens: vec![],
|
|
start: 0,
|
|
current: 0,
|
|
line: 0,
|
|
keywords: HashMap::new(),
|
|
had_error: false
|
|
};
|
|
scanner.init_keywords();
|
|
scanner
|
|
}
|
|
|
|
fn error(&mut self, line: u32, message: String) {
|
|
self.had_error = true;
|
|
println!("[line {line}] Error : {message}");
|
|
}
|
|
|
|
fn init_keywords(&mut self) {
|
|
self.keywords = HashMap::new();
|
|
self.keywords.insert(String::from("and"), TokenType::And);
|
|
self.keywords.insert(String::from("class"), TokenType::Class);
|
|
self.keywords.insert(String::from("else"), TokenType::Else);
|
|
self.keywords.insert(String::from("false"), TokenType::False);
|
|
self.keywords.insert(String::from("for"), TokenType::For);
|
|
self.keywords.insert(String::from("fun"), TokenType::Fun);
|
|
self.keywords.insert(String::from("if"), TokenType::If);
|
|
self.keywords.insert(String::from("nil"), TokenType::Nil);
|
|
self.keywords.insert(String::from("or"), TokenType::Or);
|
|
self.keywords.insert(String::from("print"), TokenType::Print);
|
|
self.keywords.insert(String::from("return"), TokenType::Return);
|
|
self.keywords.insert(String::from("super"), TokenType::Super);
|
|
self.keywords.insert(String::from("this"), TokenType::This);
|
|
self.keywords.insert(String::from("true"), TokenType::True);
|
|
self.keywords.insert(String::from("var"), TokenType::Var);
|
|
self.keywords.insert(String::from("while"), TokenType::While);
|
|
}
|
|
|
|
pub fn scan_tokens(&mut self) {
|
|
while !self.is_at_end() {
|
|
self.start = self.current;
|
|
self.scan_token();
|
|
}
|
|
|
|
// Ajout d'un token final quand il n'y a plus rien à parser
|
|
self.tokens.push(Token { token_type: TokenType::Eof, lexeme: String::from(""), literal: String::from(""), line: self.line });
|
|
}
|
|
|
|
fn is_at_end(&self) -> bool {
|
|
let at_end = self.current >= self.source.len();
|
|
at_end
|
|
}
|
|
|
|
pub fn scan_token(&mut self) {
|
|
let c = self.advance();
|
|
match c {
|
|
'(' => self.add_simple_token(TokenType::LeftParen),
|
|
')' => self.add_simple_token(TokenType::RightParen),
|
|
'{' => self.add_simple_token(TokenType::LeftBrace),
|
|
'}' => self.add_simple_token(TokenType::RightBrace),
|
|
',' => self.add_simple_token(TokenType::Comma),
|
|
'.' => self.add_simple_token(TokenType::Dot),
|
|
'-' => self.add_simple_token(TokenType::Minus),
|
|
'+' => self.add_simple_token(TokenType::Plus),
|
|
';' => self.add_simple_token(TokenType::Semicolon),
|
|
'*' => self.add_simple_token(TokenType::Star),
|
|
'!' => { if self.match_next('=') { self.add_simple_token(TokenType::BangEqual) } else { self.add_simple_token(TokenType::Bang) } },
|
|
'=' => { if self.match_next('=') { self.add_simple_token(TokenType::EqualEqual) } else { self.add_simple_token(TokenType::Equal) } },
|
|
'<' => { if self.match_next('=') { self.add_simple_token(TokenType::LessEqual) } else { self.add_simple_token(TokenType::Less) } },
|
|
'>' => { if self.match_next('=') { self.add_simple_token(TokenType::GreaterEqual) } else { self.add_simple_token(TokenType::Greater) } },
|
|
'/' => {
|
|
if self.match_next('/') {
|
|
// commentaire : avance jusqu'à la fin de la ligne sans ajouter de token
|
|
while self.peek() != '\n' && !self.is_at_end() {
|
|
self.advance();
|
|
}
|
|
} else {
|
|
self.add_simple_token(TokenType::Slash)
|
|
}
|
|
},
|
|
' ' => (),
|
|
'\r' => (),
|
|
'\t' => (),
|
|
'\n' => self.line += 1,
|
|
'"' => self.string(),
|
|
_ => {
|
|
if is_digit(c) {
|
|
self.number();
|
|
} else if is_alpha(c) {
|
|
self.identifier();
|
|
} else {
|
|
self.error(self.line, String::from("Unexpected token"));
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
fn advance(&mut self) -> char {
|
|
self.current += 1;
|
|
self.source[self.current-1]
|
|
}
|
|
|
|
fn match_next(&mut self, expected: char) -> bool {
|
|
if self.is_at_end() { return false; }
|
|
if self.source[self.current] != expected { return false; }
|
|
|
|
self.current += 1;
|
|
true
|
|
}
|
|
|
|
fn peek(&self) -> char {
|
|
if self.is_at_end() {
|
|
'\0'
|
|
} else {
|
|
self.source[self.current]
|
|
}
|
|
}
|
|
|
|
fn peek_next(&self) -> char {
|
|
if self.current + 1 >= self.source.len() {
|
|
'\0'
|
|
} else {
|
|
self.source[self.current + 1]
|
|
}
|
|
}
|
|
|
|
fn add_simple_token(&mut self, t: TokenType) {
|
|
self.add_token(t, String::from(""));
|
|
}
|
|
|
|
fn add_token(&mut self, t: TokenType, l: String) {
|
|
let text = self.source[self.start..self.current].iter().collect();
|
|
self.tokens.push(Token { token_type: t, lexeme: text, literal: l, line: self.line });
|
|
}
|
|
|
|
fn string(&mut self) {
|
|
// Consomme les caractères jusqu'à trouver le délimiteur de chaînes ou la fin du fichier
|
|
while self.peek() != '"' && !self.is_at_end() {
|
|
if self.peek() == '\n' {
|
|
self.line += 1; // les chaînes peuvent être multilignes
|
|
}
|
|
self.advance();
|
|
}
|
|
|
|
if self.is_at_end() {
|
|
// Erreur : chaîne non terminée
|
|
return;
|
|
}
|
|
|
|
self.advance(); // Consomme le délimiteur final
|
|
|
|
self.add_token(TokenType::String, self.source[self.start + 1..self.current - 1].into_iter().collect());
|
|
}
|
|
|
|
fn number(&mut self) {
|
|
while is_digit(self.peek()) {
|
|
self.advance();
|
|
}
|
|
|
|
if self.peek() == '.' && is_digit(self.peek_next()) {
|
|
while is_digit(self.peek()) {
|
|
self.advance();
|
|
}
|
|
}
|
|
|
|
self.add_token(TokenType::Number, self.source[self.start..self.current].into_iter().collect()); // Il faudra faire un parse sur la chaîne pour connaître la valeur effective
|
|
}
|
|
|
|
fn identifier(&mut self) {
|
|
while is_alpha_numeric(self.peek()) {
|
|
self.advance();
|
|
}
|
|
|
|
let text: String = self.source[self.start..self.current].into_iter().collect();
|
|
match self.keywords.get(&text) {
|
|
Some(t) => { self.add_simple_token(*t) },
|
|
None => { self.add_token(TokenType::Identifier, text) }
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
#[cfg(test)]
|
|
mod tests {
|
|
use super::*;
|
|
|
|
#[test]
|
|
fn test_digit_and_alpha() {
|
|
assert!(is_digit('0'));
|
|
assert_eq!(is_digit('a'), false);
|
|
assert!(is_alpha('a'));
|
|
assert_eq!(is_alpha('🤣'), false);
|
|
assert!(is_alpha_numeric('0'));
|
|
assert!(is_alpha_numeric('a'));
|
|
assert_eq!(is_alpha_numeric('🤣'), false);
|
|
}
|
|
|
|
#[test]
|
|
fn test_static_keywords() {
|
|
let s = Scanner::new(String::from(""));
|
|
assert_eq!(s.keywords.len(), 16);
|
|
}
|
|
|
|
#[test]
|
|
fn test_is_at_end() {
|
|
let s = Scanner::new(String::from(""));
|
|
assert!(s.is_at_end());
|
|
|
|
let mut s2 = Scanner::new(String::from("1+2"));
|
|
assert_eq!(s2.is_at_end(), false);
|
|
s2.current = 3;
|
|
assert!(s2.is_at_end());
|
|
}
|
|
|
|
#[test]
|
|
fn test_advance() {
|
|
let mut s = Scanner::new(String::from("1+2"));
|
|
assert_eq!(s.current, 0);
|
|
let c = s.advance();
|
|
assert_eq!(c, '1');
|
|
assert_eq!(s.current, 1);
|
|
let c2 = s.advance();
|
|
assert_eq!(c2, '+');
|
|
assert_eq!(s.current, 2);
|
|
}
|
|
|
|
#[test]
|
|
fn test_match_next() {
|
|
let mut s = Scanner::new(String::from("1+2"));
|
|
|
|
let is_not_a_match = s.match_next('6');
|
|
assert_eq!(is_not_a_match, false);
|
|
assert_eq!(s.current, 0);
|
|
|
|
let is_a_match = s.match_next('1');
|
|
assert_eq!(is_a_match, true);
|
|
assert_eq!(s.current, 1);
|
|
}
|
|
|
|
#[test]
|
|
fn test_peek() {
|
|
let mut s = Scanner::new(String::from("1+2"));
|
|
|
|
assert_eq!(s.peek(), '1');
|
|
s.advance();
|
|
s.advance();
|
|
s.advance();
|
|
assert!(s.is_at_end());
|
|
assert_eq!(s.peek(), '\0');
|
|
|
|
}
|
|
|
|
#[test]
|
|
fn test_peek_next() {
|
|
let s = Scanner::new(String::from("1+2"));
|
|
assert_eq!(s.peek_next(), '+');
|
|
}
|
|
|
|
#[test]
|
|
fn test_string() {
|
|
let mut s = Scanner::new(String::from("\"hello\""));
|
|
s.advance();
|
|
s.string();
|
|
assert!(s.is_at_end());
|
|
}
|
|
|
|
#[test]
|
|
fn test_number() {
|
|
let mut s = Scanner::new(String::from("12345"));
|
|
s.number();
|
|
assert!(s.is_at_end());
|
|
}
|
|
|
|
#[test]
|
|
fn test_identifier() {
|
|
let mut s = Scanner::new(String::from("+id"));
|
|
s.identifier();
|
|
assert!(!s.is_at_end());
|
|
s.advance();
|
|
s.identifier();
|
|
assert!(s.is_at_end());
|
|
}
|
|
|
|
#[test]
|
|
fn test_add_simple_token() {
|
|
let mut s = Scanner::new(String::from(""));
|
|
s.add_simple_token(TokenType::LeftParen);
|
|
assert_eq!(s.tokens.len(), 1);
|
|
}
|
|
|
|
#[test]
|
|
fn test_add_token() {
|
|
let mut s = Scanner::new(String::from(""));
|
|
s.add_token(TokenType::Identifier, String::from("id"));
|
|
assert_eq!(s.tokens.len(), 1);
|
|
}
|
|
|
|
#[test]
|
|
fn test_scan_token() {
|
|
let mut s = Scanner::new(String::from("+"));
|
|
s.scan_token();
|
|
assert_eq!(s.tokens.len(), 1);
|
|
assert_eq!(s.tokens[0].token_type, TokenType::Plus);
|
|
}
|
|
|
|
#[test]
|
|
fn test_scan_tokens() {
|
|
let mut s = Scanner::new(String::from("1+id-\"toto\""));
|
|
s.scan_tokens();
|
|
assert_eq!(s.tokens.len(), 6);
|
|
assert_eq!(s.tokens[0].token_type, TokenType::Number);
|
|
assert_eq!(s.tokens[1].token_type, TokenType::Plus);
|
|
assert_eq!(s.tokens[2].token_type, TokenType::Identifier);
|
|
assert_eq!(s.tokens[3].token_type, TokenType::Minus);
|
|
assert_eq!(s.tokens[4].token_type, TokenType::String);
|
|
assert_eq!(s.tokens[5].token_type, TokenType::Eof);
|
|
|
|
assert_eq!(s.tokens[0].literal, String::from("1"));
|
|
assert_eq!(s.tokens[2].literal, String::from("id"));
|
|
assert_eq!(s.tokens[4].literal, String::from("toto"));
|
|
|
|
}
|
|
|
|
} |