From 40b6505f96a1c55515fbba4b14739a053b0cefbb Mon Sep 17 00:00:00 2001 From: kek kek kek Date: Sat, 28 Oct 2023 22:57:55 +0000 Subject: [PATCH] chore: correct handling of unicode in lexer --- compiler/noirc_frontend/src/lexer/lexer.rs | 28 +++++++--------------- 1 file changed, 9 insertions(+), 19 deletions(-) diff --git a/compiler/noirc_frontend/src/lexer/lexer.rs b/compiler/noirc_frontend/src/lexer/lexer.rs index 2576b7a08ab..4a5ea56d8bc 100644 --- a/compiler/noirc_frontend/src/lexer/lexer.rs +++ b/compiler/noirc_frontend/src/lexer/lexer.rs @@ -6,17 +6,13 @@ use super::{ }; use acvm::FieldElement; use noirc_errors::{Position, Span}; -use std::str::Chars; -use std::{ - iter::{Peekable, Zip}, - ops::RangeFrom, -}; +use std::str::CharIndices; /// The job of the lexer is to transform an iterator of characters (`char_iter`) /// into an iterator of `SpannedToken`. Each `Token` corresponds roughly to 1 word or operator. /// Tokens are tagged with their location in the source file (a `Span`) for use in error reporting. pub struct Lexer<'a> { - char_iter: Peekable, RangeFrom>>, + chars: CharIndices<'a>, position: Position, done: bool, skip_comments: bool, @@ -41,13 +37,7 @@ impl<'a> Lexer<'a> { } pub fn new(source: &'a str) -> Self { - Lexer { - // We zip with the character index here to ensure the first char has index 0 - char_iter: source.chars().zip(0..).peekable(), - position: 0, - done: false, - skip_comments: true, - } + Lexer { chars: source.char_indices(), position: 0, done: false, skip_comments: true } } pub fn skip_comments(mut self, flag: bool) -> Self { @@ -57,21 +47,21 @@ impl<'a> Lexer<'a> { /// Iterates the cursor and returns the char at the new cursor position fn next_char(&mut self) -> Option { - let (c, index) = self.char_iter.next()?; - self.position = index; - Some(c) + let (position, ch) = self.chars.next()?; + self.position = position as u32; + Some(ch) } /// Peeks at the next char. Does not iterate the cursor fn peek_char(&mut self) -> Option { - self.char_iter.peek().map(|(c, _)| *c) + self.chars.clone().next().map(|(_, ch)| ch) } /// Peeks at the character two positions ahead. Does not iterate the cursor fn peek2_char(&mut self) -> Option { - let mut chars = self.char_iter.clone(); + let mut chars = self.chars.clone(); chars.next(); - chars.next().map(|(c, _)| c) + chars.next().map(|(_, ch)| ch) } /// Peeks at the next char and returns true if it is equal to the char argument