From 0fa814064460e26a01d3492d6243b015c1bd364a Mon Sep 17 00:00:00 2001 From: dkanus Date: Tue, 16 Sep 2025 08:19:42 +0700 Subject: [PATCH] Add `EndOfFile` variant to `TokenLocation` --- rottlib/src/lexer/iterator.rs | 59 ++++++++++++++++++++--------------- rottlib/src/lexer/mod.rs | 21 ++++++++----- 2 files changed, 47 insertions(+), 33 deletions(-) diff --git a/rottlib/src/lexer/iterator.rs b/rottlib/src/lexer/iterator.rs index 4fc991b..b4a0bda 100644 --- a/rottlib/src/lexer/iterator.rs +++ b/rottlib/src/lexer/iterator.rs @@ -28,9 +28,7 @@ use super::{TokenLocation, TokenPiece, TokenizedFile}; #[derive(Clone, Debug)] pub struct Tokens<'src> { /// [`TokenLocation`] of the next token to be returned. - /// - /// [`None`] means the iterator has been exhausted. - cursor: Option, + cursor: TokenLocation, /// [`TokenizedFile`] whose tokens we're iterating over. source_file: &'src TokenizedFile<'src>, /// When `true`, whitespace tokens are skipped. @@ -51,50 +49,57 @@ impl<'src> Tokens<'src> { // Returns the position of the next new token, skipping carried-over pieces // and blank lines. - fn advance_position(&self, mut position: TokenLocation) -> Option { - if let Some(current_line) = self.source_file.lines.get(position.line) { + fn advance_position(&self, position: TokenLocation) -> TokenLocation { + let TokenLocation::Position { + mut line, + mut column, + } = position + else { + return TokenLocation::EndOfFile; + }; + if let Some(current_line) = self.source_file.lines.get(line) { // `Line::len()` also counts a possible token that continued from // the previous line. - if position.column + 1 < current_line.len() { - position.column += 1; - return Some(position); + if column + 1 < current_line.len() { + column += 1; + return TokenLocation::Position { line, column }; } } // Current line is exhausted: walk downward until we find the first line // that **owns local tokens**, because we only want *new* token, // not continued from previous lines (they were already iterated over). - position.line += 1; - while let Some(next_line) = self.source_file.lines.get(position.line) { + line += 1; + while let Some(next_line) = self.source_file.lines.get(line) { if next_line.local_range().is_some() { // Start at the first *local* token, // skipping any carried-over one - position.column = if next_line.continued_from.is_some() { + column = if next_line.continued_from.is_some() { 1 } else { 0 }; - return Some(position); + return TokenLocation::Position { line, column }; } - position.line += 1; // keep skipping empty / pure-carried lines + line += 1; // keep skipping empty / pure-carried lines } // No more tokens. - None + TokenLocation::EndOfFile } // Creates a new iterator. fn new(source_file: &'src TokenizedFile) -> Tokens<'src> { let mut new_iterator = Tokens { source_file, - cursor: Some(TokenLocation { line: 0, column: 0 }), + cursor: TokenLocation::Position { line: 0, column: 0 }, skip_whitespace: false, }; // We need to land on the first existing token so [`Iterator::next`] // can assume cursor is valid. - while let Some(token_position) = new_iterator.cursor { - if new_iterator.source_file.get(token_position).is_some() { + while new_iterator.cursor != TokenLocation::EndOfFile { + if new_iterator.source_file.get(new_iterator.cursor).is_some() { break; } - new_iterator.cursor = new_iterator.advance_position(token_position); + new_iterator.cursor = new_iterator.advance_position(new_iterator.cursor); } new_iterator } @@ -105,16 +110,17 @@ impl<'src> Iterator for Tokens<'src> { fn next(&mut self) -> Option { // We only ever loop to discard whitespaces when the flag is on - loop { - let current_cursor = self.cursor?; - let token_piece = *self.source_file.get(current_cursor)?; - self.cursor = self.advance_position(current_cursor); + while self.cursor != TokenLocation::EndOfFile { + let token_location = self.cursor; + let token_piece = *self.source_file.get(self.cursor)?; + self.cursor = self.advance_position(self.cursor); // Optional whitespace-skip if !self.skip_whitespace || !token_piece.token.is_whitespace() { - return Some((current_cursor, token_piece)); + return Some((token_location, token_piece)); } } + None } } @@ -148,8 +154,11 @@ impl<'src> TokenizedFile<'src> { /// ``` #[track_caller] pub fn get(&self, position: TokenLocation) -> Option<&TokenPiece> { - let line = self.lines.get(position.line)?; - let column = position.column; + let TokenLocation::Position { line, column } = position else { + return None; + }; + let line = self.lines.get(line)?; + let column = column; if column >= line.len() { return None; } diff --git a/rottlib/src/lexer/mod.rs b/rottlib/src/lexer/mod.rs index 6d34a33..3fb4001 100644 --- a/rottlib/src/lexer/mod.rs +++ b/rottlib/src/lexer/mod.rs @@ -67,14 +67,19 @@ pub struct TokenPiece<'src> { /// Defines location of a token inside [`TokenizedFile`] in a form convenient /// for communicating through LSP. #[derive(Clone, Copy, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)] -pub struct TokenLocation { - /// 0-based line number. - pub line: usize, - /// 0-based index of a token in the line, possibly including the token that - /// has continued from the previous line. - /// - /// Columns count tokens, not bytes or chars. - pub column: usize, +pub enum TokenLocation { + /// Actual position of some token in the file. + Position { + /// 0-based line number. + line: usize, + /// 0-based index of a token in the line, possibly including the token that + /// has continued from the previous line. + /// + /// Columns count tokens, not bytes or chars. + column: usize, + }, + /// Position af the end-of-file. + EndOfFile, } /// A tokenized, lossless representation of an UnrealScript source file.