Add EndOfFile variant to TokenLocation

2025-09-16 08:19:42 +07:00 · 2025-09-16 08:19:42 +07:00 · 0fa8140644
commit 0fa8140644
parent 7ed934e2b8
2 changed files with 47 additions and 33 deletions
--- a/rottlib/src/lexer/iterator.rs
+++ b/rottlib/src/lexer/iterator.rs
@ -28,9 +28,7 @@ use super::{TokenLocation, TokenPiece, TokenizedFile};
 #[derive(Clone, Debug)]
 pub struct Tokens<'src> {
    /// [`TokenLocation`] of the next token to be returned.
-    ///
-    /// [`None`] means the iterator has been exhausted.
-    cursor: Option<TokenLocation>,
+    cursor: TokenLocation,
    /// [`TokenizedFile`] whose tokens we're iterating over.
    source_file: &'src TokenizedFile<'src>,
    /// When `true`, whitespace tokens are skipped.
@ -51,50 +49,57 @@ impl<'src> Tokens<'src> {

    // Returns the position of the next new token, skipping carried-over pieces
    // and blank lines.
-    fn advance_position(&self, mut position: TokenLocation) -> Option<TokenLocation> {
-        if let Some(current_line) = self.source_file.lines.get(position.line) {
+    fn advance_position(&self, position: TokenLocation) -> TokenLocation {
+        let TokenLocation::Position {
+            mut line,
+            mut column,
+        } = position
+        else {
+            return TokenLocation::EndOfFile;
+        };
+        if let Some(current_line) = self.source_file.lines.get(line) {
            // `Line::len()` also counts a possible token that continued from
            // the previous line.
-            if position.column + 1 < current_line.len() {
-                position.column += 1;
-                return Some(position);
+            if column + 1 < current_line.len() {
+                column += 1;
+                return TokenLocation::Position { line, column };
            }
        }
        // Current line is exhausted: walk downward until we find the first line
        // that **owns local tokens**, because we only want *new* token,
        // not continued from previous lines (they were already iterated over).
-        position.line += 1;
-        while let Some(next_line) = self.source_file.lines.get(position.line) {
+        line += 1;
+        while let Some(next_line) = self.source_file.lines.get(line) {
            if next_line.local_range().is_some() {
                // Start at the first *local* token,
                // skipping any carried-over one
-                position.column = if next_line.continued_from.is_some() {
+                column = if next_line.continued_from.is_some() {
                    1
                } else {
                    0
                };
-                return Some(position);
+                return TokenLocation::Position { line, column };
            }
-            position.line += 1; // keep skipping empty / pure-carried lines
+            line += 1; // keep skipping empty / pure-carried lines
        }
        // No more tokens.
-        None
+        TokenLocation::EndOfFile
    }

    // Creates a new iterator.
    fn new(source_file: &'src TokenizedFile) -> Tokens<'src> {
        let mut new_iterator = Tokens {
            source_file,
-            cursor: Some(TokenLocation { line: 0, column: 0 }),
+            cursor: TokenLocation::Position { line: 0, column: 0 },
            skip_whitespace: false,
        };
        // We need to land on the first existing token so [`Iterator::next`]
        // can assume cursor is valid.
-        while let Some(token_position) = new_iterator.cursor {
-            if new_iterator.source_file.get(token_position).is_some() {
+        while new_iterator.cursor != TokenLocation::EndOfFile {
+            if new_iterator.source_file.get(new_iterator.cursor).is_some() {
                break;
            }
-            new_iterator.cursor = new_iterator.advance_position(token_position);
+            new_iterator.cursor = new_iterator.advance_position(new_iterator.cursor);
        }
        new_iterator
    }
@ -105,16 +110,17 @@ impl<'src> Iterator for Tokens<'src> {

    fn next(&mut self) -> Option<Self::Item> {
        // We only ever loop to discard whitespaces when the flag is on
-        loop {
-            let current_cursor = self.cursor?;
-            let token_piece = *self.source_file.get(current_cursor)?;
-            self.cursor = self.advance_position(current_cursor);
+        while self.cursor != TokenLocation::EndOfFile {
+            let token_location = self.cursor;
+            let token_piece = *self.source_file.get(self.cursor)?;
+            self.cursor = self.advance_position(self.cursor);

            // Optional whitespace-skip
            if !self.skip_whitespace || !token_piece.token.is_whitespace() {
-                return Some((current_cursor, token_piece));
+                return Some((token_location, token_piece));
            }
        }
+        None
    }
 }

@ -148,8 +154,11 @@ impl<'src> TokenizedFile<'src> {
    /// ```
    #[track_caller]
    pub fn get(&self, position: TokenLocation) -> Option<&TokenPiece> {
-        let line = self.lines.get(position.line)?;
-        let column = position.column;
+        let TokenLocation::Position { line, column } = position else {
+            return None;
+        };
+        let line = self.lines.get(line)?;
+        let column = column;
        if column >= line.len() {
            return None;
        }
--- a/rottlib/src/lexer/mod.rs
+++ b/rottlib/src/lexer/mod.rs
@ -67,14 +67,19 @@ pub struct TokenPiece<'src> {
 /// Defines location of a token inside [`TokenizedFile`] in a form convenient
 /// for communicating through LSP.
 #[derive(Clone, Copy, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)]
-pub struct TokenLocation {
+pub enum TokenLocation {
+    /// Actual position of some token in the file.
+    Position {
        /// 0-based line number.
-    pub line: usize,
+        line: usize,
        /// 0-based index of a token in the line, possibly including the token that
        /// has continued from the previous line.
        ///
        /// Columns count tokens, not bytes or chars.
-    pub column: usize,
+        column: usize,
+    },
+    /// Position af the end-of-file.
+    EndOfFile,
 }

 /// A tokenized, lossless representation of an UnrealScript source file.