Add EndOfFile variant to TokenLocation

This commit is contained in:
dkanus 2025-09-16 08:19:42 +07:00
parent 7ed934e2b8
commit 0fa8140644
2 changed files with 47 additions and 33 deletions

View File

@ -28,9 +28,7 @@ use super::{TokenLocation, TokenPiece, TokenizedFile};
#[derive(Clone, Debug)] #[derive(Clone, Debug)]
pub struct Tokens<'src> { pub struct Tokens<'src> {
/// [`TokenLocation`] of the next token to be returned. /// [`TokenLocation`] of the next token to be returned.
/// cursor: TokenLocation,
/// [`None`] means the iterator has been exhausted.
cursor: Option<TokenLocation>,
/// [`TokenizedFile`] whose tokens we're iterating over. /// [`TokenizedFile`] whose tokens we're iterating over.
source_file: &'src TokenizedFile<'src>, source_file: &'src TokenizedFile<'src>,
/// When `true`, whitespace tokens are skipped. /// When `true`, whitespace tokens are skipped.
@ -51,50 +49,57 @@ impl<'src> Tokens<'src> {
// Returns the position of the next new token, skipping carried-over pieces // Returns the position of the next new token, skipping carried-over pieces
// and blank lines. // and blank lines.
fn advance_position(&self, mut position: TokenLocation) -> Option<TokenLocation> { fn advance_position(&self, position: TokenLocation) -> TokenLocation {
if let Some(current_line) = self.source_file.lines.get(position.line) { let TokenLocation::Position {
mut line,
mut column,
} = position
else {
return TokenLocation::EndOfFile;
};
if let Some(current_line) = self.source_file.lines.get(line) {
// `Line::len()` also counts a possible token that continued from // `Line::len()` also counts a possible token that continued from
// the previous line. // the previous line.
if position.column + 1 < current_line.len() { if column + 1 < current_line.len() {
position.column += 1; column += 1;
return Some(position); return TokenLocation::Position { line, column };
} }
} }
// Current line is exhausted: walk downward until we find the first line // Current line is exhausted: walk downward until we find the first line
// that **owns local tokens**, because we only want *new* token, // that **owns local tokens**, because we only want *new* token,
// not continued from previous lines (they were already iterated over). // not continued from previous lines (they were already iterated over).
position.line += 1; line += 1;
while let Some(next_line) = self.source_file.lines.get(position.line) { while let Some(next_line) = self.source_file.lines.get(line) {
if next_line.local_range().is_some() { if next_line.local_range().is_some() {
// Start at the first *local* token, // Start at the first *local* token,
// skipping any carried-over one // skipping any carried-over one
position.column = if next_line.continued_from.is_some() { column = if next_line.continued_from.is_some() {
1 1
} else { } else {
0 0
}; };
return Some(position); return TokenLocation::Position { line, column };
} }
position.line += 1; // keep skipping empty / pure-carried lines line += 1; // keep skipping empty / pure-carried lines
} }
// No more tokens. // No more tokens.
None TokenLocation::EndOfFile
} }
// Creates a new iterator. // Creates a new iterator.
fn new(source_file: &'src TokenizedFile) -> Tokens<'src> { fn new(source_file: &'src TokenizedFile) -> Tokens<'src> {
let mut new_iterator = Tokens { let mut new_iterator = Tokens {
source_file, source_file,
cursor: Some(TokenLocation { line: 0, column: 0 }), cursor: TokenLocation::Position { line: 0, column: 0 },
skip_whitespace: false, skip_whitespace: false,
}; };
// We need to land on the first existing token so [`Iterator::next`] // We need to land on the first existing token so [`Iterator::next`]
// can assume cursor is valid. // can assume cursor is valid.
while let Some(token_position) = new_iterator.cursor { while new_iterator.cursor != TokenLocation::EndOfFile {
if new_iterator.source_file.get(token_position).is_some() { if new_iterator.source_file.get(new_iterator.cursor).is_some() {
break; break;
} }
new_iterator.cursor = new_iterator.advance_position(token_position); new_iterator.cursor = new_iterator.advance_position(new_iterator.cursor);
} }
new_iterator new_iterator
} }
@ -105,16 +110,17 @@ impl<'src> Iterator for Tokens<'src> {
fn next(&mut self) -> Option<Self::Item> { fn next(&mut self) -> Option<Self::Item> {
// We only ever loop to discard whitespaces when the flag is on // We only ever loop to discard whitespaces when the flag is on
loop { while self.cursor != TokenLocation::EndOfFile {
let current_cursor = self.cursor?; let token_location = self.cursor;
let token_piece = *self.source_file.get(current_cursor)?; let token_piece = *self.source_file.get(self.cursor)?;
self.cursor = self.advance_position(current_cursor); self.cursor = self.advance_position(self.cursor);
// Optional whitespace-skip // Optional whitespace-skip
if !self.skip_whitespace || !token_piece.token.is_whitespace() { if !self.skip_whitespace || !token_piece.token.is_whitespace() {
return Some((current_cursor, token_piece)); return Some((token_location, token_piece));
} }
} }
None
} }
} }
@ -148,8 +154,11 @@ impl<'src> TokenizedFile<'src> {
/// ``` /// ```
#[track_caller] #[track_caller]
pub fn get(&self, position: TokenLocation) -> Option<&TokenPiece> { pub fn get(&self, position: TokenLocation) -> Option<&TokenPiece> {
let line = self.lines.get(position.line)?; let TokenLocation::Position { line, column } = position else {
let column = position.column; return None;
};
let line = self.lines.get(line)?;
let column = column;
if column >= line.len() { if column >= line.len() {
return None; return None;
} }

View File

@ -67,14 +67,19 @@ pub struct TokenPiece<'src> {
/// Defines location of a token inside [`TokenizedFile`] in a form convenient /// Defines location of a token inside [`TokenizedFile`] in a form convenient
/// for communicating through LSP. /// for communicating through LSP.
#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)] #[derive(Clone, Copy, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)]
pub struct TokenLocation { pub enum TokenLocation {
/// 0-based line number. /// Actual position of some token in the file.
pub line: usize, Position {
/// 0-based index of a token in the line, possibly including the token that /// 0-based line number.
/// has continued from the previous line. line: usize,
/// /// 0-based index of a token in the line, possibly including the token that
/// Columns count tokens, not bytes or chars. /// has continued from the previous line.
pub column: usize, ///
/// Columns count tokens, not bytes or chars.
column: usize,
},
/// Position af the end-of-file.
EndOfFile,
} }
/// A tokenized, lossless representation of an UnrealScript source file. /// A tokenized, lossless representation of an UnrealScript source file.