Fix documentation and comments

This commit is contained in:
dkanus 2025-08-11 03:31:26 +07:00
parent 933722bd42
commit d519ecab2e
2 changed files with 28 additions and 19 deletions

View File

@ -139,7 +139,7 @@ impl<'src> TokenizedFile<'src> {
/// ## Examples /// ## Examples
/// ///
/// ```rust /// ```rust
/// use mycrate::{TokenizedFile, TokenLocation, Token}; /// use super::{TokenizedFile, TokenLocation, Token};
/// let file = TokenizedFile::from_str("0 / 0"); /// let file = TokenizedFile::from_str("0 / 0");
/// assert_eq!( /// assert_eq!(
/// file.get(TokenLocation { line: 0, column: 2 }).map(|p| p.token), /// file.get(TokenLocation { line: 0, column: 2 }).map(|p| p.token),

View File

@ -23,11 +23,11 @@
//! compiled with `debug` feature enabled. They live in the [`debug_tools`] //! compiled with `debug` feature enabled. They live in the [`debug_tools`]
//! extension trait, implemented for [`TokenizedFile`]. //! extension trait, implemented for [`TokenizedFile`].
//! //!
//! ``` //! ```rust
//! // bring the trait into scope //! // bring the trait into scope
//! use lexer::DebugTools; //! use lexer::DebugTools;
//! //!
//! let file = TokenizedFile::from_str(src); //! let file = TokenizedFile::from_str("local int myValue;");
//! file.debug_dump(); // pretty-print token layout //! file.debug_dump(); // pretty-print token layout
//! let text = file.to_source(); // reconstruct original text //! let text = file.to_source(); // reconstruct original text
//! ``` //! ```
@ -64,7 +64,7 @@ pub struct TokenPiece<'src> {
pub length_utf16: usize, pub length_utf16: usize,
} }
/// Defines location of a token inside [`TokenizedFile`] in a way, convenient /// Defines location of a token inside [`TokenizedFile`] in a form convenient
/// for communicating through LSP. /// for communicating through LSP.
#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)] #[derive(Clone, Copy, Debug, Hash, PartialEq, Eq, PartialOrd, Ord)]
pub struct TokenLocation { pub struct TokenLocation {
@ -72,6 +72,8 @@ pub struct TokenLocation {
pub line: usize, pub line: usize,
/// 0-based index of a token in the line, possibly including the token that /// 0-based index of a token in the line, possibly including the token that
/// has continued from the previous line. /// has continued from the previous line.
///
/// Columns count tokens, not bytes or chars.
pub column: usize, pub column: usize,
} }
@ -102,6 +104,10 @@ struct Tokenizer<'src> {
slice_start_index: usize, slice_start_index: usize,
/// When a multi-line token is being scanned, stores the 0-based line /// When a multi-line token is being scanned, stores the 0-based line
/// on which it started; [`None`] otherwise. /// on which it started; [`None`] otherwise.
///
/// `Some(line_idx)` iff the current line is within a multi-line token that
/// started on `line_idx`; it is consumed exactly once by
/// [`Self::commit_current_line`].
multi_line_start: Option<usize>, multi_line_start: Option<usize>,
/// Set to [`true`] if the lexer reported any error tokens. /// Set to [`true`] if the lexer reported any error tokens.
had_errors: bool, had_errors: bool,
@ -141,7 +147,7 @@ impl<'src> TokenizedFile<'src> {
/// ```rust /// ```rust
/// let tokenized_file = TokenizedFile::from_str("function test() {}"); /// let tokenized_file = TokenizedFile::from_str("function test() {}");
/// if tokenized_file.has_errors() { /// if tokenized_file.has_errors() {
/// println!("Error while parsing file: {}", path.display()); /// println!("Error while parsing file.");
/// } /// }
/// ``` /// ```
#[inline] #[inline]
@ -170,7 +176,7 @@ type TokenIdx = usize;
/// Representation of a single physical line of the source file. /// Representation of a single physical line of the source file.
/// ///
/// [`Range<TokenIndex>`] are used instead of slices to avoid creating /// [`Range<TokenIdx>`] are used instead of slices to avoid creating
/// a self-referential struct (with [`TokenizedFile`]), which rust forbids. /// a self-referential struct (with [`TokenizedFile`]), which rust forbids.
#[derive(Clone, Debug, Hash, PartialEq, Eq)] #[derive(Clone, Debug, Hash, PartialEq, Eq)]
struct Line { struct Line {
@ -214,7 +220,7 @@ impl Line {
/// Returns a range of tokens inside [`TokenizedFile::buffer`] that start /// Returns a range of tokens inside [`TokenizedFile::buffer`] that start
/// on this line. /// on this line.
/// ///
/// [`None`] means there is no such tokens. Otherwise range is guaranteed /// [`None`] means there are no such tokens. Otherwise range is guaranteed
/// to not be empty. /// to not be empty.
#[inline] #[inline]
fn local_range(&self) -> Option<Range<TokenIdx>> { fn local_range(&self) -> Option<Range<TokenIdx>> {
@ -225,7 +231,7 @@ impl Line {
} }
} }
/// Returns amount of tokens of the line. /// Returns the number of tokens on this line.
/// ///
/// Counts both tokens that started on this line and tokens that continued /// Counts both tokens that started on this line and tokens that continued
/// from previous one. /// from previous one.
@ -246,7 +252,8 @@ impl<'src> Tokenizer<'src> {
} }
} }
/// Handles tokens that never span multiple lines. /// Handles simple tokens that *never* span multiple lines, allowing us to
/// skip a lot of work.
fn process_single_line_token(&mut self, token_piece: TokenPiece<'src>) { fn process_single_line_token(&mut self, token_piece: TokenPiece<'src>) {
if token_piece.token.is_newline() { if token_piece.token.is_newline() {
self.line_number += 1; self.line_number += 1;
@ -257,7 +264,7 @@ impl<'src> Tokenizer<'src> {
} }
} }
/// Handles tokens that may contain one or more newline characters. /// Handles tokens that might contain one or more newline characters.
fn process_multi_line_token(&mut self, token_piece: TokenPiece<'src>) { fn process_multi_line_token(&mut self, token_piece: TokenPiece<'src>) {
let start_line = self.line_number; let start_line = self.line_number;
let newline_count = count_line_breaks(token_piece.lexeme); let newline_count = count_line_breaks(token_piece.lexeme);
@ -271,12 +278,15 @@ impl<'src> Tokenizer<'src> {
// We only need to commit the line if this token actually ended the line // We only need to commit the line if this token actually ended the line
if newline_count > 0 { if newline_count > 0 {
self.commit_current_line(); self.commit_current_line();
// We only need to insert one `Line::Spanned(base)` per *interior* // We only need to insert one `Line::spanned(start_line)` per
// newline, so `newline_count - 1` such lines // *interior* line:
// (e.g. 2 line breaks in block comment -> it has //
// exactly `1` interior line) // standalone | local int i = /* Now we start long comment
let insert_count = newline_count - 1; // spanned | with three line breaks and *exactly* two
for _ in 0..insert_count { // spanned | inner lines that contain nothing but
// spanned_with_tokens | comment bytes! */ = 0;
let inner_lines_count = newline_count - 1;
for _ in 0..inner_lines_count {
self.lines.push(Line::spanned(start_line)); self.lines.push(Line::spanned(start_line));
} }
// This is called *after* `commit_current_line()` cleared previous // This is called *after* `commit_current_line()` cleared previous
@ -313,7 +323,7 @@ impl<'src> Tokenizer<'src> {
/// Finishes tokenization, converting accumulated data into /// Finishes tokenization, converting accumulated data into
/// [`TokenizedFile`]. /// [`TokenizedFile`].
fn into_tokenized_file(mut self) -> TokenizedFile<'src> { fn into_tokenized_file(mut self) -> TokenizedFile<'src> {
// Commit any trailing tokens // Flush trailing tokens for which `commit` wasn't auto triggered
self.commit_current_line(); self.commit_current_line();
// If we still have a `multi_line_start` // If we still have a `multi_line_start`
// (i.e. a pure multi-line token with no local tokens on its last line), // (i.e. a pure multi-line token with no local tokens on its last line),
@ -322,7 +332,6 @@ impl<'src> Tokenizer<'src> {
self.lines.push(Line::spanned(from)); self.lines.push(Line::spanned(from));
} }
// Optimize for size
self.buffer.shrink_to_fit(); self.buffer.shrink_to_fit();
self.lines.shrink_to_fit(); self.lines.shrink_to_fit();
@ -343,7 +352,7 @@ fn make_token_piece<'src>(token: Token, text: &'src str) -> TokenPiece<'src> {
} }
} }
/// Counts the number of new lines in given text. /// Counts the number of newlines in given text.
fn count_line_breaks(text: &str) -> usize { fn count_line_breaks(text: &str) -> usize {
let mut bytes_iterator = text.as_bytes().iter().peekable(); let mut bytes_iterator = text.as_bytes().iter().peekable();
let mut newline_count = 0; let mut newline_count = 0;