From 8632ba0a86ecabd93b15707d3ef54f1aaaf0ffaa Mon Sep 17 00:00:00 2001 From: dkanus Date: Sun, 12 Apr 2026 17:52:39 +0700 Subject: [PATCH] Add more diagnostic messages --- dev_tests/src/verify_expr.rs | 10 +- rottlib/src/arena.rs | 17 +- rottlib/src/ast/callables.rs | 7 +- rottlib/src/ast/expressions.rs | 12 +- rottlib/src/ast/mod.rs | 82 +--- rottlib/src/ast/types.rs | 6 +- rottlib/src/diagnostics/expression.rs | 190 --------- .../src/diagnostics/expression_diagnostics.rs | 284 +++++++++++++ rottlib/src/diagnostics/mod.rs | 14 +- rottlib/src/diagnostics/render.rs | 207 ++++++++- rottlib/src/lexer/mod.rs | 60 +++ rottlib/src/lexer/queries.rs | 56 ++- rottlib/src/parser/cursor.rs | 32 +- rottlib/src/parser/errors.rs | 43 +- rottlib/src/parser/grammar/class.rs | 42 +- .../grammar/declarations/enum_definition.rs | 6 +- .../grammar/declarations/struct_definition.rs | 8 +- .../grammar/declarations/type_specifier.rs | 13 +- .../declarations/variable_declarators.rs | 6 +- .../src/parser/grammar/expression/block.rs | 8 +- .../parser/grammar/expression/control_flow.rs | 35 +- .../parser/grammar/expression/identifier.rs | 6 +- .../src/parser/grammar/expression/pratt.rs | 29 +- .../src/parser/grammar/expression/primary.rs | 38 +- .../parser/grammar/expression/selectors.rs | 20 +- .../src/parser/grammar/expression/switch.rs | 16 +- .../src/parser/grammar/function/definition.rs | 10 +- rottlib/src/parser/grammar/function/params.rs | 6 +- rottlib/src/parser/grammar/statement.rs | 10 +- rottlib/src/parser/mod.rs | 7 + rottlib/src/parser/recovery.rs | 127 ++++-- rottlib/tests/diagnostics_expressions.rs | 394 ++++++++++++++++++ 32 files changed, 1255 insertions(+), 546 deletions(-) delete mode 100644 rottlib/src/diagnostics/expression.rs create mode 100644 rottlib/src/diagnostics/expression_diagnostics.rs create mode 100644 rottlib/tests/diagnostics_expressions.rs diff --git a/dev_tests/src/verify_expr.rs b/dev_tests/src/verify_expr.rs index f8b0b78..0f13204 100644 --- a/dev_tests/src/verify_expr.rs +++ b/dev_tests/src/verify_expr.rs @@ -12,16 +12,16 @@ use rottlib::parser::Parser; mod pretty; +// a * * * + /// Expressions to test. /// /// Add, remove, or edit entries here. /// Using `(&str, &str)` gives each case a human-readable label. const TEST_CASES: &[(&str, &str)] = &[ - ("simple_add", "1 + 2 * 3"), - ("member_call", "Foo.Bar(1, 2)"), - ("index_member", "arr[5].X"), - ("tagged_name", "Class'MyPackage.MyThing'"), - ("broken_expr", "a + (]\n//AAA\n//BBB\n//CCC\n//DDD\n//EEE\n//FFF"), + ("files/P0003_01.uc", "(a + b && c / d ^ e @ f"), + ("files/P0003_02.uc", "(a]"), + ("files/P0003_03.uc", "(a\n;"), ]; /// If true, print the parsed expression using Debug formatting. diff --git a/rottlib/src/arena.rs b/rottlib/src/arena.rs index 77112a8..f83e0cd 100644 --- a/rottlib/src/arena.rs +++ b/rottlib/src/arena.rs @@ -19,8 +19,7 @@ use core::ops::{Deref, DerefMut}; use bumpalo::{Bump, boxed, collections}; -use crate::ast::AstSpan; -use crate::lexer::TokenPosition; +use crate::lexer::{TokenPosition, TokenSpan}; /// Object that manages a separate memory space, which can be deallocated all /// at once after use. @@ -62,7 +61,7 @@ impl Arena { /// The returned node borrows the arena and cannot outlive it. /// If it is still live when the arena is dropped, its destructor is not run. #[must_use] - pub fn alloc_node(&self, value: T, span: AstSpan) -> ArenaNode<'_, T> { + pub fn alloc_node(&self, value: T, span: TokenSpan) -> ArenaNode<'_, T> { ArenaNode { value: boxed::Box::new_in(value, &self.bump), span, @@ -81,7 +80,7 @@ impl Arena { start: TokenPosition, end: TokenPosition, ) -> ArenaNode<'_, T> { - self.alloc_node(value, AstSpan::range(start, end)) + self.alloc_node(value, TokenSpan::range(start, end)) } /// Allocates `value` in this arena and attaches a span covering `at`. @@ -90,7 +89,7 @@ impl Arena { /// If it is still live when the arena is dropped, its destructor is not run. #[must_use] pub fn alloc_node_at(&self, value: T, at: TokenPosition) -> ArenaNode<'_, T> { - self.alloc_node(value, AstSpan::new(at)) + self.alloc_node(value, TokenSpan::new(at)) } } @@ -107,13 +106,13 @@ impl Default for Arena { #[derive(Hash, PartialEq, Eq)] pub struct ArenaNode<'arena, T> { value: boxed::Box<'arena, T>, - span: AstSpan, + span: TokenSpan, } impl<'arena, T> ArenaNode<'arena, T> { /// Creates a new [`ArenaNode`] by allocating `value` in `arena`. #[must_use] - pub fn new_in(value: T, span: AstSpan, arena: &'arena Arena) -> Self { + pub fn new_in(value: T, span: TokenSpan, arena: &'arena Arena) -> Self { Self { value: boxed::Box::new_in(value, &arena.bump), span, @@ -122,13 +121,13 @@ impl<'arena, T> ArenaNode<'arena, T> { /// Returns a mutable reference to the token span covered by this node. #[must_use] - pub const fn span_mut(&mut self) -> &mut AstSpan { + pub const fn span_mut(&mut self) -> &mut TokenSpan { &mut self.span } /// Returns the token span covered by this node. #[must_use] - pub const fn span(&self) -> &AstSpan { + pub const fn span(&self) -> &TokenSpan { &self.span } } diff --git a/rottlib/src/ast/callables.rs b/rottlib/src/ast/callables.rs index 6251c56..5715791 100644 --- a/rottlib/src/ast/callables.rs +++ b/rottlib/src/ast/callables.rs @@ -8,12 +8,11 @@ //! declarations. This module preserves those forms as AST nodes together with //! source-relevant modifier and parameter information. -use super::{ - AstSpan, BlockBody, ExpressionRef, IdentifierToken, InfixOperatorName, PostfixOperatorName, +use super::{BlockBody, ExpressionRef, IdentifierToken, InfixOperatorName, PostfixOperatorName, PrefixOperatorName, TypeSpecifierRef, }; use crate::arena::ArenaVec; -use crate::lexer::{Keyword, TokenPosition}; +use crate::lexer::{TokenSpan, Keyword, TokenPosition}; use crate::arena::ArenaNode; @@ -219,7 +218,7 @@ pub struct CallableModifier { /// Modifier kind. pub kind: CallableModifierKind, /// Span covering the full modifier syntax. - pub span: AstSpan, + pub span: TokenSpan, } impl Keyword { diff --git a/rottlib/src/ast/expressions.rs b/rottlib/src/ast/expressions.rs index 998c77e..59cf27c 100644 --- a/rottlib/src/ast/expressions.rs +++ b/rottlib/src/ast/expressions.rs @@ -2,10 +2,10 @@ //! //! This module defines ordinary expressions together with expression-shaped //! control-flow and block forms parsed by the language. -use super::{ - AstSpan, IdentifierToken, InfixOperator, PostfixOperator, PrefixOperator, +use super::{IdentifierToken, InfixOperator, PostfixOperator, PrefixOperator, QualifiedIdentifierRef, StatementRef, }; +use crate::lexer::TokenSpan; use crate::arena::ArenaVec; use super::super::lexer::TokenPosition; @@ -186,7 +186,7 @@ pub type StatementList<'src, 'arena> = ArenaVec<'arena, StatementRef<'src, 'aren #[derive(Debug, PartialEq)] pub struct BlockBody<'src, 'arena> { pub statements: StatementList<'src, 'arena>, - pub span: AstSpan, + pub span: TokenSpan, } /// Stable arena reference to an expression node. @@ -254,7 +254,7 @@ impl<'arena> Expression<'_, 'arena> { op: InfixOperator, right_hand_side: ArenaNode<'arena, Self>, ) -> ArenaNode<'arena, Self> { - let span = AstSpan::merge(left_hand_side.span(), right_hand_side.span()); + let span = TokenSpan::merge(left_hand_side.span(), right_hand_side.span()); ArenaNode::new_in( Self::Binary(left_hand_side, op, right_hand_side), span, @@ -271,7 +271,7 @@ impl<'arena> Expression<'_, 'arena> { operation: PrefixOperator, right_hand_side: ArenaNode<'arena, Self>, ) -> ArenaNode<'arena, Self> { - let span = AstSpan::range(operation_position, right_hand_side.span().token_to); + let span = TokenSpan::range(operation_position, right_hand_side.span().end); ArenaNode::new_in(Self::PrefixUnary(operation, right_hand_side), span, arena) } @@ -284,7 +284,7 @@ impl<'arena> Expression<'_, 'arena> { operation: PostfixOperator, operation_position: TokenPosition, ) -> ArenaNode<'arena, Self> { - let span = AstSpan::range(left_hand_side.span().token_from, operation_position); + let span = TokenSpan::range(left_hand_side.span().start, operation_position); ArenaNode::new_in(Self::PostfixUnary(left_hand_side, operation), span, arena) } } diff --git a/rottlib/src/ast/mod.rs b/rottlib/src/ast/mod.rs index 4fba79d..6d789be 100644 --- a/rottlib/src/ast/mod.rs +++ b/rottlib/src/ast/mod.rs @@ -2,7 +2,7 @@ // Need to do a proper check to figure out what should and shouldn't be a node use crate::arena::ArenaVec; -use super::lexer::TokenPosition; +use super::lexer::{TokenPosition, TokenSpan}; use crate::arena::{Arena, ArenaNode, ArenaString}; @@ -30,66 +30,6 @@ pub struct QualifiedIdentifier<'arena> { } pub type QualifiedIdentifierRef<'arena> = ArenaNode<'arena, QualifiedIdentifier<'arena>>; -// All inclusive! -#[derive(Debug, Hash, PartialEq, Eq, PartialOrd, Ord, Clone, Copy)] -pub struct AstSpan { - pub token_from: TokenPosition, - pub token_to: TokenPosition, -} - -impl AstSpan { - // -------- existing coord-based API (unchanged externally) -------- - - #[must_use] - pub const fn merge(left_span: &Self, right_span: &Self) -> Self { - Self { - // assumes both were constructed in the same style; good enough for the refactor - token_from: left_span.token_from, - token_to: right_span.token_to, - } - } - - // -------- NEW: 4 constructors based on TokenIndex -------- - - /// Single-token span from an index (coords are dummy for now). - #[inline] - #[must_use] - pub const fn new(single_index: TokenPosition) -> Self { - Self { - token_from: single_index, - token_to: single_index, - } - } - - /// Span from two indices (coords are dummy for now). - #[inline] - #[must_use] - pub const fn range(from: TokenPosition, to: TokenPosition) -> Self { - Self { - token_from: from, - token_to: to, - } - } - - /// Immutable extension by index (keeps coords as-is). - #[inline] - #[must_use] - pub fn extended(&self, right_most_index: TokenPosition) -> Self { - Self { - token_from: self.token_from, - token_to: std::cmp::max(self.token_to, right_most_index), - } - } - - /// In-place extension by index (coords unchanged). - #[inline] - pub fn extend_to(&mut self, right_most_index: TokenPosition) { - if right_most_index > self.token_to { - self.token_to = right_most_index; - } - } -} - impl<'arena> QualifiedIdentifier<'arena> { #[inline] #[must_use] @@ -117,7 +57,7 @@ impl<'arena> QualifiedIdentifier<'arena> { /// Cheap constructor from a single identifier. No Vec allocated. pub fn from_ident(arena: &'arena Arena, id: IdentifierToken) -> QualifiedIdentifierRef<'arena> { - let span = AstSpan::new(id.0); + let span = TokenSpan::new(id.0); ArenaNode::new_in( Self { head: id, @@ -132,7 +72,7 @@ impl<'arena> QualifiedIdentifier<'arena> { arena: &'arena Arena, position: TokenPosition, ) -> QualifiedIdentifierRef<'arena> { - let span = AstSpan::new(position); + let span = TokenSpan::new(position); ArenaNode::new_in( Self { head: IdentifierToken(position), @@ -185,8 +125,8 @@ pub struct DeclarationLiteralRef<'src, 'arena> { impl IdentifierToken { #[must_use] - pub const fn span(self) -> AstSpan { - AstSpan::new(self.0) + pub const fn span(self) -> TokenSpan { + TokenSpan::new(self.0) } } @@ -253,7 +193,7 @@ pub struct ClassVarDecl<'src, 'arena> { pub type_spec: TypeSpecifierRef<'src, 'arena>, // Named/InlineEnum/InlineStruct pub declarators: ArenaVec<'arena, VariableDeclaratorRef<'src, 'arena>>, // a, b=expr - pub span: AstSpan, + pub span: TokenSpan, } pub type ClassVarDeclRef<'src, 'arena> = ArenaNode<'arena, ClassVarDecl<'src, 'arena>>; @@ -261,7 +201,7 @@ pub type ClassVarDeclRef<'src, 'arena> = ArenaNode<'arena, ClassVarDecl<'src, 'a pub struct ClassConstDecl<'src, 'arena> { pub name: IdentifierToken, pub value: DeclarationLiteralRef<'src, 'arena>, - pub span: AstSpan, + pub span: TokenSpan, } pub type ClassConstDeclRef<'src, 'arena> = ArenaNode<'arena, ClassConstDecl<'src, 'arena>>; @@ -293,14 +233,14 @@ pub struct ReplicationRule<'src, 'arena> { pub reliability: Reliability, // reliable|unreliable pub condition: Option>, // if () or None pub members: ArenaVec<'arena, IdentifierToken>, // a, b, Foo() - pub span: AstSpan, + pub span: TokenSpan, } pub type ReplicationRuleRef<'src, 'arena> = ArenaNode<'arena, ReplicationRule<'src, 'arena>>; #[derive(Debug)] pub struct ReplicationBlock<'src, 'arena> { pub rules: ArenaVec<'arena, ReplicationRuleRef<'src, 'arena>>, - pub span: AstSpan, + pub span: TokenSpan, } pub type ReplicationBlockRef<'src, 'arena> = ArenaNode<'arena, ReplicationBlock<'src, 'arena>>; @@ -320,7 +260,7 @@ pub struct StateDecl<'src, 'arena> { pub ignores: Option>, // 'ignores Foo, Bar;' /// Body: ordinary statements plus nested function definitions (see `Statement::Function`). pub body: ArenaVec<'arena, StatementRef<'src, 'arena>>, - pub span: AstSpan, + pub span: TokenSpan, } pub type StateDeclRef<'src, 'arena> = ArenaNode<'arena, StateDecl<'src, 'arena>>; @@ -328,7 +268,7 @@ pub type StateDeclRef<'src, 'arena> = ArenaNode<'arena, StateDecl<'src, 'arena>> #[derive(Debug)] pub struct ExecDirective<'arena> { pub text: ArenaString<'arena>, // full line without trailing newline(s) - pub span: AstSpan, + pub span: TokenSpan, } pub type ExecDirectiveRef<'arena> = ArenaNode<'arena, ExecDirective<'arena>>; diff --git a/rottlib/src/ast/types.rs b/rottlib/src/ast/types.rs index 6ec341a..d5088c4 100644 --- a/rottlib/src/ast/types.rs +++ b/rottlib/src/ast/types.rs @@ -2,7 +2,7 @@ //! //! This module defines syntactic forms used to represent type names, inline //! type declarations, variable declarators, and declaration modifiers. -use super::{AstSpan, ExpressionRef, IdentifierToken, QualifiedIdentifierRef}; +use super::{TokenSpan, ExpressionRef, IdentifierToken, QualifiedIdentifierRef}; use crate::arena::{ArenaNode, ArenaString, ArenaVec}; use crate::lexer::{Keyword, Token, TokenPosition}; @@ -87,8 +87,8 @@ pub struct StructModifier { impl StructModifier { /// Span covering just this modifier token. #[must_use] - pub const fn span(self) -> AstSpan { - AstSpan::new(self.position) + pub const fn span(self) -> TokenSpan { + TokenSpan::new(self.position) } /// Construct a struct modifier from kind and token position. #[must_use] diff --git a/rottlib/src/diagnostics/expression.rs b/rottlib/src/diagnostics/expression.rs deleted file mode 100644 index 519805d..0000000 --- a/rottlib/src/diagnostics/expression.rs +++ /dev/null @@ -1,190 +0,0 @@ -use super::{Diagnostic, DiagnosticBuilder}; -use crate::ast::AstSpan; -use crate::lexer::TokenPosition; -use crate::parser::{ParseError, ParseErrorKind}; -use std::convert::From; - -fn diagnostic_parenthesized_expression_empty( - error: ParseError, - left_parenthesis_position: TokenPosition, -) -> Diagnostic { - DiagnosticBuilder::error("empty parenthesized expression") - .primary_label(error.blame_span, "expected an expression before this `)`") - .secondary_label( - AstSpan::new(left_parenthesis_position), - "parenthesized expression starts here", - ) - .help("Remove the parentheses or put an expression inside them.") - .build() -} - -fn diagnostic_class_type_missing_type_argument( - error: ParseError, - left_angle_bracket_position: TokenPosition, -) -> Diagnostic { - DiagnosticBuilder::error("missing type argument in `class<...>`") - .primary_label(error.blame_span, "expected a type name here") - .secondary_label( - AstSpan::new(left_angle_bracket_position), - "type argument list starts here", - ) - .help("Write a type name, for example `class`.") - .build() -} - -fn diagnostic_class_type_missing_closing_angle_bracket( - error: ParseError, - left_angle_bracket_position: TokenPosition, -) -> Diagnostic { - DiagnosticBuilder::error("missing closing `>` in `class<...>`") - .primary_label(error.blame_span, "expected `>` here") - .secondary_label( - AstSpan::new(left_angle_bracket_position), - "this `<` starts the type argument", - ) - .help("Add `>` to close the class type expression.") - .build() -} - -fn diagnostic_parenthesized_expression_missing_closing_parenthesis( - error: ParseError, - left_parenthesis_position: TokenPosition, -) -> Diagnostic { - DiagnosticBuilder::error("missing closing `)`") - .primary_label(error.blame_span, "expected `)` here") - .secondary_label( - AstSpan::new(left_parenthesis_position), - "this `(` starts the parenthesized expression", - ) - .help("Add `)` to close the expression.") - .build() -} - -fn diagnostic_expression_expected(error: ParseError) -> Diagnostic { - let mut builder = DiagnosticBuilder::error("expected expression") - .primary_label(error.blame_span, "this token cannot start an expression") - .help( - "Expressions can start with literals, identifiers, `(`, `{`, or expression keywords.", - ); - - if let Some(related_span) = error.related_span { - builder = builder.secondary_label(related_span, "expression context starts here"); - } - - builder.build() -} - -fn diagnostic_class_type_invalid_type_argument( - error: ParseError, - left_angle_bracket_position: TokenPosition, -) -> Diagnostic { - DiagnosticBuilder::error("invalid type argument in `class<...>`") - .primary_label(error.blame_span, "expected a qualified type name here") - .secondary_label( - AstSpan::new(left_angle_bracket_position), - "type argument list starts here", - ) - .note("Only a qualified type name is accepted between `<` and `>` here.") - .build() -} - -fn diagnostic_new_too_many_arguments( - error: ParseError, - left_parenthesis_position: TokenPosition, -) -> Diagnostic { - DiagnosticBuilder::error("too many arguments in `new(...)`") - .primary_label(error.blame_span, "unexpected extra argument") - .secondary_label( - AstSpan::new(left_parenthesis_position), - "this argument list accepts at most three arguments", - ) - .note("The three slots are `outer`, `name`, and `flags`.") - .help("Remove the extra argument.") - .build() -} - -fn diagnostic_new_missing_closing_parenthesis( - error: ParseError, - left_parenthesis_position: TokenPosition, -) -> Diagnostic { - DiagnosticBuilder::error("missing closing `)` in `new(...)`") - .primary_label(error.blame_span, "expected `)` here") - .secondary_label( - AstSpan::new(left_parenthesis_position), - "this argument list starts here", - ) - .help("Add `)` to close the argument list.") - .build() -} - -fn diagnostic_new_missing_class_specifier( - error: ParseError, - new_keyword_position: TokenPosition, -) -> Diagnostic { - let mut builder = DiagnosticBuilder::error("missing class specifier in `new` expression") - .primary_label( - error.blame_span, - "expected the class or expression to instantiate here", - ) - .secondary_label( - AstSpan::new(new_keyword_position), - "`new` expression starts here", - ) - .help("Add the class or expression to instantiate after `new` or `new(...)`."); - - if let Some(related_span) = error.related_span { - builder = builder.secondary_label(related_span, "optional `new(...)` arguments end here"); - } - - builder.build() -} - -impl From for Diagnostic { - fn from(error: ParseError) -> Self { - match error.kind { - ParseErrorKind::ParenthesizedExpressionEmpty { - left_parenthesis_position, - } => diagnostic_parenthesized_expression_empty(error, left_parenthesis_position), - - ParseErrorKind::ClassTypeMissingTypeArgument { - left_angle_bracket_position, - } => diagnostic_class_type_missing_type_argument(error, left_angle_bracket_position), - - ParseErrorKind::ClassTypeMissingClosingAngleBracket { - left_angle_bracket_position, - } => diagnostic_class_type_missing_closing_angle_bracket( - error, - left_angle_bracket_position, - ), - - ParseErrorKind::ParenthesizedExpressionMissingClosingParenthesis { - left_parenthesis_position, - } => diagnostic_parenthesized_expression_missing_closing_parenthesis( - error, - left_parenthesis_position, - ), - - ParseErrorKind::ExpressionExpected => diagnostic_expression_expected(error), - - ParseErrorKind::ClassTypeInvalidTypeArgument { - left_angle_bracket_position, - } => diagnostic_class_type_invalid_type_argument(error, left_angle_bracket_position), - - ParseErrorKind::NewTooManyArguments { - left_parenthesis_position, - } => diagnostic_new_too_many_arguments(error, left_parenthesis_position), - - ParseErrorKind::NewMissingClosingParenthesis { - left_parenthesis_position, - } => diagnostic_new_missing_closing_parenthesis(error, left_parenthesis_position), - - ParseErrorKind::NewMissingClassSpecifier { - new_keyword_position, - } => diagnostic_new_missing_class_specifier(error, new_keyword_position), - - _ => DiagnosticBuilder::error(format!("error {:?} while parsing", error.kind)) - .primary_label(error.covered_span, "happened here") - .build(), - } - } -} diff --git a/rottlib/src/diagnostics/expression_diagnostics.rs b/rottlib/src/diagnostics/expression_diagnostics.rs new file mode 100644 index 0000000..4aba278 --- /dev/null +++ b/rottlib/src/diagnostics/expression_diagnostics.rs @@ -0,0 +1,284 @@ +use super::{Diagnostic, DiagnosticBuilder}; +use crate::lexer::{TokenPosition, TokenSpan, TokenizedFile}; +use crate::parser::{ParseError, ParseErrorKind}; + +pub(crate) fn diagnostic_from_parse_error<'src>( + error: ParseError, + file: &TokenizedFile<'src>, +) -> Diagnostic { + match error.kind { + ParseErrorKind::ParenthesizedExpressionInvalidStart => { + diagnostic_parenthesized_expression_invalid_start(error, file) + } + + ParseErrorKind::ExpressionExpected => diagnostic_expression_expected(error, file), + + ParseErrorKind::ClassTypeMissingTypeArgument { + left_angle_bracket_position, + } => diagnostic_class_type_missing_type_argument(error, left_angle_bracket_position), + + ParseErrorKind::ClassTypeMissingClosingAngleBracket { + left_angle_bracket_position, + } => { + diagnostic_class_type_missing_closing_angle_bracket(error, left_angle_bracket_position) + } + + ParseErrorKind::ParenthesizedExpressionMissingClosingParenthesis => { + diagnostic_parenthesized_expression_missing_closing_parenthesis(error, file) + } + + ParseErrorKind::ClassTypeInvalidTypeArgument { + left_angle_bracket_position, + } => diagnostic_class_type_invalid_type_argument(error, left_angle_bracket_position), + + ParseErrorKind::NewTooManyArguments { + left_parenthesis_position, + } => diagnostic_new_too_many_arguments(error, left_parenthesis_position), + + ParseErrorKind::NewMissingClosingParenthesis { + left_parenthesis_position, + } => diagnostic_new_missing_closing_parenthesis(error, left_parenthesis_position), + + ParseErrorKind::NewMissingClassSpecifier { + new_keyword_position, + } => diagnostic_new_missing_class_specifier(error, new_keyword_position), + + _ => DiagnosticBuilder::error(format!("error {:?} while parsing", error.kind)) + .primary_label(error.covered_span, "happened here") + .build(), + } +} + +fn diagnostic_parenthesized_expression_invalid_start<'src>( + mut error: ParseError, + file: &TokenizedFile<'src>, +) -> Diagnostic { + let (header_text, primary_text) = + if let Some(token_text) = file.token_text(error.blame_span.end) { + ( + format!( + "expected expression inside parentheses, found `{}`", + token_text + ), + format!("unexpected `{}`", token_text), + ) + } else if file.is_eof(&error.blame_span.end) { + ( + "expected expression, found end of file".to_string(), + "reached end of file here".to_string(), + ) + } else { + ( + "expected expression inside parentheses".to_string(), + "expected expression".to_string(), + ) + }; + let mut builder = DiagnosticBuilder::error(header_text); + if let Some(related_span) = error.related_spans.get("left_parenthesis") + && !file.same_line(related_span.start, error.blame_span.end) + { + builder = builder.secondary_label(*related_span, "parenthesized expression starts here"); + }; + // It is more clear to see what happened if just the first token is + // highlighted in case blame span never leaves the line + if file.same_line(error.blame_span.start, error.blame_span.end) { + error.blame_span.start = error.blame_span.end; + } + builder + .primary_label(error.blame_span, primary_text) + .code("P0001") + .build() +} + +fn diagnostic_expression_expected<'src>( + mut error: ParseError, + file: &TokenizedFile<'src>, +) -> Diagnostic { + let prefix_operator_span = error.related_spans.get("prefix_operator").copied(); + let infix_operator_span = error.related_spans.get("infix_operator").copied(); + let operator_span = infix_operator_span.or(prefix_operator_span); + + let operator_text = operator_span.and_then(|span| file.token_text(span.end)); + + let (header_text, primary_text) = match (operator_text, file.token_text(error.blame_span.end)) { + (Some(operator_text), Some(token_text)) => ( + format!( + "expected expression after `{}`, found `{}`", + operator_text, token_text + ), + format!("unexpected `{}`", token_text), + ), + (Some(operator_text), None) if file.is_eof(&error.blame_span.end) => ( + format!( + "expected expression after `{}`, found end of file", + operator_text + ), + "reached end of file here".to_string(), + ), + (Some(operator_text), None) => ( + format!("expected expression after `{}`", operator_text), + "expected expression".to_string(), + ), + + (None, Some(token_text)) => ( + format!("expected expression, found `{}`", token_text), + format!("unexpected `{}`", token_text), + ), + (None, None) if file.is_eof(&error.blame_span.end) => ( + "expected expression, found end of file".to_string(), + "reached end of file here".to_string(), + ), + (None, None) => ( + "expected expression".to_string(), + "expected expression".to_string(), + ), + }; + + let mut builder = DiagnosticBuilder::error(header_text); + + // Only need this hint if lines are different + if let Some(span) = operator_span + && !file.same_line(span.start, error.blame_span.end) + { + let secondary_text = if let Some(operator_text) = operator_text { + format!("after this `{}`, an expression was expected", operator_text) + } else { + "an expression was expected after this operator".to_string() + }; + + builder = builder.secondary_label(span, secondary_text); + } + + builder + .primary_label(error.blame_span, primary_text) + .code("P0002") + .build() +} + +fn diagnostic_parenthesized_expression_missing_closing_parenthesis<'src>( + mut error: ParseError, + file: &TokenizedFile<'src>, +) -> Diagnostic { + let left_parenthesis_span = error.related_spans.get("left_parenthesis").copied(); + + let primary_text = if let Some(token_text) = file.token_text(error.blame_span.end) { + format!("expected `)` before `{}`", token_text) + } else if file.is_eof(&error.blame_span.end) { + "expected `)` before end of file".to_string() + } else { + "expected `)` here".to_string() + }; + + let mut builder = DiagnosticBuilder::error("missing `)` to close parenthesized expression"); + + if let Some(span) = left_parenthesis_span + && !file.same_line(span.start, error.blame_span.end) + { + builder = builder.secondary_label(span, "parenthesized expression starts here"); + } + + // On a single line, point only at the exact place where `)` was expected. + // On multiple lines, keep the full span so the renderer can connect the + // opening `(` to the failure point. + if file.same_line(error.blame_span.start, error.blame_span.end) { + error.blame_span.start = error.blame_span.end; + } + + builder + .primary_label(error.blame_span, primary_text) + .code("P0003") + .build() +} + +fn diagnostic_class_type_missing_type_argument( + error: ParseError, + left_angle_bracket_position: TokenPosition, +) -> Diagnostic { + DiagnosticBuilder::error("missing type argument in `class<...>`") + .primary_label(error.blame_span, "expected a type name here") + .secondary_label( + TokenSpan::new(left_angle_bracket_position), + "type argument list starts here", + ) + .help("Write a type name, for example `class`.") + .build() +} + +fn diagnostic_class_type_missing_closing_angle_bracket( + error: ParseError, + left_angle_bracket_position: TokenPosition, +) -> Diagnostic { + DiagnosticBuilder::error("missing closing `>` in `class<...>`") + .primary_label(error.blame_span, "expected `>` here") + .secondary_label( + TokenSpan::new(left_angle_bracket_position), + "this `<` starts the type argument", + ) + .help("Add `>` to close the class type expression.") + .build() +} + +fn diagnostic_class_type_invalid_type_argument( + error: ParseError, + left_angle_bracket_position: TokenPosition, +) -> Diagnostic { + DiagnosticBuilder::error("invalid type argument in `class<...>`") + .primary_label(error.blame_span, "expected a qualified type name here") + .secondary_label( + TokenSpan::new(left_angle_bracket_position), + "type argument list starts here", + ) + .note("Only a qualified type name is accepted between `<` and `>` here.") + .build() +} + +fn diagnostic_new_too_many_arguments( + error: ParseError, + left_parenthesis_position: TokenPosition, +) -> Diagnostic { + DiagnosticBuilder::error("too many arguments in `new(...)`") + .primary_label(error.blame_span, "unexpected extra argument") + .secondary_label( + TokenSpan::new(left_parenthesis_position), + "this argument list accepts at most three arguments", + ) + .note("The three slots are `outer`, `name`, and `flags`.") + .help("Remove the extra argument.") + .build() +} + +fn diagnostic_new_missing_closing_parenthesis( + error: ParseError, + left_parenthesis_position: TokenPosition, +) -> Diagnostic { + DiagnosticBuilder::error("missing closing `)` in `new(...)`") + .primary_label(error.blame_span, "expected `)` here") + .secondary_label( + TokenSpan::new(left_parenthesis_position), + "this argument list starts here", + ) + .help("Add `)` to close the argument list.") + .build() +} + +fn diagnostic_new_missing_class_specifier( + error: ParseError, + new_keyword_position: TokenPosition, +) -> Diagnostic { + let mut builder = DiagnosticBuilder::error("missing class specifier in `new` expression") + .primary_label( + error.blame_span, + "expected the class or expression to instantiate here", + ) + .secondary_label( + TokenSpan::new(new_keyword_position), + "`new` expression starts here", + ) + .help("Add the class or expression to instantiate after `new` or `new(...)`."); + + if let Some(related_span) = error.related_spans.get("blablabla") { + builder = builder.secondary_label(*related_span, "optional `new(...)` arguments end here"); + } + + builder.build() +} diff --git a/rottlib/src/diagnostics/mod.rs b/rottlib/src/diagnostics/mod.rs index 2ce5107..c0855c4 100644 --- a/rottlib/src/diagnostics/mod.rs +++ b/rottlib/src/diagnostics/mod.rs @@ -4,10 +4,11 @@ //! parsing or doing lightweight frontend checks. They are intentionally small, //! depend only on [`AstSpan`], and are easy to construct and store. -mod expression; +mod expression_diagnostics; mod render; -use crate::ast::AstSpan; +use crate::lexer::TokenSpan; +pub(crate) use expression_diagnostics::diagnostic_from_parse_error; /// Classification of a diagnostic by its impact. /// @@ -39,7 +40,7 @@ pub enum Severity { #[derive(Clone, Debug, Hash, PartialEq, Eq)] pub struct Label { /// Span to highlight in source coordinates. - pub span: AstSpan, + pub span: TokenSpan, /// Short inline text shown next to the caret line. pub message: String, } @@ -57,9 +58,6 @@ pub struct Diagnostic { /// Codes must match `^[LPTSXD][0-9]{4}$` where the prefix is the domain: /// `L` lexer, `P` parser, `T` type check, `S` semantics, `X` lints, /// `D` deprecations. - /// - /// Codes help users search documentation and suppress or elevate specific - /// diagnostics. Keep codes stable across releases once published. code: Option, /// Marks the main location the user should look at first. /// @@ -212,7 +210,7 @@ impl DiagnosticBuilder { /// One sentence, starting with lowercase letter, no period at the end. /// Since only one primary label can be specified, the previous primary is /// replaced. - pub fn primary_label(mut self, span: AstSpan, message: impl Into) -> Self { + pub fn primary_label(mut self, span: TokenSpan, message: impl Into) -> Self { self.diagnostic.primary_label = Some(Label { span, message: message.into(), @@ -223,7 +221,7 @@ impl DiagnosticBuilder { /// Add a secondary label. /// /// One sentence, starting with lowercase letter, no period at the end. - pub fn secondary_label(mut self, span: AstSpan, message: impl Into) -> Self { + pub fn secondary_label(mut self, span: TokenSpan, message: impl Into) -> Self { self.diagnostic.secondary_labels.push(Label { span, message: message.into(), diff --git a/rottlib/src/diagnostics/render.rs b/rottlib/src/diagnostics/render.rs index b30f4a0..6ac36f8 100644 --- a/rottlib/src/diagnostics/render.rs +++ b/rottlib/src/diagnostics/render.rs @@ -1,6 +1,5 @@ -use crate::ast::AstSpan; use crate::diagnostics::{self, Diagnostic, Severity}; -use crate::lexer::TokenizedFile; +use crate::lexer::{TokenSpan, TokenizedFile}; use core::convert::Into; use crossterm::style::Stylize; @@ -233,9 +232,9 @@ fn max_line_number_width(ranges: &RangeSet) -> usize { } } -fn span_to_range<'src>(span: AstSpan, file: &TokenizedFile<'src>) -> Option> { - let start_line = file.token_line(span.token_from)?; - let end_line = file.token_line(span.token_to)?; +fn span_to_range<'src>(span: TokenSpan, file: &TokenizedFile<'src>) -> Option> { + let start_line = file.token_line(span.start)?; + let end_line = file.token_line(span.end)?; if start_line <= end_line { Some(start_line..=end_line) @@ -276,7 +275,7 @@ impl Diagnostic { SingleRange { label_type: LabelType, }, */ - fn label_data(&self, label_type: LabelType) -> Option<(AstSpan, String)> { + fn label_data(&self, label_type: LabelType) -> Option<(TokenSpan, String)> { match label_type { LabelType::Primary => self .primary_label() @@ -322,11 +321,18 @@ impl Diagnostic { } // !!!!!!!!!!!!!!!! // First - update line drawing stack - for (label_type, column) in start_commands { + // First - update line drawing stack + for &(label_type, column) in &start_commands { vertical_stack[column] = Some(label_type); } // Next - draw the line - self.draw_line(current_line, max_line_number_width, file, &vertical_stack); + self.draw_line_with_starts( + current_line, + max_line_number_width, + file, + &vertical_stack, + &start_commands, + ); for label_type in single_commands { self.render_single_command( label_type, @@ -337,7 +343,7 @@ impl Diagnostic { } // Next - render finish commands (drop for now) for (label_type, column) in finish_commands { - self.render_single_command( + self.render_finish_command( label_type, max_line_number_width, file, @@ -349,7 +355,7 @@ impl Diagnostic { // Render some more lines let mut countdown = 3; current_line += 1; - while current_line < commands[i].0 { + while i < commands.len() && current_line < commands[i].0 { if countdown == 0 { if current_line + 1 == commands[i].0 { self.draw_line(current_line, max_line_number_width, file, &vertical_stack); @@ -395,6 +401,53 @@ impl Diagnostic { builder.push_str(&" ".repeat(visible.columns.start)); + let underline_width = (visible.columns.end - visible.columns.start).max(1); + let mut underline_label = if label_type == LabelType::Primary { + "^".repeat(underline_width) + } else { + "-".repeat(underline_width) + }; + underline_label.push_str(&format!(" {}", message)); + + match label_type { + LabelType::Primary => { + if self.severity == Severity::Error { + builder.push_str(&underline_label.red().bold().to_string()); + } else { + builder.push_str(&underline_label.yellow().bold().to_string()); + } + } + LabelType::Secondary(_) => { + builder.push_str(&underline_label.blue().bold().to_string()); + } + } + + println!("{builder}"); + } + + fn render_finish_command<'src>( + &self, + label_type: LabelType, + max_line_number_width: usize, + file: &TokenizedFile<'src>, + vertical_stack: &[Option], + ) { + let Some((span, message)) = self.label_data(label_type) else { + return; + }; + + let Some(visible) = file + .token_visible_spans(span.end) + .and_then(|spans| spans.into_iter().last()) + else { + return; + }; + + let mut builder = + self.make_finish_prefix(max_line_number_width, vertical_stack, label_type); + + builder.push_str(&"─".repeat(visible.columns.start).red().to_string()); + let underline_width = (visible.columns.end - visible.columns.start).max(1); let mut underline_label = "^".repeat(underline_width); underline_label.push_str(&format!(" {}", message)); @@ -433,6 +486,26 @@ impl Diagnostic { ); } + fn draw_line_with_starts<'src>( + &self, + current_line: usize, + max_line_number_width: usize, + file: &TokenizedFile<'src>, + vertical_stack: &[Option], + start_commands: &[(LabelType, usize)], + ) { + println!( + "{}{}", + self.make_start_prefix( + LineIndexType::Normal(current_line), + max_line_number_width, + vertical_stack, + start_commands, + ), + file.line_text(current_line).unwrap_or_default() + ); + } + fn make_line_prefix<'src>( &self, current_line: LineIndexType, @@ -455,12 +528,12 @@ impl Diagnostic { let piece = match label { LabelType::Primary => { if self.severity == Severity::Error { - " |".red() + " │".red() } else { - " |".yellow() + " │".yellow() } } - LabelType::Secondary(_) => " |".blue(), + LabelType::Secondary(_) => " │".blue(), } .to_string(); builder.push_str(&piece); @@ -471,6 +544,114 @@ impl Diagnostic { builder } + fn make_start_prefix( + &self, + current_line: LineIndexType, + max_line_number_width: usize, + vertical_stack: &[Option], + start_commands: &[(LabelType, usize)], + ) -> String { + let line_text = match current_line { + LineIndexType::Normal(current_line) => (current_line + 1).to_string(), + LineIndexType::Missing => "".to_string(), + LineIndexType::Ellipsis => "...".to_string(), + }; + let line_padding = " ".repeat(max_line_number_width - line_text.len()); + let mut builder = format!(" {}{} | ", line_padding, line_text) + .blue() + .bold() + .to_string(); + + for (column, vertical_line) in vertical_stack.iter().enumerate() { + let piece = match vertical_line { + Some(label) => { + let starts_here = start_commands.iter().any(|(start_label, start_column)| { + *start_label == *label && *start_column == column + }); + + match label { + LabelType::Primary => { + if self.severity == Severity::Error { + if starts_here { + " ╭".red() + } else { + " │".red() + } + } else { + if starts_here { + " ╭".yellow() + } else { + " │".yellow() + } + } + } + LabelType::Secondary(_) => { + if starts_here { + " ╭".blue() + } else { + " │".blue() + } + } + } + .to_string() + } + None => " ".to_string(), + }; + + builder.push_str(&piece); + } + + builder + } + + fn make_finish_prefix( + &self, + max_line_number_width: usize, + vertical_stack: &[Option], + finishing_label: LabelType, + ) -> String { + let line_text = ""; + let line_padding = " ".repeat(max_line_number_width - line_text.len()); + let mut builder = format!(" {}{} | ", line_padding, line_text) + .blue() + .bold() + .to_string(); + + for vertical_line in vertical_stack { + let piece = match vertical_line { + Some(label) if *label == finishing_label => match label { + LabelType::Primary => { + if self.severity == Severity::Error { + " ╰".red() + } else { + " ╰".yellow() + } + } + LabelType::Secondary(_) => " ╰".blue(), + } + .to_string(), + + Some(label) => match label { + LabelType::Primary => { + if self.severity == Severity::Error { + " │".red() + } else { + " │".yellow() + } + } + LabelType::Secondary(_) => " │".blue(), + } + .to_string(), + + None => " ".to_string(), + }; + + builder.push_str(&piece); + } + + builder + } + fn render_header(&self) { let severity_label = match self.severity { Severity::Error => "error".red(), diff --git a/rottlib/src/lexer/mod.rs b/rottlib/src/lexer/mod.rs index 916a6b1..41b8ac1 100644 --- a/rottlib/src/lexer/mod.rs +++ b/rottlib/src/lexer/mod.rs @@ -70,6 +70,66 @@ pub struct TokenData<'src> { #[derive(Clone, Copy, Debug, Hash, PartialEq, Eq, PartialOrd, Ord, Default)] pub struct TokenPosition(pub usize); +// All inclusive! +#[derive(Debug, Hash, PartialEq, Eq, PartialOrd, Ord, Clone, Copy)] +pub struct TokenSpan { + pub start: TokenPosition, + pub end: TokenPosition, +} + +impl TokenSpan { + // -------- existing coord-based API (unchanged externally) -------- + + #[must_use] + pub const fn merge(left_span: &Self, right_span: &Self) -> Self { + Self { + // assumes both were constructed in the same style; good enough for the refactor + start: left_span.start, + end: right_span.end, + } + } + + // -------- NEW: 4 constructors based on TokenIndex -------- + + /// Single-token span from an index (coords are dummy for now). + #[inline] + #[must_use] + pub const fn new(single_index: TokenPosition) -> Self { + Self { + start: single_index, + end: single_index, + } + } + + /// Span from two indices (coords are dummy for now). + #[inline] + #[must_use] + pub const fn range(from: TokenPosition, to: TokenPosition) -> Self { + Self { + start: from, + end: to, + } + } + + /// Immutable extension by index (keeps coords as-is). + #[inline] + #[must_use] + pub fn extended(&self, right_most_index: TokenPosition) -> Self { + Self { + start: self.start, + end: std::cmp::max(self.end, right_most_index), + } + } + + /// In-place extension by index (coords unchanged). + #[inline] + pub fn extend_to(&mut self, right_most_index: TokenPosition) { + if right_most_index > self.end { + self.end = right_most_index; + } + } +} + /// A tokenized, lossless representation of an `UnrealScript` source file. #[derive(Clone, Debug, PartialEq, Eq)] pub struct TokenizedFile<'src> { diff --git a/rottlib/src/lexer/queries.rs b/rottlib/src/lexer/queries.rs index 74f2da7..58e2e68 100644 --- a/rottlib/src/lexer/queries.rs +++ b/rottlib/src/lexer/queries.rs @@ -6,6 +6,14 @@ use crate::lexer::{Line, TokenData, TokenPosition, TokenizedFile, VisibleLineSpan}; impl<'src> TokenizedFile<'src> { + pub const fn eof(&self) -> TokenPosition { + TokenPosition(self.buffer.len()) + } + + pub fn is_eof(&self, position: &TokenPosition) -> bool { + position == &self.eof() + } + /// Returns the number of physical lines stored in this file. /// /// Empty line after the trailing newline sequence isn't counted as a line @@ -111,7 +119,12 @@ impl<'src> TokenizedFile<'src> { /// Returns `None` if `position` is out of bounds. #[must_use] pub fn token_line(&self, position: TokenPosition) -> Option { - // Reject invalid token positions early. + // EOF is a valid virtual position: past the end of the last stored line. + if position == self.eof() { + return self.line_count().checked_sub(1); + } + + // Reject invalid non-EOF positions early. self.buffer.get(position.0)?; let line_index = self @@ -183,6 +196,17 @@ impl<'src> TokenizedFile<'src> { /// Returns `None` if `position` is invalid. #[must_use] pub fn token_visible_spans(&self, position: TokenPosition) -> Option> { + // EOF is a virtual zero-width span at the end of the last stored line. + if position == self.eof() { + let line = self.line_count().checked_sub(1)?; + let column = self.line_text(line)?.chars().count(); + + return Some(vec![VisibleLineSpan { + line, + columns: column..column, + }]); + } + let token_piece = self.buffer.get(position.0).copied()?; let start_line = self.token_line(position)?; let start_column = self.token_start_visible_column(position)?; @@ -191,8 +215,6 @@ impl<'src> TokenizedFile<'src> { return Some(Vec::new()); } - // True multi-line token: reuse already computed visible byte segments, - // then convert them into visible character columns. if let Some(segments) = self.multi_line_map.get(&position.0) { let mut out = Vec::with_capacity(segments.len()); @@ -200,15 +222,12 @@ impl<'src> TokenizedFile<'src> { let visible_text = &token_piece.lexeme[byte_range.clone()]; let width = visible_text.chars().count(); - // Empty visible fragment: skip it. - // This matters for things like a token ending with '\n'. if width == 0 { continue; } let line = start_line + segment_index; - // A trailing newline does not create an extra stored physical line. if line >= self.line_count() { break; } @@ -223,8 +242,6 @@ impl<'src> TokenizedFile<'src> { return Some(out); } - // Single-line token, including "can_span_lines" tokens that happen not - // to contain a line break. let width = token_piece.lexeme.chars().count(); Some(vec![VisibleLineSpan { line: start_line, @@ -269,15 +286,9 @@ impl<'src> TokenizedFile<'src> { } #[must_use] - pub fn span_visible_on_line(&self, span: crate::ast::AstSpan) -> Option { - let start = self - .token_visible_spans(span.token_from)? - .into_iter() - .next()?; - let end = self - .token_visible_spans(span.token_to)? - .into_iter() - .last()?; + pub fn span_visible_on_line(&self, span: crate::lexer::TokenSpan) -> Option { + let start = self.token_visible_spans(span.start)?.into_iter().next()?; + let end = self.token_visible_spans(span.end)?.into_iter().last()?; if start.line != end.line { return None; @@ -288,4 +299,15 @@ impl<'src> TokenizedFile<'src> { columns: start.columns.start..end.columns.end, }) } + + pub fn token_text(&self, pos: TokenPosition) -> Option<&'src str> { + self.token_at(pos).map(|t| t.lexeme) + } + + pub fn same_line(&self, a: TokenPosition, b: TokenPosition) -> bool { + match (self.token_line(a), self.token_line(b)) { + (Some(x), Some(y)) => x == y, + _ => false, + } + } } diff --git a/rottlib/src/parser/cursor.rs b/rottlib/src/parser/cursor.rs index 8719452..c989e44 100644 --- a/rottlib/src/parser/cursor.rs +++ b/rottlib/src/parser/cursor.rs @@ -5,10 +5,9 @@ //! see [`parser::TriviaKind`]. use std::collections::VecDeque; - +// TODO: NO RETURNING EOF use crate::{ - ast::AstSpan, - lexer::{self, Keyword, Token, TokenPosition}, + lexer::{self, Keyword, Token, TokenPosition, TokenSpan}, parser::{self, ParseResult, Parser, ResultRecoveryExt, trivia::TriviaIndexBuilder}, }; @@ -115,6 +114,13 @@ impl<'src, 'arena> Parser<'src, 'arena> { .map(|(token_position, _)| *token_position) } + #[must_use] + pub(crate) fn peek_position_or_eof(&mut self) -> TokenPosition { + self.peek_buffered_token() + .map(|(token_position, _)| *token_position) + .unwrap_or_else(|| self.file.eof()) + } + /// Returns the next significant token and its lexeme without consuming it. /// /// May buffer additional tokens and record skipped trivia, but does not @@ -217,7 +223,7 @@ impl<'src, 'arena> Parser<'src, 'arena> { error_kind: parser::ParseErrorKind, ) -> ParseResult<'src, 'arena, TokenPosition> { self.peek_position() - .ok_or_else(|| self.make_error_here(error_kind)) + .ok_or_else(|| self.make_error_at(error_kind, self.file.eof())) } /// Returns the next significant token and its position without consuming @@ -229,7 +235,7 @@ impl<'src, 'arena> Parser<'src, 'arena> { error_kind: parser::ParseErrorKind, ) -> ParseResult<'src, 'arena, (Token, TokenPosition)> { self.peek_token_and_position() - .ok_or_else(|| self.make_error_here(error_kind)) + .ok_or_else(|| self.make_error_at(error_kind, self.file.eof())) } /// Returns the next significant token, its lexeme, and its position @@ -241,7 +247,7 @@ impl<'src, 'arena> Parser<'src, 'arena> { error_kind: parser::ParseErrorKind, ) -> ParseResult<'src, 'arena, (Token, &'src str, TokenPosition)> { self.peek_token_lexeme_and_position() - .ok_or_else(|| self.make_error_here(error_kind)) + .ok_or_else(|| self.make_error_at(error_kind, self.file.eof())) } /// Advances by one significant token. @@ -293,20 +299,18 @@ impl<'src, 'arena> Parser<'src, 'arena> { expected: Token, error_kind: parser::ParseErrorKind, ) -> ParseResult<'src, 'arena, TokenPosition> { - // Anchors EOF diagnostics at the last consumed token - // when no current token exists. - let anchor = self - .peek_position() - .unwrap_or_else(|| self.last_consumed_position_or_start()); // `Token` equality is enough here because lexeme and position // are stored separately. - if self.peek_token() == Some(expected) { + if let Some((token, token_position)) = self.peek_token_and_position() + && token == expected + { self.advance(); - Ok(anchor) + Ok(token_position) } else { + let anchor = self.peek_position().unwrap_or_else(|| self.file.eof()); Err(self .make_error_at(error_kind, anchor) - .blame(AstSpan::new(anchor))) + .blame(TokenSpan::new(anchor))) } } diff --git a/rottlib/src/parser/errors.rs b/rottlib/src/parser/errors.rs index c33bfaa..a00f601 100644 --- a/rottlib/src/parser/errors.rs +++ b/rottlib/src/parser/errors.rs @@ -1,6 +1,8 @@ //! Submodule with parsing related errors. -use crate::{ast::AstSpan, lexer::TokenPosition}; +use std::collections::HashMap; + +use crate::{lexer::TokenSpan, lexer::TokenPosition}; /// Internal parse error kinds. /// @@ -14,14 +16,12 @@ use crate::{ast::AstSpan, lexer::TokenPosition}; /// `UnexpectedToken`, `MultipleDefaults`, etc.). #[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)] pub enum ParseErrorKind { - // ================== New errors that are 100% used! ================== - // headline: empty parenthesized expression - // primary label on ): expected an expression before this \)'` - // secondary label on (: parenthesized expression starts here - // Remove the parentheses or put an expression inside them. - ParenthesizedExpressionEmpty { - left_parenthesis_position: TokenPosition, - }, + /// P0001 + ParenthesizedExpressionInvalidStart, + /// P0002 + ExpressionExpected, + /// P0003 + ParenthesizedExpressionMissingClosingParenthesis, // headline: missing type argument in \class<...>`` // primary label on > or insertion site: expected a type name here // secondary label on < or on class: type argument list starts here @@ -36,17 +36,6 @@ pub enum ParseErrorKind { ClassTypeMissingClosingAngleBracket { left_angle_bracket_position: TokenPosition, }, - // headline: missing closing \)'` - // primary label on the point where ) was expected: expected \)' here` or, if you have a real token there, expected \)' before this token` - // secondary label on the opening (: this \(` starts the parenthesized expression` - // help: Add \)' to close the expression.` - ParenthesizedExpressionMissingClosingParenthesis { - left_parenthesis_position: TokenPosition, - }, - // headline: expected expression - // primary label: this token cannot start an expression - // optional help: Expressions can start with literals, identifiers, \(`, `{`, or expression keywords.` - ExpressionExpected, // headline: invalid type argument in \class<...>`` // primary label on the bad token inside the angle brackets: expected a qualified type name here // secondary label on class or <: while parsing this class type expression @@ -334,17 +323,17 @@ pub enum ParseErrorKind { } /// Enumerates all specific kinds of parsing errors that the parser can emit. -#[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)] +#[derive(Clone, Debug, PartialEq, Eq)] #[must_use] pub struct ParseError { /// The specific kind of parse error that occurred. pub kind: ParseErrorKind, pub anchor: TokenPosition, /// Where the user should look first. - pub blame_span: AstSpan, + pub blame_span: TokenSpan, /// The source span in which the error was detected. - pub covered_span: AstSpan, - pub related_span: Option, + pub covered_span: TokenSpan, + pub related_spans: HashMap, } pub type ParseResult<'src, 'arena, T> = Result; @@ -362,9 +351,9 @@ impl crate::parser::Parser<'_, '_> { ParseError { kind: error_kind, anchor: position, - blame_span: AstSpan::new(position), - covered_span: AstSpan::new(position), - related_span: None, + blame_span: TokenSpan::new(position), + covered_span: TokenSpan::new(position), + related_spans: HashMap::new(), } } } diff --git a/rottlib/src/parser/grammar/class.rs b/rottlib/src/parser/grammar/class.rs index 1bc5133..62af34d 100644 --- a/rottlib/src/parser/grammar/class.rs +++ b/rottlib/src/parser/grammar/class.rs @@ -3,16 +3,17 @@ #![allow(clippy::all, clippy::pedantic, clippy::nursery)] use crate::ast::{ - AstSpan, BlockBody, ClassConstDecl, ClassConstDeclRef, ClassDeclaration, ClassDefinition, - ClassMember, ClassModifier, ClassModifierRef, ClassVarDecl, ClassVarDeclRef, - DeclarationLiteral, DeclarationLiteralRef, ExecDirective, ExecDirectiveRef, ExpressionRef, - IdentifierToken, Reliability, ReplicationBlock, ReplicationBlockRef, ReplicationRule, - ReplicationRuleRef, StateDecl, StateDeclRef, StateModifier, VariableDeclarator, - VariableDeclaratorRef, + BlockBody, ClassConstDecl, ClassConstDeclRef, ClassDeclaration, ClassDefinition, ClassMember, + ClassModifier, ClassModifierRef, ClassVarDecl, ClassVarDeclRef, DeclarationLiteral, + DeclarationLiteralRef, ExecDirective, ExecDirectiveRef, ExpressionRef, IdentifierToken, + Reliability, ReplicationBlock, ReplicationBlockRef, ReplicationRule, ReplicationRuleRef, + StateDecl, StateDeclRef, StateModifier, VariableDeclarator, VariableDeclaratorRef, }; -use crate::lexer::{Keyword, Token, TokenPosition}; +use crate::lexer::{Keyword, Token, TokenPosition, TokenSpan}; use crate::parser::{ParseErrorKind, ParseResult, ResultRecoveryExt, SyncLevel}; +use std::collections::HashMap; + impl<'src, 'arena> crate::parser::Parser<'src, 'arena> { #[inline] pub fn ensure_progress_or_break(&mut self, before: TokenPosition) -> bool { @@ -30,7 +31,7 @@ impl<'src, 'arena> crate::parser::Parser<'src, 'arena> { let trimmed = lexeme.trim_end_matches(['\r', '\n']); self.advance(); - let span = AstSpan::range(start_position, self.last_consumed_position_or_start()); + let span = TokenSpan::range(start_position, self.last_consumed_position_or_start()); Ok(self.arena.alloc_node( ExecDirective { text: self.arena.string(trimmed), @@ -80,9 +81,7 @@ impl<'src, 'arena> crate::parser::Parser<'src, 'arena> { self.expect( Token::RightBracket, - ParseErrorKind::ParenthesizedExpressionMissingClosingParenthesis { - left_parenthesis_position: self.last_consumed_position_or_start(), - }, + ParseErrorKind::ParenthesizedExpressionMissingClosingParenthesis, ) .sync_error_at(self, SyncLevel::CloseBracket)?; @@ -106,7 +105,7 @@ impl<'src, 'arena> crate::parser::Parser<'src, 'arena> { }; let mut consumed_inside_match = false; - let mut span = AstSpan::new(modifier_position); + let mut span = TokenSpan::new(modifier_position); let modifier = match token { Token::Keyword(Keyword::Final) => Final, @@ -253,7 +252,7 @@ impl<'src, 'arena> crate::parser::Parser<'src, 'arena> { self.expect(Token::Semicolon, ParseErrorKind::DeclMissingSemicolon)?; - let span = AstSpan::range(start_position, self.last_consumed_position_or_start()); + let span = TokenSpan::range(start_position, self.last_consumed_position_or_start()); Ok(self.arena.alloc_node( ClassVarDecl { paren_specs, @@ -323,7 +322,7 @@ impl<'src, 'arena> crate::parser::Parser<'src, 'arena> { Token::Semicolon, ParseErrorKind::ReplicationRuleMissingSemicolon, )?; - let span = AstSpan::range(start_position, self.last_consumed_position_or_start()); + let span = TokenSpan::range(start_position, self.last_consumed_position_or_start()); Ok(self.arena.alloc_node( ReplicationRule { reliability, @@ -373,7 +372,7 @@ impl<'src, 'arena> crate::parser::Parser<'src, 'arena> { } self.expect(Token::RightBrace, ParseErrorKind::ReplicationMissingRBrace)?; - let span = AstSpan::range(start_position, self.last_consumed_position_or_start()); + let span = TokenSpan::range(start_position, self.last_consumed_position_or_start()); Ok(self .arena .alloc_node(ReplicationBlock { rules, span }, span)) @@ -460,7 +459,7 @@ impl<'src, 'arena> crate::parser::Parser<'src, 'arena> { span: inner_span, } = self.parse_braced_block_statements_tail(opening_brace_position); - let span = AstSpan::range(start_position, inner_span.token_to); + let span = TokenSpan::range(start_position, inner_span.end); Ok(self.arena.alloc_node( StateDecl { name, @@ -850,7 +849,7 @@ impl<'src, 'arena> crate::parser::Parser<'src, 'arena> { let value = self.parse_declaration_literal_class()?; self.expect(Token::Semicolon, ParseErrorKind::DeclMissingSemicolon)?; - let span = AstSpan::range(start_position, self.last_consumed_position_or_start()); + let span = TokenSpan::range(start_position, self.last_consumed_position_or_start()); Ok(self .arena @@ -879,7 +878,8 @@ impl<'src, 'arena> crate::parser::Parser<'src, 'arena> { } }; - let span = AstSpan::range(identifier.0, self.last_consumed_position_or_start()); + let span = + TokenSpan::range(identifier.0, self.last_consumed_position_or_start()); declarators.push(self.arena.alloc_node( VariableDeclarator { name: identifier, @@ -948,9 +948,9 @@ impl<'src, 'arena> crate::parser::Parser<'src, 'arena> { self.report_error(crate::parser::ParseError { kind: ParseErrorKind::ListEmpty, anchor: list_start, - blame_span: AstSpan::range(list_start, list_end), - covered_span: AstSpan::range(list_start, list_end), - related_span: None, + blame_span: TokenSpan::range(list_start, list_end), + covered_span: TokenSpan::range(list_start, list_end), + related_spans: HashMap::new(), }); } diff --git a/rottlib/src/parser/grammar/declarations/enum_definition.rs b/rottlib/src/parser/grammar/declarations/enum_definition.rs index b7387f6..140afbe 100644 --- a/rottlib/src/parser/grammar/declarations/enum_definition.rs +++ b/rottlib/src/parser/grammar/declarations/enum_definition.rs @@ -3,9 +3,9 @@ use std::ops::ControlFlow; use crate::arena::ArenaVec; -use crate::ast::{AstSpan, EnumDefRef, EnumDefinition, IdentifierToken}; +use crate::ast::{EnumDefRef, EnumDefinition, IdentifierToken}; use crate::lexer::Token; -use crate::lexer::TokenPosition; +use crate::lexer::{TokenSpan, TokenPosition}; use crate::parser::{ParseErrorKind, Parser, ResultRecoveryExt, SyncLevel}; #[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)] @@ -32,7 +32,7 @@ impl<'src, 'arena> Parser<'src, 'arena> { self.expect(Token::RightBrace, ParseErrorKind::EnumNoClosingBrace) .report_error(self); - let span = AstSpan::range( + let span = TokenSpan::range( enum_keyword_position, self.last_consumed_position_or_start(), ); diff --git a/rottlib/src/parser/grammar/declarations/struct_definition.rs b/rottlib/src/parser/grammar/declarations/struct_definition.rs index 2f4153b..235f06d 100644 --- a/rottlib/src/parser/grammar/declarations/struct_definition.rs +++ b/rottlib/src/parser/grammar/declarations/struct_definition.rs @@ -16,11 +16,11 @@ use crate::arena::ArenaVec; use crate::ast::{ - AstSpan, IdentifierToken, QualifiedIdentifierRef, StructDefRef, StructDefinition, StructField, + IdentifierToken, QualifiedIdentifierRef, StructDefRef, StructDefinition, StructField, StructFieldRef, StructModifier, StructModifierKind, TypeSpecifierRef, VarEditorSpecifierRef, VarModifier, }; -use crate::lexer::{Keyword, Token, TokenPosition}; +use crate::lexer::{Keyword, Token, TokenPosition, TokenSpan}; use crate::parser::{ParseErrorKind, Parser, ResultRecoveryExt, SyncLevel}; #[derive(Debug)] @@ -61,7 +61,7 @@ impl<'src, 'arena> Parser<'src, 'arena> { self.expect(Token::RightBrace, ParseErrorKind::StructMissingRightBrace) .widen_error_span_from(struct_keyword_position) .report_error(self); - let span = AstSpan::range( + let span = TokenSpan::range( struct_keyword_position, self.last_consumed_position_or_start(), ); @@ -129,7 +129,7 @@ impl<'src, 'arena> Parser<'src, 'arena> { if declarators.is_empty() { return StructBodyItemParseOutcome::Skip; } - let span = AstSpan::range(var_keyword_position, self.last_consumed_position_or_start()); + let span = TokenSpan::range(var_keyword_position, self.last_consumed_position_or_start()); StructBodyItemParseOutcome::Field(self.arena.alloc_node( StructField { type_specifier: field_prefix.type_specifier, diff --git a/rottlib/src/parser/grammar/declarations/type_specifier.rs b/rottlib/src/parser/grammar/declarations/type_specifier.rs index 6a86a93..bb649a6 100644 --- a/rottlib/src/parser/grammar/declarations/type_specifier.rs +++ b/rottlib/src/parser/grammar/declarations/type_specifier.rs @@ -1,7 +1,7 @@ //! Parsing of type specifiers for Fermented `UnrealScript`. -use crate::ast::{AstSpan, TypeSpecifier, TypeSpecifierRef}; -use crate::lexer::{Keyword, Token, TokenPosition}; +use crate::ast::{TypeSpecifier, TypeSpecifierRef}; +use crate::lexer::{Keyword, Token, TokenPosition, TokenSpan}; use crate::parser::{ParseErrorKind, ParseResult, Parser}; impl<'src, 'arena> Parser<'src, 'arena> { @@ -51,7 +51,7 @@ impl<'src, 'arena> Parser<'src, 'arena> { starting_token_position: TokenPosition, ) -> TypeSpecifierRef<'src, 'arena> { let enum_definition = self.parse_enum_definition_tail(starting_token_position); - let enum_span = AstSpan::range(starting_token_position, enum_definition.span().token_to); + let enum_span = TokenSpan::range(starting_token_position, enum_definition.span().end); self.arena .alloc_node(TypeSpecifier::InlineEnum(enum_definition), enum_span) } @@ -61,8 +61,7 @@ impl<'src, 'arena> Parser<'src, 'arena> { starting_token_position: TokenPosition, ) -> TypeSpecifierRef<'src, 'arena> { let struct_definition = self.parse_struct_definition_tail(starting_token_position); - let struct_span = - AstSpan::range(starting_token_position, struct_definition.span().token_to); + let struct_span = TokenSpan::range(starting_token_position, struct_definition.span().end); self.arena .alloc_node(TypeSpecifier::InlineStruct(struct_definition), struct_span) } @@ -81,7 +80,7 @@ impl<'src, 'arena> Parser<'src, 'arena> { Token::Greater, ParseErrorKind::TypeSpecArrayMissingClosingAngle, )?; - let array_span = AstSpan::range(starting_token_position, closing_angle_bracket_position); + let array_span = TokenSpan::range(starting_token_position, closing_angle_bracket_position); Ok(self.arena.alloc_node( TypeSpecifier::Array { @@ -108,7 +107,7 @@ impl<'src, 'arena> Parser<'src, 'arena> { } else { (None, starting_token_position) }; - let span = AstSpan::range(starting_token_position, class_type_end); + let span = TokenSpan::range(starting_token_position, class_type_end); Ok(self .arena .alloc_node(TypeSpecifier::Class(inner_type_name), span)) diff --git a/rottlib/src/parser/grammar/declarations/variable_declarators.rs b/rottlib/src/parser/grammar/declarations/variable_declarators.rs index 54a7a41..4ba4d9a 100644 --- a/rottlib/src/parser/grammar/declarations/variable_declarators.rs +++ b/rottlib/src/parser/grammar/declarations/variable_declarators.rs @@ -9,8 +9,8 @@ use std::ops::ControlFlow; use crate::arena::ArenaVec; -use crate::ast::{AstSpan, OptionalExpression, VariableDeclarator, VariableDeclaratorRef}; -use crate::lexer::{Token, TokenPosition}; +use crate::ast::{OptionalExpression, VariableDeclarator, VariableDeclaratorRef}; +use crate::lexer::{Token, TokenPosition, TokenSpan}; use crate::parser::{ParseErrorKind, ParseResult, Parser, ResultRecoveryExt, SyncLevel}; #[derive(Clone, Copy, Debug, Hash, PartialEq, Eq)] @@ -141,7 +141,7 @@ impl<'src, 'arena> Parser<'src, 'arena> { let name = self.parse_identifier(ParseErrorKind::DeclBadVariableIdentifier)?; let array_size = self.parse_optional_array_size(); let initializer = self.parse_optional_variable_initializer(); - let span = AstSpan::range(name.0, self.last_consumed_position_or_start()); + let span = TokenSpan::range(name.0, self.last_consumed_position_or_start()); Ok(self.arena.alloc_node( VariableDeclarator { name, diff --git a/rottlib/src/parser/grammar/expression/block.rs b/rottlib/src/parser/grammar/expression/block.rs index 192a19c..7b7a5c2 100644 --- a/rottlib/src/parser/grammar/expression/block.rs +++ b/rottlib/src/parser/grammar/expression/block.rs @@ -5,8 +5,8 @@ //! has been consumed. use crate::arena::ArenaVec; -use crate::ast::{AstSpan, BlockBody, Expression, ExpressionRef, Statement, StatementRef}; -use crate::lexer::{Token, TokenPosition}; +use crate::ast::{BlockBody, Expression, ExpressionRef, Statement, StatementRef}; +use crate::lexer::{Token, TokenPosition, TokenSpan}; use crate::parser::{ParseErrorKind, Parser}; impl<'src, 'arena> Parser<'src, 'arena> { @@ -43,7 +43,7 @@ impl<'src, 'arena> Parser<'src, 'arena> { while let Some((token, token_position)) = self.peek_token_and_position() { if token == Token::RightBrace { self.advance(); // '}' - let span = AstSpan::range(opening_brace_position, token_position); + let span = TokenSpan::range(opening_brace_position, token_position); return BlockBody { statements, span }; } self.parse_next_block_item_into(&mut statements); @@ -51,7 +51,7 @@ impl<'src, 'arena> Parser<'src, 'arena> { } // Reached EOF without a closing `}` self.report_error_here(ParseErrorKind::BlockMissingClosingBrace); - let span = AstSpan::range( + let span = TokenSpan::range( opening_brace_position, self.last_consumed_position_or_start(), ); diff --git a/rottlib/src/parser/grammar/expression/control_flow.rs b/rottlib/src/parser/grammar/expression/control_flow.rs index eef4358..023a745 100644 --- a/rottlib/src/parser/grammar/expression/control_flow.rs +++ b/rottlib/src/parser/grammar/expression/control_flow.rs @@ -58,8 +58,8 @@ //! lives in a separate module because the construct itself is more involved //! than the control-flow forms handled here. -use crate::ast::{AstSpan, BranchBody, Expression, ExpressionRef}; -use crate::lexer::{Keyword, Token, TokenPosition}; +use crate::ast::{BranchBody, Expression, ExpressionRef}; +use crate::lexer::{Keyword, Token, TokenPosition, TokenSpan}; use crate::parser::{ParseErrorKind, Parser, ResultRecoveryExt, SyncLevel}; impl<'src, 'arena> Parser<'src, 'arena> { @@ -77,9 +77,7 @@ impl<'src, 'arena> Parser<'src, 'arena> { let right_parenthesis_position = self .expect( Token::RightParenthesis, - ParseErrorKind::ParenthesizedExpressionMissingClosingParenthesis { - left_parenthesis_position, - }, + ParseErrorKind::ParenthesizedExpressionMissingClosingParenthesis, ) .widen_error_span_from(left_parenthesis_position) .sync_error_at(self, SyncLevel::CloseParenthesis) @@ -108,11 +106,12 @@ impl<'src, 'arena> Parser<'src, 'arena> { fn parse_branch_body(&mut self) -> BranchBody<'src, 'arena> { let Some((first_token, first_token_position)) = self.peek_token_and_position() else { let error = self.make_error_here(ParseErrorKind::MissingBranchBody); + let end_anchor_token_position = error.covered_span.end; self.report_error(error); return BranchBody { expression: None, semicolon_position: None, - end_anchor_token_position: error.covered_span.token_to, + end_anchor_token_position, }; }; // `if (is_condition);` @@ -139,7 +138,7 @@ impl<'src, 'arena> Parser<'src, 'arena> { }; } let branch_expression = self.parse_expression(); - let end_anchor_token_position = branch_expression.span().token_to; + let end_anchor_token_position = branch_expression.span().end; // A block body in `if {...}` or `if {...};` owns its own terminator; // a following `;` does not belong to the branch body. if let Expression::Block(_) = *branch_expression { @@ -185,7 +184,7 @@ impl<'src, 'arena> Parser<'src, 'arena> { (None, body.end_anchor_token_position) }; - let span = AstSpan::range(if_keyword_position, if_end_position); + let span = TokenSpan::range(if_keyword_position, if_end_position); self.arena.alloc_node( Expression::If { condition, @@ -207,7 +206,7 @@ impl<'src, 'arena> Parser<'src, 'arena> { ) -> ExpressionRef<'src, 'arena> { let condition = self.parse_condition(); let body = self.parse_branch_body(); - let span = AstSpan::range(while_keyword_position, body.end_anchor_token_position); + let span = TokenSpan::range(while_keyword_position, body.end_anchor_token_position); self.arena .alloc_node(Expression::While { condition, body }, span) } @@ -230,13 +229,13 @@ impl<'src, 'arena> Parser<'src, 'arena> { { crate::arena::ArenaNode::new_in( Expression::Error, - AstSpan::new(body.end_anchor_token_position), + TokenSpan::new(body.end_anchor_token_position), self.arena, ) } else { self.parse_condition() }; - let span = AstSpan::range(do_keyword_position, condition.span().token_to); + let span = TokenSpan::range(do_keyword_position, condition.span().end); self.arena .alloc_node(Expression::DoUntil { condition, body }, span) } @@ -259,7 +258,7 @@ impl<'src, 'arena> Parser<'src, 'arena> { let iterated_expression = self.parse_expression(); let body = self.parse_branch_body(); - let span = AstSpan::range(foreach_keyword_position, body.end_anchor_token_position); + let span = TokenSpan::range(foreach_keyword_position, body.end_anchor_token_position); self.arena.alloc_node( Expression::ForEach { iterated_expression, @@ -365,7 +364,7 @@ impl<'src, 'arena> Parser<'src, 'arena> { }; let body = self.parse_branch_body(); - let span = AstSpan::range(for_keyword_position, body.end_anchor_token_position); + let span = TokenSpan::range(for_keyword_position, body.end_anchor_token_position); self.arena.alloc_node( Expression::For { initialization, @@ -387,10 +386,10 @@ impl<'src, 'arena> Parser<'src, 'arena> { return_keyword_position: TokenPosition, ) -> ExpressionRef<'src, 'arena> { let (value, span) = if self.peek_token() == Some(Token::Semicolon) { - (None, AstSpan::new(return_keyword_position)) + (None, TokenSpan::new(return_keyword_position)) } else { let returned_value = self.parse_expression(); - let span = AstSpan::range(return_keyword_position, returned_value.span().token_to); + let span = TokenSpan::range(return_keyword_position, returned_value.span().end); (Some(returned_value), span) }; self.arena.alloc_node(Expression::Return(value), span) @@ -406,10 +405,10 @@ impl<'src, 'arena> Parser<'src, 'arena> { break_keyword_position: TokenPosition, ) -> ExpressionRef<'src, 'arena> { let (value, span) = if self.peek_token() == Some(Token::Semicolon) { - (None, AstSpan::new(break_keyword_position)) + (None, TokenSpan::new(break_keyword_position)) } else { let returned_value = self.parse_expression(); - let span = AstSpan::range(break_keyword_position, returned_value.span().token_to); + let span = TokenSpan::range(break_keyword_position, returned_value.span().end); (Some(returned_value), span) }; self.arena.alloc_node(Expression::Break(value), span) @@ -439,7 +438,7 @@ impl<'src, 'arena> Parser<'src, 'arena> { .report_error(self); crate::arena::ArenaNode::new_in( Expression::Error, - AstSpan::new(goto_keyword_position), + TokenSpan::new(goto_keyword_position), self.arena, ) } diff --git a/rottlib/src/parser/grammar/expression/identifier.rs b/rottlib/src/parser/grammar/expression/identifier.rs index 2612de5..9e80438 100644 --- a/rottlib/src/parser/grammar/expression/identifier.rs +++ b/rottlib/src/parser/grammar/expression/identifier.rs @@ -4,8 +4,8 @@ //! e.g. `KFChar.ZombieClot`. use crate::arena::{self, ArenaVec}; -use crate::ast::{AstSpan, IdentifierToken, QualifiedIdentifier, QualifiedIdentifierRef}; -use crate::lexer::{self, Token}; +use crate::ast::{IdentifierToken, QualifiedIdentifier, QualifiedIdentifierRef}; +use crate::lexer::{self, Token, TokenSpan}; use crate::parser::{ParseErrorKind, ParseResult, Parser, ResultRecoveryExt}; impl<'src, 'arena> Parser<'src, 'arena> { @@ -69,7 +69,7 @@ impl<'src, 'arena> Parser<'src, 'arena> { Ok(arena::ArenaNode::new_in( QualifiedIdentifier { head, tail }, - AstSpan::range(span_start, span_end), + TokenSpan::range(span_start, span_end), self.arena, )) } diff --git a/rottlib/src/parser/grammar/expression/pratt.rs b/rottlib/src/parser/grammar/expression/pratt.rs index d1c022c..4d99603 100644 --- a/rottlib/src/parser/grammar/expression/pratt.rs +++ b/rottlib/src/parser/grammar/expression/pratt.rs @@ -63,6 +63,7 @@ impl<'src, 'arena> Parser<'src, 'arena> { #[must_use] pub fn parse_expression(&mut self) -> ExpressionRef<'src, 'arena> { self.parse_expression_with_min_precedence_rank(PrecedenceRank::LOOSEST) + .unwrap_or_fallback(self) } /// Parses an expression, including only operators with binding power @@ -70,14 +71,9 @@ impl<'src, 'arena> Parser<'src, 'arena> { fn parse_expression_with_min_precedence_rank( &mut self, min_precedence_rank: PrecedenceRank, - ) -> ExpressionRef<'src, 'arena> { - let mut left_hand_side = self - .parse_prefix_or_primary() - .sync_error_until(self, parser::SyncLevel::Expression) - .unwrap_or_fallback(self); - left_hand_side = self - .parse_selectors_into(left_hand_side) - .unwrap_or_fallback(self); + ) -> parser::ParseExpressionResult<'src, 'arena> { + let mut left_hand_side = self.parse_prefix_or_primary()?; + left_hand_side = self.parse_selectors_into(left_hand_side)?; // We disallow only postfix operators after expression forms that // represent control-flow or block constructs. Selectors are still // parsed normally. @@ -102,14 +98,15 @@ impl<'src, 'arena> Parser<'src, 'arena> { /// null denotation). fn parse_prefix_or_primary(&mut self) -> parser::ParseExpressionResult<'src, 'arena> { let (token, token_lexeme, token_position) = - self.require_token_lexeme_and_position(parser::ParseErrorKind::MissingExpression)?; + self.require_token_lexeme_and_position(parser::ParseErrorKind::ExpressionExpected)?; self.advance(); if let Ok(operator) = ast::PrefixOperator::try_from(token) { // In UnrealScript, prefix and postfix operators bind tighter than // any infix operators, so we can safely parse the right hand side // at the tightest precedence. - let right_hand_side = - self.parse_expression_with_min_precedence_rank(PrecedenceRank::TIGHTEST); + let right_hand_side = self + .parse_expression_with_min_precedence_rank(PrecedenceRank::TIGHTEST) + .related_token("prefix_operator", token_position)?; Ok(Expression::new_prefix( self.arena, token_position, @@ -146,17 +143,19 @@ impl<'src, 'arena> Parser<'src, 'arena> { &mut self, mut left_hand_side: ExpressionRef<'src, 'arena>, min_precedence_rank: PrecedenceRank, - ) -> ExpressionRef<'src, 'arena> { + ) -> parser::ParseExpressionResult<'src, 'arena> { while let Some((operator, right_precedence_rank)) = self.peek_infix_with_min_precedence_rank(min_precedence_rank) { self.advance(); - let right_hand_side = - self.parse_expression_with_min_precedence_rank(right_precedence_rank); + let infix_operator_position = self.last_consumed_position_or_start(); + let right_hand_side = self + .parse_expression_with_min_precedence_rank(right_precedence_rank) + .related_token("infix_operator", infix_operator_position)?; left_hand_side = Expression::new_binary(self.arena, left_hand_side, operator, right_hand_side); } - left_hand_side + Ok(left_hand_side) } /// Returns the next postfix operator and its position if present. diff --git a/rottlib/src/parser/grammar/expression/primary.rs b/rottlib/src/parser/grammar/expression/primary.rs index 01a9b1d..0d1df7f 100644 --- a/rottlib/src/parser/grammar/expression/primary.rs +++ b/rottlib/src/parser/grammar/expression/primary.rs @@ -186,30 +186,14 @@ impl<'src, 'arena> Parser<'src, 'arena> { &mut self, left_parenthesis_position: TokenPosition, ) -> ExpressionRef<'src, 'arena> { - // Special case for an empty expression - if let Some((Token::RightParenthesis, right_parenthesis_position)) = - self.peek_token_and_position() - { - self.make_error_here(ParseErrorKind::ParenthesizedExpressionEmpty { - left_parenthesis_position, - }) - .widen_error_span_from(left_parenthesis_position) - .sync_error_at(self, SyncLevel::CloseParenthesis) - .blame_token(right_parenthesis_position) - .report_error(self); - return self.arena.alloc_node_between( - Expression::Error, - left_parenthesis_position, - right_parenthesis_position, - ); - } // Continue parsing normally let inner_expression = if self.next_token_definitely_cannot_start_expression() { let error = self - .make_error_here(ParseErrorKind::ExpressionExpected) + .make_error_here(ParseErrorKind::ParenthesizedExpressionInvalidStart) .widen_error_span_from(left_parenthesis_position) - .sync_error_at(self, SyncLevel::Expression) - .related_token(left_parenthesis_position); + .sync_error_until(self, SyncLevel::Expression) + .extend_blame_to_next_token(self) + .related_token("left_parenthesis", left_parenthesis_position); let error_span = error.covered_span; self.report_error(error); return crate::arena::ArenaNode::new_in( @@ -223,12 +207,12 @@ impl<'src, 'arena> Parser<'src, 'arena> { let right_parenthesis_position = self .expect( Token::RightParenthesis, - ParseErrorKind::ParenthesizedExpressionMissingClosingParenthesis { - left_parenthesis_position, - }, + ParseErrorKind::ParenthesizedExpressionMissingClosingParenthesis, ) .widen_error_span_from(left_parenthesis_position) .sync_error_at(self, SyncLevel::CloseParenthesis) + .extend_blame_start_to_covered_start() + .related_token("left_parenthesis", left_parenthesis_position) .unwrap_or_fallback(self); self.arena.alloc_node_between( Expression::Parentheses(inner_expression), @@ -330,7 +314,7 @@ impl<'src, 'arena> Parser<'src, 'arena> { } else { self.parse_expression() }; - let class_specifier_end_position = class_specifier.span().token_to; + let class_specifier_end_position = class_specifier.span().end; self.arena.alloc_node_between( Expression::New { outer_argument, @@ -360,11 +344,13 @@ impl<'src, 'arena> Parser<'src, 'arena> { let mut name_argument = None; let mut flags_argument = None; + let mut first_call = true; for slot in [&mut outer_argument, &mut name_argument, &mut flags_argument] { - match self.parse_call_argument_slot(left_parenthesis_position) { + match self.parse_call_argument_slot(left_parenthesis_position, first_call) { ParsedCallArgumentSlot::Argument(argument) => *slot = argument, ParsedCallArgumentSlot::NoMoreArguments => break, } + first_call = false; } if let Some((next_token, next_token_position)) = self.peek_token_and_position() @@ -376,7 +362,7 @@ impl<'src, 'arena> Parser<'src, 'arena> { .widen_error_span_from(left_parenthesis_position) .sync_error_until(self, SyncLevel::CloseParenthesis) .blame_token(next_token_position) - .extend_blame_to_covered_end() + .extend_blame_end_to_covered_end() .report_error(self); } diff --git a/rottlib/src/parser/grammar/expression/selectors.rs b/rottlib/src/parser/grammar/expression/selectors.rs index 18b94a8..7287bf5 100644 --- a/rottlib/src/parser/grammar/expression/selectors.rs +++ b/rottlib/src/parser/grammar/expression/selectors.rs @@ -7,9 +7,8 @@ //! current token. They always require a left-hand side expression. use crate::arena::ArenaVec; -use crate::ast::AstSpan; use crate::ast::{Expression, ExpressionRef, OptionalExpression}; -use crate::lexer::{Token, TokenPosition}; +use crate::lexer::{Token, TokenPosition, TokenSpan}; use crate::parser::{ParseErrorKind, ParseExpressionResult, Parser, ResultRecoveryExt, SyncLevel}; /// Represents the result of parsing one call argument slot. @@ -61,7 +60,7 @@ impl<'src, 'arena> Parser<'src, 'arena> { left_hand_side: ExpressionRef<'src, 'arena>, ) -> ParseExpressionResult<'src, 'arena> { self.advance(); // `.` - let member_access_start = left_hand_side.span().token_from; + let member_access_start = left_hand_side.span().start; let member_identifier = self.parse_identifier(ParseErrorKind::ExpressionUnexpectedToken)?; let member_access_end = member_identifier.0; Ok(self.arena.alloc_node( @@ -69,7 +68,7 @@ impl<'src, 'arena> Parser<'src, 'arena> { target: left_hand_side, name: member_identifier, }, - AstSpan::range(member_access_start, member_access_end), + TokenSpan::range(member_access_start, member_access_end), )) } @@ -92,7 +91,7 @@ impl<'src, 'arena> Parser<'src, 'arena> { .widen_error_span_from(left_bracket_position) .sync_error_at(self, SyncLevel::CloseBracket)?; - let expression_start = left_hand_side.span().token_from; + let expression_start = left_hand_side.span().start; Ok(self.arena.alloc_node_between( Expression::Index { target: left_hand_side, @@ -123,7 +122,7 @@ impl<'src, 'arena> Parser<'src, 'arena> { .sync_error_at(self, SyncLevel::CloseParenthesis) .unwrap_or_fallback(self); - let expression_start = left_hand_side.span().token_from; + let expression_start = left_hand_side.span().start; self.arena.alloc_node_between( Expression::Call { callee: left_hand_side, @@ -144,11 +143,14 @@ impl<'src, 'arena> Parser<'src, 'arena> { pub(crate) fn parse_call_argument_slot( &mut self, left_parenthesis_position: TokenPosition, + first_call: bool, ) -> ParsedCallArgumentSlot<'src, 'arena> { match self.peek_token() { Some(Token::RightParenthesis) => return ParsedCallArgumentSlot::NoMoreArguments, Some(Token::Comma) => { - self.advance(); + if !first_call { + self.advance(); + } if self.at_call_argument_boundary() { return ParsedCallArgumentSlot::Argument(None); } @@ -174,10 +176,12 @@ impl<'src, 'arena> Parser<'src, 'arena> { ) -> ArenaVec<'arena, Option>> { let mut argument_list = ArenaVec::new_in(self.arena); + let mut first_call = true; while let ParsedCallArgumentSlot::Argument(argument) = - self.parse_call_argument_slot(left_parenthesis_position) + self.parse_call_argument_slot(left_parenthesis_position, first_call) { argument_list.push(argument); + first_call = false; } argument_list diff --git a/rottlib/src/parser/grammar/expression/switch.rs b/rottlib/src/parser/grammar/expression/switch.rs index 2d5522f..0e7a011 100644 --- a/rottlib/src/parser/grammar/expression/switch.rs +++ b/rottlib/src/parser/grammar/expression/switch.rs @@ -2,8 +2,8 @@ //! //! Provides routines for parsing `switch (...) { ... }` expressions. use crate::arena::ArenaVec; -use crate::ast::{AstSpan, ExpressionRef, StatementRef}; -use crate::lexer::{Keyword, Token, TokenPosition}; +use crate::ast::{ExpressionRef, StatementRef}; +use crate::lexer::{Keyword, Token, TokenPosition, TokenSpan}; use crate::parser::{ParseErrorKind, ResultRecoveryExt}; impl<'src, 'arena> crate::parser::Parser<'src, 'arena> { @@ -26,7 +26,7 @@ impl<'src, 'arena> crate::parser::Parser<'src, 'arena> { let selector = self.parse_expression(); let mut cases = self.arena.vec(); let mut default_arm = None; - let mut span = AstSpan::new(switch_start_position); + let mut span = TokenSpan::new(switch_start_position); if self .expect(Token::LeftBrace, ParseErrorKind::SwitchMissingBody) .report_error(self) @@ -167,7 +167,7 @@ impl<'src, 'arena> crate::parser::Parser<'src, 'arena> { selector: ExpressionRef<'src, 'arena>, cases: ArenaVec<'arena, crate::ast::SwitchCaseRef<'src, 'arena>>, default_arm: Option>>, - span: AstSpan, + span: TokenSpan, ) -> ExpressionRef<'src, 'arena> { self.arena.alloc_node( crate::ast::Expression::Switch { @@ -192,12 +192,12 @@ fn compute_case_span( labels_start_position: TokenPosition, labels: &[ExpressionRef], body: &[StatementRef], -) -> AstSpan { - let mut span = AstSpan::new(labels_start_position); +) -> TokenSpan { + let mut span = TokenSpan::new(labels_start_position); if let Some(last_statement) = body.last() { - span.extend_to(last_statement.span().token_to); + span.extend_to(last_statement.span().end); } else if let Some(last_label) = labels.last() { - span.extend_to(last_label.span().token_to); + span.extend_to(last_label.span().end); } span } diff --git a/rottlib/src/parser/grammar/function/definition.rs b/rottlib/src/parser/grammar/function/definition.rs index 27e0bc8..0ae98af 100644 --- a/rottlib/src/parser/grammar/function/definition.rs +++ b/rottlib/src/parser/grammar/function/definition.rs @@ -4,12 +4,12 @@ use crate::arena::ArenaVec; use crate::ast::{ - AstSpan, CallableDefinition, CallableDefinitionRef, CallableKind, CallableModifier, + CallableDefinition, CallableDefinitionRef, CallableKind, CallableModifier, CallableModifierKind, CallableName, IdentifierToken, InfixOperator, InfixOperatorName, ParameterRef, PostfixOperator, PostfixOperatorName, PrefixOperator, PrefixOperatorName, TypeSpecifierRef, }; -use crate::lexer::{Keyword, Token, TokenPosition}; +use crate::lexer::{Keyword, Token, TokenPosition, TokenSpan}; use crate::parser::{ ParseError, ParseErrorKind, ParseResult, Parser, ResultRecoveryExt, SyncLevel, recovery::RecoveryFallback, @@ -28,7 +28,7 @@ pub(super) struct ParsedCallableHeader<'src, 'arena> { impl<'src, 'arena> RecoveryFallback<'src, 'arena> for ParsedCallableHeader<'src, 'arena> { fn fallback_value(parser: &Parser<'src, 'arena>, error: &ParseError) -> Self { - let fallback_position = error.covered_span.token_from; + let fallback_position = error.covered_span.start; ParsedCallableHeader { start_position: fallback_position, modifiers: parser.arena.vec(), @@ -61,7 +61,7 @@ impl<'src, 'arena> Parser<'src, 'arena> { None }; - let span = AstSpan::range( + let span = TokenSpan::range( header.start_position, self.last_consumed_position_or_start(), ); @@ -230,7 +230,7 @@ impl<'src, 'arena> Parser<'src, 'arena> { } }; - let span = AstSpan::range(start, self.last_consumed_position_or_start()); + let span = TokenSpan::range(start, self.last_consumed_position_or_start()); Some(CallableModifier { kind, span }) } diff --git a/rottlib/src/parser/grammar/function/params.rs b/rottlib/src/parser/grammar/function/params.rs index 7e894ad..76603f4 100644 --- a/rottlib/src/parser/grammar/function/params.rs +++ b/rottlib/src/parser/grammar/function/params.rs @@ -1,6 +1,6 @@ use crate::arena::ArenaVec; -use crate::ast::{AstSpan, Parameter, ParameterModifier, ParameterModifierKind, ParameterRef}; -use crate::lexer::{Keyword, Token}; +use crate::ast::{Parameter, ParameterModifier, ParameterModifierKind, ParameterRef}; +use crate::lexer::{Keyword, Token, TokenSpan}; use crate::parser::{ParseErrorKind, ResultRecoveryExt, SyncLevel}; impl<'src, 'arena> crate::parser::Parser<'src, 'arena> { @@ -82,7 +82,7 @@ impl<'src, 'arena> crate::parser::Parser<'src, 'arena> { None }; - let span = AstSpan::range(start_pos, self.last_consumed_position_or_start()); + let span = TokenSpan::range(start_pos, self.last_consumed_position_or_start()); params.push(self.arena.alloc_node( Parameter { modifiers, diff --git a/rottlib/src/parser/grammar/statement.rs b/rottlib/src/parser/grammar/statement.rs index ad9f5d8..97a4d39 100644 --- a/rottlib/src/parser/grammar/statement.rs +++ b/rottlib/src/parser/grammar/statement.rs @@ -3,8 +3,8 @@ //! Implements a simple recursive-descent parser for //! *Fermented `UnrealScript` statements*. -use crate::ast::{AstSpan, Statement, StatementRef}; -use crate::lexer::{Keyword, Token}; +use crate::ast::{Statement, StatementRef}; +use crate::lexer::{Keyword, Token, TokenSpan}; use crate::parser::{ParseErrorKind, ResultRecoveryExt}; impl<'src, 'arena> crate::parser::Parser<'src, 'arena> { @@ -26,7 +26,7 @@ impl<'src, 'arena> crate::parser::Parser<'src, 'arena> { self.advance(); // `;` Some( self.arena - .alloc_node(Statement::Empty, AstSpan::new(position)), + .alloc_node(Statement::Empty, TokenSpan::new(position)), ) } @@ -39,7 +39,7 @@ impl<'src, 'arena> crate::parser::Parser<'src, 'arena> { let declarators = self.parse_variable_declarators(); // TODO: parse - let span = AstSpan::range(start, self.last_consumed_position_or_start()); + let span = TokenSpan::range(start, self.last_consumed_position_or_start()); Some(self.arena.alloc_node( Statement::LocalVariableDeclaration { type_spec, @@ -57,7 +57,7 @@ impl<'src, 'arena> crate::parser::Parser<'src, 'arena> { self.advance(); // : Some(self.arena.alloc_node( Statement::Label(self.arena.string(lexeme)), - AstSpan::range(position, self.last_consumed_position_or_start()), + TokenSpan::range(position, self.last_consumed_position_or_start()), )) } diff --git a/rottlib/src/parser/mod.rs b/rottlib/src/parser/mod.rs index 0b9ee93..50a8200 100644 --- a/rottlib/src/parser/mod.rs +++ b/rottlib/src/parser/mod.rs @@ -26,6 +26,7 @@ //! low-level plumbing lives in submodules. use super::lexer; +use crate::lexer::TokenSpan; pub use lexer::{TokenData, Tokens}; @@ -45,6 +46,7 @@ pub type ParseExpressionResult<'src, 'arena> = /// A recursive-descent parser over token from [`crate::lexer::TokenizedFile`]. pub struct Parser<'src, 'arena> { + file: &'src lexer::TokenizedFile<'src>, arena: &'arena crate::arena::Arena, pub diagnostics: Vec, cursor: cursor::Cursor<'src, 'src>, @@ -54,10 +56,15 @@ pub struct Parser<'src, 'arena> { impl<'src, 'arena> Parser<'src, 'arena> { pub fn new(file: &'src lexer::TokenizedFile<'src>, arena: &'arena crate::arena::Arena) -> Self { Self { + file, arena, diagnostics: Vec::new(), cursor: cursor::Cursor::new(file), trivia: trivia::TriviaIndexBuilder::default(), } } + + pub fn file(&self) -> &'src lexer::TokenizedFile<'src> { + self.file + } } diff --git a/rottlib/src/parser/recovery.rs b/rottlib/src/parser/recovery.rs index 5d8d147..773272a 100644 --- a/rottlib/src/parser/recovery.rs +++ b/rottlib/src/parser/recovery.rs @@ -8,9 +8,9 @@ //! General idea is that any method that returns something other than an error //! can be assumed to have reported it. -use crate::ast::{AstSpan, CallableKind, IdentifierToken, QualifiedIdentifier}; -use crate::diagnostics::Diagnostic; -use crate::lexer::{Token, TokenPosition}; +use crate::ast::{CallableKind, IdentifierToken, QualifiedIdentifier}; +use crate::diagnostics::diagnostic_from_parse_error; +use crate::lexer::{Token, TokenPosition, TokenSpan}; use crate::parser::{ParseError, ParseResult, Parser}; /// Synchronization groups the parser can stop at during recovery. @@ -180,7 +180,9 @@ impl Parser<'_, '_> { /// /// Placeholder implementation. pub fn report_error(&mut self, error: ParseError) { - self.diagnostics.push(Diagnostic::from(error)); + //self.diagnostics.push(Diagnostic::from(error)); + self.diagnostics + .push(diagnostic_from_parse_error(error, self.file())); } /// Reports a parser error with [`crate::parser::ParseErrorKind`] at @@ -200,8 +202,6 @@ impl Parser<'_, '_> { { break; } - // Always advances when `peek_token()` is `Some(...)`, - // so the loop cannot be infinite. self.advance(); } } @@ -223,17 +223,20 @@ pub trait ResultRecoveryExt<'src, 'arena, T>: Sized { #[must_use] fn widen_error_span_from(self, from: TokenPosition) -> Self; - fn blame(self, blame_span: AstSpan) -> Self; - fn related(self, related_span: AstSpan) -> Self; + fn blame(self, blame_span: TokenSpan) -> Self; + fn related(self, tag: impl Into, related_span: TokenSpan) -> Self; fn blame_token(self, blame_position: TokenPosition) -> Self { - self.blame(AstSpan::new(blame_position)) + self.blame(TokenSpan::new(blame_position)) } - fn extend_blame_to_covered_end(self) -> Self; + fn extend_blame_to_next_token(self, parser: &mut Parser<'src, 'arena>) -> Self; - fn related_token(self, related_position: TokenPosition) -> Self { - self.related(AstSpan::new(related_position)) + fn extend_blame_start_to_covered_start(self) -> Self; + fn extend_blame_end_to_covered_end(self) -> Self; + + fn related_token(self, tag: impl Into, related_position: TokenPosition) -> Self { + self.related(tag, TokenSpan::new(related_position)) } /// Extends the right end of the error span up to but not including @@ -266,28 +269,36 @@ pub trait ResultRecoveryExt<'src, 'arena, T>: Sized { impl<'src, 'arena, T> ResultRecoveryExt<'src, 'arena, T> for ParseResult<'src, 'arena, T> { fn widen_error_span_from(mut self, from: TokenPosition) -> Self { if let Err(ref mut error) = self { - error.covered_span.token_from = std::cmp::min(error.covered_span.token_from, from); + error.covered_span.start = std::cmp::min(error.covered_span.start, from); } self } - fn blame(self, blame_span: AstSpan) -> Self { + fn blame(self, blame_span: TokenSpan) -> Self { self.map_err(|error| error.blame(blame_span)) } - fn extend_blame_to_covered_end(self) -> Self { - self.map_err(|error| error.extend_blame_to_covered_end()) + fn extend_blame_to_next_token(self, parser: &mut Parser<'src, 'arena>) -> Self { + self.map_err(|error| error.extend_blame_to_next_token(parser)) } - fn related(self, related_span: AstSpan) -> Self { - self.map_err(|error| error.related(related_span)) + fn extend_blame_start_to_covered_start(self) -> Self { + self.map_err(|error| error.extend_blame_start_to_covered_start()) + } + + fn extend_blame_end_to_covered_end(self) -> Self { + self.map_err(|error| error.extend_blame_end_to_covered_end()) + } + + fn related(self, tag: impl Into, related_span: TokenSpan) -> Self { + self.map_err(|error| error.related(tag, related_span)) } fn sync_error_until(mut self, parser: &mut Parser<'src, 'arena>, level: SyncLevel) -> Self { if let Err(ref mut error) = self { parser.recover_until(level); - error.covered_span.token_to = std::cmp::max( - error.covered_span.token_to, + error.covered_span.end = std::cmp::max( + error.covered_span.end, parser.last_consumed_position_or_start(), ); } @@ -299,13 +310,16 @@ impl<'src, 'arena, T> ResultRecoveryExt<'src, 'arena, T> for ParseResult<'src, ' parser.recover_until(level); // If we're at end-of-file, this'll simply do nothing. if parser - .peek_token() - .and_then(SyncLevel::for_token) - .is_some_and(|next_level| next_level == level) + .peek_token() + .and_then(SyncLevel::for_token) + .is_some_and(|next_level| next_level == level) { parser.advance(); } - error.covered_span.token_to = parser.last_consumed_position_or_start(); // need to be peek + error.covered_span.end = std::cmp::max( + error.covered_span.end, + parser.last_consumed_position_or_start(), + ); } self } @@ -343,36 +357,59 @@ impl<'src, 'arena, T> ResultRecoveryExt<'src, 'arena, T> for ParseResult<'src, ' impl<'src, 'arena> ResultRecoveryExt<'src, 'arena, ()> for ParseError { fn widen_error_span_from(mut self, from: TokenPosition) -> Self { - self.covered_span.token_from = std::cmp::min(self.covered_span.token_from, from); + self.covered_span.start = std::cmp::min(self.covered_span.start, from); self } - fn blame(mut self, blame_span: AstSpan) -> Self { + fn blame(mut self, blame_span: TokenSpan) -> Self { self.blame_span = blame_span; self } - fn extend_blame_to_covered_end(mut self) -> Self { - self.blame_span.token_to = self.covered_span.token_to; + fn extend_blame_to_next_token(mut self, parser: &mut Parser<'src, 'arena>) -> Self { + self.blame_span.end = parser.peek_position_or_eof(); self } - fn related(mut self, related_span: AstSpan) -> Self { - self.related_span = Some(related_span); + fn extend_blame_start_to_covered_start(mut self) -> Self { + self.blame_span.start = self.covered_span.start; + self + } + + fn extend_blame_end_to_covered_end(mut self) -> Self { + self.blame_span.end = self.covered_span.end; + self + } + + fn related(mut self, tag: impl Into, related_span: TokenSpan) -> Self { + self.related_spans.insert(tag.into(), related_span); self } fn sync_error_until(mut self, parser: &mut Parser<'src, 'arena>, level: SyncLevel) -> Self { parser.recover_until(level); - self.covered_span.token_to = parser.last_consumed_position_or_start(); + self.covered_span.end = std::cmp::max( + self.covered_span.end, + parser.last_consumed_position_or_start(), + ); self } fn sync_error_at(mut self, parser: &mut Parser<'src, 'arena>, level: SyncLevel) -> Self { parser.recover_until(level); - // If we're at end-of-file, this'll simply do nothing. - parser.advance(); - self.covered_span.token_to = parser.last_consumed_position_or_start(); + + if parser + .peek_token() + .and_then(SyncLevel::for_token) + .is_some_and(|next_level| next_level == level) + { + parser.advance(); + } + + self.covered_span.end = std::cmp::max( + self.covered_span.end, + parser.last_consumed_position_or_start(), + ); self } @@ -405,7 +442,7 @@ impl<'src, 'arena> RecoveryFallback<'src, 'arena> for f64 { impl<'src, 'arena> RecoveryFallback<'src, 'arena> for crate::ast::IdentifierToken { fn fallback_value(_: &Parser<'src, 'arena>, error: &ParseError) -> Self { - Self(error.covered_span.token_from) + Self(error.covered_span.start) } } @@ -414,7 +451,7 @@ impl<'src, 'arena> RecoveryFallback<'src, 'arena> { fn fallback_value(parser: &Parser<'src, 'arena>, err: &ParseError) -> Self { // default return type: Named("") at error span - let ret_id = crate::ast::IdentifierToken(err.covered_span.token_from); + let ret_id = crate::ast::IdentifierToken(err.covered_span.start); let return_type = crate::arena::ArenaNode::new_in( crate::ast::TypeSpecifier::Named(QualifiedIdentifier::from_ident(parser.arena, ret_id)), err.covered_span, @@ -422,9 +459,7 @@ impl<'src, 'arena> RecoveryFallback<'src, 'arena> ); let def = crate::ast::CallableDefinition { - name: crate::ast::CallableName::Identifier(IdentifierToken( - err.covered_span.token_from, - )), + name: crate::ast::CallableName::Identifier(IdentifierToken(err.covered_span.start)), kind: CallableKind::Function, return_type_specifier: Some(return_type), modifiers: parser.arena.vec(), @@ -449,7 +484,7 @@ impl<'src, 'arena> RecoveryFallback<'src, 'arena> for crate::ast::StructDefRef<' impl<'src, 'arena> RecoveryFallback<'src, 'arena> for crate::ast::ClassVarDeclRef<'src, 'arena> { fn fallback_value(parser: &Parser<'src, 'arena>, err: &ParseError) -> Self { - let dummy_ident = crate::ast::IdentifierToken(err.covered_span.token_from); + let dummy_ident = crate::ast::IdentifierToken(err.covered_span.start); let type_spec = crate::arena::ArenaNode::new_in( crate::ast::TypeSpecifier::Named(QualifiedIdentifier::from_ident( parser.arena, @@ -484,7 +519,7 @@ impl<'src, 'arena> RecoveryFallback<'src, 'arena> impl<'src, 'arena> RecoveryFallback<'src, 'arena> for crate::ast::StateDeclRef<'src, 'arena> { fn fallback_value(parser: &Parser<'src, 'arena>, err: &ParseError) -> Self { let def = crate::ast::StateDecl { - name: crate::ast::IdentifierToken(err.covered_span.token_from), + name: crate::ast::IdentifierToken(err.covered_span.start), parent: None, modifiers: parser.arena.vec(), ignores: None, @@ -497,13 +532,13 @@ impl<'src, 'arena> RecoveryFallback<'src, 'arena> for crate::ast::StateDeclRef<' impl<'src, 'arena> RecoveryFallback<'src, 'arena> for TokenPosition { fn fallback_value(_: &Parser<'src, 'arena>, error: &ParseError) -> Self { - error.covered_span.token_to + error.covered_span.end } } impl<'src, 'arena> RecoveryFallback<'src, 'arena> for (Token, TokenPosition) { fn fallback_value(_: &Parser<'src, 'arena>, error: &ParseError) -> Self { - (Token::Error, error.covered_span.token_to) + (Token::Error, error.covered_span.end) } } @@ -535,10 +570,10 @@ impl<'src, 'arena, T> RecoveryFallback<'src, 'arena> for Option { impl<'src, 'arena> RecoveryFallback<'src, 'arena> for crate::ast::ClassConstDeclRef<'src, 'arena> { fn fallback_value(parser: &Parser<'src, 'arena>, err: &ParseError) -> Self { - let name = crate::ast::IdentifierToken(err.covered_span.token_from); + let name = crate::ast::IdentifierToken(err.covered_span.start); let value = crate::ast::DeclarationLiteralRef { literal: crate::ast::DeclarationLiteral::None, - position: err.covered_span.token_from, + position: err.covered_span.start, }; let def = crate::ast::ClassConstDecl { name, @@ -551,7 +586,7 @@ impl<'src, 'arena> RecoveryFallback<'src, 'arena> for crate::ast::ClassConstDecl impl<'src, 'arena> RecoveryFallback<'src, 'arena> for crate::ast::TypeSpecifierRef<'src, 'arena> { fn fallback_value(parser: &Parser<'src, 'arena>, err: &ParseError) -> Self { - let dummy = crate::ast::IdentifierToken(err.covered_span.token_from); + let dummy = crate::ast::IdentifierToken(err.covered_span.start); crate::arena::ArenaNode::new_in( crate::ast::TypeSpecifier::Named(QualifiedIdentifier::from_ident(parser.arena, dummy)), err.covered_span, diff --git a/rottlib/tests/diagnostics_expressions.rs b/rottlib/tests/diagnostics_expressions.rs new file mode 100644 index 0000000..9a260cc --- /dev/null +++ b/rottlib/tests/diagnostics_expressions.rs @@ -0,0 +1,394 @@ +use std::collections::HashMap; + +use rottlib::arena::Arena; +use rottlib::diagnostics::Diagnostic; +use rottlib::lexer::{TokenPosition, TokenSpan, TokenizedFile}; +use rottlib::parser::Parser; + +#[derive(Debug, Clone, Copy)] +pub struct Fixture { + pub code: &'static str, + pub label: &'static str, + pub source: &'static str, +} + +pub const FIXTURES: &[Fixture] = &[ + Fixture { + code: "P0001", + label: "files/P0001_01.uc", + source: "c && ( /*lol*/ ** calc_it())", + }, + Fixture { + code: "P0001", + label: "files/P0001_02.uc", + source: "\r\na + (\n//AAA\n//BBB\n//CCC\n//DDD\n//EEE\n//FFF\n ]", + }, + Fixture { + code: "P0001", + label: "files/P0001_03.uc", + source: "(\n// nothing here, bucko", + }, + Fixture { + code: "P0002", + label: "files/P0002_01.uc", + source: "a + [", + }, + Fixture { + code: "P0002", + label: "files/P0002_02.uc", + source: "a * \n//some\n//empty lines\n *", + }, + Fixture { + code: "P0002", + label: "files/P0002_03.uc", + source: "a &&", + }, + Fixture { + code: "P0002", + label: "files/P0002_04.uc", + source: "a * * *", + }, + Fixture { + code: "P0003", + label: "files/P0003_01.uc", + source: "(a + b && c / d ^ e @ f", + }, + Fixture { + code: "P0003", + label: "files/P0003_02.uc", + source: "(a]", + }, + Fixture { + code: "P0003", + label: "files/P0003_03.uc", + source: "(a\n;", + }, +]; + +pub struct FixtureRun<'src> { + pub fixture: &'static Fixture, + pub file: TokenizedFile<'src>, + pub diagnostics: Vec, +} + +pub struct FixtureRuns<'src> { + runs: HashMap<&'static str, FixtureRun<'src>>, +} + +impl<'src> FixtureRuns<'src> { + pub fn get(&self, label: &str) -> Option> { + self.runs + .get(label) + .map(|fixture_run| fixture_run.diagnostics.clone()) + } + + pub fn get_any(&self, label: &str) -> Diagnostic { + self.runs + .get(label) + .map(|fixture_run| fixture_run.diagnostics[0].clone()) + .unwrap() + } + + pub fn iter(&self) -> impl Iterator)> { + self.runs.iter().map(|(label, run)| (*label, run)) + } +} + +fn run_fixture(fixture: &'static Fixture) -> FixtureRun<'static> { + let arena = Arena::new(); + let file = TokenizedFile::tokenize(fixture.source); + let mut parser = Parser::new(&file, &arena); + + let _ = parser.parse_expression(); + let diagnostics = parser.diagnostics.clone(); + + FixtureRun { + fixture, + file, + diagnostics, + } +} + +pub fn run_fixtures(code: &str) -> FixtureRuns<'static> { + let mut runs = HashMap::new(); + + for fixture in FIXTURES.iter().filter(|fixture| fixture.code == code) { + runs.insert(fixture.label, run_fixture(fixture)); + } + + for (label, run) in runs.iter() { + run.diagnostics.iter().for_each(|diag| { + diag.render(&run.file, *label); + }); + println!(); + } + + FixtureRuns { runs } +} + +#[test] +fn check_p0001_fixtures() { + let runs = run_fixtures("P0001"); + + assert_eq!(runs.get("files/P0001_01.uc").unwrap().len(), 1); + assert_eq!(runs.get("files/P0001_02.uc").unwrap().len(), 1); + assert_eq!(runs.get("files/P0001_03.uc").unwrap().len(), 1); + + assert_eq!( + runs.get_any("files/P0001_01.uc").headline(), + "expected expression inside parentheses, found `**`" + ); + assert_eq!( + runs.get_any("files/P0001_02.uc").headline(), + "expected expression inside parentheses, found `]`" + ); + assert_eq!( + runs.get_any("files/P0001_03.uc").headline(), + "expected expression, found end of file" + ); + + assert_eq!(runs.get_any("files/P0001_01.uc").code(), Some("P0001")); + assert_eq!(runs.get_any("files/P0001_02.uc").code(), Some("P0001")); + assert_eq!(runs.get_any("files/P0001_03.uc").code(), Some("P0001")); + + assert_eq!( + runs.get_any("files/P0001_01.uc") + .primary_label() + .unwrap() + .span, + TokenSpan { + start: TokenPosition(8), + end: TokenPosition(8) + } + ); + + assert_eq!( + runs.get_any("files/P0001_02.uc") + .primary_label() + .unwrap() + .span, + TokenSpan { + start: TokenPosition(5), + end: TokenPosition(20) + } + ); + + assert_eq!( + runs.get_any("files/P0001_03.uc") + .primary_label() + .unwrap() + .span, + TokenSpan { + start: TokenPosition(0), + end: TokenPosition(3) + } + ); + + assert_eq!( + runs.get_any("files/P0001_01.uc") + .primary_label() + .unwrap() + .message, + "unexpected `**`" + ); + assert_eq!( + runs.get_any("files/P0001_02.uc") + .primary_label() + .unwrap() + .message, + "unexpected `]`" + ); + assert_eq!( + runs.get_any("files/P0001_03.uc") + .primary_label() + .unwrap() + .message, + "reached end of file here" + ); +} + +#[test] +fn check_p0002_fixtures() { + let runs = run_fixtures("P0002"); + + assert_eq!(runs.get("files/P0002_01.uc").unwrap().len(), 1); + assert_eq!(runs.get("files/P0002_02.uc").unwrap().len(), 1); + assert_eq!(runs.get("files/P0002_03.uc").unwrap().len(), 1); + assert_eq!(runs.get("files/P0002_04.uc").unwrap().len(), 1); + + assert_eq!( + runs.get_any("files/P0002_01.uc").headline(), + "expected expression after `+`, found `[`" + ); + assert_eq!( + runs.get_any("files/P0002_02.uc").headline(), + "expected expression after `*`, found `*`" + ); + assert_eq!( + runs.get_any("files/P0002_03.uc").headline(), + "expected expression after `&&`, found end of file" + ); + assert_eq!( + runs.get_any("files/P0002_04.uc").headline(), + "expected expression after `*`, found `*`" + ); + + assert_eq!(runs.get_any("files/P0002_01.uc").code(), Some("P0002")); + assert_eq!(runs.get_any("files/P0002_02.uc").code(), Some("P0002")); + assert_eq!(runs.get_any("files/P0002_03.uc").code(), Some("P0002")); + assert_eq!(runs.get_any("files/P0002_04.uc").code(), Some("P0002")); + + assert_eq!( + runs.get_any("files/P0002_01.uc") + .primary_label() + .unwrap() + .span, + TokenSpan { + start: TokenPosition(4), + end: TokenPosition(4), + } + ); + + assert_eq!( + runs.get_any("files/P0002_02.uc") + .primary_label() + .unwrap() + .span, + TokenSpan { + start: TokenPosition(10), + end: TokenPosition(10), + } + ); + + assert_eq!( + runs.get_any("files/P0002_03.uc") + .primary_label() + .unwrap() + .span, + TokenSpan { + start: TokenPosition(3), + end: TokenPosition(3), + } + ); + + assert_eq!( + runs.get_any("files/P0002_04.uc") + .primary_label() + .unwrap() + .span, + TokenSpan { + start: TokenPosition(4), + end: TokenPosition(4), + } + ); + + assert_eq!( + runs.get_any("files/P0002_01.uc") + .primary_label() + .unwrap() + .message, + "unexpected `[`" + ); + assert_eq!( + runs.get_any("files/P0002_02.uc") + .primary_label() + .unwrap() + .message, + "unexpected `*`" + ); + assert_eq!( + runs.get_any("files/P0002_03.uc") + .primary_label() + .unwrap() + .message, + "reached end of file here" + ); + assert_eq!( + runs.get_any("files/P0002_04.uc") + .primary_label() + .unwrap() + .message, + "unexpected `*`" + ); +} + +#[test] +fn check_p0003_fixtures() { + let runs = run_fixtures("P0003"); + + assert_eq!(runs.get("files/P0003_01.uc").unwrap().len(), 1); + assert_eq!(runs.get("files/P0003_02.uc").unwrap().len(), 1); + assert_eq!(runs.get("files/P0003_03.uc").unwrap().len(), 1); + + assert_eq!( + runs.get_any("files/P0003_01.uc").headline(), + "missing `)` to close parenthesized expression" + ); + assert_eq!( + runs.get_any("files/P0003_02.uc").headline(), + "missing `)` to close parenthesized expression" + ); + assert_eq!( + runs.get_any("files/P0003_03.uc").headline(), + "missing `)` to close parenthesized expression" + ); + + assert_eq!(runs.get_any("files/P0003_01.uc").code(), Some("P0003")); + assert_eq!(runs.get_any("files/P0003_02.uc").code(), Some("P0003")); + assert_eq!(runs.get_any("files/P0003_03.uc").code(), Some("P0003")); + + assert_eq!( + runs.get_any("files/P0003_01.uc") + .primary_label() + .unwrap() + .span, + TokenSpan { + start: TokenPosition(22), + end: TokenPosition(22), + } + ); + + assert_eq!( + runs.get_any("files/P0003_02.uc") + .primary_label() + .unwrap() + .span, + TokenSpan { + start: TokenPosition(2), + end: TokenPosition(2), + } + ); + + assert_eq!( + runs.get_any("files/P0003_03.uc") + .primary_label() + .unwrap() + .span, + TokenSpan { + start: TokenPosition(0), + end: TokenPosition(3), + } + ); + + assert_eq!( + runs.get_any("files/P0003_01.uc") + .primary_label() + .unwrap() + .message, + "expected `)` before end of file" + ); + assert_eq!( + runs.get_any("files/P0003_02.uc") + .primary_label() + .unwrap() + .message, + "expected `)` before `]`" + ); + assert_eq!( + runs.get_any("files/P0003_03.uc") + .primary_label() + .unwrap() + .message, + "expected `)` before `;`" + ); +} \ No newline at end of file