diff --git a/crates/apollo-parser/src/lexer/token.rs b/crates/apollo-parser/src/lexer/token.rs index 48c357ce4..3bcb9f398 100644 --- a/crates/apollo-parser/src/lexer/token.rs +++ b/crates/apollo-parser/src/lexer/token.rs @@ -3,7 +3,7 @@ use std::fmt; use crate::TokenKind; /// A token generated by the lexer. -#[derive(Clone)] +#[derive(Clone, PartialEq, Eq)] pub struct Token<'a> { pub(crate) kind: TokenKind, pub(crate) data: &'a str, diff --git a/crates/apollo-parser/src/parser/grammar/argument.rs b/crates/apollo-parser/src/parser/grammar/argument.rs index a241b7592..f7965901d 100644 --- a/crates/apollo-parser/src/parser/grammar/argument.rs +++ b/crates/apollo-parser/src/parser/grammar/argument.rs @@ -1,6 +1,7 @@ use crate::parser::grammar::value::Constness; use crate::parser::grammar::{input, name, value}; use crate::{Parser, SyntaxKind, TokenKind, S, T}; +use std::ops::ControlFlow; /// See: https://spec.graphql.org/October2021/#Argument /// @@ -27,9 +28,9 @@ pub(crate) fn arguments(p: &mut Parser, constness: Constness) { } else { p.err("expected an Argument"); } - while let Some(TokenKind::Name) = p.peek() { + p.peek_while_kind(TokenKind::Name, |p| { argument(p, constness); - } + }); p.expect(T![')'], S![')']); } @@ -45,9 +46,13 @@ pub(crate) fn arguments_definition(p: &mut Parser) { } else { p.err("expected an Argument Definition"); } - while let Some(TokenKind::Name | TokenKind::StringValue) = p.peek() { - input::input_value_definition(p); - } + p.peek_while(|p, kind| match kind { + TokenKind::Name | TokenKind::StringValue => { + input::input_value_definition(p); + ControlFlow::Continue(()) + } + _ => ControlFlow::Break(()), + }); p.expect(T![')'], S![')']); } diff --git a/crates/apollo-parser/src/parser/grammar/directive.rs b/crates/apollo-parser/src/parser/grammar/directive.rs index 8702b2c2d..4ef72e1aa 100644 --- a/crates/apollo-parser/src/parser/grammar/directive.rs +++ b/crates/apollo-parser/src/parser/grammar/directive.rs @@ -1,6 +1,7 @@ use crate::parser::grammar::value::Constness; use crate::parser::grammar::{argument, description, input, name}; use crate::{Parser, SyntaxKind, TokenKind, S, T}; +use std::ops::ControlFlow; /// See: https://spec.graphql.org/October2021/#DirectiveDefinition /// @@ -31,9 +32,13 @@ pub(crate) fn directive_definition(p: &mut Parser) { } else { p.err("expected an Argument Definition"); } - while let Some(TokenKind::Name | TokenKind::StringValue) = p.peek() { - input::input_value_definition(p); - } + p.peek_while(|p, kind| match kind { + TokenKind::Name | TokenKind::StringValue => { + input::input_value_definition(p); + ControlFlow::Continue(()) + } + _ => ControlFlow::Break(()), + }); p.expect(T![')'], S![')']); } @@ -52,29 +57,27 @@ pub(crate) fn directive_definition(p: &mut Parser) { if let Some(TokenKind::Name | T![|]) = p.peek() { let _g = p.start_node(SyntaxKind::DIRECTIVE_LOCATIONS); - if let Some(T![|]) = p.peek() { - p.bump(S![|]); - } - directive_locations(p, false); + directive_locations(p); } else { p.err("expected valid Directive Location"); } } -/// See: https://spec.graphql.org/October2021/#DirectiveLocations +/// https://spec.graphql.org/October2021/#DirectiveLocation /// -/// *DirectiveLocations*: -/// DirectiveLocations **|** DirectiveLocation -/// **|**? DirectiveLocation -pub(crate) fn directive_locations(p: &mut Parser, is_location: bool) { - if let Some(T![|]) = p.peek() { - p.bump(S![|]); - directive_locations(p, false); - } +/// *DirectiveLocation*: +/// *ExecutableDirectiveLocation* +/// *TypeSystemDirectiveLocation* +/// +/// (This function does not distinguish between the two groups of +/// locations.) +fn directive_location(p: &mut Parser) { + let Some(token) = p.peek_token() else { + return; + }; - if let Some(TokenKind::Name) = p.peek() { - let loc = p.peek_data().unwrap(); - match loc { + if token.kind == TokenKind::Name { + match token.data { "QUERY" => { let _g = p.start_node(SyntaxKind::DIRECTIVE_LOCATION); p.bump(SyntaxKind::QUERY_KW); @@ -152,21 +155,23 @@ pub(crate) fn directive_locations(p: &mut Parser, is_location: bool) { p.bump(SyntaxKind::INPUT_FIELD_DEFINITION_KW); } _ => { - if !is_location { - p.err("expected valid Directive Location"); - } - return; + p.err("expected valid Directive Location"); } } - if p.peek_data().is_some() { - return directive_locations(p, true); - } - } - if !is_location { - p.err("expected Directive Locations"); + } else { + p.err("expected Directive Location"); } } +/// See: https://spec.graphql.org/October2021/#DirectiveLocations +/// +/// *DirectiveLocations*: +/// DirectiveLocations **|** DirectiveLocation +/// **|**? DirectiveLocation +pub(crate) fn directive_locations(p: &mut Parser) { + p.parse_separated_list(T![|], S![|], directive_location); +} + /// See: https://spec.graphql.org/October2021/#Directive /// /// *Directive[Const]*: @@ -188,9 +193,9 @@ pub(crate) fn directive(p: &mut Parser, constness: Constness) { /// Directive[?Const]* pub(crate) fn directives(p: &mut Parser, constness: Constness) { let _g = p.start_node(SyntaxKind::DIRECTIVES); - while let Some(T![@]) = p.peek() { + p.peek_while_kind(T![@], |p| { directive(p, constness); - } + }); } #[cfg(test)] diff --git a/crates/apollo-parser/src/parser/grammar/document.rs b/crates/apollo-parser/src/parser/grammar/document.rs index 3c722e84c..57df307a2 100644 --- a/crates/apollo-parser/src/parser/grammar/document.rs +++ b/crates/apollo-parser/src/parser/grammar/document.rs @@ -5,6 +5,7 @@ use crate::{ }, Parser, SyntaxKind, TokenKind, }; +use std::ops::ControlFlow; /// See: https://spec.graphql.org/October2021/#Document /// @@ -13,16 +14,19 @@ use crate::{ pub(crate) fn document(p: &mut Parser) { let doc = p.start_node(SyntaxKind::DOCUMENT); - while let Some(node) = p.peek() { + p.peek_while(|p, kind| { assert_eq!( p.recursion_limit.current, 0, "unbalanced limit increment / decrement" ); - match node { + match kind { TokenKind::StringValue => { - let def = p.peek_data_n(2).unwrap(); - select_definition(def, p); + if let Some(def) = p.peek_data_n(2) { + select_definition(def, p); + } else { + p.err_and_pop("expected a definition after this StringValue"); + } } TokenKind::Name => { let def = p.peek_data().unwrap(); @@ -32,10 +36,12 @@ pub(crate) fn document(p: &mut Parser) { let def = p.peek_data().unwrap(); select_definition(def, p); } - TokenKind::Eof => break, + TokenKind::Eof => return ControlFlow::Break(()), _ => p.err_and_pop("expected a StringValue, Name or OperationDefinition"), } - } + + ControlFlow::Continue(()) + }); p.push_ignored(); @@ -274,4 +280,21 @@ enum join__Graph { } } } + + #[test] + fn trailing_description_at_limit() { + let parser = crate::Parser::new( + r#" + "All our queries" + type Query { + a: Int + } + + "Imagine another type below!" + "#, + ) + .token_limit(18); + // Must not panic + let _cst = parser.parse(); + } } diff --git a/crates/apollo-parser/src/parser/grammar/enum_.rs b/crates/apollo-parser/src/parser/grammar/enum_.rs index dcf8e272d..114748e6f 100644 --- a/crates/apollo-parser/src/parser/grammar/enum_.rs +++ b/crates/apollo-parser/src/parser/grammar/enum_.rs @@ -3,6 +3,7 @@ use crate::parser::grammar::value::Constness; use crate::parser::grammar::{description, directive, name, value}; use crate::{Parser, SyntaxKind, TokenKind, S, T}; +use std::ops::ControlFlow; /// See: https://spec.graphql.org/October2021/#EnumTypeDefinition /// @@ -78,9 +79,13 @@ pub(crate) fn enum_values_definition(p: &mut Parser) { _ => p.err("expected Enum Value Definition"), } - while let Some(TokenKind::Name | TokenKind::StringValue) = p.peek() { - enum_value_definition(p); - } + p.peek_while(|p, kind| match kind { + TokenKind::Name | TokenKind::StringValue => { + enum_value_definition(p); + ControlFlow::Continue(()) + } + _ => ControlFlow::Break(()), + }); p.expect(T!['}'], S!['}']); } diff --git a/crates/apollo-parser/src/parser/grammar/field.rs b/crates/apollo-parser/src/parser/grammar/field.rs index 54ce0264e..a391c9d38 100644 --- a/crates/apollo-parser/src/parser/grammar/field.rs +++ b/crates/apollo-parser/src/parser/grammar/field.rs @@ -3,6 +3,7 @@ use crate::parser::grammar::value::Constness; use crate::parser::grammar::{argument, description, directive, name, selection, ty}; use crate::{Parser, SyntaxKind, TokenKind, S, T}; +use std::ops::ControlFlow; /// See: https://spec.graphql.org/October2021/#Field /// @@ -40,10 +41,13 @@ pub(crate) fn field(p: &mut Parser) { pub(crate) fn fields_definition(p: &mut Parser) { let _g = p.start_node(SyntaxKind::FIELDS_DEFINITION); p.bump(S!['{']); - while let Some(TokenKind::Name | TokenKind::StringValue) = p.peek() { - // Guaranteed to eat at least one token if the next token is a Name or StringValue - field_definition(p); - } + p.peek_while(|p, kind| match kind { + TokenKind::Name | TokenKind::StringValue => { + field_definition(p); + ControlFlow::Continue(()) + } + _ => ControlFlow::Break(()), + }); p.expect(T!['}'], S!['}']); } diff --git a/crates/apollo-parser/src/parser/grammar/input.rs b/crates/apollo-parser/src/parser/grammar/input.rs index 3d0242d69..63d4531a5 100644 --- a/crates/apollo-parser/src/parser/grammar/input.rs +++ b/crates/apollo-parser/src/parser/grammar/input.rs @@ -1,6 +1,7 @@ use crate::parser::grammar::value::Constness; use crate::parser::grammar::{description, directive, name, ty, value}; use crate::{Parser, SyntaxKind, TokenKind, S, T}; +use std::ops::ControlFlow; /// See: https://spec.graphql.org/October2021/#InputObjectTypeDefinition /// @@ -76,9 +77,14 @@ pub(crate) fn input_fields_definition(p: &mut Parser) { } else { p.err("expected an Input Value Definition"); } - while let Some(TokenKind::Name | TokenKind::StringValue) = p.peek() { - input_value_definition(p); - } + p.peek_while(|p, kind| { + if matches!(kind, TokenKind::Name | TokenKind::StringValue) { + input_value_definition(p); + ControlFlow::Continue(()) + } else { + ControlFlow::Break(()) + } + }); p.expect(T!['}'], S!['}']); } diff --git a/crates/apollo-parser/src/parser/grammar/object.rs b/crates/apollo-parser/src/parser/grammar/object.rs index 00b7e6a8f..afaa22b85 100644 --- a/crates/apollo-parser/src/parser/grammar/object.rs +++ b/crates/apollo-parser/src/parser/grammar/object.rs @@ -90,24 +90,13 @@ pub(crate) fn implements_interfaces(p: &mut Parser) { let _g = p.start_node(SyntaxKind::IMPLEMENTS_INTERFACES); p.bump(SyntaxKind::implements_KW); - if let Some(T![&]) = p.peek() { - p.bump(S![&]); - } - - if let Some(TokenKind::Name) = p.peek() { - ty::named_type(p); - } else { - p.err("expected an Interface name"); - } - - while let Some(T![&]) = p.peek() { - p.bump(S![&]); + p.parse_separated_list(T![&], S![&], |p| { if let Some(TokenKind::Name) = p.peek() { ty::named_type(p); } else { p.err("expected an Interface name"); } - } + }); } #[cfg(test)] diff --git a/crates/apollo-parser/src/parser/grammar/operation.rs b/crates/apollo-parser/src/parser/grammar/operation.rs index 32ee28788..3a40fa373 100644 --- a/crates/apollo-parser/src/parser/grammar/operation.rs +++ b/crates/apollo-parser/src/parser/grammar/operation.rs @@ -1,46 +1,6 @@ use crate::parser::grammar::value::Constness; -use crate::parser::grammar::{directive, name, selection, ty, variable}; -use crate::{Parser, SyntaxKind, TokenKind, S, T}; - -/// RootOperationTypeDefinition is used in a SchemaDefinition. Not to be confused -/// with OperationDefinition. -/// -/// See: https://spec.graphql.org/October2021/#RootOperationTypeDefinition -/// -/// *RootOperationTypeDefinition*: -/// OperationType **:** NamedType -pub(crate) fn root_operation_type_definition(p: &mut Parser, is_operation_type: bool) { - if let Some(T!['{']) = p.peek() { - p.bump(S!['{']); - } - - if let Some(TokenKind::Name) = p.peek() { - let guard = p.start_node(SyntaxKind::ROOT_OPERATION_TYPE_DEFINITION); - operation_type(p); - if let Some(T![:]) = p.peek() { - p.bump(S![:]); - ty::named_type(p); - if p.peek().is_some() { - guard.finish_node(); - - // TODO use a loop instead of recursion - if p.recursion_limit.check_and_increment() { - p.limit_err("parser recursion limit reached"); - return; - } - root_operation_type_definition(p, true); - p.recursion_limit.decrement(); - return; - } - } else { - p.err("expected a Name Type"); - } - } - - if !is_operation_type { - p.err("expected an Operation Type"); - } -} +use crate::parser::grammar::{directive, name, selection, variable}; +use crate::{Parser, SyntaxKind, TokenKind, T}; /// See: https://spec.graphql.org/October2021/#OperationDefinition /// @@ -92,7 +52,7 @@ pub(crate) fn operation_type(p: &mut Parser) { "query" => p.bump(SyntaxKind::query_KW), "subscription" => p.bump(SyntaxKind::subscription_KW), "mutation" => p.bump(SyntaxKind::mutation_KW), - _ => p.err("expected either a 'mutation', a 'query', or a 'subscription'"), + _ => p.err_and_pop("expected either a 'mutation', a 'query', or a 'subscription'"), } } } diff --git a/crates/apollo-parser/src/parser/grammar/schema.rs b/crates/apollo-parser/src/parser/grammar/schema.rs index f570103e5..eff727828 100644 --- a/crates/apollo-parser/src/parser/grammar/schema.rs +++ b/crates/apollo-parser/src/parser/grammar/schema.rs @@ -1,7 +1,25 @@ use crate::parser::grammar::value::Constness; -use crate::parser::grammar::{description, directive, operation}; +use crate::parser::grammar::{description, directive, operation, ty}; use crate::{Parser, SyntaxKind, TokenKind, S, T}; +/// RootOperationTypeDefinition is used in a SchemaDefinition. Not to be confused +/// with OperationDefinition. +/// +/// See: https://spec.graphql.org/October2021/#RootOperationTypeDefinition +/// +/// *RootOperationTypeDefinition*: +/// OperationType **:** NamedType +fn root_operation_type_definition(p: &mut Parser) { + let _guard = p.start_node(SyntaxKind::ROOT_OPERATION_TYPE_DEFINITION); + operation::operation_type(p); + if let Some(T![:]) = p.peek() { + p.bump(S![:]); + ty::named_type(p); + } else { + p.err("expected a Name Type"); + } +} + /// See: https://spec.graphql.org/October2021/#SchemaDefinition /// /// *SchemaDefinition*: @@ -22,10 +40,18 @@ pub(crate) fn schema_definition(p: &mut Parser) { } if let Some(T!['{']) = p.peek() { - operation::root_operation_type_definition(p, false); + p.bump(S!['{']); + + let mut has_root_operation_types = false; + p.peek_while_kind(TokenKind::Name, |p| { + has_root_operation_types = true; + root_operation_type_definition(p); + }); + if !has_root_operation_types { + p.err("expected Root Operation Type Definition"); + } + p.expect(T!['}'], S!['}']); - } else { - p.err("expected Root Operation Type Definition"); } } @@ -47,8 +73,13 @@ pub(crate) fn schema_extension(p: &mut Parser) { } if let Some(T!['{']) = p.peek() { - meets_requirements = true; - operation::root_operation_type_definition(p, false); + p.bump(S!['{']); + + p.peek_while_kind(TokenKind::Name, |p| { + meets_requirements = true; + root_operation_type_definition(p); + }); + p.expect(T!['}'], S!['}']); } diff --git a/crates/apollo-parser/src/parser/grammar/selection.rs b/crates/apollo-parser/src/parser/grammar/selection.rs index 43cb78069..35a30077d 100644 --- a/crates/apollo-parser/src/parser/grammar/selection.rs +++ b/crates/apollo-parser/src/parser/grammar/selection.rs @@ -1,3 +1,5 @@ +use std::ops::ControlFlow; + use crate::{ parser::grammar::{field, fragment}, Parser, SyntaxKind, TokenKind, S, T, @@ -52,42 +54,42 @@ pub(crate) fn field_set(p: &mut Parser) { pub(crate) fn selection(p: &mut Parser) { let mut has_selection = false; - while let Some(node) = p.peek() { - match node { - T![...] => { - let next_token = p.peek_token_n(2); - match next_token { - Some(next_token) => { - if next_token.kind() == TokenKind::Name && next_token.data() != "on" { - fragment::fragment_spread(p); - } else if matches!( - next_token.kind(), - TokenKind::At | TokenKind::Name | TokenKind::LCurly - ) { - fragment::inline_fragment(p); - } else { - p.err("expected an Inline Fragment or a Fragment Spread"); - p.bump(S![...]); - } - has_selection = true; + p.peek_while(|p, kind| match kind { + T![...] => { + let next_token = p.peek_token_n(2); + match next_token { + Some(next_token) => { + if next_token.kind() == TokenKind::Name && next_token.data() != "on" { + fragment::fragment_spread(p); + } else if matches!( + next_token.kind(), + TokenKind::At | TokenKind::Name | TokenKind::LCurly + ) { + fragment::inline_fragment(p); + } else { + p.err("expected an Inline Fragment or a Fragment Spread"); + p.bump(S![...]); } - None => p.err("expected an Inline Fragment or a Fragment Spread"), + has_selection = true; + ControlFlow::Continue(()) } - } - T!['{'] => { - break; - } - TokenKind::Name => { - field::field(p); - has_selection = true; - } - _ => { - if !has_selection { - p.err("expected at least one Selection in Selection Set"); + None => { + p.err_and_pop("expected an Inline Fragment or a Fragment Spread"); + ControlFlow::Break(()) } - break; } } + T!['{'] => ControlFlow::Break(()), + TokenKind::Name => { + field::field(p); + has_selection = true; + ControlFlow::Continue(()) + } + _ => ControlFlow::Break(()), + }); + + if !has_selection { + p.err("expected at least one Selection in Selection Set"); } } diff --git a/crates/apollo-parser/src/parser/grammar/ty.rs b/crates/apollo-parser/src/parser/grammar/ty.rs index 0540f7ffe..883d63d28 100644 --- a/crates/apollo-parser/src/parser/grammar/ty.rs +++ b/crates/apollo-parser/src/parser/grammar/ty.rs @@ -21,7 +21,8 @@ use crate::{parser::grammar::name, Parser, SyntaxKind, Token, TokenKind, S, T}; pub(crate) fn ty(p: &mut Parser) { match parse(p) { Ok(_) => (), - Err(token) => p.err_at_token(&token, "expected a type"), + Err(Some(token)) => p.err_at_token(&token, "expected a type"), + Err(None) => p.err("expected a type"), } } @@ -30,7 +31,7 @@ pub(crate) fn ty(p: &mut Parser) { /// When errors occur deeper inside nested types like lists, this function /// pushes errors *inside* the list to the parser, and returns an Ok() with /// an incomplete type. -fn parse<'a>(p: &mut Parser<'a>) -> Result<(), Token<'a>> { +fn parse<'a>(p: &mut Parser<'a>) -> Result<(), Option>> { let checkpoint = p.checkpoint_node(); match p.peek() { Some(T!['[']) => { @@ -44,7 +45,7 @@ fn parse<'a>(p: &mut Parser<'a>) -> Result<(), Token<'a>> { let result = parse(p); p.recursion_limit.decrement(); - if let Err(token) = result { + if let Err(Some(token)) = result { // TODO(@goto-bus-stop) ideally the span here would point to the entire list // type, so both opening and closing brackets `[]`. p.err_at_token(&token, "expected item type"); @@ -59,7 +60,8 @@ fn parse<'a>(p: &mut Parser<'a>) -> Result<(), Token<'a>> { name::validate_name(token.data(), p); p.push_token(SyntaxKind::IDENT, token); } - _ => return Err(p.pop()), + Some(_) => return Err(Some(p.pop())), + None => return Err(None), }; // There may be whitespace inside a list node or between the type and the non-null `!`. @@ -85,6 +87,7 @@ fn parse<'a>(p: &mut Parser<'a>) -> Result<(), Token<'a>> { /// *NamedType*: /// Name pub(crate) fn named_type(p: &mut Parser) { + // TODO(@goto-bus-stop) can we make this error instead if no name is found? if let Some(TokenKind::Name) = p.peek() { let _g = p.start_node(SyntaxKind::NAMED_TYPE); name::name(p); diff --git a/crates/apollo-parser/src/parser/grammar/union_.rs b/crates/apollo-parser/src/parser/grammar/union_.rs index 10d1855e6..6a777fdc0 100644 --- a/crates/apollo-parser/src/parser/grammar/union_.rs +++ b/crates/apollo-parser/src/parser/grammar/union_.rs @@ -74,24 +74,13 @@ pub(crate) fn union_member_types(p: &mut Parser) { let _g = p.start_node(SyntaxKind::UNION_MEMBER_TYPES); p.bump(S![=]); - if let Some(T![|]) = p.peek() { - p.bump(S![|]); - } - - if let Some(TokenKind::Name) = p.peek() { - ty::named_type(p); - } else { - p.err("expected Union Member Types"); - } - - while let Some(T![|]) = p.peek() { - p.bump(S![|]); + p.parse_separated_list(T![|], S![|], |p| { if let Some(TokenKind::Name) = p.peek() { ty::named_type(p); } else { p.err("expected Union Member Type"); } - } + }); } #[cfg(test)] diff --git a/crates/apollo-parser/src/parser/grammar/value.rs b/crates/apollo-parser/src/parser/grammar/value.rs index 6841cbe5b..012cbaecd 100644 --- a/crates/apollo-parser/src/parser/grammar/value.rs +++ b/crates/apollo-parser/src/parser/grammar/value.rs @@ -2,6 +2,7 @@ use crate::{ parser::grammar::{name, variable}, Parser, SyntaxKind, TokenKind, S, T, }; +use std::ops::ControlFlow; #[derive(Clone, Copy)] pub(crate) enum Constness { @@ -83,13 +84,17 @@ pub(crate) fn value(p: &mut Parser, constness: Constness, pop_on_error: bool) { /// Name *but not* **true** *or* **false** *or* **null** pub(crate) fn enum_value(p: &mut Parser) { let _g = p.start_node(SyntaxKind::ENUM_VALUE); - let name = p.peek_data().unwrap(); + match p.peek_token() { + Some(token) if token.kind == TokenKind::Name => { + let name = token.data; + if matches!(name, "true" | "false" | "null") { + p.err("invalid Enum Value"); + } - if matches!(name, "true" | "false" | "null") { - p.err("unexpected Enum Value"); + name::name(p); + } + _ => p.err("expected Enum Value"), } - - name::name(p); } /// See: https://spec.graphql.org/October2021/#ListValue @@ -101,21 +106,21 @@ pub(crate) fn list_value(p: &mut Parser, constness: Constness) { let _g = p.start_node(SyntaxKind::LIST_VALUE); p.bump(S!['[']); - while let Some(node) = p.peek() { + p.peek_while(|p, node| { if node == T![']'] { p.bump(S![']']); - break; + ControlFlow::Break(()) } else if node == TokenKind::Eof { - break; + ControlFlow::Break(()) + } else if p.recursion_limit.check_and_increment() { + p.limit_err("parser recursion limit reached"); + ControlFlow::Break(()) } else { - if p.recursion_limit.check_and_increment() { - p.limit_err("parser recursion limit reached"); - return; - } value(p, constness, true); - p.recursion_limit.decrement() + p.recursion_limit.decrement(); + ControlFlow::Continue(()) } - } + }); } /// See: https://spec.graphql.org/October2021/#ObjectValue @@ -127,9 +132,9 @@ pub(crate) fn object_value(p: &mut Parser, constness: Constness) { let _g = p.start_node(SyntaxKind::OBJECT_VALUE); p.bump(S!['{']); - while let Some(TokenKind::Name) = p.peek() { + p.peek_while_kind(TokenKind::Name, |p| { object_field(p, constness); - } + }); p.expect(T!['}'], S!['}']); } diff --git a/crates/apollo-parser/src/parser/grammar/variable.rs b/crates/apollo-parser/src/parser/grammar/variable.rs index 7218ed14d..4aca7b919 100644 --- a/crates/apollo-parser/src/parser/grammar/variable.rs +++ b/crates/apollo-parser/src/parser/grammar/variable.rs @@ -15,9 +15,7 @@ pub(crate) fn variable_definitions(p: &mut Parser) { } else { p.err("expected a Variable Definition") } - while let Some(T![$]) = p.peek() { - variable_definition(p); - } + p.peek_while_kind(T![$], variable_definition); p.expect(T![')'], S![')']); } diff --git a/crates/apollo-parser/src/parser/mod.rs b/crates/apollo-parser/src/parser/mod.rs index 93806391b..4a114b813 100644 --- a/crates/apollo-parser/src/parser/mod.rs +++ b/crates/apollo-parser/src/parser/mod.rs @@ -5,13 +5,14 @@ mod token_text; pub(crate) mod grammar; -use std::{cell::RefCell, rc::Rc}; - use crate::{ cst::{Document, SelectionSet, Type}, lexer::Lexer, Error, LimitTracker, Token, TokenKind, }; +use std::cell::RefCell; +use std::ops::ControlFlow; +use std::rc::Rc; pub use generated::syntax_kind::SyntaxKind; pub use language::{SyntaxElement, SyntaxNode, SyntaxNodeChildren, SyntaxNodePtr, SyntaxToken}; @@ -272,7 +273,7 @@ impl<'input> Parser<'input> { } /// Create a parser error at a given location and push it into the error vector. - pub(crate) fn err_at_token(&mut self, current: &Token, message: &str) { + pub(crate) fn err_at_token(&mut self, current: &Token<'_>, message: &str) { let err = if current.kind == TokenKind::Eof { Error::eof(message, current.index()) } else { @@ -428,6 +429,63 @@ impl<'input> Parser<'input> { self.peek_token().map(|token| token.kind()) } + /// Repeatedly peek at the next token and call the parse function. The parse function must + /// advance parsing or break out of the loop. + pub(crate) fn peek_while( + &mut self, + mut run: impl FnMut(&mut Parser, TokenKind) -> ControlFlow<()>, + ) { + while let Some(kind) = self.peek() { + let before = self.current_token.clone(); + match run(self, kind) { + ControlFlow::Break(()) => break, + ControlFlow::Continue(()) => { + debug_assert!( + before != self.current_token, + "peek_while() iteration must advance parsing" + ); + } + } + } + } + + /// Call the parse function while the next token is of the expected kind. The parse function + /// must consume the peeked token. + pub(crate) fn peek_while_kind(&mut self, expect: TokenKind, mut run: impl FnMut(&mut Parser)) { + while let Some(kind) = self.peek() { + if kind != expect { + break; + } + + let before = self.current_token.clone(); + run(self); + debug_assert!( + before != self.current_token, + "peek_while_kind() iteration must advance parsing" + ); + } + } + + /// Call the parse function, separated by a token given in `separator`. This parses at least + /// one item. The first item may optionally be prefixed by an initial separator. + pub(crate) fn parse_separated_list( + &mut self, + separator: TokenKind, + separator_syntax: SyntaxKind, + mut run: impl FnMut(&mut Parser), + ) { + if matches!(self.peek(), Some(kind) if kind == separator) { + self.bump(separator_syntax); + } + + run(self); + + self.peek_while_kind(separator, |p| { + p.bump(separator_syntax); + run(p); + }); + } + /// Peek the next Token and return it. pub(crate) fn peek_token(&mut self) -> Option<&Token<'input>> { if self.current_token.is_none() { @@ -874,4 +932,11 @@ mod tests { } }); } + + #[test] + fn no_infinite_loop() { + let source = r#"{ ..."#; + let parser = Parser::new(source).token_limit(3); + let _cst = parser.parse(); + } } diff --git a/crates/apollo-parser/test_data/parser/err/0021_union_definition_with_missing_union_members.txt b/crates/apollo-parser/test_data/parser/err/0021_union_definition_with_missing_union_members.txt index 4f95105d8..ac28635d7 100644 --- a/crates/apollo-parser/test_data/parser/err/0021_union_definition_with_missing_union_members.txt +++ b/crates/apollo-parser/test_data/parser/err/0021_union_definition_with_missing_union_members.txt @@ -5,5 +5,5 @@ - UNION_MEMBER_TYPES@6..7 - EQ@6..7 "=" - ERROR@6:7 "expected a Name" = -- ERROR@7:7 "expected Union Member Types" EOF +- ERROR@7:7 "expected Union Member Type" EOF recursion limit: 500, high: 0 \ No newline at end of file diff --git a/crates/apollo-parser/test_data/parser/err/0054_root_operation_type_with_extra_brackets.graphql b/crates/apollo-parser/test_data/parser/err/0054_root_operation_type_with_extra_brackets.graphql new file mode 100644 index 000000000..7e337fdc8 --- /dev/null +++ b/crates/apollo-parser/test_data/parser/err/0054_root_operation_type_with_extra_brackets.graphql @@ -0,0 +1,5 @@ +schema { + query: Query + { mutation: Mutation + { subscription: Subscription +} diff --git a/crates/apollo-parser/test_data/parser/err/0054_root_operation_type_with_extra_brackets.txt b/crates/apollo-parser/test_data/parser/err/0054_root_operation_type_with_extra_brackets.txt new file mode 100644 index 000000000..ce2f89653 --- /dev/null +++ b/crates/apollo-parser/test_data/parser/err/0054_root_operation_type_with_extra_brackets.txt @@ -0,0 +1,45 @@ +- DOCUMENT@0..80 + - SCHEMA_DEFINITION@0..23 + - schema_KW@0..6 "schema" + - WHITESPACE@6..7 " " + - L_CURLY@7..8 "{" + - WHITESPACE@8..11 "\n " + - ROOT_OPERATION_TYPE_DEFINITION@11..23 + - OPERATION_TYPE@11..16 + - query_KW@11..16 "query" + - COLON@16..17 ":" + - WHITESPACE@17..18 " " + - NAMED_TYPE@18..23 + - NAME@18..23 + - IDENT@18..23 "Query" + - WHITESPACE@23..26 "\n " + - OPERATION_DEFINITION@26..79 + - SELECTION_SET@26..79 + - L_CURLY@26..27 "{" + - WHITESPACE@27..28 " " + - FIELD@28..79 + - ALIAS@28..37 + - NAME@28..36 + - IDENT@28..36 "mutation" + - COLON@36..37 ":" + - WHITESPACE@37..38 " " + - NAME@38..46 + - IDENT@38..46 "Mutation" + - WHITESPACE@46..49 "\n " + - SELECTION_SET@49..79 + - L_CURLY@49..50 "{" + - WHITESPACE@50..51 " " + - FIELD@51..77 + - ALIAS@51..64 + - NAME@51..63 + - IDENT@51..63 "subscription" + - COLON@63..64 ":" + - WHITESPACE@64..65 " " + - NAME@65..77 + - IDENT@65..77 "Subscription" + - WHITESPACE@77..78 "\n" + - R_CURLY@78..79 "}" + - WHITESPACE@79..80 "\n" +- ERROR@26:27 "expected R_CURLY, got {" { +- ERROR@80:80 "expected R_CURLY, got EOF" EOF +recursion limit: 500, high: 2 \ No newline at end of file diff --git a/crates/apollo-parser/test_data/parser/ok/0025_schema_definition.txt b/crates/apollo-parser/test_data/parser/ok/0025_schema_definition.txt index 077904792..6354acbc5 100644 --- a/crates/apollo-parser/test_data/parser/ok/0025_schema_definition.txt +++ b/crates/apollo-parser/test_data/parser/ok/0025_schema_definition.txt @@ -33,4 +33,4 @@ - IDENT@82..104 "MySubscriptionRootType" - WHITESPACE@104..105 "\n" - R_CURLY@105..106 "}" -recursion limit: 500, high: 3 \ No newline at end of file +recursion limit: 500, high: 0 \ No newline at end of file diff --git a/crates/apollo-parser/test_data/parser/ok/0026_schema_extension.txt b/crates/apollo-parser/test_data/parser/ok/0026_schema_extension.txt index 32fd5b18b..1e53806a8 100644 --- a/crates/apollo-parser/test_data/parser/ok/0026_schema_extension.txt +++ b/crates/apollo-parser/test_data/parser/ok/0026_schema_extension.txt @@ -27,4 +27,4 @@ - IDENT@42..61 "MyExtendedQueryType" - WHITESPACE@61..62 "\n" - R_CURLY@62..63 "}" -recursion limit: 500, high: 1 \ No newline at end of file +recursion limit: 500, high: 0 \ No newline at end of file diff --git a/crates/apollo-parser/test_data/parser/ok/0032_supergraph.txt b/crates/apollo-parser/test_data/parser/ok/0032_supergraph.txt index 9c976f1f5..f2aa065ad 100644 --- a/crates/apollo-parser/test_data/parser/ok/0032_supergraph.txt +++ b/crates/apollo-parser/test_data/parser/ok/0032_supergraph.txt @@ -4184,4 +4184,4 @@ - IDENT@7486..7492 "String" - WHITESPACE@7492..7493 "\n" - R_CURLY@7493..7494 "}" -recursion limit: 500, high: 2 \ No newline at end of file +recursion limit: 500, high: 1 \ No newline at end of file diff --git a/fuzz/Cargo.toml b/fuzz/Cargo.toml index 35042fd63..945004450 100644 --- a/fuzz/Cargo.toml +++ b/fuzz/Cargo.toml @@ -29,6 +29,12 @@ path = "fuzz_targets/parser.rs" test = false doc = false +[[bin]] +name = "parser_limited" +path = "fuzz_targets/parser_limited.rs" +test = false +doc = false + [[bin]] name = "lexer" path = "fuzz_targets/lexer.rs" diff --git a/fuzz/fuzz_targets/parser_limited.rs b/fuzz/fuzz_targets/parser_limited.rs new file mode 100644 index 000000000..76d01f96c --- /dev/null +++ b/fuzz/fuzz_targets/parser_limited.rs @@ -0,0 +1,21 @@ +#![no_main] +use apollo_parser::Parser; +use libfuzzer_sys::fuzz_target; +use std::panic; + +// Use completely arbitrary input and a token limit to find cases where the limit +// being reached causes a loop in the parser. +fuzz_target!(|data: &str| { + let _ = env_logger::try_init(); + + let parser = panic::catch_unwind(|| Parser::new(data)); + let parser = match parser { + Err(err) => { + panic!("error {err:?}"); + } + Ok(p) => p.token_limit(500), + }; + + // We expect to have errors--we just want to make sure it does not crash. + let _tree = parser.parse(); +});