From f4dc55aaaf8c214e2f1b644d5ccaf2d8df445929 Mon Sep 17 00:00:00 2001 From: Alexander Wood Date: Fri, 18 Aug 2023 00:55:33 +0100 Subject: [PATCH] god i hate parsers --- app/Main.hs | 5 ++-- source.elr | 2 +- src/Elara/AST/Generic.hs | 12 ++++++++- src/Elara/AST/Pretty.hs | 5 ++-- src/Elara/Parse/Expression.hs | 6 ++--- src/Elara/Parse/Literal.hs | 9 ++++--- src/Elara/Parse/Pattern.hs | 41 +++++++++++++++++++--------- test/Arbitrary/Names.hs | 1 + test/Parse.hs | 51 ++++++++++++++++++++++++++++++++--- test/Parse/Common.hs | 14 +++++++++- 10 files changed, 115 insertions(+), 31 deletions(-) diff --git a/app/Main.hs b/app/Main.hs index 1651bbe9..702119e4 100644 --- a/app/Main.hs +++ b/app/Main.hs @@ -17,6 +17,7 @@ import Elara.AST.Module import Elara.AST.Name (NameLike (..)) import Elara.AST.Region (unlocated) import Elara.AST.Select +import Elara.AST.StripLocation (StripLocation (..)) import Elara.Data.Kind.Infer import Elara.Data.Pretty import Elara.Data.TopologicalGraph (TopologicalGraph, createGraph, traverseGraph, traverseGraphRevTopologically, traverseGraph_) @@ -26,7 +27,7 @@ import Elara.Emit import Elara.Error import Elara.Error.Codes qualified as Codes (fileReadError) import Elara.Lexer.Reader -import Elara.Lexer.Token (Lexeme) +import Elara.Lexer.Token (Lexeme, Token) import Elara.Lexer.Utils import Elara.Parse import Elara.Parse.Stream @@ -150,7 +151,7 @@ lexFile path = do case evalLexMonad path contents readTokens of Left err -> report err *> nothingE Right lexemes -> do - -- debugColored (stripLocation <$> lexemes) + debugColored (stripLocation <$> lexemes :: [Token]) justE (contents, lexemes) parseModule :: (Members MainMembers r) => FilePath -> (String, [Lexeme]) -> Sem r (Module 'Frontend) diff --git a/source.elr b/source.elr index 910f1782..5526ee90 100644 --- a/source.elr +++ b/source.elr @@ -1,3 +1,3 @@ import Prelude -let y = let id = \x -> x in id id \ No newline at end of file +let y (y :: x) = 1 \ No newline at end of file diff --git a/src/Elara/AST/Generic.hs b/src/Elara/AST/Generic.hs index fabbe823..b8524080 100644 --- a/src/Elara/AST/Generic.hs +++ b/src/Elara/AST/Generic.hs @@ -231,6 +231,14 @@ coerceType' (ListType a) = ListType (coerceType a) -- Pretty printing deriving newtype instance Pretty (ASTLocate ast (BinaryOperator' ast)) => Pretty (BinaryOperator ast) +instance + ( Pretty (CleanupLocated (ASTLocate' ast (Select "SymOp" ast))) + , (Pretty (CleanupLocated (ASTLocate' ast (Select "Infixed" ast)))) + ) => + Pretty (BinaryOperator' ast) + where + pretty (SymOp op) = pretty op + pretty (Infixed op) = "`" <> pretty op <> "`" deriving newtype instance Pretty (ASTLocate ast (Type' ast)) => Pretty (Type ast) instance @@ -345,6 +353,7 @@ instance , Pretty a2 , a2 ~ UnwrapMaybe (Select "LetPattern" ast) , (ToMaybe (Select "LetPattern" ast) (Maybe a2)) + , (Pretty (CleanupLocated (ASTLocate' ast (BinaryOperator' ast)))) ) => Pretty (Expr' ast) where @@ -360,11 +369,12 @@ instance pretty (If e1 e2 e3) = prettyIfExpr e1 e2 e3 pretty (List l) = prettyListExpr l pretty (Match e m) = prettyMatchExpr e (prettyMatchBranch <$> m) - pretty (LetIn v p e1 e2) = prettyLetInExpr v (maybeToList $ toMaybe p :: [a2]) e1 (Just e2) + pretty (LetIn v p e1 e2) = prettyLetInExpr v (maybeToList $ toMaybe p :: [a2]) e1 (e2) pretty (Let v p e) = prettyLetExpr v (maybeToList $ toMaybe p :: [a2]) e pretty (Block b) = prettyBlockExpr b pretty (InParens e) = parens (pretty e) pretty (Tuple t) = prettyTupleExpr t + pretty (BinaryOperator op e1 e2) = prettyBinaryOperatorExpr e1 op e2 instance ( Pretty a1 diff --git a/src/Elara/AST/Pretty.hs b/src/Elara/AST/Pretty.hs index fdfcca1a..83044900 100644 --- a/src/Elara/AST/Pretty.hs +++ b/src/Elara/AST/Pretty.hs @@ -58,9 +58,8 @@ prettyMatchExpr e m = parens ("match" <+> pretty e <+> "with" <+> prettyBlockExp prettyMatchBranch :: (Pretty a1, Pretty a2) => (a1, a2) -> Doc AnsiStyle prettyMatchBranch (p, e) = pretty p <+> "->" <+> pretty e -prettyLetInExpr :: (Pretty a1, Pretty a2, Pretty a3, Pretty a4) => a1 -> [a2] -> a3 -> Maybe a4 -> Doc AnsiStyle -prettyLetInExpr v ps e1 Nothing = prettyLetExpr v ps e1 -prettyLetInExpr v ps e1 (Just e2) = parens ("let" <+> pretty v <+> hsep (pretty <$> ps) <+> "=" <+> pretty e1 <+> "in" <+> pretty e2) +prettyLetInExpr :: (Pretty a1, Pretty a2, Pretty a3, Pretty a4) => a1 -> [a2] -> a3 -> a4 -> Doc AnsiStyle +prettyLetInExpr v ps e1 e2 = parens ("let" <+> pretty v <+> hsep (pretty <$> ps) <+> "=" <+> pretty e1 <+> "in" <+> pretty e2) prettyLetExpr :: (Pretty a1, Pretty a2, Pretty a3) => a1 -> [a2] -> a3 -> Doc AnsiStyle prettyLetExpr v ps e = "let" <+> pretty v <+> hsep (pretty <$> ps) <+> "=" <+> pretty e diff --git a/src/Elara/Parse/Expression.hs b/src/Elara/Parse/Expression.hs index 3ae1f402..a15ebf84 100644 --- a/src/Elara/Parse/Expression.hs +++ b/src/Elara/Parse/Expression.hs @@ -11,7 +11,7 @@ import Elara.Lexer.Token (Token (..)) import Elara.Parse.Combinators (liftedBinary, sepEndBy1') import Elara.Parse.Error import Elara.Parse.Indents -import Elara.Parse.Literal (charLiteral, floatLiteral, integerLiteral, stringLiteral) +import Elara.Parse.Literal (charLiteral, floatLiteral, integerLiteral, stringLiteral, unitLiteral) import Elara.Parse.Names (maybeQualified, opName, typeName, unqualifiedVarName, varName) import Elara.Parse.Pattern import Elara.Parse.Primitives (HParser, IsParser (fromParsec), inParens, located, token_, withPredicate, ()) @@ -48,7 +48,7 @@ statement :: HParser FrontendExpr statement = letStatement "statement" unannotatedExpr :: Iso' FrontendExpr (Located FrontendExpr') -unannotatedExpr = iso (\(Expr (e, _)) -> e) (\x -> Expr (x, _)) +unannotatedExpr = iso (\(Expr (e, _)) -> e) (\x -> Expr (x, Nothing)) binOp, functionCall :: HParser (FrontendExpr -> FrontendExpr -> FrontendExpr) binOp = liftedBinary operator BinaryOperator unannotatedExpr @@ -119,7 +119,7 @@ constructor = locatedExpr $ do pure res unit :: HParser FrontendExpr -unit = locatedExpr (Unit <$ token_ TokenLeftParen <* token_ TokenRightParen) "unit" +unit = locatedExpr (Unit <$ unitLiteral) "unit" int :: HParser FrontendExpr int = locatedExpr (Int <$> integerLiteral) "int" diff --git a/src/Elara/Parse/Literal.hs b/src/Elara/Parse/Literal.hs index 80883868..4da6d21f 100644 --- a/src/Elara/Parse/Literal.hs +++ b/src/Elara/Parse/Literal.hs @@ -1,7 +1,7 @@ -module Elara.Parse.Literal (charLiteral, stringLiteral, integerLiteral, floatLiteral) where +module Elara.Parse.Literal (charLiteral, stringLiteral, integerLiteral, floatLiteral, unitLiteral) where -import Elara.Lexer.Token (Token (TokenChar, TokenFloat, TokenInt, TokenString)) -import Elara.Parse.Primitives (HParser, satisfyMap) +import Elara.Lexer.Token (Token (..)) +import Elara.Parse.Primitives (HParser, satisfyMap, token_) charLiteral :: HParser Char charLiteral = satisfyMap $ \case @@ -22,3 +22,6 @@ floatLiteral :: HParser Double floatLiteral = satisfyMap $ \case TokenFloat i -> Just i _ -> Nothing + +unitLiteral :: HParser () +unitLiteral = (token_ TokenLeftParen <* token_ TokenRightParen) diff --git a/src/Elara/Parse/Pattern.hs b/src/Elara/Parse/Pattern.hs index 8f176899..66472e6f 100644 --- a/src/Elara/Parse/Pattern.hs +++ b/src/Elara/Parse/Pattern.hs @@ -5,21 +5,28 @@ import Elara.AST.Generic (Pattern (..), Pattern' (..)) import Elara.Lexer.Token (Token (..)) import Elara.Parse.Literal import Elara.Parse.Names (typeName, unqualifiedNormalVarName) -import Elara.Parse.Primitives (HParser, inParens, inParens', located, token_) +import Elara.Parse.Primitives (HParser, inParens, located, token_) import HeadedMegaparsec (endHead) -import HeadedMegaparsec qualified as H (parse, toParsec) import Text.Megaparsec (choice, sepEndBy) pattern' :: HParser FrontendPattern pattern' = - choice @[] - [ varPattern + choice + [ consPattern + , constructorPattern' + ] + +constructorPattern' :: HParser FrontendPattern +constructorPattern' = choice [zeroArgConstructorPattern, constructorPattern, terminalPattern] + +terminalPattern :: HParser FrontendPattern +terminalPattern = + choice + [ literalPattern , wildcardPattern + , varPattern , listPattern - , consPattern - , constructorPattern - , inParens' pattern' - , literalPattern + , inParens pattern' ] locatedPattern :: HParser FrontendPattern' -> HParser FrontendPattern @@ -42,26 +49,34 @@ listPattern = locatedPattern $ do consPattern :: HParser FrontendPattern consPattern = locatedPattern $ do (head', tail') <- inParens $ do - head' <- pattern' + head' <- constructorPattern token_ TokenDoubleColon endHead - tail' <- pattern' + tail' <- constructorPattern pure (head', tail') pure $ ConsPattern head' tail' +-- To prevent ambiguity between space-separated patterns and constructor patterns +zeroArgConstructorPattern :: HParser FrontendPattern +zeroArgConstructorPattern = locatedPattern $ do + con <- located typeName + pure $ ConstructorPattern con [] + constructorPattern :: HParser FrontendPattern -constructorPattern = locatedPattern $ do +constructorPattern = locatedPattern $ inParens $ do con <- located typeName - args <- many pattern' + endHead + args <- many terminalPattern pure $ ConstructorPattern con args literalPattern :: HParser FrontendPattern literalPattern = locatedPattern $ - choice @[] + choice [ IntegerPattern <$> integerLiteral , FloatPattern <$> floatLiteral , StringPattern <$> stringLiteral , CharPattern <$> charLiteral + , UnitPattern <$ unitLiteral ] diff --git a/test/Arbitrary/Names.hs b/test/Arbitrary/Names.hs index f8e20cec..af02cf5e 100644 --- a/test/Arbitrary/Names.hs +++ b/test/Arbitrary/Names.hs @@ -31,6 +31,7 @@ instance Arbitrary AlphaUpperText where c <- arbitraryUpper cs <- listOf (oneof [arbitraryLower, arbitraryLower]) pure (c : cs) + arbitraryLower :: Gen Char arbitraryLower = elements ['a' .. 'z'] diff --git a/test/Parse.hs b/test/Parse.hs index 2c42795f..2a6ae4a4 100644 --- a/test/Parse.hs +++ b/test/Parse.hs @@ -3,13 +3,17 @@ module Parse where import Arbitrary.AST () +import Arbitrary.Names (AlphaText (getAlphaText)) import Elara.AST.Generic +import Elara.AST.Name (LowerAlphaName (LowerAlphaName), Name (NVarName), VarName (NormalVarName)) import Elara.AST.Select import Elara.Data.Pretty import Elara.Parse.Expression (exprParser) import Elara.Parse.Stream import Lex.Common import Parse.Common +import Polysemy (run) +import Polysemy.Error (runError) import Print (showPretty, showPrettyUnannotated) import Test.Hspec import Test.Hspec.QuickCheck @@ -17,12 +21,52 @@ import Test.QuickCheck spec :: Spec spec = do - quickCheckSpec + patterns + -- quickCheckSpec pass quickCheckSpec :: Spec quickCheckSpec = modifyMaxSize (const 5) $ prop "Arbitrary expressions parse prettyPrinted" ppEq +patterns :: Spec +patterns = describe "Parses patterns correctly" $ do + describe "Parses terminal patterns correctly" $ do + prop + "Parses arbitrary var patterns correctly" + ( let prop_ArbVarPatParses str = str `shouldParsePattern` Pattern (VarPattern (NormalVarName $ LowerAlphaName str), Nothing) in prop_ArbVarPatParses . getAlphaText + ) + + it "Parses wildcard pattern correctly" $ do + "_" `shouldParsePattern` Pattern (WildcardPattern, Nothing) + + it "Parses unit pattern correctly" $ do + "()" `shouldParsePattern` Pattern (UnitPattern, Nothing) + + prop + "Parses arbitrary int literal patterns correctly" + (\i -> show i `shouldParsePattern` Pattern (IntegerPattern i, Nothing)) + + prop + "Parses arbitrary float literal patterns correctly" + (\i -> show i `shouldParsePattern` Pattern (FloatPattern i, Nothing)) + + prop + "Parses arbitrary char literal patterns correctly" + (\i -> show i `shouldParsePattern` Pattern (CharPattern i, Nothing)) + + prop + "Parses arbitrary string literal patterns correctly" + (\i -> show i `shouldParsePattern` Pattern (StringPattern i, Nothing)) + + it "Parses cons patterns correctly" $ do + "(x :: xs)" + `shouldParsePattern` Pattern + ( ConsPattern + (Pattern (VarPattern "x", Nothing)) + (Pattern (VarPattern "xs", Nothing)) + , Nothing + ) + removeInParens :: Expr 'UnlocatedFrontend -> Expr 'UnlocatedFrontend removeInParens (Expr (Lambda p e, t)) = Expr (Lambda p (removeInParens e), t) removeInParens (Expr (FunctionCall e1 e2, t)) = Expr (FunctionCall (removeInParens e1) (removeInParens e2), t) @@ -38,7 +82,6 @@ removeInParens e = e ppEq :: Expr 'UnlocatedFrontend -> Property ppEq (removeInParens -> expr) = let source = showPrettyUnannotated $ pretty expr - lexed = lex' source - parsed = parse exprParser (TokenStream (toString source) lexed 0) + parsed = run $ runError $ lexAndParse exprParser source cleaned = removeInParens . stripExprLocation <$> parsed - in counterexample (toString $ showPretty $ pretty source) (cleaned `shouldParseProp` expr) + in counterexample ("pretty source: " <> toString (showPretty $ pretty source)) (cleaned `shouldParseProp` expr) diff --git a/test/Parse/Common.hs b/test/Parse/Common.hs index 7155a5b0..1d83b1b2 100644 --- a/test/Parse/Common.hs +++ b/test/Parse/Common.hs @@ -1,11 +1,16 @@ module Parse.Common where +import Elara.AST.Generic (Pattern, stripPatternLocation) +import Elara.AST.Select (UnlocatedAST (..)) +import Elara.AST.Unlocated.Frontend () import Elara.Parse.Error +import Elara.Parse.Pattern (pattern') import Elara.Parse.Primitives import Elara.Parse.Stream import Lex.Common (lex') import Polysemy -import Polysemy.Error (Error, fromEither) +import Polysemy.Error (Error, fromEither, runError) +import Test.Hspec (Expectation, expectationFailure, shouldBe) import Test.QuickCheck import Text.Megaparsec (ShowErrorComponent, TraversableStream, VisualStream, eof, errorBundlePretty, runParser) @@ -15,6 +20,13 @@ lexAndParse p t = fromEither (Parse.Common.parse p (TokenStream (toString t) (le parse :: HParser a -> TokenStream -> Either (WParseErrorBundle TokenStream ElaraParseError) a parse p = first WParseErrorBundle . runParser (toParsec p <* eof) "" +shouldParsePattern :: Text -> Pattern 'UnlocatedFrontend -> Expectation +shouldParsePattern source expected = do + let parsed = run $ runError $ lexAndParse pattern' source + case parsed of + Left err -> expectationFailure (errorBundlePretty (unWParseErrorBundle err)) + Right ast -> stripPatternLocation ast `shouldBe` expected + shouldParseProp :: (VisualStream s, TraversableStream s, ShowErrorComponent e, Eq a, Show a) => Either (WParseErrorBundle s e) a -> a -> Property result `shouldParseProp` a = ioProperty $ case result of