From 74f7b0516a5b7afc416f9014213f0ba52783c3c2 Mon Sep 17 00:00:00 2001 From: iphydf Date: Thu, 18 Jan 2024 16:19:53 +0000 Subject: [PATCH] fix: Correct pretty-printing of code in comments. --- BUILD.bazel | 2 +- cimple.cabal | 2 +- src/Language/Cimple/Ast.hs | 1 + src/Language/Cimple/CommentParser.y | 2 +- src/Language/Cimple/Flatten.hs | 1 + src/Language/Cimple/Lexer.x | 14 ++++++----- src/Language/Cimple/MapAst.hs | 2 ++ src/Language/Cimple/Pretty.hs | 38 +++++++++++++++++++++++++---- src/Language/Cimple/TraverseAst.hs | 5 ++++ test/Language/Cimple/PrettySpec.hs | 8 +++--- 10 files changed, 57 insertions(+), 18 deletions(-) diff --git a/BUILD.bazel b/BUILD.bazel index 3962a9e..46087c6 100644 --- a/BUILD.bazel +++ b/BUILD.bazel @@ -172,7 +172,7 @@ haskell_library( ), src_strip_prefix = "src", tags = ["no-cross"], - version = "0.0.19", + version = "0.0.20", visibility = ["//visibility:public"], deps = [ ":ast", diff --git a/cimple.cabal b/cimple.cabal index f2f2012..2e6ba38 100644 --- a/cimple.cabal +++ b/cimple.cabal @@ -1,5 +1,5 @@ name: cimple -version: 0.0.19 +version: 0.0.20 synopsis: Simple C-like programming language homepage: https://toktok.github.io/ license: GPL-3 diff --git a/src/Language/Cimple/Ast.hs b/src/Language/Cimple/Ast.hs index f90d830..e5fb303 100644 --- a/src/Language/Cimple/Ast.hs +++ b/src/Language/Cimple/Ast.hs @@ -147,6 +147,7 @@ data CommentF lexeme a | DocParagraph [a] | DocLine [a] + | DocCode a [a] a | DocList [a] | DocULItem [a] [a] | DocOLItem lexeme [a] diff --git a/src/Language/Cimple/CommentParser.y b/src/Language/Cimple/CommentParser.y index 1206268..199214b 100644 --- a/src/Language/Cimple/CommentParser.y +++ b/src/Language/Cimple/CommentParser.y @@ -146,7 +146,7 @@ Command(x) | '@implements' CMT_WORD { Fix $ DocImplements $2 } | '@extends' CMT_WORD { Fix $ DocExtends $2 } | '@private' { Fix DocPrivate } -| '@code' Code '@endcode' { Fix $ DocLine $ Fix (DocWord $1) : (reverse $2) ++ [Fix (DocWord $3)] } +| '@code' Code '@endcode' { Fix $ DocCode (Fix (DocWord $1)) (reverse $2) (Fix (DocWord $3)) } Code :: { [NonTerm] } Code diff --git a/src/Language/Cimple/Flatten.hs b/src/Language/Cimple/Flatten.hs index 8aa484f..1301971 100644 --- a/src/Language/Cimple/Flatten.hs +++ b/src/Language/Cimple/Flatten.hs @@ -68,6 +68,7 @@ instance GenConcatsFlatten (Fix (CommentF a)) a where gconcatsFlatten (Fix (DocExtends x)) = gconcatsFlatten x gconcatsFlatten (Fix (DocImplements x)) = gconcatsFlatten x gconcatsFlatten (Fix (DocLine x)) = gconcatsFlatten x + gconcatsFlatten (Fix (DocCode b x e)) = concat [gconcatsFlatten b, gconcatsFlatten x, gconcatsFlatten e] gconcatsFlatten (Fix (DocList x)) = gconcatsFlatten x gconcatsFlatten (Fix (DocLParen x)) = gconcatsFlatten x gconcatsFlatten (Fix (DocOLItem i x)) = i : gconcatsFlatten x diff --git a/src/Language/Cimple/Lexer.x b/src/Language/Cimple/Lexer.x index 60959a4..50a8074 100644 --- a/src/Language/Cimple/Lexer.x +++ b/src/Language/Cimple/Lexer.x @@ -254,14 +254,15 @@ tokens :- "SPDX-License-Identifier:" { mkL CmtSpdxLicense } "GPL-3.0-or-later" { mkL CmtWord } "TODO("[^\)]+"):" { mkL CmtWord } - [A-Z][A-Za-z]+"::"[a-z_]+ { mkL CmtWord } - "E.g." { mkL CmtWord } - "e.g." { mkL CmtWord } + [Ee]".g." { mkL CmtWord } "etc." { mkL CmtWord } - "I.e." { mkL CmtWord } - "i.e." { mkL CmtWord } + [Ii]".e." { mkL CmtWord } [0-2][0-9](":"[0-5][0-9]){2}"."[0-9]{3} { mkL CmtWord } - "v"?[0-9]"."[0-9]"."[0-9] { mkL CmtWord } + "v"?[0-9]+("."[0-9]+)+ { mkL CmtWord } + [A-Z][A-Za-z]+"::"[a-z_]+ { mkL CmtWord } + ([a-z]+"/")+[A-Za-z]+("."[a-z_]+)+ { mkL CmtWord } + [a-z]+("."[a-z_]+)+ { mkL CmtWord } + [a-z]+("-"[a-z_]+)+ { mkL CmtWord } "@code" { mkL CmtCode `andBegin` codeSC } "" { mkL CmtCode `andBegin` codeSC } "["[^\]]+"]" { mkL CmtAttr } @@ -276,6 +277,7 @@ tokens :- "-1" { mkL LitInteger } "`"([^`]|"\`")+"`" { mkL CmtCode } "${"([^\}])+"}" { mkL CmtCode } + "-"+ { mkL CmtWord } "–" { mkL CmtWord } "*/" { mkL CmtEnd `andBegin` 0 } \n { mkL PpNewline `andBegin` cmtNewlineSC } diff --git a/src/Language/Cimple/MapAst.hs b/src/Language/Cimple/MapAst.hs index 3f1c563..96885d7 100644 --- a/src/Language/Cimple/MapAst.hs +++ b/src/Language/Cimple/MapAst.hs @@ -142,6 +142,8 @@ instance MapAst itext otext (Comment (Lexeme itext)) where Fix <$> (DocParagraph <$> recurse docs) DocLine docs -> Fix <$> (DocLine <$> recurse docs) + DocCode begin docs end -> + Fix <$> (DocCode <$> recurse begin <*> recurse docs <*> recurse end) DocList docs -> Fix <$> (DocList <$> recurse docs) DocOLItem docs sublist -> diff --git a/src/Language/Cimple/Pretty.hs b/src/Language/Cimple/Pretty.hs index 18f878a..c1f6c55 100644 --- a/src/Language/Cimple/Pretty.hs +++ b/src/Language/Cimple/Pretty.hs @@ -136,7 +136,7 @@ ppCommentStart = dullyellow . \case Ignore -> text "//!TOKSTYLE-" ppCommentBody :: [Lexeme Text] -> Doc -ppCommentBody body = vsep . prefixStars . map (hsep . map ppWord) . groupLines $ body +ppCommentBody body = vsep . prefixStars . map (hcat . map ppWord . spaceWords) . groupLines $ body where -- If the "*/" is on a separate line, don't add an additional "*" before -- it. If "*/" is on the same line, then do add a "*" prefix on the last line. @@ -149,6 +149,21 @@ ppCommentBody body = vsep . prefixStars . map (hsep . map ppWord) . groupLines $ L _ PpNewline _ -> True _ -> False + spaceWords = \case + (L c p s:ws) -> L c p (" "<>s):continue ws + [] -> [] + where + continue [] = [] + continue (w@(L _ CmtEnd _):ws) = w:continue ws + continue (w@(L _ PctComma _):ws) = w:continue ws + continue (w@(L _ PctPeriod _):ws) = w:continue ws + continue (w@(L _ PctEMark _):ws) = w:continue ws + continue (w@(L _ PctQMark _):ws) = w:continue ws + continue (w@(L _ PctRParen _):ws) = w:continue ws + continue [w@(L c p s), end@(L _ CmtEnd _)] | lexemeLine w == lexemeLine end = [L c p (" "<>s<>" "), end] + continue (L c PctLParen s:w:ws) = (L c PctLParen (" "<>s)):w:continue ws + continue (L c p s:ws) = (L c p (" "<>s)):continue ws + ppWord (L _ CmtIndent _) = empty ppWord (L _ CmtCommand t) = dullcyan $ ppText t ppWord (L _ _ t) = dullyellow $ ppText t @@ -157,7 +172,7 @@ ppComment :: CommentStyle -> [Lexeme Text] -> Lexeme Text -> Doc ppComment Ignore cs _ = ppCommentStart Ignore <> hcat (map ppWord cs) <> dullyellow (text "//!TOKSTYLE+" <> line) ppComment style cs (L l c _) = - nest 1 $ ppCommentStart style <+> ppCommentBody (cs ++ [L l c "*/"]) + nest 1 $ ppCommentStart style <> ppCommentBody (cs ++ [L l c "*/"]) ppInitialiserList :: [Doc] -> Doc ppInitialiserList l = lbrace <+> commaSep l <+> rbrace @@ -274,10 +289,22 @@ ppVerbatimComment = . renderS . plain +ppCodeBody :: [Doc] -> Doc +ppCodeBody = + vcat + . zipWith (<>) (empty : commentStart " *" ) + . map text + . List.splitOn "\n" + . renderS + . plain + . hcat + +commentStart :: String -> [Doc] +commentStart = repeat . dullyellow . text + ppCommentInfo :: Comment (Lexeme Text) -> Doc ppCommentInfo = foldFix go where - commentStart t = repeat (dullyellow (text t)) ppBody = vcat . zipWith (<>) ( commentStart " * " ) ppIndented = vcat . zipWith (<>) (empty : commentStart " * ") ppRef = underline . cyan . ppLexeme @@ -310,6 +337,7 @@ ppCommentInfo = foldFix go DocParagraph docs -> ppIndented docs DocLine docs -> fillSep docs + DocCode begin code end -> begin <> ppCodeBody code <> end DocList l -> ppVerbatimComment $ vcat l DocOLItem num docs -> ppLexeme num <> char '.' <+> nest 3 (fillSep docs) DocULItem docs sublist -> char '-' <+> nest 2 (vsep $ fillSep docs : sublist) @@ -332,10 +360,10 @@ ppNode = foldFix go LicenseDecl l cs -> ppLicenseDecl l cs CopyrightDecl from (Just to) owner -> - text " * Copyright © " <> ppLexeme from <> char '-' <> ppLexeme to <+> + text " * Copyright © " <> ppLexeme from <> char '-' <> ppLexeme to <> ppCommentBody owner CopyrightDecl from Nothing owner -> - text " * Copyright © " <> ppLexeme from <+> + text " * Copyright © " <> ppLexeme from <> ppCommentBody owner Comment style _ cs end -> diff --git a/src/Language/Cimple/TraverseAst.hs b/src/Language/Cimple/TraverseAst.hs index de07ad9..79dd74a 100644 --- a/src/Language/Cimple/TraverseAst.hs +++ b/src/Language/Cimple/TraverseAst.hs @@ -134,6 +134,11 @@ instance TraverseAst text (Comment (Lexeme text)) where recurse docs DocLine docs -> recurse docs + DocCode begin docs end -> do + _ <- recurse begin + _ <- recurse docs + _ <- recurse end + pure () DocList docs -> recurse docs DocOLItem docs sublist -> do diff --git a/test/Language/Cimple/PrettySpec.hs b/test/Language/Cimple/PrettySpec.hs index 417a6a2..e8541d6 100644 --- a/test/Language/Cimple/PrettySpec.hs +++ b/test/Language/Cimple/PrettySpec.hs @@ -69,7 +69,7 @@ spec = do it "respects newlines at end of comments" $ do compact "/* foo bar */" `shouldBe` "/* foo bar */\n" - compact "/* foo bar\n */" `shouldBe` "/* foo bar\n*/\n" + compact "/* foo bar\n */" `shouldBe` "/* foo bar\n */\n" it "respects comment styles" $ do compact "/* foo bar */" `shouldBe` "/* foo bar */\n" @@ -79,11 +79,11 @@ spec = do it "supports punctuation in comments" $ do compact "/* foo.bar,baz-blep */" - `shouldBe` "/* foo . bar , baz - blep */\n" - compact "/* foo? */" `shouldBe` "/* foo ? */\n" + `shouldBe` "/* foo.bar, baz-blep */\n" + compact "/* foo? */" `shouldBe` "/* foo?*/\n" compact "/* 123 - 456 */" `shouldBe` "/* 123 - 456 */\n" compact "/* - 3 */" `shouldBe` "/* - 3 */\n" - compact "/* a-b */" `shouldBe` "/* a - b */\n" + compact "/* a-b */" `shouldBe` "/* a-b*/\n" it "formats pointer types with east-const" $ do compact "void foo(const int *a);"