From b422a7dbedadc5dae9ab3438d50874daf4454109 Mon Sep 17 00:00:00 2001 From: Greg Pfeil Date: Tue, 10 Sep 2024 14:29:37 -0600 Subject: [PATCH 1/5] Improve syntax-tests matching MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously it would trim the first and last `Lexeme` from the actual result, to avoid having to include the extra “file” `Open`/`Close` in the expected value. However, when lexing failed, you’d just get a mismatch against an empty list of tokens. This now adds `Open`/`Close` to expected before comparing, and reports lexing failures differently. --- unison-syntax/package.yaml | 1 - unison-syntax/test/Main.hs | 32 ++++++++++++++----------------- unison-syntax/unison-syntax.cabal | 1 - 3 files changed, 14 insertions(+), 20 deletions(-) diff --git a/unison-syntax/package.yaml b/unison-syntax/package.yaml index e376d72db6..2c49dc4402 100644 --- a/unison-syntax/package.yaml +++ b/unison-syntax/package.yaml @@ -40,7 +40,6 @@ tests: - code-page - easytest - unison-syntax - - unison-core - unison-prelude - text main: Main.hs diff --git a/unison-syntax/test/Main.hs b/unison-syntax/test/Main.hs index 4914c38775..5e2751e288 100644 --- a/unison-syntax/test/Main.hs +++ b/unison-syntax/test/Main.hs @@ -1,14 +1,9 @@ -{-# OPTIONS_GHC -Wno-orphans #-} - module Main (main) where -import Data.Maybe (fromJust) import Data.Text qualified as Text import EasyTest import System.IO.CodePage (withCP65001) import Unison.Prelude -import Unison.ShortHash (ShortHash) -import Unison.ShortHash qualified as ShortHash import Unison.Syntax.HashQualifiedPrime qualified as HQ' (unsafeParseText) import Unison.Syntax.Lexer.Unison @@ -221,16 +216,20 @@ test = ] t :: String -> [Lexeme] -> Test () -t s expected = - let actual0 = payload <$> preParse (lexer "ignored filename" s) - actual = take (length actual0 - 2) . drop 1 $ toList actual0 - in scope s $ - if actual == expected - then ok - else do - note $ "expected: " ++ show expected - note $ "actual : " ++ show actual - crash "actual != expected" +t s expected = case toList . preParse $ lexer filename s of + [token@(Token (Err _) _ _)] -> crash $ show token + tokened -> + let actual = payload <$> tokened + expected' = Open filename : expected <> pure Close + in scope s $ + if actual == expected' + then ok + else do + note $ "expected: " ++ show expected' + note $ "actual : " ++ show actual + crash "actual != expected" + where + filename = "test case" simpleSymbolyId :: Text -> Lexeme simpleSymbolyId = @@ -239,6 +238,3 @@ simpleSymbolyId = simpleWordyId :: Text -> Lexeme simpleWordyId = WordyId . HQ'.unsafeParseText - -instance IsString ShortHash where - fromString = fromJust . ShortHash.fromText . Text.pack diff --git a/unison-syntax/unison-syntax.cabal b/unison-syntax/unison-syntax.cabal index fa00fe8efd..e42ee6e3dc 100644 --- a/unison-syntax/unison-syntax.cabal +++ b/unison-syntax/unison-syntax.cabal @@ -128,7 +128,6 @@ test-suite syntax-tests , code-page , easytest , text - , unison-core , unison-prelude , unison-syntax default-language: Haskell2010 From 1dcc332a0dbfb20702df326acce3472b276fa7c1 Mon Sep 17 00:00:00 2001 From: Greg Pfeil Date: Thu, 26 Sep 2024 12:58:20 -0600 Subject: [PATCH 2/5] Move recursion schemes to separate package --- codebase2/codebase/U/Codebase/Decl.hs | 7 ++- codebase2/codebase/U/Codebase/Term.hs | 7 ++- codebase2/codebase/package.yaml | 1 + codebase2/codebase/unison-codebase.cabal | 1 + codebase2/core/U/Core/ABT.hs | 26 +++------ codebase2/core/package.yaml | 1 + codebase2/core/unison-core.cabal | 1 + contrib/cabal.project | 3 +- lib/unison-util-recursion/package.yaml | 46 +++++++++++++++ .../src/Unison/Util/Recursion.hs | 57 +++++++++++++++++++ .../unison-util-recursion.cabal | 57 +++++++++++++++++++ parser-typechecker/package.yaml | 1 + .../src/Unison/KindInference/Generate.hs | 7 ++- .../Unison/PatternMatchCoverage/Desugar.hs | 27 +++++---- .../src/Unison/PatternMatchCoverage/Fix.hs | 20 ------- .../Unison/PatternMatchCoverage/GrdTree.hs | 2 +- .../src/Unison/PatternMatchCoverage/Solve.hs | 2 +- .../unison-parser-typechecker.cabal | 2 +- stack.yaml | 1 + unison-cli/package.yaml | 2 + .../Unison/Codebase/Editor/HandleInput/Run.hs | 5 +- .../Unison/LSP/FileAnalysis/UnusedBindings.hs | 12 ++-- unison-cli/tests/Unison/Test/LSP.hs | 7 ++- unison-cli/unison-cli.cabal | 2 + unison-core/src/Unison/ABT.hs | 8 +-- 25 files changed, 223 insertions(+), 82 deletions(-) create mode 100644 lib/unison-util-recursion/package.yaml create mode 100644 lib/unison-util-recursion/src/Unison/Util/Recursion.hs create mode 100644 lib/unison-util-recursion/unison-util-recursion.cabal delete mode 100644 parser-typechecker/src/Unison/PatternMatchCoverage/Fix.hs diff --git a/codebase2/codebase/U/Codebase/Decl.hs b/codebase2/codebase/U/Codebase/Decl.hs index 7a46ea9fc0..cf6ae66902 100644 --- a/codebase2/codebase/U/Codebase/Decl.hs +++ b/codebase2/codebase/U/Codebase/Decl.hs @@ -12,6 +12,7 @@ import U.Core.ABT qualified as ABT import U.Core.ABT.Var qualified as ABT import Unison.Hash (Hash) import Unison.Prelude +import Unison.Util.Recursion type ConstructorId = Word64 @@ -107,7 +108,7 @@ unhashComponent componentHash refToVar m = { declType, modifier, bound, - constructorTypes = ABT.cata alg <$> constructorTypes + constructorTypes = cata alg <$> constructorTypes } where rewriteTypeReference :: Reference.Id' (Maybe Hash) -> Either v Reference.Reference @@ -126,8 +127,8 @@ unhashComponent componentHash refToVar m = case Map.lookup (fromMaybe componentHash <$> rid) withGeneratedVars of Nothing -> error "unhashComponent: self-reference not found in component map" Just (v, _, _) -> Left v - alg :: () -> ABT.ABT (Type.F' TypeRef) v (HashableType v) -> HashableType v - alg () = \case + alg :: ABT.Term' (Type.F' TypeRef) v () (HashableType v) -> HashableType v + alg (ABT.Term' _ () abt) = case abt of ABT.Var v -> ABT.var () v ABT.Cycle body -> ABT.cycle () body ABT.Abs v body -> ABT.abs () v body diff --git a/codebase2/codebase/U/Codebase/Term.hs b/codebase2/codebase/U/Codebase/Term.hs index 57691ba6ec..07b938ae25 100644 --- a/codebase2/codebase/U/Codebase/Term.hs +++ b/codebase2/codebase/U/Codebase/Term.hs @@ -16,6 +16,7 @@ import U.Core.ABT qualified as ABT import U.Core.ABT.Var qualified as ABT import Unison.Hash (Hash) import Unison.Prelude +import Unison.Util.Recursion type ConstructorId = Word64 @@ -281,7 +282,7 @@ unhashComponent componentHash refToVar m = assignVar :: Reference.Id -> (trm, extra) -> StateT (Set v) Identity (v, trm, extra) assignVar r (trm, extra) = (,trm,extra) <$> ABT.freshenS (refToVar r) fillSelfReferences :: Term v -> HashableTerm v - fillSelfReferences = (ABT.cata alg) + fillSelfReferences = cata alg where rewriteTermReference :: Reference.Id' (Maybe Hash) -> Either v Reference.Reference rewriteTermReference rid@(Reference.Id mayH pos) = @@ -299,8 +300,8 @@ unhashComponent componentHash refToVar m = case Map.lookup (fromMaybe componentHash <$> rid) withGeneratedVars of Nothing -> error "unhashComponent: self-reference not found in component map" Just (v, _, _) -> Left v - alg :: () -> ABT.ABT (F v) v (HashableTerm v) -> HashableTerm v - alg () = \case + alg :: ABT.Term' (F v) v () (HashableTerm v) -> HashableTerm v + alg (ABT.Term' _ () abt) = case abt of ABT.Var v -> ABT.var () v ABT.Cycle body -> ABT.cycle () body ABT.Abs v body -> ABT.abs () v body diff --git a/codebase2/codebase/package.yaml b/codebase2/codebase/package.yaml index 3d4bc0cc8d..c9a1a2ab55 100644 --- a/codebase2/codebase/package.yaml +++ b/codebase2/codebase/package.yaml @@ -11,6 +11,7 @@ dependencies: - unison-core - unison-hash - unison-prelude + - unison-util-recursion library: source-dirs: . diff --git a/codebase2/codebase/unison-codebase.cabal b/codebase2/codebase/unison-codebase.cabal index dfcaf461c4..5a7335649f 100644 --- a/codebase2/codebase/unison-codebase.cabal +++ b/codebase2/codebase/unison-codebase.cabal @@ -69,4 +69,5 @@ library , unison-core , unison-hash , unison-prelude + , unison-util-recursion default-language: GHC2021 diff --git a/codebase2/core/U/Core/ABT.hs b/codebase2/core/U/Core/ABT.hs index 690202d366..2e22791fde 100644 --- a/codebase2/core/U/Core/ABT.hs +++ b/codebase2/core/U/Core/ABT.hs @@ -10,6 +10,7 @@ import Debug.RecoverRTTI qualified as RTTI import U.Core.ABT.Var (Var (freshIn)) import Unison.Debug qualified as Debug import Unison.Prelude +import Unison.Util.Recursion import Prelude hiding (abs, cycle) data ABT f v r @@ -24,6 +25,13 @@ data ABT f v r data Term f v a = Term {freeVars :: Set v, annotation :: a, out :: ABT f v (Term f v a)} deriving (Functor, Foldable, Generic, Traversable) +data Term' f v a x = Term' {freeVars' :: Set v, annotation' :: a, out' :: ABT f v x} + deriving (Functor) + +instance (Functor f) => Recursive (Term f v a) (Term' f v a) where + embed (Term' vs a abt) = Term vs a abt + project (Term vs a abt) = Term' vs a abt + instance (Foldable f, Functor f, forall a. (Eq a) => Eq (f a), Var v) => Eq (Term f v a) where -- alpha equivalence, works by renaming any aligned Abs ctors to use a common fresh variable t1 == t2 = go (out t1) (out t2) @@ -97,24 +105,6 @@ vmapM f (Term _ a out) = case out of Cycle r -> cycle a <$> vmapM f r Abs v body -> abs a <$> f v <*> vmapM f body -cata :: - (Functor f) => - (a -> ABT f v x -> x) -> - Term f v a -> - x -cata abtAlg = - let go (Term _fvs a out) = abtAlg a (fmap go out) - in go - -para :: - (Functor f) => - (a -> ABT f v (Term f v a, x) -> x) -> - Term f v a -> - x -para abtAlg = - let go (Term _fvs a out) = abtAlg a (fmap (\x -> (x, go x)) out) - in go - transform :: (Ord v, Foldable g, Functor g) => (forall a. f a -> g a) -> diff --git a/codebase2/core/package.yaml b/codebase2/core/package.yaml index 71458bbf77..a090d9af99 100644 --- a/codebase2/core/package.yaml +++ b/codebase2/core/package.yaml @@ -17,6 +17,7 @@ dependencies: - text - unison-hash - unison-prelude + - unison-util-recursion default-extensions: - ApplicativeDo diff --git a/codebase2/core/unison-core.cabal b/codebase2/core/unison-core.cabal index 2b17e42ac5..2045517a08 100644 --- a/codebase2/core/unison-core.cabal +++ b/codebase2/core/unison-core.cabal @@ -64,4 +64,5 @@ library , text , unison-hash , unison-prelude + , unison-util-recursion default-language: Haskell2010 diff --git a/contrib/cabal.project b/contrib/cabal.project index 8f13162c7f..759ea5add2 100644 --- a/contrib/cabal.project +++ b/contrib/cabal.project @@ -21,9 +21,10 @@ packages: lib/unison-util-base32hex lib/unison-util-bytes lib/unison-util-cache + lib/unison-util-file-embed + lib/unison-util-recursion lib/unison-util-relation lib/unison-util-rope - lib/unison-util-file-embed parser-typechecker unison-core diff --git a/lib/unison-util-recursion/package.yaml b/lib/unison-util-recursion/package.yaml new file mode 100644 index 0000000000..21f83722ea --- /dev/null +++ b/lib/unison-util-recursion/package.yaml @@ -0,0 +1,46 @@ +name: unison-util-recursion +github: unisonweb/unison +copyright: Copyright (C) 2013-2022 Unison Computing, PBC and contributors + +ghc-options: -Wall + +dependencies: + - base + - free + +library: + source-dirs: src + when: + - condition: false + other-modules: Paths_unison_util_recursion + +default-extensions: + - ApplicativeDo + - BangPatterns + - BlockArguments + - DeriveAnyClass + - DeriveFoldable + - DeriveFunctor + - DeriveGeneric + - DeriveTraversable + - DerivingStrategies + - DerivingVia + - DoAndIfThenElse + - DuplicateRecordFields + - FlexibleContexts + - FlexibleInstances + - FunctionalDependencies + - GeneralizedNewtypeDeriving + - ImportQualifiedPost + - LambdaCase + - MultiParamTypeClasses + - NamedFieldPuns + - OverloadedStrings + - PatternSynonyms + - RankNTypes + - ScopedTypeVariables + - StandaloneDeriving + - TupleSections + - TypeApplications + - TypeFamilies + - ViewPatterns diff --git a/lib/unison-util-recursion/src/Unison/Util/Recursion.hs b/lib/unison-util-recursion/src/Unison/Util/Recursion.hs new file mode 100644 index 0000000000..3b0bb82dd8 --- /dev/null +++ b/lib/unison-util-recursion/src/Unison/Util/Recursion.hs @@ -0,0 +1,57 @@ +{-# LANGUAGE DefaultSignatures #-} +{-# LANGUAGE QuantifiedConstraints #-} +{-# LANGUAGE UndecidableInstances #-} + +module Unison.Util.Recursion + ( Algebra, + Recursive (..), + cataM, + para, + Fix (..), + Cofree' (..), + ) +where + +import Control.Arrow ((&&&)) +import Control.Comonad.Cofree (Cofree ((:<))) +import Control.Monad ((<=<)) + +type Algebra f a = f a -> a + +class Recursive t f | t -> f where + cata :: (Algebra f a) -> t -> a + default cata :: (Functor f) => (f a -> a) -> t -> a + cata φ = φ . fmap (cata φ) . project + project :: t -> f t + default project :: (Functor f) => t -> f t + project = cata (fmap embed) + embed :: f t -> t + {-# MINIMAL embed, (cata | project) #-} + +cataM :: (Recursive t f, Traversable f, Monad m) => (f a -> m a) -> t -> m a +cataM φ = cata $ φ <=< sequenceA + +para :: (Recursive t f, Functor f) => (f (t, a) -> a) -> t -> a +para φ = snd . cata (embed . fmap fst &&& φ) + +newtype Fix f = Fix (f (Fix f)) + +deriving instance (forall a. (Show a) => Show (f a)) => Show (Fix f) + +deriving instance (forall a. (Eq a) => Eq (f a)) => Eq (Fix f) + +deriving instance (Eq (Fix f), forall a. (Ord a) => Ord (f a)) => Ord (Fix f) + +instance (Functor f) => Recursive (Fix f) f where + embed = Fix + project (Fix f) = f + +data Cofree' f a x = a :<< f x + deriving (Foldable, Functor, Traversable) + +-- | +-- +-- __NB__: `Cofree` from “free” is lazy, so this instance is technically partial. +instance (Functor f) => Recursive (Cofree f a) (Cofree' f a) where + embed (a :<< fco) = a :< fco + project (a :< fco) = a :<< fco diff --git a/lib/unison-util-recursion/unison-util-recursion.cabal b/lib/unison-util-recursion/unison-util-recursion.cabal new file mode 100644 index 0000000000..035b9f81d4 --- /dev/null +++ b/lib/unison-util-recursion/unison-util-recursion.cabal @@ -0,0 +1,57 @@ +cabal-version: 1.12 + +-- This file has been generated from package.yaml by hpack version 0.36.0. +-- +-- see: https://github.com/sol/hpack + +name: unison-util-recursion +version: 0.0.0 +homepage: https://github.com/unisonweb/unison#readme +bug-reports: https://github.com/unisonweb/unison/issues +copyright: Copyright (C) 2013-2022 Unison Computing, PBC and contributors +build-type: Simple + +source-repository head + type: git + location: https://github.com/unisonweb/unison + +library + exposed-modules: + Unison.Util.Recursion + hs-source-dirs: + src + default-extensions: + ApplicativeDo + BangPatterns + BlockArguments + DeriveAnyClass + DeriveFoldable + DeriveFunctor + DeriveGeneric + DeriveTraversable + DerivingStrategies + DerivingVia + DoAndIfThenElse + DuplicateRecordFields + FlexibleContexts + FlexibleInstances + FunctionalDependencies + GeneralizedNewtypeDeriving + ImportQualifiedPost + LambdaCase + MultiParamTypeClasses + NamedFieldPuns + OverloadedStrings + PatternSynonyms + RankNTypes + ScopedTypeVariables + StandaloneDeriving + TupleSections + TypeApplications + TypeFamilies + ViewPatterns + ghc-options: -Wall + build-depends: + base + , free + default-language: Haskell2010 diff --git a/parser-typechecker/package.yaml b/parser-typechecker/package.yaml index 7150e81120..71a031c8b6 100644 --- a/parser-typechecker/package.yaml +++ b/parser-typechecker/package.yaml @@ -70,6 +70,7 @@ library: - unison-util-base32hex - unison-util-bytes - unison-util-cache + - unison-util-recursion - unison-util-relation - unison-util-rope - unison-util-serialization diff --git a/parser-typechecker/src/Unison/KindInference/Generate.hs b/parser-typechecker/src/Unison/KindInference/Generate.hs index 3ed3361f37..0886cacc4c 100644 --- a/parser-typechecker/src/Unison/KindInference/Generate.hs +++ b/parser-typechecker/src/Unison/KindInference/Generate.hs @@ -27,6 +27,7 @@ import Unison.Prelude import Unison.Reference (Reference) import Unison.Term qualified as Term import Unison.Type qualified as Type +import Unison.Util.Recursion import Unison.Var (Type (User), Var (typed), freshIn) -------------------------------------------------------------------------------- @@ -160,7 +161,7 @@ instantiateType type0 k = -- | Process type annotations depth-first. Allows processing -- annotations with lexical scoping. dfAnns :: (loc -> Type.Type v loc -> b -> b) -> (b -> b -> b) -> b -> Term.Term v loc -> b -dfAnns annAlg cons nil = ABT.cata \ann abt0 -> case abt0 of +dfAnns annAlg cons nil = cata \(ABT.Term' _ ann abt0) -> case abt0 of ABT.Var _ -> nil ABT.Cycle x -> x ABT.Abs _ x -> x @@ -173,7 +174,7 @@ dfAnns annAlg cons nil = ABT.cata \ann abt0 -> case abt0 of -- annotations. hackyStripAnns :: (Ord v) => Term.Term v loc -> Term.Term v loc hackyStripAnns = - snd . ABT.cata \ann abt0 -> case abt0 of + snd . cata \(ABT.Term' _ ann abt0) -> case abt0 of ABT.Var v -> (False, ABT.var ann v) ABT.Cycle (_, x) -> (False, ABT.cycle ann x) ABT.Abs v (_, x) -> (False, ABT.abs ann v x) @@ -188,7 +189,7 @@ hackyStripAnns = in (isHack, Term.constructor ann cref) t -> (False, ABT.tm ann (snd <$> t)) where - stripAnns = ABT.cata \ann abt0 -> case abt0 of + stripAnns = cata \(ABT.Term' _ ann abt0) -> case abt0 of ABT.Var v -> ABT.var ann v ABT.Cycle x -> ABT.cycle ann x ABT.Abs v x -> ABT.abs ann v x diff --git a/parser-typechecker/src/Unison/PatternMatchCoverage/Desugar.hs b/parser-typechecker/src/Unison/PatternMatchCoverage/Desugar.hs index b813145986..273f1298e2 100644 --- a/parser-typechecker/src/Unison/PatternMatchCoverage/Desugar.hs +++ b/parser-typechecker/src/Unison/PatternMatchCoverage/Desugar.hs @@ -7,13 +7,13 @@ import U.Core.ABT qualified as ABT import Unison.Pattern import Unison.Pattern qualified as Pattern import Unison.PatternMatchCoverage.Class -import Unison.PatternMatchCoverage.Fix import Unison.PatternMatchCoverage.GrdTree import Unison.PatternMatchCoverage.PmGrd import Unison.PatternMatchCoverage.PmLit qualified as PmLit import Unison.Term (MatchCase (..), Term', app, var) import Unison.Type (Type) import Unison.Type qualified as Type +import Unison.Util.Recursion -- | Desugar a match into a 'GrdTree' desugarMatch :: @@ -114,32 +114,31 @@ listToGrdTree :: [v] -> m (GrdTree (PmGrd vt v loc) loc) listToGrdTree _listTyp elemTyp listVar nl0 k0 vs0 = - let (minLen, maxLen) = countMinListLen nl0 - in Grd (PmListInterval listVar minLen maxLen) <$> go 0 0 nl0 k0 vs0 + let (minLen, maxLen) = cata countMinListLen nl0 0 + in Grd (PmListInterval listVar minLen maxLen) <$> cata go nl0 0 0 k0 vs0 where - go consCount snocCount (Fix pat) k vs = case pat of + go pat consCount snocCount k vs = case pat of N'ConsF x xs -> do element <- fresh let grd = PmListHead listVar consCount element elemTyp let !consCount' = consCount + 1 - Grd grd <$> desugarPattern elemTyp element x (go consCount' snocCount xs k) vs + Grd grd <$> desugarPattern elemTyp element x (xs consCount' snocCount k) vs N'SnocF xs x -> do element <- fresh let grd = PmListTail listVar snocCount element elemTyp let !snocCount' = snocCount + 1 - Grd grd <$> go consCount snocCount' xs (desugarPattern elemTyp element x k) vs + Grd grd <$> xs consCount snocCount' (desugarPattern elemTyp element x k) vs N'NilF -> k vs N'VarF _ -> k (listVar : vs) N'UnboundF _ -> k vs - countMinListLen :: NormalizedList loc -> (Int, Int) - countMinListLen = - ($ 0) . cata \case - N'ConsF _ b -> \acc -> b $! acc + 1 - N'SnocF b _ -> \acc -> b $! acc + 1 - N'NilF -> \ !n -> (n, n) - N'VarF _ -> \ !n -> (n, maxBound) - N'UnboundF _ -> \ !n -> (n, maxBound) + countMinListLen :: Algebra (NormalizedListF loc) (Int -> (Int, Int)) + countMinListLen = \case + N'ConsF _ b -> \acc -> b $! acc + 1 + N'SnocF b _ -> \acc -> b $! acc + 1 + N'NilF -> \ !n -> (n, n) + N'VarF _ -> \ !n -> (n, maxBound) + N'UnboundF _ -> \ !n -> (n, maxBound) data NormalizedListF loc a = N'ConsF (Pattern loc) a diff --git a/parser-typechecker/src/Unison/PatternMatchCoverage/Fix.hs b/parser-typechecker/src/Unison/PatternMatchCoverage/Fix.hs deleted file mode 100644 index 9accc06fb4..0000000000 --- a/parser-typechecker/src/Unison/PatternMatchCoverage/Fix.hs +++ /dev/null @@ -1,20 +0,0 @@ -{-# LANGUAGE FlexibleInstances #-} -{-# LANGUAGE QuantifiedConstraints #-} -{-# LANGUAGE TypeSynonymInstances #-} -{-# LANGUAGE UndecidableInstances #-} - -module Unison.PatternMatchCoverage.Fix where - -newtype Fix f = Fix {unFix :: f (Fix f)} - -deriving instance (forall a. (Show a) => Show (f a)) => Show (Fix f) - -deriving instance (forall a. (Eq a) => Eq (f a)) => Eq (Fix f) - -deriving instance (Eq (Fix f), forall a. (Ord a) => Ord (f a)) => Ord (Fix f) - -cata :: (Functor f) => (f a -> a) -> Fix f -> a -cata alg = let c = alg . fmap c . unFix in c - -para :: (Functor f) => (f (Fix f, a) -> a) -> Fix f -> a -para alg = let c = alg . fmap (\x -> (x, c x)) . unFix in c diff --git a/parser-typechecker/src/Unison/PatternMatchCoverage/GrdTree.hs b/parser-typechecker/src/Unison/PatternMatchCoverage/GrdTree.hs index bf84bd71c2..3d6e142b9d 100644 --- a/parser-typechecker/src/Unison/PatternMatchCoverage/GrdTree.hs +++ b/parser-typechecker/src/Unison/PatternMatchCoverage/GrdTree.hs @@ -11,9 +11,9 @@ module Unison.PatternMatchCoverage.GrdTree where import Data.ListLike (ListLike) -import Unison.PatternMatchCoverage.Fix import Unison.Prelude import Unison.Util.Pretty +import Unison.Util.Recursion -- | A @GrdTree@ is the simple language to desugar matches into. All -- pattern matching constructs (/e.g./ structural pattern matching, diff --git a/parser-typechecker/src/Unison/PatternMatchCoverage/Solve.hs b/parser-typechecker/src/Unison/PatternMatchCoverage/Solve.hs index 29e93d187f..8986f4c409 100644 --- a/parser-typechecker/src/Unison/PatternMatchCoverage/Solve.hs +++ b/parser-typechecker/src/Unison/PatternMatchCoverage/Solve.hs @@ -28,7 +28,6 @@ import Unison.PatternMatchCoverage.Class import Unison.PatternMatchCoverage.Constraint (Constraint) import Unison.PatternMatchCoverage.Constraint qualified as C import Unison.PatternMatchCoverage.EffectHandler -import Unison.PatternMatchCoverage.Fix import Unison.PatternMatchCoverage.GrdTree import Unison.PatternMatchCoverage.IntervalSet (IntervalSet) import Unison.PatternMatchCoverage.IntervalSet qualified as IntervalSet @@ -42,6 +41,7 @@ import Unison.Prelude import Unison.Type (Type) import Unison.Type qualified as Type import Unison.Util.Pretty qualified as P +import Unison.Util.Recursion import Unison.Var (Var) -- | top-down traversal of the 'GrdTree' that produces: diff --git a/parser-typechecker/unison-parser-typechecker.cabal b/parser-typechecker/unison-parser-typechecker.cabal index af6098f702..f08a2f969e 100644 --- a/parser-typechecker/unison-parser-typechecker.cabal +++ b/parser-typechecker/unison-parser-typechecker.cabal @@ -107,7 +107,6 @@ library Unison.PatternMatchCoverage.Constraint Unison.PatternMatchCoverage.Desugar Unison.PatternMatchCoverage.EffectHandler - Unison.PatternMatchCoverage.Fix Unison.PatternMatchCoverage.GrdTree Unison.PatternMatchCoverage.IntervalSet Unison.PatternMatchCoverage.ListPat @@ -248,6 +247,7 @@ library , unison-util-base32hex , unison-util-bytes , unison-util-cache + , unison-util-recursion , unison-util-relation , unison-util-rope , unison-util-serialization diff --git a/stack.yaml b/stack.yaml index 6a31222d65..e4e4470f68 100644 --- a/stack.yaml +++ b/stack.yaml @@ -34,6 +34,7 @@ packages: - lib/unison-util-bytes - lib/unison-util-cache - lib/unison-util-file-embed + - lib/unison-util-recursion - lib/unison-util-relation - lib/unison-util-rope - parser-typechecker diff --git a/unison-cli/package.yaml b/unison-cli/package.yaml index 8a438a9093..68ecf3431a 100644 --- a/unison-cli/package.yaml +++ b/unison-cli/package.yaml @@ -97,6 +97,7 @@ library: - unison-sqlite - unison-syntax - unison-util-base32hex + - unison-util-recursion - unison-util-relation - uuid - vector @@ -127,6 +128,7 @@ tests: - unison-parser-typechecker - unison-pretty-printer - unison-syntax + - unison-util-recursion main: Main.hs source-dirs: tests diff --git a/unison-cli/src/Unison/Codebase/Editor/HandleInput/Run.hs b/unison-cli/src/Unison/Codebase/Editor/HandleInput/Run.hs index d2d9ef8af4..58c34aaadd 100644 --- a/unison-cli/src/Unison/Codebase/Editor/HandleInput/Run.hs +++ b/unison-cli/src/Unison/Codebase/Editor/HandleInput/Run.hs @@ -42,6 +42,7 @@ import Unison.UnisonFile (TypecheckedUnisonFile) import Unison.UnisonFile qualified as UF import Unison.UnisonFile.Names qualified as UF import Unison.Util.Defns (Defns (..)) +import Unison.Util.Recursion import Unison.Var qualified as Var handleRun :: Bool -> HQ.HashQualified Name -> [String] -> Cli () @@ -200,7 +201,7 @@ stripUnisonFileReferences :: TypecheckedUnisonFile Symbol a -> Term Symbol () -> stripUnisonFileReferences unisonFile term = let refMap :: Map Reference.Id Symbol refMap = Map.fromList . map (\(sym, (_, refId, _, _, _)) -> (refId, sym)) . Map.toList . UF.hashTermsId $ unisonFile - alg () = \case + alg (ABT.Term' _ () abt) = case abt of ABT.Var x -> ABT.var x ABT.Cycle x -> ABT.cycle x ABT.Abs v x -> ABT.abs v x @@ -208,7 +209,7 @@ stripUnisonFileReferences unisonFile term = Term.Ref ref | Just var <- (\k -> Map.lookup k refMap) =<< Reference.toId ref -> ABT.var var x -> ABT.tm x - in ABT.cata alg term + in cata alg term magicMainWatcherString :: String magicMainWatcherString = "main" diff --git a/unison-cli/src/Unison/LSP/FileAnalysis/UnusedBindings.hs b/unison-cli/src/Unison/LSP/FileAnalysis/UnusedBindings.hs index 85a3511cfd..5dd7c14cad 100644 --- a/unison-cli/src/Unison/LSP/FileAnalysis/UnusedBindings.hs +++ b/unison-cli/src/Unison/LSP/FileAnalysis/UnusedBindings.hs @@ -17,10 +17,10 @@ import Unison.Term (Term) import Unison.Term qualified as Term import Unison.Util.List qualified as ListUtils import Unison.Util.Range qualified as Range +import Unison.Util.Recursion import Unison.Var qualified as Var -data VarUsages - = VarUsages +data VarUsages = VarUsages { unusedVars :: Map Symbol (Set Ann), usedVars :: Set Symbol, -- This is generally a copy of usedVars, except that we _don't_ remove variables when they go out of scope. @@ -39,7 +39,7 @@ instance Monoid VarUsages where analyseTerm :: Lsp.Uri -> Term Symbol Ann -> [Diagnostic] analyseTerm fileUri tm = - let (VarUsages {unusedVars}) = ABT.cata alg tm + let (VarUsages {unusedVars}) = cata alg tm vars = Map.toList unusedVars & mapMaybe \(v, ann) -> do (,ann) <$> getRelevantVarName v @@ -63,10 +63,8 @@ analyseTerm fileUri tm = guard (not (Text.isPrefixOf "_" n)) Just n _ -> Nothing - alg :: - Ann -> - (ABT (Term.F Symbol Ann Ann) Symbol VarUsages -> VarUsages) - alg ann abt = case abt of + alg :: Algebra (ABT.Term' (Term.F Symbol Ann Ann) Symbol Ann) VarUsages + alg (ABT.Term' _ ann abt) = case abt of Var v -> VarUsages {unusedVars = mempty, usedVars = Set.singleton v, allUsedVars = Set.singleton v} Cycle x -> x Abs v (VarUsages {unusedVars, usedVars, allUsedVars}) -> diff --git a/unison-cli/tests/Unison/Test/LSP.hs b/unison-cli/tests/Unison/Test/LSP.hs index 4459d93204..02af644740 100644 --- a/unison-cli/tests/Unison/Test/LSP.hs +++ b/unison-cli/tests/Unison/Test/LSP.hs @@ -39,6 +39,7 @@ import Unison.Term qualified as Term import Unison.Type qualified as Type import Unison.UnisonFile qualified as UF import Unison.Util.Monoid (foldMapM) +import Unison.Util.Recursion test :: Test () test = do @@ -344,12 +345,12 @@ annotationNestingTest (name, src) = scope name do -- within the span of the parent node. assertAnnotationsAreNested :: forall f. (Foldable f, Functor f, Show (f (Either String Ann))) => ABT.Term f Symbol Ann -> Test () assertAnnotationsAreNested term = do - case ABT.cata alg term of + case cata alg term of Right _ -> pure () Left err -> crash err where - alg :: Ann -> ABT.ABT f Symbol (Either String Ann) -> Either String Ann - alg ann abt = do + alg :: Algebra (ABT.Term' f Symbol Ann) (Either String Ann) + alg (ABT.Term' _ ann abt) = do childSpan <- abt & foldMapM id case abt of -- Abs nodes are the only nodes whose annotations are allowed to not contain their children, diff --git a/unison-cli/unison-cli.cabal b/unison-cli/unison-cli.cabal index cdd2aea21d..d7952578d9 100644 --- a/unison-cli/unison-cli.cabal +++ b/unison-cli/unison-cli.cabal @@ -264,6 +264,7 @@ library , unison-sqlite , unison-syntax , unison-util-base32hex + , unison-util-recursion , unison-util-relation , unliftio , uuid @@ -403,6 +404,7 @@ test-suite cli-tests , unison-prelude , unison-pretty-printer , unison-syntax + , unison-util-recursion default-language: Haskell2010 if flag(optimized) ghc-options: -O2 -funbox-strict-fields diff --git a/unison-core/src/Unison/ABT.hs b/unison-core/src/Unison/ABT.hs index fe9a8f930e..d838b2a730 100644 --- a/unison-core/src/Unison/ABT.hs +++ b/unison-core/src/Unison/ABT.hs @@ -1,6 +1,5 @@ {-# LANGUAGE DataKinds #-} {-# LANGUAGE DeriveDataTypeable #-} --- Based on: http://semantic-domain.blogspot.com/2015/03/abstract-binding-trees.html {-# LANGUAGE DeriveFoldable #-} {-# LANGUAGE DeriveGeneric #-} {-# LANGUAGE DeriveTraversable #-} @@ -12,10 +11,12 @@ {-# LANGUAGE UndecidableInstances #-} {-# LANGUAGE ViewPatterns #-} +-- | Based on: http://semantic-domain.blogspot.com/2015/03/abstract-binding-trees.html module Unison.ABT ( -- * Types ABT (..), Term (..), + Term' (..), Var (..), V (..), Subst (..), @@ -41,8 +42,6 @@ module Unison.ABT rebuildUp', reannotateUp, rewriteDown, - cata, - para, transform, transformM, foreachSubterm, @@ -111,12 +110,11 @@ import Data.Set qualified as Set import U.Core.ABT ( ABT (..), Term (..), + Term' (..), allVars, - cata, foreachSubterm, freshInBoth, freshenS, - para, rename, subst', substInheritAnnotation, From 30a49f656ca435400e8eadac081b29cef38c4630 Mon Sep 17 00:00:00 2001 From: Greg Pfeil Date: Thu, 26 Sep 2024 00:51:11 -0600 Subject: [PATCH 3/5] Make annotations on Doc more consistent This does all `Doc` annotations with `Cofree`. As a consequence, some types have been broken apart and constraints that were mentioned in comments before are now encoded in the types. The only remaining direct recursion is now between `Column` and `List`. This is a precursor for unit testing Doc parsing, as this allows us to drop all annotations from the Doc structure, making it much less fragile. --- .../src/Unison/Syntax/TermParser.hs | 135 ++++---- .../src/Unison/Syntax/TermPrinter.hs | 1 + .../src/Unison/Syntax/Lexer/Unison.hs | 24 +- unison-syntax/src/Unison/Syntax/Parser.hs | 3 +- unison-syntax/src/Unison/Syntax/Parser/Doc.hs | 150 ++++----- .../src/Unison/Syntax/Parser/Doc/Data.hs | 291 ++++++++++-------- 6 files changed, 333 insertions(+), 271 deletions(-) diff --git a/parser-typechecker/src/Unison/Syntax/TermParser.hs b/parser-typechecker/src/Unison/Syntax/TermParser.hs index 26ad356868..f7667a63f6 100644 --- a/parser-typechecker/src/Unison/Syntax/TermParser.hs +++ b/parser-typechecker/src/Unison/Syntax/TermParser.hs @@ -1,3 +1,4 @@ +{-# LANGUAGE MultiWayIf #-} {-# LANGUAGE PartialTypeSignatures #-} module Unison.Syntax.TermParser @@ -12,8 +13,8 @@ module Unison.Syntax.TermParser ) where -import Control.Comonad.Cofree (Cofree ((:<))) import Control.Monad.Reader (asks, local) +import Data.Bitraversable (bitraverse) import Data.Char qualified as Char import Data.Foldable (foldrM) import Data.List qualified as List @@ -25,7 +26,6 @@ import Data.Sequence qualified as Sequence import Data.Set qualified as Set import Data.Text qualified as Text import Data.Tuple.Extra qualified as TupleE -import Data.Void (absurd, vacuous) import Text.Megaparsec qualified as P import U.Codebase.Reference (ReferenceType (..)) import U.Core.ABT qualified as ABT @@ -65,6 +65,7 @@ import Unison.Type qualified as Type import Unison.Typechecker.Components qualified as Components import Unison.Util.Bytes qualified as Bytes import Unison.Util.List (intercalateMapWith, quenchRuns) +import Unison.Util.Recursion import Unison.Var (Var) import Unison.Var qualified as Var import Prelude hiding (and, or, seq) @@ -146,10 +147,12 @@ link' :: (Monad m, Var v) => P v m (Either (L.Token TypeReference) (L.Token Refe link' = do id <- hqPrefixId ns <- asks names - case (Names.lookupHQTerm Names.IncludeSuffixes (L.payload id) ns, Names.lookupHQType Names.IncludeSuffixes (L.payload id) ns) of - (s, s2) | Set.size s == 1 && Set.null s2 -> pure . Right $ const (Set.findMin s) <$> id - (s, s2) | Set.size s2 == 1 && Set.null s -> pure . Left $ const (Set.findMin s2) <$> id - (s, s2) -> customFailure $ UnknownId id s s2 + let s = Names.lookupHQTerm Names.IncludeSuffixes (L.payload id) ns + let s2 = Names.lookupHQType Names.IncludeSuffixes (L.payload id) ns + if + | Set.size s == 1 && Set.null s2 -> pure . Right $ Set.findMin s <$ id + | Set.size s2 == 1 && Set.null s -> pure . Left $ Set.findMin s2 <$ id + | True -> customFailure $ UnknownId id s s2 link :: (Monad m, Var v) => TermP v m link = termLink <|> typeLink @@ -602,10 +605,9 @@ doc2Block :: forall m v. (Monad m, Var v) => P v m (Ann {- Annotation for the wh doc2Block = do L.Token docContents startDoc endDoc <- doc let docAnn = Ann startDoc endDoc - (docAnn,) . docUntitledSection (gann docAnn) <$> traverse (cata $ docTop <=< sequenceA) docContents + (docAnn,) . docUntitledSection (gann docAnn) <$> traverse foldTop docContents where - cata :: (Functor f) => (f a -> a) -> Cofree f x -> a - cata fn (_ :< fx) = fn $ cata fn <$> fx + foldTop = cataM \(a :<< top) -> docTop a =<< bitraverse (cataM \(a :<< leaf) -> docLeaf a leaf) pure top gann :: (Annotated a) => a -> Ann gann = Ann.GeneratedFrom . ann @@ -620,9 +622,9 @@ doc2Block = do docUntitledSection ann (Doc.UntitledSection tops) = Term.app ann (f ann "UntitledSection") $ Term.list (gann tops) tops - docTop :: Doc.Top (ReferenceType, HQ'.HashQualified Name) [L.Token L.Lexeme] (Term v Ann) -> TermP v m - docTop d = case d of - Doc.Section title body -> pure $ Term.apps' (f d "Section") [title, Term.list (gann body) body] + docTop :: Ann -> Doc.Top [L.Token L.Lexeme] (Term v Ann) (Term v Ann) -> TermP v m + docTop d = \case + Doc.Section title body -> pure $ Term.apps' (f d "Section") [docParagraph d title, Term.list (gann body) body] Doc.Eval code -> Term.app (gann d) (f d "Eval") . addDelay . snd <$> subParse (block' False False "syntax.docEval" (pure $ pure ()) $ Ann.External <$ P.eof) code @@ -633,25 +635,29 @@ doc2Block = do pure $ Term.apps' (f d "CodeBlock") - [Term.text (ann label) . Text.pack $ L.payload label, Term.text (ann body) . Text.pack $ L.payload body] + [Term.text d $ Text.pack label, Term.text d $ Text.pack body] + Doc.List' list -> pure $ docList d list + Doc.Paragraph' para -> pure $ docParagraph d para + + docParagraph d leaves = Term.app (gann d) (f d "Paragraph") . Term.list d $ toList leaves + + docList :: Ann -> Doc.List (Term v Ann) -> Term v Ann + docList d = \case Doc.BulletedList items -> - pure $ Term.app (gann d) (f d "BulletedList") . Term.list (gann items) . toList $ docColumn <$> items + Term.app (gann d) (f d "BulletedList") . Term.list (gann d) . toList $ docColumn d <$> items Doc.NumberedList items@((n, _) :| _) -> - pure $ - Term.apps' - (f d "NumberedList") - [Term.nat (ann d) $ L.payload n, Term.list (gann $ snd <$> items) . toList $ docColumn . snd <$> items] - Doc.Paragraph leaves -> - Term.app (gann d) (f d "Paragraph") . Term.list (ann leaves) . toList <$> traverse docLeaf leaves - - docColumn :: Doc.Column (Term v Ann) -> Term v Ann - docColumn d@(Doc.Column para sublist) = - Term.app (gann d) (f d "Column") . Term.list (gann d) $ para : toList sublist - - docLeaf :: Doc.Leaf (ReferenceType, HQ'.HashQualified Name) [L.Token L.Lexeme] (Term v Ann) -> TermP v m - docLeaf d = case d of - Doc.Link link -> Term.app (gann d) (f d "Link") <$> docEmbedLink link - Doc.NamedLink para target -> Term.apps' (f d "NamedLink") . (para :) . pure <$> docLeaf (vacuous target) + Term.apps' + (f d "NumberedList") + [Term.nat (ann d) $ n, Term.list (gann d) . toList $ docColumn d . snd <$> items] + + docColumn :: Ann -> Doc.Column (Term v Ann) -> Term v Ann + docColumn d (Doc.Column para sublist) = + Term.app (gann d) (f d "Column") . Term.list (gann d) $ docParagraph d para : toList (docList d <$> sublist) + + docLeaf :: Ann -> Doc.Leaf (L.Token (ReferenceType, HQ'.HashQualified Name)) [L.Token L.Lexeme] (Term v Ann) -> TermP v m + docLeaf d = \case + Doc.Link link -> Term.app (gann d) (f d "Link") <$> docEmbedLink d link + Doc.NamedLink para group -> pure $ Term.apps' (f d "NamedLink") [docParagraph d para, docGroup d group] Doc.Example code -> do trm <- subParse term code pure . Term.apps' (f d "Example") $ case trm of @@ -661,56 +667,56 @@ doc2Block = do lam = addDelay $ Term.lam' (ann tm) ((mempty,) <$> fvs) tm in [n, lam] tm -> [Term.nat (ann tm) 0, addDelay tm] - Doc.Transclude code -> Term.app (gann d) (f d "Transclude") <$> subParse term code - Doc.Bold para -> pure $ Term.app (gann d) (f d "Bold") para - Doc.Italic para -> pure $ Term.app (gann d) (f d "Italic") para - Doc.Strikethrough para -> pure $ Term.app (gann d) (f d "Strikethrough") para - Doc.Verbatim leaf -> Term.app (gann d) (f d "Verbatim") <$> docLeaf (bimap absurd absurd leaf) - Doc.Code leaf -> Term.app (gann d) (f d "Code") <$> docLeaf (bimap absurd absurd leaf) + Doc.Transclude' trans -> docTransclude d trans + Doc.Bold para -> pure . Term.app (gann d) (f d "Bold") $ docParagraph d para + Doc.Italic para -> pure . Term.app (gann d) (f d "Italic") $ docParagraph d para + Doc.Strikethrough para -> pure . Term.app (gann d) (f d "Strikethrough") $ docParagraph d para + Doc.Verbatim leaf -> pure . Term.app (gann d) (f d "Verbatim") $ docWord d leaf + Doc.Code leaf -> pure . Term.app (gann d) (f d "Code") $ docWord d leaf Doc.Source elems -> - Term.app (gann d) (f d "Source") . Term.list (ann elems) . toList <$> traverse docSourceElement elems + Term.app (gann d) (f d "Source") . Term.list d . toList <$> traverse (docSourceElement d) elems Doc.FoldedSource elems -> - Term.app (gann d) (f d "FoldedSource") . Term.list (ann elems) . toList <$> traverse docSourceElement elems + Term.app (gann d) (f d "FoldedSource") . Term.list d . toList <$> traverse (docSourceElement d) elems Doc.EvalInline code -> Term.app (gann d) (f d "EvalInline") . addDelay <$> subParse term code Doc.Signature links -> - Term.app (gann d) (f d "Signature") . Term.list (ann links) . toList <$> traverse docEmbedSignatureLink links - Doc.SignatureInline link -> Term.app (gann d) (f d "SignatureInline") <$> docEmbedSignatureLink link - Doc.Word txt -> pure . Term.app (gann d) (f d "Word") . Term.text (ann txt) . Text.pack $ L.payload txt - Doc.Group (Doc.Join leaves) -> - Term.app (gann d) (f d "Group") . Term.app (gann d) (f d "Join") . Term.list (ann leaves) . toList - <$> traverse docLeaf leaves - - docEmbedLink :: Doc.EmbedLink (ReferenceType, HQ'.HashQualified Name) -> TermP v m - docEmbedLink d@(Doc.EmbedLink (L.Token (level, ident) start end)) = case level of + Term.app (gann d) (f d "Signature") . Term.list d . toList <$> traverse (docEmbedSignatureLink d) links + Doc.SignatureInline link -> Term.app (gann d) (f d "SignatureInline") <$> docEmbedSignatureLink d link + Doc.Word' word -> pure $ docWord d word + Doc.Group' group -> pure $ docGroup d group + + docEmbedLink :: Ann -> Doc.EmbedLink (L.Token (ReferenceType, HQ'.HashQualified Name)) -> TermP v m + docEmbedLink d (Doc.EmbedLink (L.Token (level, ident) start end)) = case level of RtType -> Term.app (gann d) (f d "EmbedTypeLink") . Term.typeLink (ann d) . L.payload <$> findUniqueType (L.Token (HQ'.toHQ ident) start end) RtTerm -> Term.app (gann d) (f d "EmbedTermLink") . addDelay <$> resolveHashQualified (L.Token (HQ'.toHQ ident) start end) + docTransclude :: Ann -> Doc.Transclude [L.Token L.Lexeme] -> TermP v m + docTransclude d (Doc.Transclude code) = Term.app (gann d) (f d "Transclude") <$> subParse term code + docSourceElement :: - Doc.SourceElement - (ReferenceType, HQ'.HashQualified Name) - (Doc.Leaf (ReferenceType, HQ'.HashQualified Name) [L.Token L.Lexeme] Void) -> + Ann -> + Doc.SourceElement (L.Token (ReferenceType, HQ'.HashQualified Name)) (Doc.Transclude [L.Token L.Lexeme]) -> TermP v m - docSourceElement d@(Doc.SourceElement link anns) = do - link' <- docEmbedLink link - anns' <- traverse docEmbedAnnotation anns - pure $ Term.apps' (f d "SourceElement") [link', Term.list (ann anns) anns'] - - docEmbedSignatureLink :: Doc.EmbedSignatureLink (ReferenceType, HQ'.HashQualified Name) -> TermP v m - docEmbedSignatureLink d@(Doc.EmbedSignatureLink (L.Token (level, ident) start end)) = case level of + docSourceElement d (Doc.SourceElement link anns) = do + link' <- docEmbedLink d link + anns' <- traverse (docEmbedAnnotation d) anns + pure $ Term.apps' (f d "SourceElement") [link', Term.list d anns'] + + docEmbedSignatureLink :: + Ann -> Doc.EmbedSignatureLink (L.Token (ReferenceType, HQ'.HashQualified Name)) -> TermP v m + docEmbedSignatureLink d (Doc.EmbedSignatureLink (L.Token (level, ident) start end)) = case level of RtType -> P.customFailure . TypeNotAllowed $ L.Token (HQ'.toHQ ident) start end RtTerm -> Term.app (gann d) (f d "EmbedSignatureLink") . addDelay <$> resolveHashQualified (L.Token (HQ'.toHQ ident) start end) docEmbedAnnotation :: - Doc.EmbedAnnotation - (ReferenceType, HQ'.HashQualified Name) - (Doc.Leaf (ReferenceType, HQ'.HashQualified Name) [L.Token L.Lexeme] Void) -> + Ann -> + Doc.EmbedAnnotation (L.Token (ReferenceType, HQ'.HashQualified Name)) (Doc.Transclude [L.Token L.Lexeme]) -> TermP v m - docEmbedAnnotation d@(Doc.EmbedAnnotation a) = + docEmbedAnnotation d (Doc.EmbedAnnotation a) = -- This is the only place I’m not sure we’re doing the right thing. In the lexer, this can be an identifier or a -- DocLeaf, but here it could be either /text/ or a Doc element. And I don’t think there’s any way the lexemes -- produced for an identifier and the lexemes consumed for text line up. So, I think this is a bugfix I can’t @@ -721,9 +727,16 @@ doc2Block = do RtType -> P.customFailure . TypeNotAllowed $ L.Token (HQ'.toHQ ident) start end RtTerm -> resolveHashQualified $ L.Token (HQ'.toHQ ident) start end ) - (docLeaf . vacuous) + (docTransclude d) a + docWord :: Ann -> Doc.Word -> Term v Ann + docWord d (Doc.Word txt) = Term.app (gann d) (f d "Word") . Term.text d $ Text.pack txt + + docGroup :: Ann -> Doc.Group (Term v Ann) -> Term v Ann + docGroup d (Doc.Group (Doc.Join leaves)) = + Term.app d (f d "Group") . Term.app d (f d "Join") . Term.list (ann leaves) $ toList leaves + docBlock :: (Monad m, Var v) => TermP v m docBlock = do openTok <- openBlockWith "[:" diff --git a/parser-typechecker/src/Unison/Syntax/TermPrinter.hs b/parser-typechecker/src/Unison/Syntax/TermPrinter.hs index cddc64399a..f17129180f 100644 --- a/parser-typechecker/src/Unison/Syntax/TermPrinter.hs +++ b/parser-typechecker/src/Unison/Syntax/TermPrinter.hs @@ -7,6 +7,7 @@ module Unison.Syntax.TermPrinter prettyBinding, prettyBinding', prettyBindingWithoutTypeSignature, + prettyDoc2, pretty0, runPretty, prettyPattern, diff --git a/unison-syntax/src/Unison/Syntax/Lexer/Unison.hs b/unison-syntax/src/Unison/Syntax/Lexer/Unison.hs index 0480fb324c..6eb51da9cb 100644 --- a/unison-syntax/src/Unison/Syntax/Lexer/Unison.hs +++ b/unison-syntax/src/Unison/Syntax/Lexer/Unison.hs @@ -14,6 +14,9 @@ module Unison.Syntax.Lexer.Unison showEscapeChar, touches, + -- * Lexers + typeOrTerm, + -- * Character classifiers wordyIdChar, wordyIdStartChar, @@ -131,7 +134,7 @@ data Lexeme | -- | hash literals Hash ShortHash | Err Err - | Doc (Doc.UntitledSection (Doc.Tree (ReferenceType, HQ'.HashQualified Name) [Token Lexeme])) + | Doc (Doc.UntitledSection (Doc.Tree (Token (ReferenceType, HQ'.HashQualified Name)) [Token Lexeme])) deriving stock (Eq, Show, Ord) type IsVirtual = Bool -- is it a virtual semi or an actual semi? @@ -369,7 +372,7 @@ doc2 = do (docTok, closeTok) <- local (\env -> env {inLayout = False}) do - body <- Doc.doc typeOrTerm lexemes' . P.lookAhead $ lit "}}" + body <- Doc.doc (tokenP typeOrTerm) lexemes' . P.lookAhead $ lit "}}" closeStart <- posP lit "}}" closeEnd <- posP @@ -397,12 +400,6 @@ doc2 = do isTopLevel = length (layout env0) + maybe 0 (const 1) (opening env0) == 1 _ -> docTok : endToks where - wordyKw kw = separated wordySep (lit kw) - typeOrAbility' = typeOrAbilityAlt (wordyKw . Text.unpack) - typeOrTerm = do - mtype <- P.optional $ typeOrAbility' <* CP.space - ident <- identifierP <* CP.space - pure (maybe RtTerm (const RtType) mtype, ident) subsequentTypeName = P.lookAhead . P.optional $ do let lit' s = lit s <* sp let modifier = typeModifiersAlt (lit' . Text.unpack) @@ -423,6 +420,17 @@ doc2 = do where ok s = length [() | '\n' <- s] < 2 +typeOrTerm :: (Monad m) => P.ParsecT (Token Err) String m (ReferenceType, HQ'.HashQualified Name) +typeOrTerm = do + mtype <- P.optional $ typeOrAbility' <* CP.space + ident <- identifierP <* CP.space + pure (maybe RtTerm (const RtType) mtype, ident) + +typeOrAbility' :: (Monad m) => P.ParsecT (Token Err) String m String +typeOrAbility' = typeOrAbilityAlt (wordyKw . Text.unpack) + where + wordyKw kw = separated wordySep (lit kw) + lexemes' :: P () -> P [Token Lexeme] lexemes' eof = -- NB: `postLex` requires the token stream to start with an `Open`, otherwise it can’t create a `BlockTree`, so this diff --git a/unison-syntax/src/Unison/Syntax/Parser.hs b/unison-syntax/src/Unison/Syntax/Parser.hs index 51bdc1e367..30126c7d8b 100644 --- a/unison-syntax/src/Unison/Syntax/Parser.hs +++ b/unison-syntax/src/Unison/Syntax/Parser.hs @@ -435,7 +435,8 @@ string = queryToken getString getString _ = Nothing doc :: - (Ord v) => P v m (L.Token (Doc.UntitledSection (Doc.Tree (ReferenceType, HQ'.HashQualified Name) [L.Token L.Lexeme]))) + (Ord v) => + P v m (L.Token (Doc.UntitledSection (Doc.Tree (L.Token (ReferenceType, HQ'.HashQualified Name)) [L.Token L.Lexeme]))) doc = queryToken \case L.Doc d -> pure d _ -> Nothing diff --git a/unison-syntax/src/Unison/Syntax/Parser/Doc.hs b/unison-syntax/src/Unison/Syntax/Parser/Doc.hs index 1a03665493..d2279ba4c0 100644 --- a/unison-syntax/src/Unison/Syntax/Parser/Doc.hs +++ b/unison-syntax/src/Unison/Syntax/Parser/Doc.hs @@ -10,6 +10,7 @@ -- Each of those parsers is expected to satisfy @(`Ord` e, `P.MonadParsec` e `String` m)@. module Unison.Syntax.Parser.Doc ( Tree, + Leaves, initialEnv, doc, untitledSection, @@ -62,13 +63,16 @@ import Text.Megaparsec qualified as P import Text.Megaparsec.Char (char) import Text.Megaparsec.Char qualified as CP import Text.Megaparsec.Char.Lexer qualified as LP -import Unison.Parser.Ann (Ann, Annotated (..)) -import Unison.Prelude hiding (join) +import Unison.Parser.Ann (Ann (Ann)) +import Unison.Prelude hiding (Word, join) import Unison.Syntax.Lexer (column, line, lit, sepBy1', some', someTill', (<+>)) import Unison.Syntax.Lexer.Token (Token (Token), posP, tokenP) import Unison.Syntax.Parser.Doc.Data +import Prelude hiding (Word) -type Tree ident code = Cofree (Top ident code) Ann +type Leaves ident code = Cofree (Leaf ident code) Ann + +type Tree ident code = Cofree (Top code (Leaves ident code)) Ann data ParsingEnv = ParsingEnv { -- | Use a stack to remember the parent section and allow docSections within docSections. @@ -83,12 +87,12 @@ initialEnv :: ParsingEnv initialEnv = ParsingEnv [0] 0 doc :: - (Ord e, P.MonadParsec e String m, Annotated code) => + (Ord e, P.MonadParsec e String m) => m ident -> (m () -> m code) -> m end -> m (UntitledSection (Tree ident code)) -doc ident code = flip R.runReaderT initialEnv . untitledSection . sectionElem ident code . void +doc ident code = flip R.runReaderT initialEnv . untitledSection . wrap . sectionElem ident code . void -- | This is the actual `Doc` lexer. Unlike `doc2`, it doesn’t do any Unison-side lexing (i.e., it doesn’t know that -- Unison wraps `Doc` literals in `}}`). @@ -96,28 +100,27 @@ untitledSection :: (P.MonadParsec e String m) => m a -> m (UntitledSection a) untitledSection a = UntitledSection <$> P.many (a <* CP.space) sectionElem :: - (Ord e, P.MonadParsec e String m, Annotated code) => + (Ord e, P.MonadParsec e String m) => m ident -> (m () -> m code) -> m () -> - R.ReaderT ParsingEnv m (Tree ident code) + R.ReaderT ParsingEnv m (Top code (Leaves ident code) (Tree ident code)) sectionElem ident code docClose = - fmap wrap' $ - section ident code docClose - <|> lift (P.label "block eval (syntax: a fenced code block)" (eval code <|> exampleBlock code <|> codeBlock)) - <|> list ident code docClose - <|> lift (paragraph ident code docClose) + section ident code docClose + <|> lift (P.label "block eval (syntax: a fenced code block)" (eval code <|> exampleBlock code <|> codeBlock)) + <|> fmap List' (list ident code docClose) + <|> lift (Paragraph' <$> paragraph ident code docClose) paragraph :: - (Ord e, P.MonadParsec e String m, Annotated code) => + (Ord e, P.MonadParsec e String m) => m ident -> (m () -> m code) -> m () -> - m (Top ident code (Tree ident code)) + m (Paragraph (Leaves ident code)) paragraph ident code docClose = fmap Paragraph . spaced docClose $ leafy ident code docClose -word :: (Ord e, P.MonadParsec e String m) => m end -> m (Leaf ident code void) -word closing = fmap Word . tokenP . P.try $ do +word :: (Ord e, P.MonadParsec e String m) => m end -> m Word +word closing = fmap Word . P.try $ do let end = P.lookAhead $ void (P.satisfy isSpace) <|> void closing word <- P.manyTill (P.satisfy (\ch -> not (isSpace ch))) end guard (not $ reserved word || null word) @@ -126,16 +129,16 @@ word closing = fmap Word . tokenP . P.try $ do reserved word = List.isPrefixOf "}}" word || all (== '#') word leaf :: - (Ord e, P.MonadParsec e String m, Annotated code) => + (Ord e, P.MonadParsec e String m) => m ident -> (m () -> m code) -> m () -> - m (Leaf ident code (Tree ident code)) + m (Leaf ident code (Leaves ident code)) leaf ident code closing = link ident <|> namedLink ident code closing <|> example code - <|> transclude code + <|> (Transclude' <$> transclude code) <|> bold ident code closing <|> italic ident code closing <|> strikethrough ident code closing @@ -145,20 +148,20 @@ leaf ident code closing = <|> evalInline code <|> signatures ident <|> signatureInline ident - <|> word closing + <|> (Word' <$> word closing) leafy :: - (Ord e, P.MonadParsec e String m, Annotated code) => + (Ord e, P.MonadParsec e String m) => m ident -> (m () -> m code) -> m () -> - m (Leaf ident code (Tree ident code)) + m (Leaves ident code) leafy ident code closing = do - p <- leaf ident code closing + p <- wrap $ leaf ident code closing after <- P.optional . P.try $ leafy ident code closing case after of Nothing -> pure p - Just after -> group . pure $ p :| pure after + Just after -> wrap . fmap Group' . group . pure $ p :| pure after comma :: (P.MonadParsec e String m) => m String comma = lit "," <* CP.space @@ -173,7 +176,7 @@ sourceElements :: (Ord e, P.MonadParsec e String m) => m ident -> (m () -> m code) -> - m (NonEmpty (SourceElement ident (Leaf ident code Void))) + m (NonEmpty (SourceElement ident (Transclude code))) sourceElements ident code = do _ <- (lit " {" <|> lit "{") *> CP.space s <- sepBy1' srcElem comma @@ -187,7 +190,7 @@ sourceElements ident code = do (lit "@") *> (CP.space *> annotations) ) where - annotation = fmap Left (tokenP ident) <|> fmap Right (transclude code) <* CP.space + annotation = fmap Left ident <|> fmap Right (transclude code) <* CP.space annotations = P.some (EmbedAnnotation <$> annotation) signatures :: (Ord e, P.MonadParsec e String m) => m ident -> m (Leaf ident code a) @@ -213,10 +216,10 @@ evalInline code = fmap EvalInline $ do -- | Not an actual node, but this pattern is referenced in multiple places embedLink :: (Ord e, P.MonadParsec e s m, P.TraversableStream s) => m ident -> m (EmbedLink ident) -embedLink = fmap EmbedLink . tokenP +embedLink = fmap EmbedLink embedSignatureLink :: (Ord e, P.MonadParsec e String m) => m ident -> m (EmbedSignatureLink ident) -embedSignatureLink ident = EmbedSignatureLink <$> tokenP ident <* CP.space +embedSignatureLink ident = EmbedSignatureLink <$> ident <* CP.space verbatim :: (Ord e, P.MonadParsec e String m) => m (Leaf ident code a) verbatim = @@ -235,8 +238,8 @@ verbatim = txt = trimIndentFromVerbatimBlock (column start - 1) trimmed in -- If it's a multi-line verbatim block we trim any whitespace representing -- indentation from the pretty-printer. See 'trimIndentFromVerbatimBlock' - Verbatim . Word $ Token txt start stop - else Code . Word $ Token originalText start stop + Verbatim . Word $ txt + else Code . Word $ originalText example :: (P.MonadParsec e String m) => (m () -> m code) -> m (Leaf ident code void) example code = @@ -251,7 +254,7 @@ example code = link :: (Ord e, P.MonadParsec e String m) => m ident -> m (Leaf ident code a) link ident = P.label "link (examples: {type List}, {Nat.+})" $ Link <$> P.try (lit "{" *> embedLink ident <* lit "}") -transclude :: (P.MonadParsec e String m) => (m () -> m code) -> m (Leaf ident code a) +transclude :: (P.MonadParsec e String m) => (m () -> m code) -> m (Transclude code) transclude code = fmap Transclude . P.label "transclusion (examples: {{ doc2 }}, {{ sepBy s [doc1, doc2] }})" $ lit "{{" *> code (void $ lit "}}") @@ -261,7 +264,8 @@ nonNewlineSpaces = P.takeWhileP Nothing nonNewlineSpace where nonNewlineSpace ch = isSpace ch && ch /= '\n' && ch /= '\r' -eval :: (P.MonadParsec e String m, Annotated code) => (m () -> m code) -> m (Top ident code (Tree ident code)) +eval :: + (P.MonadParsec e String m) => (m () -> m code) -> m (Top code (Leaves ident code) (Tree ident code)) eval code = Eval <$> do -- commit after seeing that ``` is on its own line @@ -271,7 +275,7 @@ eval code = fence <$ guard b CP.space *> code (void $ lit fence) -exampleBlock :: (P.MonadParsec e String m, Annotated code) => (m () -> m code) -> m (Top ident code (Tree ident code)) +exampleBlock :: (P.MonadParsec e String m) => (m () -> m code) -> m (Top code (Leaves ident code) (Tree ident code)) exampleBlock code = ExampleBlock <$> do @@ -279,20 +283,14 @@ exampleBlock code = fence <- lit "```" <+> P.takeWhileP Nothing (== '`') code . void $ lit fence -codeBlock :: (Ord e, P.MonadParsec e String m) => m (Top ident code (Tree ident code)) +codeBlock :: (Ord e, P.MonadParsec e String m) => m (Top code (Leaves ident code) (Tree ident code)) codeBlock = do column <- (\x -> x - 1) . toInteger . P.unPos <$> LP.indentLevel let tabWidth = toInteger . P.unPos $ P.defaultTabWidth fence <- lit "```" <+> P.takeWhileP Nothing (== '`') - name <- - nonNewlineSpaces - *> tokenP (P.takeWhile1P Nothing (not . isSpace)) - <* nonNewlineSpaces + name <- nonNewlineSpaces *> P.takeWhile1P Nothing (not . isSpace) <* nonNewlineSpaces _ <- void CP.eol - verbatim <- - tokenP $ - uncolumn column tabWidth . trimAroundDelimiters - <$> P.someTill P.anySingle ([] <$ lit fence) + verbatim <- uncolumn column tabWidth . trimAroundDelimiters <$> P.someTill P.anySingle ([] <$ lit fence) pure $ CodeBlock name verbatim where uncolumn column tabWidth s = @@ -306,19 +304,19 @@ codeBlock = do in List.intercalate "\n" $ skip column <$> lines s emphasis :: - (Ord e, P.MonadParsec e String m, Annotated code) => + (Ord e, P.MonadParsec e String m) => Char -> m ident -> (m () -> m code) -> m () -> - m (Tree ident code) + m (Paragraph (Leaves ident code)) emphasis delimiter ident code closing = do let start = some (P.satisfy (== delimiter)) end <- P.try $ do end <- start P.lookAhead (P.satisfy (not . isSpace)) pure end - wrap' . Paragraph + Paragraph <$> someTill' (leafy ident code (closing <|> (void $ lit end)) <* void whitespaceWithoutParagraphBreak) (lit end) @@ -331,44 +329,44 @@ emphasis delimiter ident code closing = do Nothing -> pure () bold :: - (Ord e, P.MonadParsec e String m, Annotated code) => + (Ord e, P.MonadParsec e String m) => m ident -> (m () -> m code) -> m () -> - m (Leaf ident code (Tree ident code)) + m (Leaf ident code (Leaves ident code)) bold ident code = fmap Bold . emphasis '*' ident code italic :: - (Ord e, P.MonadParsec e String m, Annotated code) => + (Ord e, P.MonadParsec e String m) => m ident -> (m () -> m code) -> m () -> - m (Leaf ident code (Tree ident code)) + m (Leaf ident code (Leaves ident code)) italic ident code = fmap Italic . emphasis '_' ident code strikethrough :: - (Ord e, P.MonadParsec e String m, Annotated code) => + (Ord e, P.MonadParsec e String m) => m ident -> (m () -> m code) -> m () -> - m (Leaf ident code (Tree ident code)) + m (Leaf ident code (Leaves ident code)) strikethrough ident code = fmap Strikethrough . emphasis '~' ident code namedLink :: - (Ord e, P.MonadParsec e String m, Annotated code) => + (Ord e, P.MonadParsec e String m) => m ident -> (m () -> m code) -> m () -> - m (Leaf ident code (Tree ident code)) + m (Leaf ident code (Leaves ident code)) namedLink ident code docClose = P.label "hyperlink (example: [link name](https://destination.com))" do _ <- lit "[" p <- spaced docClose . leafy ident code . void $ char ']' _ <- lit "]" _ <- lit "(" - target <- group $ fmap pure (link ident) <|> some' (transclude code <|> word (docClose <|> void (char ')'))) + target <- group $ fmap pure (wrap $ link ident) <|> some' (wrap (Transclude' <$> transclude code) <|> wrap (Word' <$> word (docClose <|> void (char ')')))) _ <- lit ")" - pure $ NamedLink (wrap' $ Paragraph p) target + pure $ NamedLink (Paragraph p) target sp :: (P.MonadParsec e String m) => m () -> m String sp docClose = P.try $ do @@ -386,11 +384,11 @@ spaced docClose p = some' $ p <* P.optional (sp docClose) -- | Not an actual node, but this pattern is referenced in multiple places list :: - (Ord e, P.MonadParsec e String m, Annotated code) => + (Ord e, P.MonadParsec e String m) => m ident -> (m () -> m code) -> m () -> - R.ReaderT ParsingEnv m (Top ident code (Tree ident code)) + R.ReaderT ParsingEnv m (List (Leaves ident code)) list ident code docClose = bulletedList ident code docClose <|> numberedList ident code docClose listSep :: (Ord e, R.MonadReader ParsingEnv m, P.MonadParsec e String m) => m () @@ -412,16 +410,16 @@ listItemStart gutter = P.try do guard (col > parentCol) (col,) <$> gutter -numberedStart :: (Ord e, R.MonadReader ParsingEnv m, P.MonadParsec e String m) => m (Int, Token Word64) -numberedStart = listItemStart $ P.try (tokenP $ LP.decimal <* lit ".") +numberedStart :: (Ord e, R.MonadReader ParsingEnv m, P.MonadParsec e String m) => m (Int, Word64) +numberedStart = listItemStart . P.try $ LP.decimal <* lit "." -- | FIXME: This should take a @`P` a@ numberedList :: - (Ord e, P.MonadParsec e String m, Annotated code) => + (Ord e, P.MonadParsec e String m) => m ident -> (m () -> m code) -> m () -> - R.ReaderT ParsingEnv m (Top ident code (Tree ident code)) + R.ReaderT ParsingEnv m (List (Leaves ident code)) numberedList ident code docClose = NumberedList <$> sepBy1' numberedItem listSep where numberedItem = P.label "numbered list (examples: 1. item1, 8. start numbering at '8')" do @@ -430,11 +428,11 @@ numberedList ident code docClose = NumberedList <$> sepBy1' numberedItem listSep -- | FIXME: This should take a @`P` a@ bulletedList :: - (Ord e, P.MonadParsec e String m, Annotated code) => + (Ord e, P.MonadParsec e String m) => m ident -> (m () -> m code) -> m () -> - R.ReaderT ParsingEnv m (Top ident code (Tree ident code)) + R.ReaderT ParsingEnv m (List (Leaves ident code)) bulletedList ident code docClose = BulletedList <$> sepBy1' bullet listSep where bullet = P.label "bullet (examples: * item1, - item2)" do @@ -442,16 +440,16 @@ bulletedList ident code docClose = BulletedList <$> sepBy1' bullet listSep column' ident code docClose col column' :: - (Ord e, P.MonadParsec e String m, Annotated code) => + (Ord e, P.MonadParsec e String m) => m ident -> (m () -> m code) -> m () -> Int -> - R.ReaderT ParsingEnv m (Column (Tree ident code)) + R.ReaderT ParsingEnv m (Column (Leaves ident code)) column' ident code docClose col = - Column . wrap' + Column <$> (nonNewlineSpaces *> listItemParagraph) - <*> R.local (\e -> e {parentListColumn = col}) (P.optional $ listSep *> fmap wrap' (list ident code docClose)) + <*> R.local (\e -> e {parentListColumn = col}) (P.optional $ listSep *> list ident code docClose) where listItemParagraph = Paragraph <$> do @@ -493,11 +491,11 @@ newline = P.label "newline" $ lit "\n" <|> lit "\r\n" -- > -- > # A section title (not a subsection) section :: - (Ord e, P.MonadParsec e String m, Annotated code) => + (Ord e, P.MonadParsec e String m) => m ident -> (m () -> m code) -> m () -> - R.ReaderT ParsingEnv m (Top ident code (Tree ident code)) + R.ReaderT ParsingEnv m (Top code (Leaves ident code) (Tree ident code)) section ident code docClose = do ns <- R.asks parentSections hashes <- lift $ P.try $ lit (replicate (head ns) '#') *> P.takeWhile1P Nothing (== '#') <* sp docClose @@ -505,11 +503,11 @@ section ident code docClose = do let m = length hashes + head ns body <- R.local (\env -> env {parentSections = m : tail ns}) $ - P.many (sectionElem ident code docClose <* CP.space) - pure $ Section (wrap' title) body + P.many (wrap (sectionElem ident code docClose) <* CP.space) + pure $ Section title body -- | FIXME: This should just take a @`P` code@ and @`P` a@. -group :: (P.MonadParsec e s m) => m (NonEmpty (Leaf ident code a)) -> m (Leaf ident code a) +group :: (P.MonadParsec e s m) => m (NonEmpty (Leaves ident code)) -> m (Group (Leaves ident code)) group = fmap Group . join -- | FIXME: This should just take a @`P` a@ @@ -518,8 +516,12 @@ join = fmap Join -- * utility functions -wrap' :: (Annotated code) => Top ident code (Tree ident code) -> Tree ident code -wrap' doc = ann doc :< doc +wrap :: (Ord e, P.MonadParsec e s m, P.TraversableStream s) => m (f (Cofree f Ann)) -> m (Cofree f Ann) +wrap p = do + start <- posP + val <- p + end <- posP + pure (Ann start end :< val) -- | If it's a multi-line verbatim block we trim any whitespace representing -- indentation from the pretty-printer. diff --git a/unison-syntax/src/Unison/Syntax/Parser/Doc/Data.hs b/unison-syntax/src/Unison/Syntax/Parser/Doc/Data.hs index 75bc3a621e..fbc1e042b0 100644 --- a/unison-syntax/src/Unison/Syntax/Parser/Doc/Data.hs +++ b/unison-syntax/src/Unison/Syntax/Parser/Doc/Data.hs @@ -13,150 +13,140 @@ -- line. module Unison.Syntax.Parser.Doc.Data where +import Data.Bifoldable (Bifoldable, bifoldr) +import Data.Bitraversable (Bitraversable, bitraverse) import Data.Eq.Deriving (deriveEq1, deriveEq2) +import Data.Functor.Classes (Eq1 (..), Ord1 (..), Show1 (..)) import Data.List.NonEmpty (NonEmpty) import Data.Ord.Deriving (deriveOrd1, deriveOrd2) import Text.Show.Deriving (deriveShow1, deriveShow2) -import Unison.Parser.Ann (Annotated (..)) -import Unison.Prelude -import Unison.Syntax.Lexer.Token (Token (..)) +import Unison.Prelude hiding (Word) +import Prelude hiding (Word) newtype UntitledSection a = UntitledSection [a] deriving (Eq, Ord, Show, Foldable, Functor, Traversable) -data Top ident code a - = -- | The first argument is always a `Paragraph` - Section a [a] - | Eval code - | ExampleBlock code - | CodeBlock (Token String) (Token String) - | BulletedList (NonEmpty (Column a)) - | NumberedList (NonEmpty (Token Word64, Column a)) - | Paragraph (NonEmpty (Leaf ident code a)) +newtype Paragraph a = Paragraph (NonEmpty a) + deriving (Eq, Ord, Show, Foldable, Functor, Traversable) + +$(deriveEq1 ''Paragraph) +$(deriveOrd1 ''Paragraph) +$(deriveShow1 ''Paragraph) + +data List a + = BulletedList (NonEmpty (Column a)) + | NumberedList (NonEmpty (Word64, Column a)) deriving (Eq, Ord, Show, Foldable, Functor, Traversable) +instance Eq1 List where + liftEq eqA = curry \case + (BulletedList as, BulletedList as') -> liftEq (liftEq eqA) as as' + (NumberedList as, NumberedList as') -> liftEq (liftEq (liftEq eqA)) as as' + (_, _) -> False + +instance Ord1 List where + liftCompare compareA = curry \case + (BulletedList as, BulletedList as') -> liftCompare (liftCompare compareA) as as' + (NumberedList as, NumberedList as') -> liftCompare (liftCompare (liftCompare compareA)) as as' + (BulletedList _, NumberedList _) -> LT + (NumberedList _, BulletedList _) -> GT + +instance Show1 List where + liftShowsPrec showsPrecA showListA prec = + showParen (prec <= 11) . \case + BulletedList as -> + showString "BulletedList " + . liftShowsPrec (liftShowsPrec showsPrecA showListA) (liftShowList showsPrecA showListA) 11 as + NumberedList as -> + showString "NumberedList " + . liftShowsPrec + (liftShowsPrec (liftShowsPrec showsPrecA showListA) (liftShowList showsPrecA showListA)) + (liftShowList (liftShowsPrec showsPrecA showListA) (liftShowList showsPrecA showListA)) + 11 + as + data Column a - = -- | The first is always a `Paragraph`, and the second a `BulletedList` or `NumberedList` - Column a (Maybe a) + = Column (Paragraph a) (Maybe (List a)) deriving (Eq, Ord, Show, Foldable, Functor, Traversable) -data Leaf ident code a - = Link (EmbedLink ident) - | -- | first is a Paragraph, second is always a Group (which contains either a single Term/Type link or list of - -- `Transclude`s & `Word`s) - NamedLink a (Leaf ident code Void) - | Example code - | Transclude code - | -- | Always a Paragraph - Bold a - | -- | Always a Paragraph - Italic a - | -- | Always a Paragraph - Strikethrough a - | -- | Always a Word - Verbatim (Leaf ident Void Void) - | -- | Always a Word - Code (Leaf ident Void Void) - | -- | Always a Transclude - Source (NonEmpty (SourceElement ident (Leaf ident code Void))) - | -- | Always a Transclude - FoldedSource (NonEmpty (SourceElement ident (Leaf ident code Void))) - | EvalInline code - | Signature (NonEmpty (EmbedSignatureLink ident)) - | SignatureInline (EmbedSignatureLink ident) - | Word (Token String) - | Group (Join (Leaf ident code a)) +instance Eq1 Column where + liftEq eqA (Column para mlist) (Column para' mlist') = + liftEq eqA para para' && liftEq (liftEq eqA) mlist mlist' + +instance Ord1 Column where + liftCompare compareA (Column para mlist) (Column para' mlist') = + liftCompare compareA para para' <> liftCompare (liftCompare compareA) mlist mlist' + +instance Show1 Column where + liftShowsPrec showsPrecA showListA prec (Column para mlist) = + showParen (prec <= 11) $ + showString "Column " + . liftShowsPrec showsPrecA showListA 11 para + . liftShowsPrec (liftShowsPrec showsPrecA showListA) (liftShowList showsPrecA showListA) 11 mlist + +data Top code leaf a + = Section (Paragraph leaf) [a] + | Eval code + | ExampleBlock code + | CodeBlock String String + | List' (List leaf) + | Paragraph' (Paragraph leaf) deriving (Eq, Ord, Show, Foldable, Functor, Traversable) -instance Bifunctor (Leaf ident) where +instance Bifoldable (Top code) where + bifoldr f g z = \case + Section para as -> foldr f (foldr g z as) para + Eval _ -> z + ExampleBlock _ -> z + CodeBlock _ _ -> z + List' list -> foldr f z list + Paragraph' para -> foldr f z para + +instance Bifunctor (Top code) where bimap f g = \case - Link x -> Link x - NamedLink a leaf -> NamedLink (g a) $ first f leaf - Example code -> Example $ f code - Transclude code -> Transclude $ f code - Bold a -> Bold $ g a - Italic a -> Italic $ g a - Strikethrough a -> Strikethrough $ g a - Verbatim leaf -> Verbatim leaf - Code leaf -> Code leaf - Source elems -> Source $ fmap (first f) <$> elems - FoldedSource elems -> FoldedSource $ fmap (first f) <$> elems - EvalInline code -> EvalInline $ f code - Signature x -> Signature x - SignatureInline x -> SignatureInline x - Word x -> Word x - Group join -> Group $ bimap f g <$> join + Section para as -> Section (fmap f para) $ fmap g as + Eval code -> Eval code + ExampleBlock code -> ExampleBlock code + CodeBlock title body -> CodeBlock title body + List' list -> List' $ fmap f list + Paragraph' para -> Paragraph' $ fmap f para + +instance Bitraversable (Top code) where + bitraverse f g = \case + Section para as -> Section <$> traverse f para <*> traverse g as + Eval code -> pure $ Eval code + ExampleBlock code -> pure $ ExampleBlock code + CodeBlock title body -> pure $ CodeBlock title body + List' list -> List' <$> traverse f list + Paragraph' para -> Paragraph' <$> traverse f para + +$(deriveEq1 ''Top) +$(deriveOrd1 ''Top) +$(deriveShow1 ''Top) +$(deriveEq2 ''Top) +$(deriveOrd2 ''Top) +$(deriveShow2 ''Top) -- | This is a deviation from the Unison Doc data model – in Unison, Doc distinguishes between type and term links, but -- here Doc knows nothing about what namespaces may exist. -data EmbedLink ident = EmbedLink (Token ident) - deriving (Eq, Ord, Show) - -data SourceElement ident a = SourceElement (EmbedLink ident) [EmbedAnnotation ident a] +data EmbedLink a = EmbedLink a deriving (Eq, Ord, Show, Foldable, Functor, Traversable) -newtype EmbedSignatureLink ident = EmbedSignatureLink (Token ident) - deriving (Eq, Ord, Show) +$(deriveEq1 ''EmbedLink) +$(deriveOrd1 ''EmbedLink) +$(deriveShow1 ''EmbedLink) -newtype Join a = Join (NonEmpty a) +newtype Transclude a = Transclude a deriving (Eq, Ord, Show, Foldable, Functor, Traversable) +$(deriveEq1 ''Transclude) +$(deriveOrd1 ''Transclude) +$(deriveShow1 ''Transclude) + newtype EmbedAnnotation ident a - = EmbedAnnotation (Either (Token ident) a) + = EmbedAnnotation (Either ident a) deriving (Eq, Ord, Show, Foldable, Functor, Traversable) -instance (Annotated code, Annotated a) => Annotated (Top ident code a) where - ann = \case - Section title body -> ann title <> ann body - Eval code -> ann code - ExampleBlock code -> ann code - CodeBlock label body -> ann label <> ann body - BulletedList items -> ann items - NumberedList items -> ann $ snd <$> items - Paragraph leaves -> ann leaves - -instance (Annotated a) => Annotated (Column a) where - ann (Column para list) = ann para <> ann list - -instance (Annotated code, Annotated a) => Annotated (Leaf ident code a) where - ann = \case - Link link -> ann link - NamedLink label target -> ann label <> ann target - Example code -> ann code - Transclude code -> ann code - Bold para -> ann para - Italic para -> ann para - Strikethrough para -> ann para - Verbatim word -> ann word - Code word -> ann word - Source elems -> ann elems - FoldedSource elems -> ann elems - EvalInline code -> ann code - Signature links -> ann links - SignatureInline link -> ann link - Word text -> ann text - Group (Join leaves) -> ann leaves - -instance Annotated (EmbedLink ident) where - ann (EmbedLink name) = ann name - -instance (Annotated code) => Annotated (SourceElement ident code) where - ann (SourceElement link target) = ann link <> ann target - -instance Annotated (EmbedSignatureLink ident) where - ann (EmbedSignatureLink name) = ann name - -instance (Annotated code) => Annotated (EmbedAnnotation ident code) where - ann (EmbedAnnotation a) = either ann ann a - -$(deriveEq1 ''Column) -$(deriveOrd1 ''Column) -$(deriveShow1 ''Column) - -$(deriveEq1 ''Token) -$(deriveOrd1 ''Token) -$(deriveShow1 ''Token) - $(deriveEq1 ''EmbedAnnotation) $(deriveOrd1 ''EmbedAnnotation) $(deriveShow1 ''EmbedAnnotation) @@ -164,9 +154,8 @@ $(deriveEq2 ''EmbedAnnotation) $(deriveOrd2 ''EmbedAnnotation) $(deriveShow2 ''EmbedAnnotation) -$(deriveEq1 ''EmbedLink) -$(deriveOrd1 ''EmbedLink) -$(deriveShow1 ''EmbedLink) +data SourceElement ident a = SourceElement (EmbedLink ident) [EmbedAnnotation ident a] + deriving (Eq, Ord, Show, Foldable, Functor, Traversable) $(deriveEq1 ''SourceElement) $(deriveOrd1 ''SourceElement) @@ -175,20 +164,68 @@ $(deriveEq2 ''SourceElement) $(deriveOrd2 ''SourceElement) $(deriveShow2 ''SourceElement) +newtype EmbedSignatureLink a = EmbedSignatureLink a + deriving (Eq, Ord, Show, Foldable, Functor, Traversable) + +newtype Word = Word String + deriving (Eq, Ord, Show) + +newtype Join a = Join (NonEmpty a) + deriving (Eq, Ord, Show, Foldable, Functor, Traversable) + $(deriveEq1 ''Join) $(deriveOrd1 ''Join) $(deriveShow1 ''Join) +newtype Group a = Group (Join a) + deriving (Eq, Ord, Show, Foldable, Functor, Traversable) + +$(deriveEq1 ''Group) +$(deriveOrd1 ''Group) +$(deriveShow1 ''Group) + +data Leaf ident code a + = Link (EmbedLink ident) + | -- | the Group always contains either a single Term/Type link or list of `Transclude`s & `Word`s + NamedLink (Paragraph a) (Group a) + | Example code + | Transclude' (Transclude code) + | Bold (Paragraph a) + | Italic (Paragraph a) + | Strikethrough (Paragraph a) + | Verbatim Word + | Code Word + | Source (NonEmpty (SourceElement ident (Transclude code))) + | FoldedSource (NonEmpty (SourceElement ident (Transclude code))) + | EvalInline code + | Signature (NonEmpty (EmbedSignatureLink ident)) + | SignatureInline (EmbedSignatureLink ident) + | Word' Word + | Group' (Group a) + deriving (Eq, Ord, Show, Foldable, Functor, Traversable) + +instance Bifunctor (Leaf ident) where + bimap f g = \case + Link x -> Link x + NamedLink para group -> NamedLink (g <$> para) $ g <$> group + Example code -> Example $ f code + Transclude' trans -> Transclude' $ f <$> trans + Bold para -> Bold $ g <$> para + Italic para -> Italic $ g <$> para + Strikethrough para -> Strikethrough $ g <$> para + Verbatim word -> Verbatim word + Code word -> Code word + Source elems -> Source $ fmap (fmap f) <$> elems + FoldedSource elems -> FoldedSource $ fmap (fmap f) <$> elems + EvalInline code -> EvalInline $ f code + Signature x -> Signature x + SignatureInline x -> SignatureInline x + Word' word -> Word' word + Group' group -> Group' $ g <$> group + $(deriveEq1 ''Leaf) $(deriveOrd1 ''Leaf) $(deriveShow1 ''Leaf) $(deriveEq2 ''Leaf) $(deriveOrd2 ''Leaf) $(deriveShow2 ''Leaf) - -$(deriveEq1 ''Top) -$(deriveOrd1 ''Top) -$(deriveShow1 ''Top) -$(deriveEq2 ''Top) -$(deriveOrd2 ''Top) -$(deriveShow2 ''Top) From b57428f2a02bbfcb28ca33769160d1bbe391b790 Mon Sep 17 00:00:00 2001 From: Greg Pfeil Date: Thu, 26 Sep 2024 00:50:21 -0600 Subject: [PATCH 4/5] Add unit tests for Doc syntax --- unison-syntax/package.yaml | 5 +- unison-syntax/test/Main.hs | 4 +- unison-syntax/test/Unison/Test/Doc.hs | 168 ++++++++++++++++++++++++++ unison-syntax/unison-syntax.cabal | 5 + 4 files changed, 179 insertions(+), 3 deletions(-) create mode 100644 unison-syntax/test/Unison/Test/Doc.hs diff --git a/unison-syntax/package.yaml b/unison-syntax/package.yaml index 2c49dc4402..77a4c724b3 100644 --- a/unison-syntax/package.yaml +++ b/unison-syntax/package.yaml @@ -39,8 +39,11 @@ tests: - base - code-page - easytest - - unison-syntax + - megaparsec + - unison-core1 - unison-prelude + - unison-syntax + - unison-util-recursion - text main: Main.hs source-dirs: test diff --git a/unison-syntax/test/Main.hs b/unison-syntax/test/Main.hs index 5e2751e288..e08eef4164 100644 --- a/unison-syntax/test/Main.hs +++ b/unison-syntax/test/Main.hs @@ -6,10 +6,10 @@ import System.IO.CodePage (withCP65001) import Unison.Prelude import Unison.Syntax.HashQualifiedPrime qualified as HQ' (unsafeParseText) import Unison.Syntax.Lexer.Unison +import Unison.Test.Doc qualified as Doc main :: IO () -main = - withCP65001 (run test) +main = withCP65001 . run $ tests [test, Doc.test] test :: Test () test = diff --git a/unison-syntax/test/Unison/Test/Doc.hs b/unison-syntax/test/Unison/Test/Doc.hs new file mode 100644 index 0000000000..428b079bd0 --- /dev/null +++ b/unison-syntax/test/Unison/Test/Doc.hs @@ -0,0 +1,168 @@ +module Unison.Test.Doc (test) where + +import Data.Bifunctor (first) +import Data.List.NonEmpty (NonEmpty ((:|))) +import Data.Text (Text) +import EasyTest +import Text.Megaparsec qualified as P +import Unison.HashQualifiedPrime qualified as HQ' +import Unison.Syntax.Lexer.Unison +import Unison.Syntax.Name qualified as Name +import Unison.Syntax.Parser.Doc qualified as DP +import Unison.Syntax.Parser.Doc.Data qualified as Doc +import Unison.Util.Recursion + +test :: Test () +test = + scope "Doc parser" . tests $ + [ t "# Hello" [Doc.Section (Doc.Paragraph $ docWord "Hello" :| []) []], + t + ( unlines + [ "# Hello", + "## Again" + ] + ) + [ Doc.Section + (Doc.Paragraph $ docWord "Hello" :| []) + [Fix $ Doc.Section (Doc.Paragraph $ docWord "Again" :| []) []] + ], + t + ( unlines + [ "## Hello", + "# Again" + ] + ) + [ Doc.Section (Doc.Paragraph $ docWord "Hello" :| []) [], + Doc.Section (Doc.Paragraph $ docWord "Again" :| []) [] + ], + t + "*some bold words*" + [Doc.Paragraph' . Doc.Paragraph $ docBold (docWord "some" :| [docWord "bold", docWord "words"]) :| []], + t + "_some italic words_" + [Doc.Paragraph' . Doc.Paragraph $ docItalic (docWord "some" :| [docWord "italic", docWord "words"]) :| []], + t + "~some struck-through words~" + [ Doc.Paragraph' . Doc.Paragraph $ + docStrikethrough (docWord "some" :| [docWord "struck-through", docWord "words"]) :| [] + ], + -- any number of emphasis delimiters is allowed + t + "__some italic words__" + [Doc.Paragraph' . Doc.Paragraph $ docItalic (docWord "some" :| [docWord "italic", docWord "words"]) :| []], + t + "________some italic words________" + [Doc.Paragraph' . Doc.Paragraph $ docItalic (docWord "some" :| [docWord "italic", docWord "words"]) :| []], + t + "***some bold words***" + [ Doc.Paragraph' . Doc.Paragraph $ docBold (docWord "some" :| [docWord "bold", docWord "words"]) :| [] + ], + t + "***some _nested_ emphasis***" + [ Doc.Paragraph' . Doc.Paragraph $ + docBold (docWord "some" :| [docItalic $ docWord "nested" :| [], docWord "emphasis"]) :| [] + ], + -- mismatched delimiters should be preserved as text + t "*" [Doc.Paragraph' . Doc.Paragraph $ docWord "*" :| []], + t "`" [Doc.Paragraph' . Doc.Paragraph $ docWord "`" :| []], + -- various code blocks (although we’re not testing the Unison code block lexer/parser with these) + t + ( unlines + [ "```", + "You might think this is code, but it’s not", + "```" + ] + ) + [Doc.Eval "You might think this is code, but it’s not\n"], + t + ( unlines + [ "`````````", + "This one has extra delimiters", + "`````````" + ] + ) + [Doc.Eval "This one has extra delimiters\n"], + t + ( unlines + [ "```unison", + "You might think this is code, but it’s not", + "```" + ] + ) + [Doc.CodeBlock "unison" "You might think this is code, but it’s not"], + t + ( unlines + [ "`````````unison", + "This one has extra delimiters", + "`````````" + ] + ) + [Doc.CodeBlock "unison" "This one has extra delimiters"], + t + ( unlines + [ "@typecheck ```", + "You might think this is code, but it’s not", + "```" + ] + ) + [Doc.ExampleBlock "\nYou might think this is code, but it’s not\n"], + t + ( unlines + [ "@typecheck`````````", + "This one has extra delimiters", + "`````````" + ] + ) + [Doc.ExampleBlock "\nThis one has extra delimiters\n"], + t "`some verbatim text`" [Doc.Paragraph' . Doc.Paragraph $ docCode "some verbatim text" :| []], + t "''some verbatim text''" [Doc.Paragraph' . Doc.Paragraph $ docCode "some verbatim text" :| []], + t "'''''some verbatim text'''''" [Doc.Paragraph' . Doc.Paragraph $ docCode "some verbatim text" :| []] + ] + +-- round-trip tests need to be in unison-parser-typechecker +-- +-- -- want to get this to `Text` (or `String`), for round-trip testing +-- showPrettyDoc :: (Var v) => PrettyPrintEnv -> Term v a -> Pretty ColorText +-- showPrettyDoc ppe tm = PP.syntaxToColor . runPretty (avoidShadowing tm ppe) <$> prettyDoc2 emptyAc (printAnnotate ppe tm) + +t :: + String -> + -- | Despite the long type, this is a simplified `Doc` – no annotations, and ident and code are Text & String, + -- respectively. + [Doc.Top String (Fix (Doc.Leaf Text String)) (Fix (Doc.Top String (Fix (Doc.Leaf Text String))))] -> + Test () +t s expected = + scope s + . either + (crash . P.errorBundlePretty) + ( \actual -> + let expected' = Doc.UntitledSection $ embed <$> expected + actual' = cata (\(_ :<< top) -> embed $ first (cata \(_ :<< leaf) -> embed leaf) top) <$> actual + in if actual' == expected' + then ok + else do + note $ "expected: " ++ show expected' + note $ "actual : " ++ show actual' + crash "actual != expected" + ) + $ P.runParser (DP.doc (Name.toText . HQ'.toName . snd <$> typeOrTerm) (P.manyTill P.anySingle) P.eof) "test case" s + +-- * Helper functions to make it easier to read the examples. + +-- Once the parser gets generalized, these should be able to be removed, as they won’t require multiple layers of +-- constructor. + +docBold :: NonEmpty (Fix (Doc.Leaf ident code)) -> Fix (Doc.Leaf ident code) +docBold = embed . Doc.Bold . Doc.Paragraph + +docCode :: String -> Fix (Doc.Leaf ident code) +docCode = embed . Doc.Code . Doc.Word + +docItalic :: NonEmpty (Fix (Doc.Leaf ident code)) -> Fix (Doc.Leaf ident code) +docItalic = embed . Doc.Italic . Doc.Paragraph + +docStrikethrough :: NonEmpty (Fix (Doc.Leaf ident code)) -> Fix (Doc.Leaf ident code) +docStrikethrough = embed . Doc.Strikethrough . Doc.Paragraph + +docWord :: String -> Fix (Doc.Leaf ident code) +docWord = embed . Doc.Word' . Doc.Word diff --git a/unison-syntax/unison-syntax.cabal b/unison-syntax/unison-syntax.cabal index e42ee6e3dc..580cacf1c9 100644 --- a/unison-syntax/unison-syntax.cabal +++ b/unison-syntax/unison-syntax.cabal @@ -90,6 +90,8 @@ library test-suite syntax-tests type: exitcode-stdio-1.0 main-is: Main.hs + other-modules: + Unison.Test.Doc hs-source-dirs: test default-extensions: @@ -127,7 +129,10 @@ test-suite syntax-tests base , code-page , easytest + , megaparsec , text + , unison-core1 , unison-prelude , unison-syntax + , unison-util-recursion default-language: Haskell2010 From 05cc1c21053b62345739575046e4a3ced5077aaa Mon Sep 17 00:00:00 2001 From: Greg Pfeil Date: Wed, 11 Sep 2024 19:09:51 -0600 Subject: [PATCH 5/5] Separate Unison lexer unit tests Have them alongside the Doc parser tests, rather than embedded in `Main`. --- unison-syntax/test/Main.hs | 235 +---------------------- unison-syntax/test/Unison/Test/Unison.hs | 235 +++++++++++++++++++++++ unison-syntax/unison-syntax.cabal | 1 + 3 files changed, 238 insertions(+), 233 deletions(-) create mode 100644 unison-syntax/test/Unison/Test/Unison.hs diff --git a/unison-syntax/test/Main.hs b/unison-syntax/test/Main.hs index e08eef4164..3c84130548 100644 --- a/unison-syntax/test/Main.hs +++ b/unison-syntax/test/Main.hs @@ -1,240 +1,9 @@ module Main (main) where -import Data.Text qualified as Text import EasyTest import System.IO.CodePage (withCP65001) -import Unison.Prelude -import Unison.Syntax.HashQualifiedPrime qualified as HQ' (unsafeParseText) -import Unison.Syntax.Lexer.Unison import Unison.Test.Doc qualified as Doc +import Unison.Test.Unison qualified as Unison main :: IO () -main = withCP65001 . run $ tests [test, Doc.test] - -test :: Test () -test = - scope "lexer" . tests $ - [ t "" [], - t "1" [Numeric "1"], - t "+1" [Numeric "+1"], - t "-1" [Numeric "-1"], - t "-1.0" [Numeric "-1.0"], - t "+1.0" [Numeric "+1.0"], - t "1e3" [Numeric "1e3"], - t "1e+3" [Numeric "1e+3"], - t "1e-3" [Numeric "1e-3"], - t "+1e3" [Numeric "+1e3"], - t "+1e+3" [Numeric "+1e+3"], - t "+1e-3" [Numeric "+1e-3"], - t "-1e3" [Numeric "-1e3"], - t "-1e+3" [Numeric "-1e+3"], - t "-1e-3" [Numeric "-1e-3"], - t "1.2e3" [Numeric "1.2e3"], - t "1.2e+3" [Numeric "1.2e+3"], - t "1.2e-3" [Numeric "1.2e-3"], - t "+1.2e3" [Numeric "+1.2e3"], - t "+1.2e+3" [Numeric "+1.2e+3"], - t "+1.2e-3" [Numeric "+1.2e-3"], - t "-1.2e3" [Numeric "-1.2e3"], - t "-1.2e+3" [Numeric "-1.2e+3"], - t "-1.2e-3" [Numeric "-1.2e-3"], - t "1E3" [Numeric "1e3"], - t "1E+3" [Numeric "1e+3"], - t "1E-3" [Numeric "1e-3"], - t "+1E3" [Numeric "+1e3"], - t "+1E+3" [Numeric "+1e+3"], - t "+1E-3" [Numeric "+1e-3"], - t "-1E3" [Numeric "-1e3"], - t "-1E+3" [Numeric "-1e+3"], - t "-1E-3" [Numeric "-1e-3"], - t "1.2E3" [Numeric "1.2e3"], - t "1.2E+3" [Numeric "1.2e+3"], - t "1.2E-3" [Numeric "1.2e-3"], - t "+1.2E3" [Numeric "+1.2e3"], - t "+1.2E+3" [Numeric "+1.2e+3"], - t "+1.2E-3" [Numeric "+1.2e-3"], - t "-1.2E3" [Numeric "-1.2e3"], - t "-1.2E+3" [Numeric "-1.2e+3"], - t "-1.2E-3" [Numeric "-1.2e-3"], - t "1-1" [Numeric "1", simpleSymbolyId "-", Numeric "1"], - t "1+1" [Numeric "1", simpleSymbolyId "+", Numeric "1"], - t "1 +1" [Numeric "1", Numeric "+1"], - t "1+ 1" [Numeric "1", simpleSymbolyId "+", Numeric "1"], - t "x+y" [simpleWordyId "x", simpleSymbolyId "+", simpleWordyId "y"], - t "++;++" [simpleSymbolyId "++", Semi False, simpleSymbolyId "++"], - t "++; woot" [simpleSymbolyId "++", Semi False, simpleWordyId "woot"], - t "woot;woot" [simpleWordyId "woot", Semi False, simpleWordyId "woot"], - t "woot;(woot)" [simpleWordyId "woot", Semi False, Open "(", simpleWordyId "woot", Close], - t - "[+1,+1]" - [Open "[", Numeric "+1", Reserved ",", Numeric "+1", Close], - t - "[ +1 , +1 ]" - [Open "[", Numeric "+1", Reserved ",", Numeric "+1", Close], - t "-- a comment 1.0" [], - t "\"woot\" -- a comment 1.0" [Textual "woot"], - t "0:Int" [Numeric "0", Reserved ":", simpleWordyId "Int"], - t "0 : Int" [Numeric "0", Reserved ":", simpleWordyId "Int"], - t - ".Foo Foo `.` .foo.bar.baz" - [ simpleWordyId ".Foo", - simpleWordyId "Foo", - simpleSymbolyId "`.`", - simpleWordyId ".foo.bar.baz" - ], - t ".Foo.Bar.+" [simpleSymbolyId ".Foo.Bar.+"], - t ".Foo.++.+" [simpleSymbolyId ".Foo.++.+"], - t ".Foo.`++`.+" [simpleSymbolyId ".Foo.`++`.+"], - t ".Foo.`+.+`.+" [simpleSymbolyId ".Foo.`+.+`.+"], - -- idents with hashes - t "foo#bar" [simpleWordyId "foo#bar"], - t "+#bar" [simpleSymbolyId "+#bar"], - -- note - these are all the same, just with different spacing - let ex1 = "if x then y else z" - ex2 = unlines ["if", " x", "then", " y", "else z"] - ex3 = unlines ["if", " x", " then", " y", "else z"] - ex4 = unlines ["if", " x", " then", " y", "else z"] - expected = - [ Open "if", - simpleWordyId "x", - Close, - Open "then", - simpleWordyId "y", - Close, - Open "else", - simpleWordyId "z", - Close - ] - in -- directly close empty = block - tests $ map (`t` expected) [ex1, ex2, ex3, ex4], - let ex = unlines ["test =", "", "x = 1"] - in -- directly close nested empty blocks - t - ex - [ simpleWordyId "test", - Open "=", - Close, - (Semi True), - simpleWordyId "x", - Open "=", - Numeric "1", - Close - ], - let ex = unlines ["test =", " test2 =", "", "x = 1"] - in t - ex - [ simpleWordyId "test", - Open "=", - simpleWordyId "test2", - Open "=", - Close, - Close, - (Semi True), - simpleWordyId "x", - Open "=", - Numeric "1", - Close - ], - let ex = - unlines - ["if a then b", "else if c then d", "else if e then f", "else g"] -- close of the three `else` blocks - in -- In an empty `then` clause, the `else` is interpreted as a `Reserved` token - t - ex - [ Open "if", - simpleWordyId "a", - Close, - Open "then", - simpleWordyId "b", - Close, - Open "else", - Open "if", - simpleWordyId "c", - Close, - Open "then", - simpleWordyId "d", - Close, - Open "else", - Open "if", - simpleWordyId "e", - Close, - Open "then", - simpleWordyId "f", - Close, - Open "else", - simpleWordyId "g", - Close, - Close, - Close - ], - t - "if x then else" - [ Open "if", - simpleWordyId "x", - Close, - Open "then", - Close, - Open "else", - Close - ], - -- Empty `else` clause - t - "if x then 1 else" - [ Open "if", - simpleWordyId "x", - Close, - Open "then", - Numeric "1", - Close, - Open "else", - Close - ], - -- shouldn't be too eager to find keywords at the front of identifiers, - -- particularly for block-closing keywords (see #2727) - tests $ do - kw <- ["if", "then", "else"] - suffix <- ["0", "x", "!", "'"] -- examples of wordyIdChar - let i = kw ++ suffix - -- a keyword at the front of an identifier should still be an identifier - pure $ t i [simpleWordyId (Text.pack i)], - -- Test string literals - t - "\"simple string without escape characters\"" - [Textual "simple string without escape characters"], - t - "\"test escaped quotes \\\"in quotes\\\"\"" - [Textual "test escaped quotes \"in quotes\""], - t "\"\\n \\t \\b \\a\"" [Textual "\n \t \b \a"], - -- Delayed string - t "'\"\"" [Reserved "'", Textual ""], - -- https://github.com/unisonweb/unison/issues/4683 - -- don't emit virtual semis in ability lists or normal lists - t "{foo\n,bar}" [Open "{", simpleWordyId "foo", Reserved ",", simpleWordyId "bar", Close], - t "{foo\n ,bar}" [Open "{", simpleWordyId "foo", Reserved ",", simpleWordyId "bar", Close], - t "[foo\n,bar]" [Open "[", simpleWordyId "foo", Reserved ",", simpleWordyId "bar", Close], - t "[foo\n ,bar]" [Open "[", simpleWordyId "foo", Reserved ",", simpleWordyId "bar", Close] - ] - -t :: String -> [Lexeme] -> Test () -t s expected = case toList . preParse $ lexer filename s of - [token@(Token (Err _) _ _)] -> crash $ show token - tokened -> - let actual = payload <$> tokened - expected' = Open filename : expected <> pure Close - in scope s $ - if actual == expected' - then ok - else do - note $ "expected: " ++ show expected' - note $ "actual : " ++ show actual - crash "actual != expected" - where - filename = "test case" - -simpleSymbolyId :: Text -> Lexeme -simpleSymbolyId = - SymbolyId . HQ'.unsafeParseText - -simpleWordyId :: Text -> Lexeme -simpleWordyId = - WordyId . HQ'.unsafeParseText +main = withCP65001 . run $ tests [Unison.test, Doc.test] diff --git a/unison-syntax/test/Unison/Test/Unison.hs b/unison-syntax/test/Unison/Test/Unison.hs new file mode 100644 index 0000000000..5468046400 --- /dev/null +++ b/unison-syntax/test/Unison/Test/Unison.hs @@ -0,0 +1,235 @@ +module Unison.Test.Unison (test) where + +import Data.Text qualified as Text +import EasyTest +import Unison.Prelude +import Unison.Syntax.HashQualifiedPrime qualified as HQ' (unsafeParseText) +import Unison.Syntax.Lexer.Unison + +test :: Test () +test = + scope "lexer" . tests $ + [ t "" [], + t "1" [Numeric "1"], + t "+1" [Numeric "+1"], + t "-1" [Numeric "-1"], + t "-1.0" [Numeric "-1.0"], + t "+1.0" [Numeric "+1.0"], + t "1e3" [Numeric "1e3"], + t "1e+3" [Numeric "1e+3"], + t "1e-3" [Numeric "1e-3"], + t "+1e3" [Numeric "+1e3"], + t "+1e+3" [Numeric "+1e+3"], + t "+1e-3" [Numeric "+1e-3"], + t "-1e3" [Numeric "-1e3"], + t "-1e+3" [Numeric "-1e+3"], + t "-1e-3" [Numeric "-1e-3"], + t "1.2e3" [Numeric "1.2e3"], + t "1.2e+3" [Numeric "1.2e+3"], + t "1.2e-3" [Numeric "1.2e-3"], + t "+1.2e3" [Numeric "+1.2e3"], + t "+1.2e+3" [Numeric "+1.2e+3"], + t "+1.2e-3" [Numeric "+1.2e-3"], + t "-1.2e3" [Numeric "-1.2e3"], + t "-1.2e+3" [Numeric "-1.2e+3"], + t "-1.2e-3" [Numeric "-1.2e-3"], + t "1E3" [Numeric "1e3"], + t "1E+3" [Numeric "1e+3"], + t "1E-3" [Numeric "1e-3"], + t "+1E3" [Numeric "+1e3"], + t "+1E+3" [Numeric "+1e+3"], + t "+1E-3" [Numeric "+1e-3"], + t "-1E3" [Numeric "-1e3"], + t "-1E+3" [Numeric "-1e+3"], + t "-1E-3" [Numeric "-1e-3"], + t "1.2E3" [Numeric "1.2e3"], + t "1.2E+3" [Numeric "1.2e+3"], + t "1.2E-3" [Numeric "1.2e-3"], + t "+1.2E3" [Numeric "+1.2e3"], + t "+1.2E+3" [Numeric "+1.2e+3"], + t "+1.2E-3" [Numeric "+1.2e-3"], + t "-1.2E3" [Numeric "-1.2e3"], + t "-1.2E+3" [Numeric "-1.2e+3"], + t "-1.2E-3" [Numeric "-1.2e-3"], + t "1-1" [Numeric "1", simpleSymbolyId "-", Numeric "1"], + t "1+1" [Numeric "1", simpleSymbolyId "+", Numeric "1"], + t "1 +1" [Numeric "1", Numeric "+1"], + t "1+ 1" [Numeric "1", simpleSymbolyId "+", Numeric "1"], + t "x+y" [simpleWordyId "x", simpleSymbolyId "+", simpleWordyId "y"], + t "++;++" [simpleSymbolyId "++", Semi False, simpleSymbolyId "++"], + t "++; woot" [simpleSymbolyId "++", Semi False, simpleWordyId "woot"], + t "woot;woot" [simpleWordyId "woot", Semi False, simpleWordyId "woot"], + t "woot;(woot)" [simpleWordyId "woot", Semi False, Open "(", simpleWordyId "woot", Close], + t + "[+1,+1]" + [Open "[", Numeric "+1", Reserved ",", Numeric "+1", Close], + t + "[ +1 , +1 ]" + [Open "[", Numeric "+1", Reserved ",", Numeric "+1", Close], + t "-- a comment 1.0" [], + t "\"woot\" -- a comment 1.0" [Textual "woot"], + t "0:Int" [Numeric "0", Reserved ":", simpleWordyId "Int"], + t "0 : Int" [Numeric "0", Reserved ":", simpleWordyId "Int"], + t + ".Foo Foo `.` .foo.bar.baz" + [ simpleWordyId ".Foo", + simpleWordyId "Foo", + simpleSymbolyId "`.`", + simpleWordyId ".foo.bar.baz" + ], + t ".Foo.Bar.+" [simpleSymbolyId ".Foo.Bar.+"], + t ".Foo.++.+" [simpleSymbolyId ".Foo.++.+"], + t ".Foo.`++`.+" [simpleSymbolyId ".Foo.`++`.+"], + t ".Foo.`+.+`.+" [simpleSymbolyId ".Foo.`+.+`.+"], + -- idents with hashes + t "foo#bar" [simpleWordyId "foo#bar"], + t "+#bar" [simpleSymbolyId "+#bar"], + -- note - these are all the same, just with different spacing + let ex1 = "if x then y else z" + ex2 = unlines ["if", " x", "then", " y", "else z"] + ex3 = unlines ["if", " x", " then", " y", "else z"] + ex4 = unlines ["if", " x", " then", " y", "else z"] + expected = + [ Open "if", + simpleWordyId "x", + Close, + Open "then", + simpleWordyId "y", + Close, + Open "else", + simpleWordyId "z", + Close + ] + in -- directly close empty = block + tests $ map (`t` expected) [ex1, ex2, ex3, ex4], + let ex = unlines ["test =", "", "x = 1"] + in -- directly close nested empty blocks + t + ex + [ simpleWordyId "test", + Open "=", + Close, + (Semi True), + simpleWordyId "x", + Open "=", + Numeric "1", + Close + ], + let ex = unlines ["test =", " test2 =", "", "x = 1"] + in t + ex + [ simpleWordyId "test", + Open "=", + simpleWordyId "test2", + Open "=", + Close, + Close, + (Semi True), + simpleWordyId "x", + Open "=", + Numeric "1", + Close + ], + let ex = + unlines + ["if a then b", "else if c then d", "else if e then f", "else g"] -- close of the three `else` blocks + in -- In an empty `then` clause, the `else` is interpreted as a `Reserved` token + t + ex + [ Open "if", + simpleWordyId "a", + Close, + Open "then", + simpleWordyId "b", + Close, + Open "else", + Open "if", + simpleWordyId "c", + Close, + Open "then", + simpleWordyId "d", + Close, + Open "else", + Open "if", + simpleWordyId "e", + Close, + Open "then", + simpleWordyId "f", + Close, + Open "else", + simpleWordyId "g", + Close, + Close, + Close + ], + t + "if x then else" + [ Open "if", + simpleWordyId "x", + Close, + Open "then", + Close, + Open "else", + Close + ], + -- Empty `else` clause + t + "if x then 1 else" + [ Open "if", + simpleWordyId "x", + Close, + Open "then", + Numeric "1", + Close, + Open "else", + Close + ], + -- shouldn't be too eager to find keywords at the front of identifiers, + -- particularly for block-closing keywords (see #2727) + tests $ do + kw <- ["if", "then", "else"] + suffix <- ["0", "x", "!", "'"] -- examples of wordyIdChar + let i = kw ++ suffix + -- a keyword at the front of an identifier should still be an identifier + pure $ t i [simpleWordyId (Text.pack i)], + -- Test string literals + t + "\"simple string without escape characters\"" + [Textual "simple string without escape characters"], + t + "\"test escaped quotes \\\"in quotes\\\"\"" + [Textual "test escaped quotes \"in quotes\""], + t "\"\\n \\t \\b \\a\"" [Textual "\n \t \b \a"], + -- Delayed string + t "'\"\"" [Reserved "'", Textual ""], + -- https://github.com/unisonweb/unison/issues/4683 + -- don't emit virtual semis in ability lists or normal lists + t "{foo\n,bar}" [Open "{", simpleWordyId "foo", Reserved ",", simpleWordyId "bar", Close], + t "{foo\n ,bar}" [Open "{", simpleWordyId "foo", Reserved ",", simpleWordyId "bar", Close], + t "[foo\n,bar]" [Open "[", simpleWordyId "foo", Reserved ",", simpleWordyId "bar", Close], + t "[foo\n ,bar]" [Open "[", simpleWordyId "foo", Reserved ",", simpleWordyId "bar", Close] + ] + +t :: String -> [Lexeme] -> Test () +t s expected = case toList . preParse $ lexer filename s of + [token@(Token (Err _) _ _)] -> crash $ show token + tokened -> + let actual = payload <$> tokened + expected' = Open filename : expected <> pure Close + in scope s $ + if actual == expected' + then ok + else do + note $ "expected: " ++ show expected' + note $ "actual : " ++ show actual + crash "actual != expected" + where + filename = "test case" + +simpleSymbolyId :: Text -> Lexeme +simpleSymbolyId = + SymbolyId . HQ'.unsafeParseText + +simpleWordyId :: Text -> Lexeme +simpleWordyId = + WordyId . HQ'.unsafeParseText diff --git a/unison-syntax/unison-syntax.cabal b/unison-syntax/unison-syntax.cabal index 580cacf1c9..389ca06413 100644 --- a/unison-syntax/unison-syntax.cabal +++ b/unison-syntax/unison-syntax.cabal @@ -92,6 +92,7 @@ test-suite syntax-tests main-is: Main.hs other-modules: Unison.Test.Doc + Unison.Test.Unison hs-source-dirs: test default-extensions: