diff --git a/app/Main.hs b/app/Main.hs index db122c6..a978dcd 100644 --- a/app/Main.hs +++ b/app/Main.hs @@ -15,7 +15,8 @@ import System.IO (hSetEncoding, stdin, stdout, utf8) import HhiReducer import Kiselyov import System.TimeIt -import Text.RawString.QQ +import Text.RawString.QQ +import qualified Data.Bifunctor printGraph :: ST s (STRef s (Graph s)) -> ST s String @@ -34,7 +35,7 @@ main = do hSetEncoding stdout utf8 -- this is required to handle UTF-8 characters like λ --let testSource = "main = (\\x y -> + x x) 3 4" - mapM_ showCompilations [sqr] --, factorial, fibonacci, ackermann, tak] + mapM_ showCompilations [sqr, factorial] --, fibonacci, ackermann, tak] --demo type SourceCode = String @@ -51,7 +52,7 @@ tak = [r| main = tak 7 4 2 --18 6 3 |] -ackermann :: SourceCode +ackermann :: SourceCode ackermann = [r| ack = y(λf n m. if (is0 n) (+ m 1) (if (is0 m) (f (sub1 n) 1) (f (sub1 n) (f n (sub1 m))))) main = ack 2 2 @@ -75,6 +76,12 @@ showCompilations source = do putStrLn "The parsed environment of named lambda expressions:" mapM_ print env putStrLn "" + putStrLn "The expressions in de Bruijn notation:" + mapM_ (print . Data.Bifunctor.second deBruijn) env + + putStrLn "applying plain compilation:" + print $ compilePlain env + putStrLn "" let expr = compile env abstractToSKI putStrLn "The main expression compiled to SICKBY combinator expressions:" diff --git a/kiselyov.md b/kiselyov.md index 01f4bf1..43fbb37 100644 --- a/kiselyov.md +++ b/kiselyov.md @@ -37,7 +37,7 @@ My implementation closely follows Ben Lynn's implementation of Kiselyov's algori My parser can parse programs of a very rudimentary language that is basically just pure λ-calculus plus integers. Here is an example: ```haskell -sqr = \x -> * x x +sqr = λx. * x x main = sqr 3 ``` @@ -64,6 +64,90 @@ type Environment = [(String, Expr)] Now we can define a compiler that translates such λ-expressions to combinator terms. +Our journey begins by translating λ-expressions to a data type `DB` which is quite similar to the λ-calculus terms but uses indices instead of variable names. This is done by the function `deBruijn`: + +```haskell +data Peano = Succ Peano | Zero deriving Show +data DB = N Peano | L DB | A DB DB | Free String | IN Integer deriving Show + +deBruijn :: Expr -> DB +deBruijn = go [] where + go binds = \case + Var x -> maybe (Free x) N $ index x binds + Lam x t -> L $ go (x:binds) t + App t u -> A (go binds t) (go binds u) + Int i -> IN i + +index :: Eq a => a -> [a] -> Maybe Peano +index x xs = lookup x $ zip xs $ iterate Succ Zero +``` + +Lets see how this works on our example `sqr` and `main` functions: + +```haskell + let source = [r| + sqr = \x. * x x + main = sqr 3 + |] + let env = parseEnvironment source + putStrLn "The parsed environment of named lambda expressions:" + mapM_ print env + putStrLn "" + putStrLn "The expressions in de Bruijn notation:" + mapM_ (print . Data.Bifunctor.second deBruijn) env +``` +This will produce the following output: + +```haskell +The parsed environment of named lambda expressions: +("sqr", Lam "x" (App (App (Var "*") (Var "x")) (Var "x"))) +("main", App (Var "sqr") (Int 3)) + +The expressions in de Bruijn notation: +("sqr", L (A (A (Free "*") (N Zero)) (N Zero))) +("main", A (Free "sqr") (IN 3)) +``` + +It's easy to see that the de Bruijn notation is just a different representation of the λ-calculus terms. The only difference is that the variable names are replaced by indices. +This is quite helpful as it allows to systematically adress variables by their respective position without having to deal with arbitrary variable names. + +But why are we using Peano numbers for the indices? Why not just use integers? +Well it's definitely possible to [use integers instead of Peano numbers](https://crypto.stanford.edu/~blynn/lambda/cl.html). +But there is a good reason to use Peano numbers in our case: +In the subsequent compilation steps we want to be able to do pattern matching on the indices. This is not possible with integers but it is possible with Peano numbers, because they are defined as an algebraic data type: + +```haskell +data Peano = Succ Peano | Zero +``` + +Now we'll take a look at the next step in the compilation process. The function `convert` translates the de Bruijn notation to a data type `CL` which represents the combinator terms. + + + +```haskell +convert :: ((Int, CL) -> (Int, CL) -> CL) -> [(String, Expr)] -> DB -> (Int, CL) +convert (#) env = \case + N Zero -> (1, Com I) + N (Succ e) -> (n + 1, (0, Com K) # t) where t@(n, _) = rec $ N e + L e -> case rec e of + (0, d) -> (0, Com K :@ d) + (n, d) -> (n - 1, d) + A e1 e2 -> (max n1 n2, t1 # t2) where + t1@(n1, _) = rec e1 + t2@(n2, _) = rec e2 + IN i -> (0, INT i) + Free s -> convertVar (#) env s + where rec = convert (#) env +``` + +xxx + + + + + + + Combinator terms are defined as follows: ```haskell @@ -75,6 +159,8 @@ data Combinator = I | K | S | B | C | Y | R | B' | C' | S' | T | ``` + + ## to be continued... ## performance comparison diff --git a/src/CLTerm.hs b/src/CLTerm.hs index 2262831..c7648ee 100644 --- a/src/CLTerm.hs +++ b/src/CLTerm.hs @@ -14,6 +14,7 @@ module CLTerm import Parser (Expr(..)) data CL = Com Combinator | INT Integer | CL :@ CL + instance Show CL where showsPrec :: Int -> CL -> ShowS showsPrec p = \case diff --git a/src/Kiselyov.hs b/src/Kiselyov.hs index 287e0df..0a7b4da 100644 --- a/src/Kiselyov.hs +++ b/src/Kiselyov.hs @@ -3,6 +3,7 @@ module Kiselyov ( deBruijn, bulkOpt, + compilePlain, compileBulk, compileEta, compileBulkLinear, @@ -22,20 +23,54 @@ I've only added minor changes to fit it into my codebase. E.g. I've added access to the environment of named lambda expressions for free variables. --} -data Peano = Su Peano | Z deriving Show +data Peano = Succ Peano | Zero deriving Show data DB = N Peano | L DB | A DB DB | Free String | IN Integer deriving Show -index :: Eq a => a -> [a] -> Maybe Peano -index x xs = lookup x $ zip xs $ iterate Su Z - deBruijn :: Expr -> DB deBruijn = go [] where go binds = \case Var x -> maybe (Free x) N $ index x binds Lam x t -> L $ go (x:binds) t - t `App` u -> A (go binds t) (go binds u) + App t u -> A (go binds t) (go binds u) Int i -> IN i +index :: Eq a => a -> [a] -> Maybe Peano +index x xs = lookup x $ zip xs $ iterate Succ Zero + +convert :: ((Int, CL) -> (Int, CL) -> CL) -> [(String, Expr)] -> DB -> (Int, CL) +convert (#) env = \case + N Zero -> (1, Com I) + N (Succ e) -> (n + 1, (0, Com K) # t) where t@(n, _) = rec $ N e + L e -> case rec e of + (0, d) -> (0, Com K :@ d) + (n, d) -> (n - 1, d) + A e1 e2 -> (max n1 n2, t1 # t2) where + t1@(n1, _) = rec e1 + t2@(n2, _) = rec e2 + IN i -> (0, INT i) + Free s -> convertVar (#) env s + where rec = convert (#) env + +-- | convert a free variable to a combinator. +-- first we try to find a definition in the environment. +-- if that fails, we assume it is a SICKBY combinator. +convertVar :: ((Int, CL) -> (Int, CL) -> CL) -> [(String, Expr)] -> String -> (Int, CL) +convertVar (#) env s + | Just t <- lookup s env = convert (#) env (deBruijn t) + | otherwise = (0, Com (fromString s)) + +plain :: Environment -> DB -> (Int, CL) +plain = convert (#) where + (0 , d1) # (0 , d2) = d1 :@ d2 + (0 , d1) # (n , d2) = (0, Com B :@ d1) # (n - 1, d2) + (n , d1) # (0 , d2) = (0, Com R :@ d2) # (n - 1, d1) + (n1, d1) # (n2, d2) = (n1 - 1, (0, Com S) # (n1 - 1, d1)) # (n2 - 1, d2) + +compilePlain :: Environment -> CL +compilePlain env = case lookup "main" env of + Nothing -> error "main function missing" + Just main -> snd $ plain env (deBruijn main) + bulk :: Combinator -> Int -> CL bulk c 1 = Com c bulk c n = Com $ BulkCom (show c) n @@ -53,17 +88,17 @@ compileBulk env = case lookup "main" env of compileBulkLinear :: Environment -> CL compileBulkLinear env = case lookup "main" env of Nothing -> error "main function missing" - Just main -> snd $ bulkOpt breakBulkLinear env (deBruijn main) + Just main -> snd $ bulkOpt breakBulkLinear env (deBruijn main) compileBulkLog :: Environment -> CL compileBulkLog env = case lookup "main" env of Nothing -> error "main function missing" - Just main -> snd $ bulkOpt breakBulkLog env (deBruijn main) + Just main -> snd $ bulkOpt breakBulkLog env (deBruijn main) convertBool :: (([Bool], CL) -> ([Bool], CL) -> CL) -> Environment -> DB -> ([Bool], CL) convertBool (#) env = \case - N Z -> ([True], Com I) - N (Su e) -> (False:g, d) where (g, d) = rec env (N e) + N Zero -> ([True], Com I) + N (Succ e) -> (False:g, d) where (g, d) = rec env (N e) L e -> case rec env e of ([], d) -> ([], Com K :@ d) (False:g, d) -> (g, ([], Com K) # (g, d)) @@ -75,6 +110,9 @@ convertBool (#) env = \case IN i -> ([False], INT i) where rec = convertBool (#) +-- | convert a free variable to a combinator. +-- first we try to find a definition in the environment. +-- if that fails, we assume it is a SICKBY combinator. convertFree :: (([Bool], CL) -> ([Bool], CL) -> CL) -> [(String, Expr)] -> String -> ([Bool], CL) convertFree (#) env s | Just t <- lookup s env = convertBool (#) env (deBruijn t) @@ -120,8 +158,8 @@ bulkLookup s env bulkFun = case lookup s env of bulkOpt :: (Combinator -> Int -> CL) -> Environment -> DB -> ([Bool], CL) bulkOpt bulkFun env = \case - N Z -> ([True], Com I) - N (Su e) -> first (False:) $ rec env $ N e + N Zero -> ([True], Com I) + N (Succ e) -> first (False:) $ rec env $ N e L e -> case rec env e of ([], d) -> ([], Com K :@ d) (False:g, d) -> ([], Com K) ## (g, d) diff --git a/src/Parser.hs b/src/Parser.hs index 63690a7..d4e9e55 100644 --- a/src/Parser.hs +++ b/src/Parser.hs @@ -19,10 +19,10 @@ data Expr | Var String | Int Integer | Lam String Expr - deriving (Eq) --, Show) + deriving (Eq, Show) -instance Show Expr where - show = toString +-- instance Show Expr where +-- show = toString toString :: Expr -> String