diff --git a/.github/workflows/linux.yml b/.github/workflows/linux.yml new file mode 100644 index 0000000..ab53183 --- /dev/null +++ b/.github/workflows/linux.yml @@ -0,0 +1,26 @@ +name: Linux + +on: + push: + branches: + - '*' + tags-ignore: + - '*' + pull_request: + +jobs: + raku: + strategy: + matrix: + os: + - ubuntu-latest + raku-version: + - 'latest' + runs-on: ${{ matrix.os }} + steps: + - uses: actions/checkout@v3 + - uses: Raku/setup-raku@v1 + with: + raku-version: ${{ matrix.raku-version }} + - name: Run Special Tests + run: raku run-tests -i diff --git a/.github/workflows/macos.yml b/.github/workflows/macos.yml new file mode 100644 index 0000000..de79738 --- /dev/null +++ b/.github/workflows/macos.yml @@ -0,0 +1,26 @@ +name: MacOS + +on: + push: + branches: + - '*' + tags-ignore: + - '*' + pull_request: + +jobs: + raku: + strategy: + matrix: + os: + - macos-latest + raku-version: + - 'latest' + runs-on: ${{ matrix.os }} + steps: + - uses: actions/checkout@v3 + - uses: Raku/setup-raku@v1 + with: + raku-version: ${{ matrix.raku-version }} + - name: Run Special Tests + run: raku run-tests -i diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml new file mode 100644 index 0000000..557c8d5 --- /dev/null +++ b/.github/workflows/windows.yml @@ -0,0 +1,26 @@ +name: Windows + +on: + push: + branches: + - '*' + tags-ignore: + - '*' + pull_request: + +jobs: + raku: + strategy: + matrix: + os: + - windows-latest + raku-version: + - 'latest' + runs-on: ${{ matrix.os }} + steps: + - uses: actions/checkout@v3 + - uses: Raku/setup-raku@v1 + with: + raku-version: ${{ matrix.raku-version }} + - name: Run Special Tests + run: raku run-tests -i diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..a3c0381 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +.precomp/ +/Algorithm-LCS-* diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 34aca29..0000000 --- a/.travis.yml +++ /dev/null @@ -1,9 +0,0 @@ -language: perl6 -install: - - rakudobrew build-panda -before_script: - - panda installdeps . - - panda-build - -script: - - panda-test diff --git a/Changes b/Changes new file mode 100644 index 0000000..df72e44 --- /dev/null +++ b/Changes @@ -0,0 +1,4 @@ +Revision history for Algorithm::LCS + +{{$NEXT}} + - Initial version as a Raku Community Module diff --git a/META6.json b/META6.json index f647ec0..0cb2caf 100644 --- a/META6.json +++ b/META6.json @@ -1,13 +1,28 @@ { - "perl": "6.*", - "name": "Algorithm::LCS", - "license" : "MIT", - "version": "0.1.0", - "description": "Implementation of the longest common subsequence algorithm", - "author": "Rob Hoelz", - "depends": [], - "provides" : { - "Algorithm::LCS" : "lib/Algorithm/LCS.pm" - }, - "source-url": "git://github.com/hoelzro/p6-algorithm-lcs.git" + "auth": "zef:raku-community-modules", + "authors": [ + "Rob Hoelz", + "Raku Community" + ], + "build-depends": [ + ], + "depends": [ + ], + "description": "Implementation of the longest common subsequence algorithm", + "license": "MIT", + "name": "Algorithm::LCS", + "perl": "6.*", + "provides": { + "Algorithm::LCS": "lib/Algorithm/LCS.rakumod" + }, + "resources": [ + ], + "source-url": "https://github.com/hoelzro/p6-algorithm-lcs.git", + "tags": [ + "ALGORITHM", + "LCS" + ], + "test-depends": [ + ], + "version": "0.1.0" } diff --git a/README.md b/README.md index fc7a0ba..853b13c 100644 --- a/README.md +++ b/README.md @@ -1,30 +1,91 @@ -# TITLE +[![Actions Status](https://github.com/hoelzro/p6-algorithm-lcs/actions/workflows/linux.yml/badge.svg)](https://github.com/hoelzro/p6-algorithm-lcs/actions) [![Actions Status](https://github.com/hoelzro/p6-algorithm-lcs/actions/workflows/macos.yml/badge.svg)](https://github.com/hoelzro/p6-algorithm-lcs/actions) [![Actions Status](https://github.com/hoelzro/p6-algorithm-lcs/actions/workflows/windows.yml/badge.svg)](https://github.com/hoelzro/p6-algorithm-lcs/actions) -Algorithm::LCS +NAME +==== -# SYNOPSIS +Algorithm::LCS - Implementation of the longest common subsequence algorithm -```perl6 - use Algorithm::LCS; +SYNOPSIS +======== - # regular usage - say lcs(, ); # prints T +```raku +use Algorithm::LCS; - # custom comparator via :compare - say lcs(, , :compare(&infix:)); +# regular usage +say lcs(, ); # prints T - # extra special custom comparison via :compare-i - my @a = slurp('one.txt'); - my @b = slurp('two.txt'); - my @a-hashed = @a.map({ hash-algorithm($_) }); - my @b-hashed = @b.map({ hash-algorithm($_) }); - say lcs(@a, @b, :compare-i({ @a-hashed[$^i] eqv @b-hashed[$^j] })); +# custom comparator via :compare +say lcs(, , :compare(&infix:)); + +# extra special custom comparison via :compare-i +my @a = slurp('one.txt'); +my @b = slurp('two.txt'); +my @a-hashed = @a.map({ hash-algorithm($_) }); +my @b-hashed = @b.map({ hash-algorithm($_) }); +say lcs(@a, @b, :compare-i({ @a-hashed[$^i] eqv @b-hashed[$^j] })); +``` + +DESCRIPTION +=========== + +This module contains a single subroutine, `lcs`, that calculates the longest common subsequence between two sequences of data. `lcs` takes two lists as required parameters; you may also specify the comparison function (which defaults to `eqv`) via the `&compare` named parameter). + +Sometimes you may want to maintain a parallel array of information to consult during calculation (for example, if you're comparing long lines of a file, and you'd like a speedup by comparing their hashes rather than their contents); for that, you may use the `&compare-i` named argumeny + +SUBROUTINES +=========== + +### sub lcs + +```raku +sub lcs( + @a, + @b, + :&compare = Code.new, + :&compare-i is copy +) returns Mu ``` -# DESCRIPTION +Returns the longest common subsequence of two sequences of data. + +class Mu $ +---------- + +The first sequence + +class Mu $ +---------- + +The second sequence + +class Mu $ +---------- + +The comparison function (defaults to C) + +class Mu $ +---------- + +The compare-by-index function (defaults to using &compare) + +SEE ALSO +======== + +[Wikipedia article](http://en.wikipedia.org/wiki/Longest_common_subsequence_problem) + +AUTHORS +======= + + * Rob Hoelz + + * Raku Community + +COPYRIGHT AND LICENSE +===================== + +Copyright (c) 2014 - 2017 Rob Hoelz -This module contains a single subroutine, lcs, that calculates the longest common subsequence between two sequences of data. lcs takes two lists as required parameters; you may also specify the comparison function (which defaults to eqv) via the &compare named parameter). Sometimes you may want to maintain a parallel array of information to consult during calculation (for example, if you're comparing long lines of a file, and you'd like a speedup by comparing their hashes rather than their contents); for that, you may use the &compare-i named parameter. +Copyright (c) 2024 Raku Community -# SEE ALSO +This library is free software; you can redistribute it and/or modify it under the MIT license. -http://en.wikipedia.org/wiki/Longest_common_subsequence_problem diff --git a/dist.ini b/dist.ini new file mode 100644 index 0000000..4975faa --- /dev/null +++ b/dist.ini @@ -0,0 +1,11 @@ +name = Algorithm::LCS + +[ReadmeFromPod] +filename = lib/Algorithm/LCS.rakumod + +[UploadToZef] + +[Badges] +provider = github-actions/linux.yml +provider = github-actions/macos.yml +provider = github-actions/windows.yml diff --git a/lib/Algorithm/LCS.pm b/lib/Algorithm/LCS.pm deleted file mode 100644 index f3ce0cf..0000000 --- a/lib/Algorithm/LCS.pm +++ /dev/null @@ -1,128 +0,0 @@ -use v6; - -=head1 TITLE -Algorithm::LCS -=head1 SYNOPSIS -=begin code - use Algorithm::LCS; - - # regular usage - say lcs(, ); # prints T - - # custom comparator via :compare - say lcs(, , :compare(&infix:)); - - # extra special custom comparison via :compare-i - my @a = slurp('one.txt'); - my @b = slurp('two.txt'); - my @a-hashed = @a.map({ hash-algorithm($_) }); - my @b-hashed = @b.map({ hash-algorithm($_) }); - say lcs(@a, @b, :compare-i({ @a-hashed[$^i] eqv @b-hashed[$^j] })); -=end code -=begin head1 -DESCRIPTION - -This module contains a single subroutine, C, that calculates -the longest common subsequence between two sequences of data. C -takes two lists as required parameters; you may also specify the comparison -function (which defaults to C) via the C<&compare> named parameter). -Sometimes you may want to maintain a parallel array of information to -consult during calculation (for example, if you're comparing long lines -of a file, and you'd like a speedup by comparing their hashes rather than -their contents); for that, you may use the C<&compare-i> named parameter. - -=end head1 - -=begin head1 -SEE ALSO - -http://en.wikipedia.org/wiki/Longest_common_subsequence_problem -=end head1 - -module Algorithm::LCS:ver<0.0.1>:auth { - my sub strip-prefix(@a, @b, &compare-i) { - my $i = 0; - my @prefix; - - while $i < (@a&@b) && &compare-i($i, $i) { - @prefix.push: @a[$i++]; - } - - @prefix - } - - my sub strip-suffix(@a, @b, &compare-i) { - # XXX could be optimized, but this is easy for now - strip-prefix(@a.reverse, @b.reverse, -> $i, $j { - &compare-i(@a.end - $i, @b.end - $j) - }).reverse - } - - my sub build-lcs-matrix(@a, @b, &compare-i) { - my @matrix = 0 xx ((@a + 1) * (@b + 1)); - my $row-len = @a + 1; - - for 1 .. @b X 1 .. @a -> ($row, $offset) { - my $index = $row * $row-len + $offset; - - if &compare-i($offset - 1, $row - 1) { - @matrix[$index] = @matrix[$index - $row-len - 1] + 1; - } else { - @matrix[$index] = [max] @matrix[ $index - $row-len, $index - 1 ]; - } - } - - @matrix - } - - #| Returns the longest common subsequence of two sequences of data. - our sub lcs( - @a, #= The first sequence - @b, #= The second sequence - :&compare=&infix:, #= The comparison function (defaults to C) - :&compare-i is copy #= The compare-by-index function (defaults to using &compare) - ) is export { - unless &compare-i.defined { - &compare-i = -> $i, $j { - &compare(@a[$i], @b[$j]) - }; - } - - my @prefix = strip-prefix(@a, @b, &compare-i); - my @suffix = strip-suffix(@a[+@prefix .. *], @b[+@prefix .. *], -> $i, $j { - &compare-i($i + @prefix, $j + @prefix) - }); - my @a-middle = @a[+@prefix .. @a.end - @suffix]; - my @b-middle = @b[+@prefix .. @b.end - @suffix]; - - if @a-middle && @b-middle { - my @matrix = build-lcs-matrix(@a-middle, @b-middle, -> $i, $j { - &compare-i($i + @prefix, $j + @prefix) - }); - - my $matrix-row-len = @a-middle + 1; - my $i = @matrix.end; - - my @result := gather while $i > 0 && @matrix[$i] > 0 { - my $current-length = @matrix[$i]; - my $next-row-length = @matrix[$i - $matrix-row-len]; - my $next-col-length = @matrix[$i - 1]; - - if $current-length > $next-row-length && $next-row-length == $next-col-length { - take @b-middle[$i div $matrix-row-len - 1]; - $i -= $matrix-row-len + 1; - } elsif $next-row-length < $next-col-length { - $i--; - } elsif $next-col-length <= $next-row-length { - $i -= $matrix-row-len; - } else { - die "this should never be reached!"; - } - }.list; - - ( @prefix, @result.reverse, @suffix ).flat - } else { - ( @prefix, @suffix ).flat - } - } -} diff --git a/lib/Algorithm/LCS.rakumod b/lib/Algorithm/LCS.rakumod new file mode 100644 index 0000000..f8ad516 --- /dev/null +++ b/lib/Algorithm/LCS.rakumod @@ -0,0 +1,155 @@ +=begin pod + +=head1 NAME + +Algorithm::LCS - Implementation of the longest common subsequence algorithm + +=head1 SYNOPSIS + +=begin code :lang + +use Algorithm::LCS; + +# regular usage +say lcs(, ); # prints T + +# custom comparator via :compare +say lcs(, , :compare(&infix:)); + +# extra special custom comparison via :compare-i +my @a = slurp('one.txt'); +my @b = slurp('two.txt'); +my @a-hashed = @a.map({ hash-algorithm($_) }); +my @b-hashed = @b.map({ hash-algorithm($_) }); +say lcs(@a, @b, :compare-i({ @a-hashed[$^i] eqv @b-hashed[$^j] })); + +=end code + +=head1 DESCRIPTION + +This module contains a single subroutine, C, that calculates +the longest common subsequence between two sequences of data. C +takes two lists as required parameters; you may also specify the comparison +function (which defaults to C) via the C<&compare> named parameter). + +Sometimes you may want to maintain a parallel array of information to +consult during calculation (for example, if you're comparing long lines +of a file, and you'd like a speedup by comparing their hashes rather than +their contents); for that, you may use the C<&compare-i> named argumeny + +=head1 SUBROUTINES + +=end pod + +my sub strip-prefix(@a, @b, &compare-i) { + my $i = 0; + my @prefix; + + while $i < (@a & @b) && &compare-i($i, $i) { + @prefix.push: @a[$i++]; + } + + @prefix +} + +my sub strip-suffix(@a, @b, &compare-i) { + # XXX could be optimized, but this is easy for now + strip-prefix(@a.reverse, @b.reverse, -> $i, $j { + &compare-i(@a.end - $i, @b.end - $j) + }).reverse +} + +my sub build-lcs-matrix(@a, @b, &compare-i) { + my @matrix = 0 xx ((@a + 1) * (@b + 1)); + my $row-len = @a + 1; + + for 1 .. @b X 1 .. @a -> ($row, $offset) { + my $index = $row * $row-len + $offset; + + if &compare-i($offset - 1, $row - 1) { + @matrix[$index] = @matrix[$index - $row-len - 1] + 1; + } else { + @matrix[$index] = [max] @matrix[ $index - $row-len, $index - 1 ]; + } + } + + @matrix +} + +#| Returns the longest common subsequence of two sequences of data. +my sub lcs( + @a, #= The first sequence + @b, #= The second sequence + :&compare=&infix:, #= The comparison function (defaults to C) + :&compare-i is copy #= The compare-by-index function (defaults to using &compare) +) is export { + unless &compare-i.defined { + &compare-i = -> $i, $j { + &compare(@a[$i], @b[$j]) + }; + } + + my @prefix = strip-prefix(@a, @b, &compare-i); + my @suffix = strip-suffix(@a[+@prefix .. *], @b[+@prefix .. *], -> $i, $j { + &compare-i($i + @prefix, $j + @prefix) + }); + my @a-middle = @a[+@prefix .. @a.end - @suffix]; + my @b-middle = @b[+@prefix .. @b.end - @suffix]; + + if @a-middle && @b-middle { + my @matrix = build-lcs-matrix(@a-middle, @b-middle, -> $i, $j { + &compare-i($i + @prefix, $j + @prefix) + }); + + my $matrix-row-len = @a-middle + 1; + my $i = @matrix.end; + + my @result := gather while $i > 0 && @matrix[$i] > 0 { + my $current-length = @matrix[$i]; + my $next-row-length = @matrix[$i - $matrix-row-len]; + my $next-col-length = @matrix[$i - 1]; + + if $current-length > $next-row-length && $next-row-length == $next-col-length { + take @b-middle[$i div $matrix-row-len - 1]; + $i -= $matrix-row-len + 1; + } + elsif $next-row-length < $next-col-length { + $i--; + } + elsif $next-col-length <= $next-row-length { + $i -= $matrix-row-len; + } + else { + die "this should never be reached!"; + } + }.list; + + ( @prefix, @result.reverse, @suffix ).flat + } + else { + ( @prefix, @suffix ).flat + } +} + +=begin pod + +=head1 SEE ALSO + +L + +=head1 AUTHORS + +=item Rob Hoelz +=item Raku Community + +=head1 COPYRIGHT AND LICENSE + +Copyright (c) 2014 - 2017 Rob Hoelz + +Copyright (c) 2024 Raku Community + +This library is free software; you can redistribute it and/or modify it under the MIT license. + +=end pod + +# vim: expandtab shiftwidth=4 diff --git a/run-tests b/run-tests new file mode 100644 index 0000000..a833103 --- /dev/null +++ b/run-tests @@ -0,0 +1,65 @@ +unit sub MAIN(:a($author), :i($install)); + +say run(, :out).out.slurp.chomp; +say "Running on $*DISTRO.gist().\n"; + +say "Testing { + "dist.ini".IO.lines.head.substr(7) +}{ + " including author tests" if $author +}"; + +my @failed; +my $done = 0; + +sub process($proc, $filename) { + if $proc { + $proc.out.slurp; + } + else { + @failed.push($filename); + if $proc.out.slurp -> $stdout { + my @lines = $stdout.lines; + with @lines.first( + *.starts-with(" from gen/moar/stage2"),:k) + -> $index { + say @lines[^$index].join("\n"); + } + else { + say $stdout; + } + } + else { + say "No output received, exit-code $proc.exitcode() ($proc.signal()):\n$proc.os-error()"; + } + } +} + +sub install() { + my $zef := $*DISTRO.is-win ?? 'zef.bat' !! 'zef'; + my $proc := run $zef, "install", ".", "--verbose", "--/test", :out,:err,:merge; + process($proc, "*installation*"); +} + +sub test-dir($dir) { + for $dir.IO.dir(:test(*.ends-with: '.t' | '.rakutest')).map(*.Str).sort { + say "=== $_"; + my $proc := run "raku", "--ll-exception", "-I.", $_, :out,:err,:merge; + process($proc, $_); + $done++; + } +} + +test-dir("t"); +test-dir("xt") if $author && "xt".IO.e; +install if $install; + +if @failed { + say "\nFAILED: {+@failed} of $done:"; + say " $_" for @failed; + exit +@failed; +} + +say "\nALL {"$done " if $done > 1}OK"; + +# vim: expandtab shiftwidth=4 diff --git a/t/01-basic.t b/t/01-basic.rakutest similarity index 97% rename from t/01-basic.t rename to t/01-basic.rakutest index 7537036..7148a6e 100644 --- a/t/01-basic.t +++ b/t/01-basic.rakutest @@ -1,8 +1,7 @@ -use v6; use Test; use Algorithm::LCS; -plan *; +plan 17; is-deeply([lcs(, )], [], 'the lcs of two sequences with nothing in common should be empty'); is-deeply([lcs(, )], [], 'the lcs of two identical sequences should be the that sequence'); @@ -22,4 +21,4 @@ is-deeply([lcs(, )], []); is-deeply([lcs(, )], []); is-deeply([lcs(, )], []); -done-testing; +# vim: expandtab shiftwidth=4