From abd5acbacc4c56c1a7e7e5f96b334aede6fdb742 Mon Sep 17 00:00:00 2001 From: David Belicza <87.bdavid@gmail.com> Date: Sun, 20 Sep 2020 15:56:07 +0200 Subject: [PATCH] Smaller fixes, added new tests --- README.md | 4 +- composer.json | 5 +- src/Service/Normalizer.php | 40 ----- src/Service/PageRankAlgorithm.php | 1 + src/Service/PageRankAlgorithm/Normalizer.php | 60 +++++++ .../NormalizerInterface.php | 2 +- .../Service/PageRankAlgorithmTest.php | 1 + tests/phpunit.xml | 3 + tests/unit/Builder/NodeBuilderTest.php | 31 ++++ .../PageRankAlgorithm/NormalizerTest.php | 100 +++++++++++ .../PageRankAlgorithm/RankComparatorTest.php | 58 +++++++ .../Service/PageRankAlgorithm/RankingTest.php | 162 ++++++++++++++++++ 12 files changed, 423 insertions(+), 44 deletions(-) delete mode 100644 src/Service/Normalizer.php create mode 100644 src/Service/PageRankAlgorithm/Normalizer.php rename src/Service/{ => PageRankAlgorithm}/NormalizerInterface.php (86%) create mode 100644 tests/unit/Builder/NodeBuilderTest.php create mode 100644 tests/unit/Service/PageRankAlgorithm/NormalizerTest.php create mode 100644 tests/unit/Service/PageRankAlgorithm/RankComparatorTest.php create mode 100644 tests/unit/Service/PageRankAlgorithm/RankingTest.php diff --git a/README.md b/README.md index b554e62..000218a 100644 --- a/README.md +++ b/README.md @@ -15,7 +15,7 @@ PageRank

-This source code is an OOP implementation of the PageRank algorithm, under MIT licence. +This source code is an OOP implementation of the PageRank algorithm.
The minimum required PHP version is 7.4.

@@ -35,7 +35,7 @@ long-running calculation can be scheduled in batches using the Strategy OOP patt iteration number. * However, the iteration stops when the ranks are accurate enough even if the max iteration didn't reach its limit. * The accuracy measured by the float epsilon constant. -* At the end the algorithm normalizes the ranks between 0 and 1 and then scale them between 1 and 10. The scaling range +* At the end, the algorithm normalizes the ranks between 0 and 1 and then scale them between 1 and 10. The scaling range is configurable. * Getting, setting, updating the nodes from the resource is a responsibility of the NodeDataSourceStrategyInterface. * The package provides a simple implementation of the NodeDataSourceStrategyInterface that only keeps the nodes in the diff --git a/composer.json b/composer.json index 4ab377a..cd7e25a 100644 --- a/composer.json +++ b/composer.json @@ -9,11 +9,14 @@ "email": "87.bdavid@gmail.com" } ], - "require": {}, + "require": { + "php": "7.4.*" + }, "autoload": { "psr-4": { "PhpScience\\PageRank\\": [ "src", + "tests/unit", "tests/functional" ] } diff --git a/src/Service/Normalizer.php b/src/Service/Normalizer.php deleted file mode 100644 index bd1ba42..0000000 --- a/src/Service/Normalizer.php +++ /dev/null @@ -1,40 +0,0 @@ -min = $min; - $this->max = $max; - } - - public function normalize( - NodeCollectionInterface $nodeCollection, - float $lowestRank, - float $highestRank - ): void { - foreach ($nodeCollection->getNodes() as $node) { - $rank = $this->getRank($node->getRank(), $lowestRank, $highestRank); - $node->setRank($rank); - } - } - - private function getRank(float $value, float $min, float $max): float - { - $normalized = ($value - $min) / ($max - $min); - $scaled = ($normalized * ($this->max - $this->min)) + $this->min; - - return $scaled; - } -} diff --git a/src/Service/PageRankAlgorithm.php b/src/Service/PageRankAlgorithm.php index 09ec242..7371867 100644 --- a/src/Service/PageRankAlgorithm.php +++ b/src/Service/PageRankAlgorithm.php @@ -5,6 +5,7 @@ namespace PhpScience\PageRank\Service; use PhpScience\PageRank\Data\NodeCollectionInterface; +use PhpScience\PageRank\Service\PageRankAlgorithm\NormalizerInterface; use PhpScience\PageRank\Service\PageRankAlgorithm\RankingInterface; use PhpScience\PageRank\Strategy\NodeDataSourceStrategyInterface; diff --git a/src/Service/PageRankAlgorithm/Normalizer.php b/src/Service/PageRankAlgorithm/Normalizer.php new file mode 100644 index 0000000..8f5ebf1 --- /dev/null +++ b/src/Service/PageRankAlgorithm/Normalizer.php @@ -0,0 +1,60 @@ +scaleBottom = $scaleBottom; + $this->scaleTop = $scaleTop; + } + + public function normalize( + NodeCollectionInterface $nodeCollection, + float $lowestRank, + float $highestRank + ): void { + $divider = $this->getDivider($lowestRank, $highestRank); + + foreach ($nodeCollection->getNodes() as $node) { + $rank = $this->getScaledRank( + $node->getRank(), + $lowestRank, + $divider + ); + $node->setRank($rank); + } + } + + private function getDivider(float $lowestRank, float $highestRank): float + { + $divider = $highestRank - $lowestRank; + + if (.0 === $divider) { + $divider = 1; + } + + return $divider; + } + + private function getScaledRank( + float $value, + float $lowestRank, + float $divider + ): float { + $normalized = ($value - $lowestRank) / $divider; + $multiplier = $this->scaleTop - $this->scaleBottom; + + return ($normalized * $multiplier) + $this->scaleBottom; + } +} diff --git a/src/Service/NormalizerInterface.php b/src/Service/PageRankAlgorithm/NormalizerInterface.php similarity index 86% rename from src/Service/NormalizerInterface.php rename to src/Service/PageRankAlgorithm/NormalizerInterface.php index f6c084b..3697c79 100644 --- a/src/Service/NormalizerInterface.php +++ b/src/Service/PageRankAlgorithm/NormalizerInterface.php @@ -2,7 +2,7 @@ declare(strict_types=1); -namespace PhpScience\PageRank\Service; +namespace PhpScience\PageRank\Service\PageRankAlgorithm; use PhpScience\PageRank\Data\NodeCollectionInterface; diff --git a/tests/functional/Service/PageRankAlgorithmTest.php b/tests/functional/Service/PageRankAlgorithmTest.php index 86dde53..f6d1ac5 100644 --- a/tests/functional/Service/PageRankAlgorithmTest.php +++ b/tests/functional/Service/PageRankAlgorithmTest.php @@ -6,6 +6,7 @@ use PhpScience\PageRank\Builder\NodeBuilder; use PhpScience\PageRank\Builder\NodeCollectionBuilder; +use PhpScience\PageRank\Service\PageRankAlgorithm\Normalizer; use PhpScience\PageRank\Service\PageRankAlgorithm\RankComparator; use PhpScience\PageRank\Service\PageRankAlgorithm\Ranking; use PhpScience\PageRank\Strategy\MemorySourceStrategy; diff --git a/tests/phpunit.xml b/tests/phpunit.xml index e16a1bc..0236b3d 100644 --- a/tests/phpunit.xml +++ b/tests/phpunit.xml @@ -5,6 +5,9 @@ colors="true"> + + ../tests/unit + ../tests/functional diff --git a/tests/unit/Builder/NodeBuilderTest.php b/tests/unit/Builder/NodeBuilderTest.php new file mode 100644 index 0000000..94579b5 --- /dev/null +++ b/tests/unit/Builder/NodeBuilderTest.php @@ -0,0 +1,31 @@ +nodeBuilder = new NodeBuilder(); + } + + public function testBuild(): void + { + $expected = 0.25; + + $data = [ + 'id' => 1, + 'rank' => $expected + ]; + + $actual = $this->nodeBuilder->build($data); + + static::assertSame($expected, $actual->getRank()); + } +} diff --git a/tests/unit/Service/PageRankAlgorithm/NormalizerTest.php b/tests/unit/Service/PageRankAlgorithm/NormalizerTest.php new file mode 100644 index 0000000..e23cc77 --- /dev/null +++ b/tests/unit/Service/PageRankAlgorithm/NormalizerTest.php @@ -0,0 +1,100 @@ +createMock(NodeCollectionInterface::class); + $node = $this + ->createMock(NodeInterface::class); + + $nodeCollection + ->expects($this->once()) + ->method('getNodes') + ->willReturn([$node]); + + $node + ->expects($this->once()) + ->method('getRank') + ->willReturn($originalRank); + + $node + ->expects($this->once()) + ->method('setRank') + ->with($expectedRank); + + $normalizer = new Normalizer( + $scaleBottom, + $scaleTop + ); + + $normalizer->normalize( + $nodeCollection, + $lowestRank, + $highestRank + ); + } + + public function dataProviderNormalize(): array + { + return [ + 'realistic' => [ + 1.234, + 1.0, + 10.0, + -5.0, + 5.0, + 6.6106 + ], + 'division_by_zero' => [ + 5, + 1.0, + 10.0, + 5.0, + 5.0, + 1.0 + ], + 'division_by_float_epsilon' => [ + 5, + 1.0, + 10.0, + 5.0, + 5.0 + PHP_FLOAT_EPSILON, + 1.0 + ], + 'scale_from_minus' => [ + 0.12577, + -5, + 5, + 100, + 1000, + -6.109713666666667 + ] + ]; + } +} diff --git a/tests/unit/Service/PageRankAlgorithm/RankComparatorTest.php b/tests/unit/Service/PageRankAlgorithm/RankComparatorTest.php new file mode 100644 index 0000000..ae6d42d --- /dev/null +++ b/tests/unit/Service/PageRankAlgorithm/RankComparatorTest.php @@ -0,0 +1,58 @@ +rankComparator = new RankComparator(); + } + + /** + * @dataProvider dataProviderIsEqual + * + * @param float $rank1 + * @param float $rank2 + * @param bool $expected + */ + public function testIsEqual( + float $rank1, + float $rank2, + bool $expected + ): void { + $actual = $this->rankComparator->isEqual($rank1, $rank2); + + static::assertSame($expected, $actual); + } + + public function dataProviderIsEqual(): array + { + return [ + 'not_equal' => [ + .1, .2, false + ], + 'equal' => [ + .1, .1, true + ], + 'absolute_value_of_minus' => [ + .1, .2, false + ], + 'absolute_of_minus_one_is_bigger_than_float_epsilon' => [ + 1, 2, false + ], + 'smallest_representable_difference' => [ + 1, 1 + PHP_FLOAT_EPSILON, false + ], + 'non_representable_difference' => [ + 1, 1 + 2.2204460492503e-17, true + ] + ]; + } +} diff --git a/tests/unit/Service/PageRankAlgorithm/RankingTest.php b/tests/unit/Service/PageRankAlgorithm/RankingTest.php new file mode 100644 index 0000000..a65eadd --- /dev/null +++ b/tests/unit/Service/PageRankAlgorithm/RankingTest.php @@ -0,0 +1,162 @@ +nodeDataStrategy = $this + ->getMockBuilder(NodeDataSourceStrategyInterface::class) + ->disableOriginalConstructor() + ->getMock(); + + $this->rankComparator = $this + ->getMockBuilder(RankComparatorInterface::class) + ->disableOriginalConstructor() + ->getMock(); + + $this->ranking = new Ranking( + $this->rankComparator, + $this->nodeDataStrategy + ); + } + + public function testCalculateInitialRank(): void + { + $nodeCollection = $this + ->createMock(NodeCollectionInterface::class); + + $node = $this + ->createMock(NodeInterface::class); + + $nodeCollection + ->expects($this->once()) + ->method('getNodes') + ->willReturn([$node]); + + $nodeCollection + ->expects($this->once()) + ->method('getAllNodeCount') + ->willReturn(10); + + $node + ->expects($this->once()) + ->method('setRank') + ->with(0.1); + + $this->ranking->calculateInitialRank($nodeCollection); + } + + /** + * @dataProvider dataProviderCalculateRankPerIteration + * + * @param int $subjectId + * @param int $incomingNodeId + * @param int $outgoingNodesOfIncomingNode + * @param float $calculatedNewRank + * @param bool $ranksAreEqual + * @param int $expected + */ + public function testCalculateRankPerIteration( + int $subjectId, + int $incomingNodeId, + int $outgoingNodesOfIncomingNode, + float $calculatedNewRank, + bool $ranksAreEqual, + int $expected + ): void { + $nodeCollection = $this + ->createMock(NodeCollectionInterface::class); + + $node = $this + ->createMock(NodeInterface::class); + + $nodeCollection + ->expects($this->once()) + ->method('getNodes') + ->willReturn([$node]); + + $node + ->expects($this->any()) + ->method('getId') + ->willReturn($subjectId); + + $this + ->nodeDataStrategy + ->expects($this->once()) + ->method('getIncomingNodeIds') + ->with($subjectId) + ->willReturn([$incomingNodeId]); + + $this + ->nodeDataStrategy + ->expects($this->exactly($incomingNodeId)) + ->method('getPreviousRank') + ->withConsecutive([$incomingNodeId], [$subjectId]) + ->willReturn(0.2, 0.1); + + $this + ->nodeDataStrategy + ->expects($this->once()) + ->method('countOutgoingNodes') + ->with($incomingNodeId) + ->willReturn($outgoingNodesOfIncomingNode); + + $node + ->expects($this->once()) + ->method('setRank') + ->with($calculatedNewRank); + + $node + ->expects($this->once()) + ->method('getRank') + ->willReturn(99.123); + + $this + ->rankComparator + ->expects($this->once()) + ->method('isEqual') + ->with(0.1, 99.123) + ->willReturn($ranksAreEqual); + + $actual = $this->ranking->calculateRankPerIteration($nodeCollection); + + static::assertSame($expected, $actual); + } + + public function dataProviderCalculateRankPerIteration(): array + { + return [ + [ + 1, 2, 100, 0.002, true, 1 + ], + [ + 1, 2, 0, 0, true, 1 + ], + [ + 1, 2, 0, 0, false, 0 + ] + ]; + } +}