diff --git a/modules/harvest/src/Commands/HarvestCommands.php b/modules/harvest/src/Commands/HarvestCommands.php index d7ad118495..07968cf8a5 100644 --- a/modules/harvest/src/Commands/HarvestCommands.php +++ b/modules/harvest/src/Commands/HarvestCommands.php @@ -44,8 +44,6 @@ public function __construct(Service $service, LoggerChannelInterface $logger) { * List available harvests. * * @command dkan:harvest:list - * @aliases dkan-harvest:list - * @deprecated dkan-harvest:list is deprecated and will be removed in a future Dkan release. Use dkan:harvest:list instead. * * @usage dkan:harvest:list * List available harvests. @@ -70,8 +68,6 @@ function ($id) { * * @command dkan:harvest:register * @usage dkan-harvest:register '{"identifier":"example","extract":{"type":"\\Harvest\\ETL\\Extract\\DataJson","uri":"https://source/data.json"},"transforms":[],"load":{"type":"\\Drupal\\harvest\\Load\\Dataset"}}' - * @aliases dkan-harvest:register - * @deprecated dkan-harvest:register is deprecated and will be removed in a future Dkan release. Use dkan:harvest:register instead. */ public function register($harvest_plan) { try { @@ -90,8 +86,6 @@ public function register($harvest_plan) { * Deregister a harvest. * * @command dkan:harvest:deregister - * @aliases dkan-harvest:deregister - * @deprecated dkan-harvest:deregister is deprecated and will be removed in a future Dkan release. Use dkan:harvest:deregister instead. */ public function deregister($id) { try { @@ -113,8 +107,6 @@ public function deregister($id) { * The harvest id. * * @command dkan:harvest:run - * @aliases dkan-harvest:run - * @deprecated dkan-harvest:run is deprecated and will be removed in a future Dkan release. Use dkan:harvest:run instead. * * @usage dkan:harvest:run * Runs a harvest. @@ -136,8 +128,6 @@ public function run($id) { * Run all pending harvests. * * @command dkan:harvest:run-all - * @aliases dkan-harvest:run-all - * @deprecated dkan-harvest:run-all is deprecated and will be removed in a future Dkan release. Use dkan:harvest:run-all instead. * * @usage dkan:harvest:run-all * Runs all pending harvests. @@ -155,65 +145,94 @@ public function runAll() { /** * Give information about a previous harvest run. * - * @param string $id + * @param string $harvestId * The harvest id. - * @param string $run_id + * @param string $runId * The run's id. * * @command dkan:harvest:info - * @aliases dkan-harvest:info - * @deprecated dkan-harvest:info is deprecated and will be removed in a future Dkan release. Use dkan:harvest:info instead. */ - public function info($id, $run_id = NULL) { - $run_ids = []; - - if (!isset($run_id)) { - $run_ids = $this->harvestService - ->getAllHarvestRunInfo($id); - } - else { - $run_ids = [$run_id]; - } + public function info($harvestId, $runId = NULL) { + $this->validateHarvestId($harvestId); + $runIds = $runId ? [$runId] : $this->harvestService->getAllHarvestRunInfo($harvestId); - $run_infos = []; - foreach ($run_ids as $run_id) { - $run = $this->harvestService - ->getHarvestRunInfo($id, $run_id); + foreach ($runIds as $id) { + $run = $this->harvestService->getHarvestRunInfo($harvestId, $id); $result = json_decode($run, TRUE); - $run_infos[] = [$run_id, $result]; + $runs[] = [$id, $result]; } - $this->renderHarvestRunsInfo($run_infos); + $this->renderHarvestRunsInfo($runs ?? []); } /** * Revert a harvest, i.e. remove all of its harvested entities. * - * @param string $id + * @param string $harvestId * The source to revert. * * @command dkan:harvest:revert - * @aliases dkan-harvest:revert - * @deprecated dkan-harvest:revert is deprecated and will be removed in a future Dkan release. Use dkan:harvest:revert instead. * * @usage dkan:harvest:revert * Removes harvested entities. */ - public function revert($id) { + public function revert($harvestId) { + $this->validateHarvestId($harvestId); + $result = $this->harvestService->revertHarvest($harvestId); + (new ConsoleOutput())->write("{$result} items reverted for the '{$harvestId}' harvest plan." . PHP_EOL); + } - $result = $this->harvestService - ->revertHarvest($id); + /** + * Archive all harvested datasets for a single harvest. + * + * @param string $harvestId + * The source to archive harvests for. + * + * @command dkan:harvest:archive + * + * @usage dkan:harvest:archive + * Archives harvested entities. + */ + public function archive($harvestId) { + $this->validateHarvestId($harvestId); + $result = $this->harvestService->archive($harvestId); + if (empty($result)) { + (new ConsoleOutput())->write("No items available to archive for the '{$harvestId}' harvest plan." . PHP_EOL); + } + foreach ($result as $id) { + (new ConsoleOutput())->write("Archived dataset {$id} from harvest '{$harvestId}'." . PHP_EOL); + } + } - (new ConsoleOutput())->write("{$result} items reverted for the '{$id}' harvest plan." . PHP_EOL); + /** + * Archive all harvested datasets for a single harvest. + * + * @param string $harvestId + * The source to archive harvests for. + * + * @command dkan:harvest:publish + * + * @usage dkan:harvest:publish + * Publishes harvested entities. + */ + public function publish($harvestId) { + $this->validateHarvestId($harvestId); + $result = $this->harvestService->publish($harvestId); + if (empty($result)) { + (new ConsoleOutput())->write("No items available to publish for the '{$harvestId}' harvest plan." . PHP_EOL); + } + foreach ($result as $id) { + (new ConsoleOutput())->write("Published dataset {$id} from harvest '{$harvestId}'." . PHP_EOL); + } } /** * Show status of of a particular harvest run. * - * @param string $harvest_id + * @param string $harvestId * The id of the harvest source. - * @param string $run_id + * @param string $runId * The run's id. Optional. Show the status for the latest run if not * provided. * @@ -222,44 +241,37 @@ public function revert($id) { * @usage dkan:harvest:status * test 1599157120 */ - public function status($harvest_id, $run_id = NULL) { - // Validate the harvest id. - $harvest_id_all = $this->harvestService->getAllHarvestIds(); - - if (array_search($harvest_id, $harvest_id_all) === FALSE) { - (new ConsoleOutput())->writeln("harvest id $harvest_id not found."); - return DrushCommands::EXIT_FAILURE; - } + public function status($harvestId, $runId = NULL) { + $this->validateHarvestId($harvestId); // No run_id provided, get the latest run_id. // Validate run_id. - $run_id_all = $this->harvestService->getAllHarvestRunInfo($harvest_id); + $allRunIds = $this->harvestService->getAllHarvestRunInfo($harvestId); - if (empty($run_id_all)) { - (new ConsoleOutput())->writeln("No Run IDs found for harvest id $harvest_id"); + if (empty($allRunIds)) { + (new ConsoleOutput())->writeln("No Run IDs found for harvest id $harvestId"); return DrushCommands::EXIT_FAILURE; } - if (empty($run_id)) { + if (empty($runId)) { // Get the last run_id from the array. - $run_id = end($run_id_all); - reset($run_id_all); + $runId = end($allRunIds); + reset($allRunIds); } - if (array_search($run_id, $run_id_all) === FALSE) { - (new ConsoleOutput())->writeln("Run ID $run_id not found for harvest id $harvest_id"); + if (array_search($runId, $allRunIds) === FALSE) { + (new ConsoleOutput())->writeln("Run ID $runId not found for harvest id $harvestId"); return DrushCommands::EXIT_FAILURE; } - $run = $this->harvestService - ->getHarvestRunInfo($harvest_id, $run_id); + $run = $this->harvestService->getHarvestRunInfo($harvestId, $runId); if (empty($run)) { - (new ConsoleOutput())->writeln("No status found for harvest id $harvest_id and run id $run_id"); + (new ConsoleOutput())->writeln("No status found for harvest id $harvestId and run id $runId"); return DrushCommands::EXIT_FAILURE; } - $this->renderStatusTable($harvest_id, $run_id, json_decode($run, TRUE)); + $this->renderStatusTable($harvestId, $runId, json_decode($run, TRUE)); } /** @@ -275,11 +287,7 @@ public function status($harvest_id, $run_id = NULL) { * @alias dkan:harvest:orphan */ public function orphanDatasets(string $harvestId) : int { - - if (!in_array($harvestId, $this->harvestService->getAllHarvestIds())) { - $this->logger()->error("Harvest id {$harvestId} not found."); - return DrushCommands::EXIT_FAILURE; - } + $this->validateHarvestId($harvestId); try { $orphans = $this->harvestService->getOrphanIdsFromCompleteHarvest($harvestId); @@ -296,4 +304,17 @@ public function orphanDatasets(string $harvestId) : int { } } + /** + * Throw error if Harvest ID does not exist. + * + * @param string $harvestId + * The Harvest ID. + */ + private function validateHarvestId($harvestId) { + if (!in_array($harvestId, $this->harvestService->getAllHarvestIds())) { + $this->logger()->error("Harvest id {$harvestId} not found."); + return DrushCommands::EXIT_FAILURE; + } + } + } diff --git a/modules/harvest/src/Service.php b/modules/harvest/src/Service.php index c8a9305f5f..fe49dfa49e 100644 --- a/modules/harvest/src/Service.php +++ b/modules/harvest/src/Service.php @@ -14,13 +14,21 @@ use Symfony\Component\DependencyInjection\ContainerInterface; /** - * Service. + * Main DKAN Harvester service. + * + * Import groups of datasets from an external source, and manage existing + * harvest plans and their dependent datasets. */ class Service implements ContainerInjectionInterface { use LoggerTrait; use OrphanDatasetsProcessor; + /** + * Service to instantiate storage objects for Harvest plan storage. + * + * @var \Contracts\FactoryInterface + */ private $storeFactory; /** @@ -216,59 +224,87 @@ private function getLastHarvestRunId(string $id) { /** * Publish a harvest. * - * @param string $id + * @param string $harvestId * Harvest identifier. * * @return array - * The uuids of the published datasets. + * The uuids of the datasets to publish. */ - public function publish(string $id): array { + public function publish(string $harvestId): array { + return $this->bulkUpdateStatus($harvestId, 'publish'); - $lastRunInfoObj = $this->getLastRunInfoObj($id); - if (!isset($lastRunInfoObj->status->extracted_items_ids)) { - return []; - } - - return $this->publishHelper($id, $lastRunInfoObj->status); } /** - * Private. + * Archive a harvest. + * + * @param string $harvestId + * Harvest identifier. + * + * @return array + * The uuids of the published datasets. */ - private function getLastRunInfoObj(string $harvestId) { - $lastRunId = $this->getLastHarvestRunId($harvestId); - $lastRunInfoJsonString = $this->getHarvestRunInfo($harvestId, $lastRunId); - return json_decode($lastRunInfoJsonString); + public function archive(string $harvestId): array { + return $this->bulkUpdateStatus($harvestId, 'archive'); } /** - * Private. + * Archive a harvest. + * + * @param string $harvestId + * Harvest identifier. + * @param string $method + * Metastore update status method - "archive" or "publish" available. + * + * @return array + * The uuids of the published datasets. */ - private function publishHelper(string $harvestId, $lastRunStatus): array { - $publishedIdentifiers = []; - - foreach ($lastRunStatus->extracted_items_ids as $uuid) { - try { - if ($this->metastorePublishHelper($lastRunStatus, $uuid)) { - $publishedIdentifiers[] = $uuid; - } - } - catch (\Exception $e) { - $this->error("Error publishing dataset {$uuid} in harvest {$harvestId}: {$e->getMessage()}"); - } + protected function bulkUpdateStatus(string $harvestId, string $method): array { + if (!in_array($method, ['archive', 'publish'])) { + throw new \OutOfRangeException("Method {$method} does not exist"); + } + + $lastRunId = $this->getLastHarvestRunId($harvestId); + $lastRunInfo = json_decode($this->getHarvestRunInfo($harvestId, $lastRunId)); + $status = $lastRunInfo->status ?? NULL; + if (!isset($status->extracted_items_ids)) { + return []; } - return $publishedIdentifiers; + $updated = []; + foreach ($status->extracted_items_ids as $datasetId) { + // $this->publishHarvestedDataset() will return true if $datasetId + // could be successfully published. + $updated[] = $this->setDatasetStatus($status, $datasetId, $method) ? $datasetId : NULL; + } + + return array_values(array_filter($updated)); } /** - * Private. + * Use metastore service to publish a harvested item. + * + * @param object $runInfoStatus + * Status object with run information. + * @param string $datasetId + * ID to DKAN dataset. + * @param string $method + * Metastore update status method - "archive" or "publish" available. + * + * @return bool + * Whether or not publish action was successful. */ - private function metastorePublishHelper($runInfoStatus, string $uuid): bool { - return isset($runInfoStatus->load) && - $runInfoStatus->load->{$uuid} && - $runInfoStatus->load->{$uuid} != 'FAILURE' && - $this->metastore->publish('dataset', $uuid); + protected function setDatasetStatus($runInfoStatus, string $datasetId, string $method): bool { + try { + return isset($runInfoStatus->load) && + $runInfoStatus->load->{$datasetId} && + $runInfoStatus->load->{$datasetId} != 'FAILURE' && + $this->metastore->$method('dataset', $datasetId); + } + catch (\Exception $e) { + $this->error("Error applying method {$method} to dataset {$datasetId}: {$e->getMessage()}"); + return FALSE; + } } /** @@ -285,9 +321,15 @@ public function validateHarvestPlan($plan) { } /** - * Private. + * Get a DKAN harvester instance. + * + * @param string $id + * Harvester ID. + * + * @return \Harvest\Harvester + * Harvester object. */ - private function getHarvester($id) { + private function getHarvester(string $id) { $plan_store = $this->storeFactory->getInstance("harvest_plans"); $harvestPlan = json_decode($plan_store->retrieve($id)); $item_store = $this->storeFactory->getInstance("harvest_{$id}_items"); diff --git a/modules/harvest/tests/src/Unit/ServiceTest.php b/modules/harvest/tests/src/Unit/ServiceTest.php index 5c20204107..8ccfb868b5 100644 --- a/modules/harvest/tests/src/Unit/ServiceTest.php +++ b/modules/harvest/tests/src/Unit/ServiceTest.php @@ -224,17 +224,21 @@ private function getEntityTypeManagerMockChain() { public function testPublish() { $datasetUuids = ['abcd-1001', 'abcd-1002', 'abcd-1003', 'abcd-1004']; - $lastRunInfo = (object) [ - 'status' => [ - 'extracted_items_ids' => $datasetUuids, - 'load' => [ - 'abcd-1001' => "SUCCESS", - 'abcd-1002' => "SUCCESS", - 'abcd-1003' => "SUCCESS", - 'abcd-1004' => "FAILURE", + $lastRunInfo = (new Sequence()) + ->add(json_encode((object) [ + 'status' => [ + 'extracted_items_ids' => $datasetUuids, + 'load' => [ + 'abcd-1001' => "SUCCESS", + 'abcd-1002' => "SUCCESS", + 'abcd-1003' => "SUCCESS", + 'abcd-1004' => "FAILURE", + ], ], - ], - ]; + ])) + ->add(json_encode((object) [ + 'status' => [], + ])); $metastorePublicationResults = (new Sequence()) // abcd-1001 will be skipped since already published. @@ -249,7 +253,7 @@ public function testPublish() { ->add(LoggerChannelInterface::class, 'error', NULL, 'error'); $container = $this->getCommonMockChain() - ->add(DatabaseTable::class, "retrieve", json_encode($lastRunInfo)) + ->add(DatabaseTable::class, "retrieve", $lastRunInfo) ->add(Metastore::class, 'publish', $metastorePublicationResults); $service = HarvestService::create($container->getMock()); @@ -259,8 +263,59 @@ public function testPublish() { $this->assertEquals(['abcd-1003'], $result); $loggerResult = $logger->getStoredInput('error')[0]; - $error = 'Error publishing dataset abcd-1002 in harvest 1: FooBar'; + $error = 'Error applying method publish to dataset abcd-1002: FooBar'; $this->assertEquals($error, $loggerResult); + + $result = $service->publish('2'); + $this->assertEmpty($result); + } + + public function testArchive() { + $datasetUuids = ['abcd-1001', 'abcd-1002', 'abcd-1003', 'abcd-1004']; + $lastRunInfo = (new Sequence()) + ->add(json_encode((object) [ + 'status' => [ + 'extracted_items_ids' => $datasetUuids, + 'load' => [ + 'abcd-1001' => "SUCCESS", + 'abcd-1002' => "SUCCESS", + 'abcd-1003' => "SUCCESS", + 'abcd-1004' => "FAILURE", + ], + ], + ])) + ->add(json_encode((object) [ + 'status' => [], + ])); + + $metastoreArchiveResults = (new Sequence()) + // abcd-1001 will be skipped since already archived. + ->add(FALSE) + // abcd-1002 will be skipped due to exception. + ->add(new \Exception('FooBar')) + // abcd-1003 should be archived without issue. + ->add(TRUE); + + $logger = (new Chain($this)) + ->add(LoggerChannelFactory::class, 'get', LoggerChannelInterface::class) + ->add(LoggerChannelInterface::class, 'error', NULL, 'error'); + + $container = $this->getCommonMockChain() + ->add(DatabaseTable::class, "retrieve", $lastRunInfo) + ->add(Metastore::class, 'archive', $metastoreArchiveResults); + + $service = HarvestService::create($container->getMock()); + $service->setLoggerFactory($logger->getMock()); + $result = $service->archive('1'); + + $this->assertEquals(['abcd-1003'], $result); + + $loggerResult = $logger->getStoredInput('error')[0]; + $error = 'Error applying method archive to dataset abcd-1002: FooBar'; + $this->assertEquals($error, $loggerResult); + + $result = $service->archive('2'); + $this->assertEmpty($result); } /** diff --git a/modules/metastore/src/Service.php b/modules/metastore/src/Service.php index e55c01debf..4236c019ec 100644 --- a/modules/metastore/src/Service.php +++ b/modules/metastore/src/Service.php @@ -307,6 +307,25 @@ public function publish(string $schema_id, string $identifier): bool { throw new MissingObjectException("No data with the identifier {$identifier} was found."); } + /** + * Publish an item's update by making its latest revision its default one. + * + * @param string $schema_id + * The {schema_id} slug from the HTTP request. + * @param string $identifier + * Identifier. + * + * @return bool + * True if the dataset is successfully archived, false otherwise. + */ + public function archive(string $schema_id, string $identifier): bool { + if ($this->objectExists($schema_id, $identifier)) { + return $this->getStorage($schema_id)->archive($identifier); + } + + throw new MissingObjectException("No data with the identifier {$identifier} was found."); + } + /** * Implements PUT method. * diff --git a/modules/metastore/src/Storage/Data.php b/modules/metastore/src/Storage/Data.php index e63209226c..32c53fa865 100644 --- a/modules/metastore/src/Storage/Data.php +++ b/modules/metastore/src/Storage/Data.php @@ -13,6 +13,8 @@ /** * Abstract metastore storage class, for using Drupal entities. + * + * @todo Separate workflow management and storage into separate classes. */ abstract class Data implements MetastoreEntityStorageInterface { @@ -200,19 +202,38 @@ public function retrieve(string $uuid) : ?string { } /** - * Inherited. - * - * {@inheritdoc}. + * {@inheritdoc} */ public function publish(string $uuid): bool { + return $this->setWorkflowState($uuid, 'published'); + } + /** + * {@inheritdoc} + */ + public function archive(string $uuid): bool { + return $this->setWorkflowState($uuid, 'archived'); + } + + /** + * Change the state of a metastore item. + * + * @param string $uuid + * Metastore identifier. + * @param string $state + * Any workflow state that can be applied to a metastore entity. + * + * @return bool + * Whether or not an item was transitioned. + */ + protected function setWorkflowState(string $uuid, string $state): bool { $entity = $this->getEntityLatestRevision($uuid); if (!$entity) { - throw new MissingObjectException("Error publishing dataset: {$uuid} not found."); + throw new MissingObjectException("Error: {$uuid} not found."); } - elseif ('published' !== $entity->get('moderation_state')->getString()) { - $entity->set('moderation_state', 'published'); + elseif ($state !== $entity->get('moderation_state')->getString()) { + $entity->set('moderation_state', $state); $entity->save(); return TRUE; } diff --git a/modules/metastore/tests/src/Unit/ServiceTest.php b/modules/metastore/tests/src/Unit/ServiceTest.php index 3fe7ff6a7b..ace3674da5 100644 --- a/modules/metastore/tests/src/Unit/ServiceTest.php +++ b/modules/metastore/tests/src/Unit/ServiceTest.php @@ -304,6 +304,19 @@ public function testPublish() { $result = $service->publish('dataset', 1); $this->assertTrue($result); } + + /** + * + */ + public function testArchive() { + $container = self::getCommonMockChain($this) + ->add(NodeData::class, "retrieve", "1") + ->add(NodeData::class, "archive", TRUE); + + $service = Service::create($container->getMock()); + $result = $service->archive('dataset', 1); + $this->assertTrue($result); + } /** * diff --git a/modules/metastore/tests/src/Unit/Storage/DataTest.php b/modules/metastore/tests/src/Unit/Storage/DataTest.php index d3cc67cef7..b37e8d2675 100644 --- a/modules/metastore/tests/src/Unit/Storage/DataTest.php +++ b/modules/metastore/tests/src/Unit/Storage/DataTest.php @@ -30,7 +30,7 @@ public function testPublishDatasetNotFound() { ->add(QueryInterface::class, 'execute', []) ->getMock(); - $this->expectExceptionMessage('Error publishing dataset: 1 not found.'); + $this->expectExceptionMessage('Error: 1 not found.'); $nodeData = new NodeData('dataset', $etmMock); $nodeData->publish('1'); } diff --git a/tests/src/Functional/DatasetTest.php b/tests/src/Functional/DatasetTest.php index 4035f157c2..8cb4fb1cd2 100644 --- a/tests/src/Functional/DatasetTest.php +++ b/tests/src/Functional/DatasetTest.php @@ -117,6 +117,32 @@ public function testResourcePurgeDraft() { $this->assertArrayNotHasKey('dkan_dataset/333', $searchResults->results); } + + /** + * Test archiving of datasets after a harvest + */ + public function testHarvestArchive() { + + $plan = $this->getPlan('testHarvestArchive', 'catalog-step-1.json'); + $harvester = $this->getHarvester(); + $harvester->registerHarvest($plan); + + // First harvest. + $harvester->runHarvest('testHarvestArchive'); + + // Ensure different harvest run identifiers, since based on timestamp. + sleep(1); + + // Confirm we have some published datasets. + $this->assertEquals('published', $this->getModerationState('1')); + $this->assertEquals('published', $this->getModerationState('2')); + + // Run archive command, confirm datasets are archived. + $harvester->archive('testHarvestArchive'); + $this->assertEquals('archived', $this->getModerationState('1')); + $this->assertEquals('archived', $this->getModerationState('2')); + } + /** * Test removal of datasets by a subsequent harvest. */