From 7aa2a807c2058cabb980c9566e376fb80a4d341c Mon Sep 17 00:00:00 2001 From: Jeff Burke Date: Tue, 26 Mar 2024 08:59:33 -0700 Subject: [PATCH 1/6] CADC-13234 update ringhold README for new config properties --- ringhold/README.md | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/ringhold/README.md b/ringhold/README.md index 9f23c1b8f..ea36b55cd 100644 --- a/ringhold/README.md +++ b/ringhold/README.md @@ -1,6 +1,6 @@ -# Storage Inventory local artifact removal process (ringhold) +# Storage Inventory local artifact deletion process (ringhold) -Process to remove local artifacts that are no longer being synchronised by fenwick. This tool is used +Process to delete local artifacts that are no longer being synchronised by fenwick. This tool is used to perform quick cleanup at a storage site after changing the fenwick artifact-filter policy. ## configuration @@ -18,17 +18,25 @@ org.opencadc.ringhold.inventory.schema={schema for inventory database objects} org.opencadc.ringhold.inventory.username={username for inventory admin} org.opencadc.ringhold.inventory.password={password for inventory admin} org.opencadc.ringhold.inventory.url=jdbc:postgresql://{server}/{database} + +# artifact namespace +org.opencadc.ringhold.namespace={storage site namespace} + +# artifact uri bucket filter (optional) +org.opencadc.ringhold.buckets={uriBucket prefix or range of prefixes} ``` The `inventory` account owns and manages all the content (insert, update, delete) in the inventory schema. Unlike other components that modify inventory content, this component **does not initialise** the database objects because it never makes sense to run this in a new/empty database. The database is specified in the JDBC URL. Failure to connect to a pre-initialised database will show up in logs. -### artifact-deselector.sql -Contains a SQL clause used as a WHERE constraint. The clause returns Artifact's that match the URI pattern. -``` -WHERE uri LIKE 'cadc:CFHT/%' -``` +The `namespace` is the prefix of the Artifact URI's to be deleted. The `namespace` must end with a colon (:) +or slash (/) so one namespace cannot accidentally match (be a prefix of) another namespace. Multiple values +of `namespace` may be specified, one per line. + +The `buckets` value indicates a subset of artifacts to delete. The range of uri bucket prefixes is specified +with two values separated by a single - (dash) character; whitespace is ignored. Multiple instances of `ringhold` +can be run (in parallel) to subdivide the work as long as the range of buckets do not overlap. ## building it ``` From f6bb3790d1d8e0d103587c360958cacd27993feb Mon Sep 17 00:00:00 2001 From: Jeff Burke Date: Tue, 26 Mar 2024 11:14:13 -0700 Subject: [PATCH 2/6] CADC-13234 update README after review --- ringhold/README.md | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/ringhold/README.md b/ringhold/README.md index ea36b55cd..6c919fde2 100644 --- a/ringhold/README.md +++ b/ringhold/README.md @@ -1,7 +1,8 @@ # Storage Inventory local artifact deletion process (ringhold) -Process to delete local artifacts that are no longer being synchronised by fenwick. This tool is used -to perform quick cleanup at a storage site after changing the fenwick artifact-filter policy. +Process to remove the local copy of artifacts from a storage site inventory database and +generate DeletedStorageLocationEvent(s) so the removal will propagate correctly to a global inventory. +This does not remove the files from storage (see `tantar`). ## configuration See the [cadc-java](https://github.com/opencadc/docker-base/tree/master/cadc-java) image docs for general config requirements. @@ -19,7 +20,7 @@ org.opencadc.ringhold.inventory.username={username for inventory admin} org.opencadc.ringhold.inventory.password={password for inventory admin} org.opencadc.ringhold.inventory.url=jdbc:postgresql://{server}/{database} -# artifact namespace +# artifact namespace(s) to remove org.opencadc.ringhold.namespace={storage site namespace} # artifact uri bucket filter (optional) From b53037b22bc3bf1009e44ec4ad5db8c0e17ea767 Mon Sep 17 00:00:00 2001 From: Jeff Burke Date: Thu, 28 Mar 2024 13:56:10 -0700 Subject: [PATCH 3/6] CADC-13234 update ringhold to query with configured namespace and uri buckets instead of select clause. --- ringhold/VERSION | 2 +- .../ringhold/InventoryValidatorTest.java | 325 ++++++++++-------- .../opencadc/ringhold/ArtifactDeselector.java | 3 + .../opencadc/ringhold/InventoryValidator.java | 88 +++-- .../main/java/org/opencadc/ringhold/Main.java | 25 +- 5 files changed, 275 insertions(+), 168 deletions(-) diff --git a/ringhold/VERSION b/ringhold/VERSION index 51807fa89..9229240a7 100644 --- a/ringhold/VERSION +++ b/ringhold/VERSION @@ -1,4 +1,4 @@ ## deployable containers have a semantic and build tag # semantic version tag: major.minor[.patch] # build version tag: timestamp -TAGS="0.2-$(date --utc +"%Y%m%dT%H%M%S")" +TAGS="0.3-$(date --utc +"%Y%m%dT%H%M%S")" diff --git a/ringhold/src/intTest/java/org/opencadc/ringhold/InventoryValidatorTest.java b/ringhold/src/intTest/java/org/opencadc/ringhold/InventoryValidatorTest.java index 0a809c6fb..47e963b1d 100644 --- a/ringhold/src/intTest/java/org/opencadc/ringhold/InventoryValidatorTest.java +++ b/ringhold/src/intTest/java/org/opencadc/ringhold/InventoryValidatorTest.java @@ -73,6 +73,7 @@ import ca.nrc.cadc.db.DBConfig; import ca.nrc.cadc.db.DBUtil; import ca.nrc.cadc.db.version.InitDatabase; +import ca.nrc.cadc.util.BucketSelector; import ca.nrc.cadc.util.FileUtil; import ca.nrc.cadc.util.HexUtil; import ca.nrc.cadc.util.Log4jInit; @@ -84,20 +85,17 @@ import java.net.URI; import java.nio.file.Files; import java.nio.file.Path; -import java.util.Date; -import java.util.Map; -import java.util.MissingResourceException; -import java.util.Properties; -import java.util.TreeMap; -import java.util.UUID; +import java.util.*; import javax.sql.DataSource; import org.apache.log4j.Level; import org.apache.log4j.Logger; import org.junit.Assert; import org.junit.Before; +import org.junit.Ignore; import org.junit.Test; import org.opencadc.inventory.Artifact; import org.opencadc.inventory.DeletedStorageLocationEvent; +import org.opencadc.inventory.Namespace; import org.opencadc.inventory.StorageLocation; import org.opencadc.inventory.db.ArtifactDAO; import org.opencadc.inventory.db.DeletedStorageLocationEventDAO; @@ -108,7 +106,7 @@ /** * Various versions of: * Insert artifacts more than uri pattern - * Run tool with one uri deselector + * Run tool with different Namespaces specified * Confirm delete storage location event creation and absence of artifacts in inventory */ public class InventoryValidatorTest { @@ -119,7 +117,7 @@ public class InventoryValidatorTest { Log4jInit.setLevel("org.opencadc.inventory", Level.INFO); Log4jInit.setLevel("org.opencadc.inventory.db", Level.INFO); Log4jInit.setLevel("ca.nrc.cadc.db", Level.INFO); - Log4jInit.setLevel("org.opencadc.ringhold", Level.DEBUG); + Log4jInit.setLevel("org.opencadc.ringhold", Level.INFO); } static String INVENTORY_SERVER = "RINGHOLD_TEST"; @@ -188,80 +186,12 @@ public InventoryValidatorTest() throws Exception { @Before public void setup() throws Exception { - writeConfig(); truncateTables(); } @Test - public void missingConfigTest() throws Exception { - final Path includePath = new File(TMP_DIR + "/config").toPath(); - Files.createDirectories(includePath); - final File includeFile = new File(includePath.toFile(), "artifact-deselector.sql"); - boolean deleted = includeFile.delete(); - Assert.assertTrue("include file not deleted", deleted); - - configTest(); - } - - @Test - public void emptyConfigTest() throws Exception { - final Path includePath = new File(TMP_DIR + "/config").toPath(); - Files.createDirectories(includePath); - final File includeFile = new File(includePath.toFile(), "artifact-deselector.sql"); - - final FileWriter fileWriter = new FileWriter(includeFile); - fileWriter.write(""); - fileWriter.flush(); - fileWriter.close(); - - configTest(); - } - - @Test - public void onlyCommentsConfigTest() throws Exception { - final Path includePath = new File(TMP_DIR + "/config").toPath(); - Files.createDirectories(includePath); - final File includeFile = new File(includePath.toFile(), "artifact-deselector.sql"); - - final FileWriter fileWriter = new FileWriter(includeFile); - fileWriter.write("# WHERE uri LIKE 'cadc:INTTEST/%'"); - fileWriter.flush(); - fileWriter.close(); - - configTest(); - } - - @Test - public void doesNotStartWithWhereConfigTest() throws Exception { - final Path includePath = new File(TMP_DIR + "/config").toPath(); - Files.createDirectories(includePath); - final File includeFile = new File(includePath.toFile(), "artifact-deselector.sql"); - - final FileWriter fileWriter = new FileWriter(includeFile); - fileWriter.write("uri LIKE 'cadc:INTTEST/%'\r\n"); - fileWriter.flush(); - fileWriter.close(); - - configTest(); - } - - @Test - public void multipleWhereConfigTest() throws Exception { - final Path includePath = new File(TMP_DIR + "/config").toPath(); - Files.createDirectories(includePath); - final File includeFile = new File(includePath.toFile(), "artifact-deselector.sql"); - - final FileWriter fileWriter = new FileWriter(includeFile); - fileWriter.write("WHERE uri LIKE 'cadc:INTTEST/%'\r\n"); - fileWriter.write("WHERE uri LIKE 'cadc:TEST/%'"); - fileWriter.flush(); - fileWriter.close(); - - configTest(); - } - - public void configTest() { - StorageLocation storageLocation = new StorageLocation(URI.create("ivo://cadc.nrc.ca/foo")); + public void noArtifactsMatchNamespace() throws Exception { + StorageLocation storageLocation = new StorageLocation(URI.create("cadc:foo")); Artifact a1 = getTestArtifact("cadc:TEST/one.txt"); a1.storageLocation = storageLocation; @@ -273,48 +203,11 @@ public void configTest() { a3.storageLocation = storageLocation; this.artifactDAO.put(a3); - try { - System.setProperty("user.home", TMP_DIR); - InventoryValidator testSubject = new InventoryValidator(this.daoConfig, this.daoConfig); - testSubject.run(); - Assert.fail("should throw an exception for invalid config"); - } catch (Exception expected) { - // exception expected - } finally { - System.setProperty("user.home", USER_HOME); - } - - a1 = this.artifactDAO.get(a1.getID()); - Assert.assertNotNull(a1); - a2 = this.artifactDAO.get(a2.getID()); - Assert.assertNotNull(a2); - a3 = this.artifactDAO.get(a3.getID()); - Assert.assertNotNull(a3); - - DeletedStorageLocationEvent dsle1 = this.deletedStorageLocationEventDAO.get(a1.getID()); - Assert.assertNull(dsle1); - DeletedStorageLocationEvent dsle2 = this.deletedStorageLocationEventDAO.get(a2.getID()); - Assert.assertNull(dsle2); - DeletedStorageLocationEvent dsle3 = this.deletedStorageLocationEventDAO.get(a3.getID()); - } - - @Test - public void noArtifactsMatchFilter() throws Exception { - StorageLocation storageLocation = new StorageLocation(URI.create("ivo://cadc.nrc.ca/foo")); - - Artifact a1 = getTestArtifact("cadc:TEST/one.txt"); - a1.storageLocation = storageLocation; - this.artifactDAO.put(a1); - Artifact a2 = getTestArtifact("cadc:INT/two.txt"); - a2.storageLocation = storageLocation; - this.artifactDAO.put(a2); - Artifact a3 = getTestArtifact("cadc:CADC/three.txt"); - a3.storageLocation = storageLocation; - this.artifactDAO.put(a3); + List namespaces = Collections.singletonList(new Namespace("cadc:NOMATCH/")); try { System.setProperty("user.home", TMP_DIR); - InventoryValidator testSubject = new InventoryValidator(this.daoConfig, this.daoConfig); + InventoryValidator testSubject = new InventoryValidator(daoConfig, daoConfig, namespaces, null); testSubject.run(); } finally { System.setProperty("user.home", USER_HOME); @@ -336,9 +229,9 @@ public void noArtifactsMatchFilter() throws Exception { } @Test - public void someArtifactsMatchFilter() throws Exception { - StorageLocation a_storageLocation = new StorageLocation(URI.create("ivo://cadc.nrc.ca/foo")); - StorageLocation b_storageLocation = new StorageLocation(URI.create("ivo://cadc.nrc.ca/bar")); + public void someArtifactsMatchNamespace() throws Exception { + StorageLocation a_storageLocation = new StorageLocation(URI.create("cadc:foo")); + StorageLocation b_storageLocation = new StorageLocation(URI.create("cadc:bar")); Artifact b1 = getTestArtifact("cadc:INT/one.txt"); b1.storageLocation = b_storageLocation; @@ -359,9 +252,12 @@ public void someArtifactsMatchFilter() throws Exception { b3.storageLocation = b_storageLocation; this.artifactDAO.put(b3); + List namespaces = Collections.singletonList(new Namespace("cadc:INTTEST/")); + BucketSelector buckets = new BucketSelector("0-f"); + try { System.setProperty("user.home", TMP_DIR); - InventoryValidator testSubject = new InventoryValidator(this.daoConfig, this.daoConfig); + InventoryValidator testSubject = new InventoryValidator(daoConfig, daoConfig, namespaces, buckets); testSubject.run(); } finally { System.setProperty("user.home", USER_HOME); @@ -397,51 +293,67 @@ public void someArtifactsMatchFilter() throws Exception { } @Test - public void allArtifactsMatchFilter() throws Exception { - StorageLocation storageLocation = new StorageLocation(URI.create("ivo://cadc.nrc.ca/foo")); + public void allArtifactsMatchNamespace() throws Exception { + StorageLocation a_storageLocation = new StorageLocation(URI.create("cadc:foo")); + StorageLocation b_storageLocation = new StorageLocation(URI.create("cadc:bar")); - Artifact a1 = getTestArtifact("cadc:INTTEST/one.txt"); - a1.storageLocation = storageLocation; + Artifact b1 = getTestArtifact("cadc:INT/one.txt"); + b1.storageLocation = b_storageLocation; + this.artifactDAO.put(b1); + Artifact b2 = getTestArtifact("cadc:INT_TEST/two.txt"); + b2.storageLocation = b_storageLocation; + this.artifactDAO.put(b2); + Artifact a1 = getTestArtifact("cadc:INTTEST/three.txt"); + a1.storageLocation = a_storageLocation; this.artifactDAO.put(a1); - Artifact a2 = getTestArtifact("cadc:INTTEST/two.txt"); - a2.storageLocation = storageLocation; + Artifact a2 = getTestArtifact("cadc:INTTEST/four.txt"); + a2.storageLocation = a_storageLocation; this.artifactDAO.put(a2); - Artifact a3 = getTestArtifact("cadc:INTTEST/three.txt"); - a3.storageLocation = storageLocation; + Artifact a3 = getTestArtifact("cadc:INTTEST/five.txt"); + a3.storageLocation = a_storageLocation; this.artifactDAO.put(a3); + Artifact b3 = getTestArtifact("cadc:TEST/six.txt"); + b3.storageLocation = b_storageLocation; + this.artifactDAO.put(b3); + + List namespaces = Arrays.asList(new Namespace("cadc:INT/"), + new Namespace("cadc:INT_TEST/"), new Namespace("cadc:INTTEST/"), + new Namespace("cadc:TEST/")); + BucketSelector buckets = new BucketSelector("0-f"); try { System.setProperty("user.home", TMP_DIR); - InventoryValidator testSubject = new InventoryValidator(this.daoConfig, this.daoConfig); + InventoryValidator testSubject = new InventoryValidator(daoConfig, daoConfig, namespaces, buckets); testSubject.run(); } finally { System.setProperty("user.home", USER_HOME); } + DeletedStorageLocationEvent b_dsle1 = this.deletedStorageLocationEventDAO.get(b1.getID()); + Assert.assertNotNull(b_dsle1); + DeletedStorageLocationEvent b_dsle2 = this.deletedStorageLocationEventDAO.get(b2.getID()); + Assert.assertNotNull(b_dsle2); DeletedStorageLocationEvent a_dsle1 = this.deletedStorageLocationEventDAO.get(a1.getID()); Assert.assertNotNull(a_dsle1); DeletedStorageLocationEvent a_dsle2 = this.deletedStorageLocationEventDAO.get(a2.getID()); Assert.assertNotNull(a_dsle2); DeletedStorageLocationEvent a_dsle3 = this.deletedStorageLocationEventDAO.get(a3.getID()); Assert.assertNotNull(a_dsle3); + DeletedStorageLocationEvent b_dsle3 = this.deletedStorageLocationEventDAO.get(b3.getID()); + Assert.assertNotNull(b_dsle3); + b1 = this.artifactDAO.get(b1.getID()); + Assert.assertNull(b1); + b2 = this.artifactDAO.get(b2.getID()); + Assert.assertNull(b2); a1 = this.artifactDAO.get(a1.getID()); Assert.assertNull(a1); a2 = this.artifactDAO.get(a2.getID()); Assert.assertNull(a2); a3 = this.artifactDAO.get(a3.getID()); Assert.assertNull(a3); - } - - private void writeConfig() throws IOException { - final Path includePath = new File(TMP_DIR + "/config").toPath(); - Files.createDirectories(includePath); - final File includeFile = new File(includePath.toFile(), "artifact-deselector.sql"); - - final FileWriter fileWriter = new FileWriter(includeFile); - fileWriter.write("WHERE uri LIKE 'cadc:INTTEST/%'"); - fileWriter.flush(); - fileWriter.close(); + b3 = this.artifactDAO.get(b3.getID()); + Assert.assertNull(b3); } private Artifact getTestArtifact(final String uri) { @@ -450,7 +362,6 @@ private Artifact getTestArtifact(final String uri) { return new Artifact(URI.create(uri), checkSum, new Date(), 512L); } - private void truncateTables() throws Exception { final JdbcTemplate jdbcTemplate = new JdbcTemplate(DBUtil.findJNDIDataSource(jndiPath)); jdbcTemplate.execute("TRUNCATE TABLE " + INVENTORY_SCHEMA + ".deletedArtifactEvent"); @@ -461,4 +372,134 @@ private void truncateTables() throws Exception { jdbcTemplate.execute("TRUNCATE TABLE " + INVENTORY_SCHEMA + ".Artifact"); } + + // below are tests for the ArtifactDeselector, which is not currently used, + // but preserved in case one day it is again. + + @Ignore + @Test + public void missingConfigTest() throws Exception { + final Path includePath = new File(TMP_DIR + "/config").toPath(); + Files.createDirectories(includePath); + final File includeFile = new File(includePath.toFile(), "artifact-deselector.sql"); + boolean deleted = includeFile.delete(); + Assert.assertTrue("include file not deleted", deleted); + + configTest(); + } + + @Ignore + @Test + public void emptyConfigTest() throws Exception { + final Path includePath = new File(TMP_DIR + "/config").toPath(); + Files.createDirectories(includePath); + final File includeFile = new File(includePath.toFile(), "artifact-deselector.sql"); + + final FileWriter fileWriter = new FileWriter(includeFile); + fileWriter.write(""); + fileWriter.flush(); + fileWriter.close(); + + configTest(); + } + + @Ignore + @Test + public void onlyCommentsConfigTest() throws Exception { + final Path includePath = new File(TMP_DIR + "/config").toPath(); + Files.createDirectories(includePath); + final File includeFile = new File(includePath.toFile(), "artifact-deselector.sql"); + + final FileWriter fileWriter = new FileWriter(includeFile); + fileWriter.write("# WHERE uri LIKE 'cadc:INTTEST/%'"); + fileWriter.flush(); + fileWriter.close(); + + configTest(); + } + + @Ignore + @Test + public void doesNotStartWithWhereConfigTest() throws Exception { + final Path includePath = new File(TMP_DIR + "/config").toPath(); + Files.createDirectories(includePath); + final File includeFile = new File(includePath.toFile(), "artifact-deselector.sql"); + + final FileWriter fileWriter = new FileWriter(includeFile); + fileWriter.write("uri LIKE 'cadc:INTTEST/%'\r\n"); + fileWriter.flush(); + fileWriter.close(); + + configTest(); + } + + @Ignore + @Test + public void multipleWhereConfigTest() throws Exception { + final Path includePath = new File(TMP_DIR + "/config").toPath(); + Files.createDirectories(includePath); + final File includeFile = new File(includePath.toFile(), "artifact-deselector.sql"); + + final FileWriter fileWriter = new FileWriter(includeFile); + fileWriter.write("WHERE uri LIKE 'cadc:INTTEST/%'\r\n"); + fileWriter.write("WHERE uri LIKE 'cadc:TEST/%'"); + fileWriter.flush(); + fileWriter.close(); + + configTest(); + } + + @Ignore + @Test + public void configTest() { + StorageLocation storageLocation = new StorageLocation(URI.create("ivo://cadc.nrc.ca/foo")); + + Artifact a1 = getTestArtifact("cadc:TEST/one.txt"); + a1.storageLocation = storageLocation; + this.artifactDAO.put(a1); + Artifact a2 = getTestArtifact("cadc:INT/two.txt"); + a2.storageLocation = storageLocation; + this.artifactDAO.put(a2); + Artifact a3 = getTestArtifact("cadc:CADC/three.txt"); + a3.storageLocation = storageLocation; + this.artifactDAO.put(a3); + + try { + System.setProperty("user.home", TMP_DIR); + List namespaces = Collections.singletonList(new Namespace("cadc:FOO/")); + BucketSelector buckets = new BucketSelector("0-f"); + InventoryValidator testSubject = new InventoryValidator(this.daoConfig, this.daoConfig, namespaces, buckets); + testSubject.run(); + Assert.fail("should throw an exception for invalid config"); + } catch (Exception expected) { + // exception expected + } finally { + System.setProperty("user.home", USER_HOME); + } + + a1 = this.artifactDAO.get(a1.getID()); + Assert.assertNotNull(a1); + a2 = this.artifactDAO.get(a2.getID()); + Assert.assertNotNull(a2); + a3 = this.artifactDAO.get(a3.getID()); + Assert.assertNotNull(a3); + + DeletedStorageLocationEvent dsle1 = this.deletedStorageLocationEventDAO.get(a1.getID()); + Assert.assertNull(dsle1); + DeletedStorageLocationEvent dsle2 = this.deletedStorageLocationEventDAO.get(a2.getID()); + Assert.assertNull(dsle2); + DeletedStorageLocationEvent dsle3 = this.deletedStorageLocationEventDAO.get(a3.getID()); + } + + private void writeConfig() throws IOException { + final Path includePath = new File(TMP_DIR + "/config").toPath(); + Files.createDirectories(includePath); + final File includeFile = new File(includePath.toFile(), "artifact-deselector.sql"); + + final FileWriter fileWriter = new FileWriter(includeFile); + fileWriter.write("WHERE uri LIKE 'cadc:INTTEST/%'"); + fileWriter.flush(); + fileWriter.close(); + } + } diff --git a/ringhold/src/main/java/org/opencadc/ringhold/ArtifactDeselector.java b/ringhold/src/main/java/org/opencadc/ringhold/ArtifactDeselector.java index 893331669..e2ad6d33c 100644 --- a/ringhold/src/main/java/org/opencadc/ringhold/ArtifactDeselector.java +++ b/ringhold/src/main/java/org/opencadc/ringhold/ArtifactDeselector.java @@ -81,6 +81,9 @@ import org.apache.log4j.Logger; /** + * Class is no longer used, switched to using configured namespace and bucketUri to query + * for artifacts to remove. But left in this package if a use case is found at a future date. + * * Implementation of ArtifactSelector that includes artifacts via selective queries. * This class requires one or more fragments of SQL (a WHERE clause), each in a separate * file located in {user.home}/config/include and named {something}.sql -- see the diff --git a/ringhold/src/main/java/org/opencadc/ringhold/InventoryValidator.java b/ringhold/src/main/java/org/opencadc/ringhold/InventoryValidator.java index 5956e368d..cda660098 100644 --- a/ringhold/src/main/java/org/opencadc/ringhold/InventoryValidator.java +++ b/ringhold/src/main/java/org/opencadc/ringhold/InventoryValidator.java @@ -69,12 +69,17 @@ import ca.nrc.cadc.db.TransactionManager; import ca.nrc.cadc.io.ResourceIterator; -import ca.nrc.cadc.net.ResourceNotFoundException; +import ca.nrc.cadc.net.TransientException; +import ca.nrc.cadc.util.BucketSelector; import java.io.IOException; +import java.util.Iterator; +import java.util.List; import java.util.Map; import org.apache.log4j.Logger; import org.opencadc.inventory.Artifact; import org.opencadc.inventory.DeletedStorageLocationEvent; +import org.opencadc.inventory.InventoryUtil; +import org.opencadc.inventory.Namespace; import org.opencadc.inventory.db.ArtifactDAO; import org.opencadc.inventory.db.DeletedStorageLocationEventDAO; @@ -89,28 +94,27 @@ public class InventoryValidator implements Runnable { private final ArtifactDAO artifactIteratorDAO; private final ArtifactDAO artifactDAO; - private final String deselector; + private final List namespaces; + private final BucketSelector bucketSelector; - public InventoryValidator(Map txnConfig, Map iterConfig) { + public InventoryValidator(Map txnConfig, Map iterConfig, + List namespaces, BucketSelector bucketSelector) { + InventoryUtil.assertNotNull(InventoryValidator.class, "txnConfig", txnConfig); + InventoryUtil.assertNotNull(InventoryValidator.class, "iterConfig", iterConfig); + InventoryUtil.assertNotNull(InventoryValidator.class, "namespaces", namespaces); + this.artifactDAO = new ArtifactDAO(); artifactDAO.setConfig(txnConfig); this.artifactIteratorDAO = new ArtifactDAO(); artifactIteratorDAO.setConfig(iterConfig); - ArtifactDeselector artifactDeselector = new ArtifactDeselector(); - try { - this.deselector = artifactDeselector.getConstraint(); - } catch (ResourceNotFoundException ex) { - throw new IllegalArgumentException("missing required configuration: " - + ArtifactDeselector.SQL_FILTER_FILE_NAME, ex); - } catch (IOException ex) { - throw new IllegalArgumentException("unable to read config: " + ArtifactDeselector.SQL_FILTER_FILE_NAME, ex); - } + this.namespaces = namespaces; + this.bucketSelector = bucketSelector; } /** - * Find an artifact with a uri pattern in the deselector, + * Find an artifact for the given namespace(s) and optional bucketUri, * delete the artifact and generate a deleted storage location event. */ @Override @@ -119,31 +123,68 @@ public void run() { final DeletedStorageLocationEventDAO deletedStorageLocationEventDAO = new DeletedStorageLocationEventDAO(this.artifactDAO); - try (final ResourceIterator artifactIterator = - this.artifactIteratorDAO.iterator(this.deselector, null, false)) { + for (Namespace namespace : namespaces) { + if (bucketSelector == null) { + iterateBucket(transactionManager, deletedStorageLocationEventDAO, namespace,null); + } else { + Iterator bucketIter = bucketSelector.getBucketIterator(); + while (bucketIter.hasNext()) { + String bucket = bucketIter.next(); + log.info(InventoryValidator.class.getSimpleName() + ".START bucket=" + bucket); + int retries = 0; + boolean done = false; + while (!done && retries < 3) { + try { + iterateBucket(transactionManager, deletedStorageLocationEventDAO, namespace, bucket); + log.info(InventoryValidator.class.getSimpleName() + ".END bucket=" + bucket); + done = true; + } catch (TransientException ex) { + log.error(InventoryValidator.class.getSimpleName() + ".FAIL bucket=" + bucket, ex); + retries++; + } catch (IllegalArgumentException ex) { + log.error(InventoryValidator.class.getSimpleName() + ".FAIL bucket=" + bucket, ex); + throw ex; + } catch (RuntimeException ex) { + // TODO: probably not a great idea to retry on these... + log.error(InventoryValidator.class.getSimpleName() + ".FAIL bucket=" + bucket, ex); + retries++; + } catch (Exception ex) { + log.error(InventoryValidator.class.getSimpleName() + ".FAIL bucket=" + bucket, ex); + throw ex; + } + } + } + } + } + } + + private void iterateBucket(TransactionManager transactionManager, + DeletedStorageLocationEventDAO deletedStorageLocationEventDAO, + Namespace namespace, String bucket) { + try (final ResourceIterator artifactIterator = this.artifactIteratorDAO.iterator(namespace, bucket, false)) { while (artifactIterator.hasNext()) { - Artifact deselectorArtifact = artifactIterator.next(); - log.debug("START: Process Artifact " + deselectorArtifact.getID() + " " + deselectorArtifact.getURI()); + Artifact artifact = artifactIterator.next(); + log.debug("START: Process Artifact " + artifact.getID() + " " + artifact.getURI()); try { transactionManager.startTransaction(); - Artifact cur = this.artifactDAO.lock(deselectorArtifact); + Artifact cur = this.artifactDAO.lock(artifact); if (cur != null) { DeletedStorageLocationEvent deletedStorageLocationEvent = new DeletedStorageLocationEvent(cur.getID()); deletedStorageLocationEventDAO.put(deletedStorageLocationEvent); - + this.artifactDAO.delete(cur.getID()); - + transactionManager.commitTransaction(); log.info("DELETE: Artifact " + cur.getID() + " " + cur.getURI()); } else { transactionManager.rollbackTransaction(); log.debug("Artifact not found"); } - - log.debug("END: Process Artifact " + deselectorArtifact.getID() + " " - + deselectorArtifact.getURI()); + + log.debug("END: Process Artifact " + artifact.getID() + " " + + artifact.getURI()); } catch (Exception exception) { if (transactionManager.isOpen()) { log.error("Exception in transaction. Rolling back..."); @@ -164,4 +205,5 @@ public void run() { log.error("Error closing iterator: " + e.getMessage()); } } + } diff --git a/ringhold/src/main/java/org/opencadc/ringhold/Main.java b/ringhold/src/main/java/org/opencadc/ringhold/Main.java index b774ba256..f4d64954d 100644 --- a/ringhold/src/main/java/org/opencadc/ringhold/Main.java +++ b/ringhold/src/main/java/org/opencadc/ringhold/Main.java @@ -69,17 +69,21 @@ import ca.nrc.cadc.db.ConnectionConfig; import ca.nrc.cadc.db.DBUtil; +import ca.nrc.cadc.util.BucketSelector; import ca.nrc.cadc.util.Log4jInit; import ca.nrc.cadc.util.MultiValuedProperties; import ca.nrc.cadc.util.PropertiesReader; import ca.nrc.cadc.util.StringUtil; +import java.util.ArrayList; import java.util.Arrays; +import java.util.List; import java.util.Map; import java.util.Set; import java.util.TreeMap; import javax.naming.NamingException; import org.apache.log4j.Level; import org.apache.log4j.Logger; +import org.opencadc.inventory.Namespace; import org.opencadc.inventory.db.SQLGenerator; /** @@ -99,6 +103,8 @@ public class Main { private static final String DB_USERNAME_CONFIG_KEY = CONFIG_PREFIX + ".inventory.username"; private static final String DB_PASSWORD_CONFIG_KEY = CONFIG_PREFIX + ".inventory.password"; private static final String DB_URL_CONFIG_KEY = CONFIG_PREFIX + ".inventory.url"; + private static final String NAMESPACE_CONFIG_KEY = CONFIG_PREFIX + ".namespace"; + private static final String BUCKETS_CONFIG_KEY = CONFIG_PREFIX + ".buckets"; // Used to verify configuration items. See the README for descriptions. private static final String[] MANDATORY_PROPERTY_KEYS = { @@ -107,7 +113,8 @@ public class Main { DB_URL_CONFIG_KEY, DB_USERNAME_CONFIG_KEY, LOGGING_CONFIG_KEY, - SQLGENERATOR_CONFIG_KEY + SQLGENERATOR_CONFIG_KEY, + NAMESPACE_CONFIG_KEY }; public static void main(final String[] args) { @@ -163,7 +170,21 @@ public static void main(final String[] args) { iterConfig.put("jndiDataSourceName", "jdbc/inventory-iter"); iterConfig.put(SQLGENERATOR_CONFIG_KEY, Class.forName(configuredSQLGenerator)); - final InventoryValidator doit = new InventoryValidator(daoConfig, iterConfig); + // check namespaces are valid + final List configuredNamespaces = props.getProperty(NAMESPACE_CONFIG_KEY); + final List namespaces = new ArrayList<>(); + for (String namespace : configuredNamespaces) { + namespaces.add(new Namespace(namespace)); + } + + // uri buckets + BucketSelector bucketSelector = null; + final String buckets = props.getFirstPropertyValue(BUCKETS_CONFIG_KEY); + if (buckets != null) { + bucketSelector = new BucketSelector(buckets); + } + + final InventoryValidator doit = new InventoryValidator(daoConfig, iterConfig, namespaces, bucketSelector); doit.run(); } catch (Throwable unexpected) { log.fatal("Unexpected failure", unexpected); From adbadf203e9d0cd44685d33ec4d767457c4513c6 Mon Sep 17 00:00:00 2001 From: Jeff Burke Date: Tue, 2 Apr 2024 13:23:35 -0700 Subject: [PATCH 4/6] CADC-13234 checkstyle fix --- .../main/java/org/opencadc/ringhold/ArtifactDeselector.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ringhold/src/main/java/org/opencadc/ringhold/ArtifactDeselector.java b/ringhold/src/main/java/org/opencadc/ringhold/ArtifactDeselector.java index e2ad6d33c..2570e29b3 100644 --- a/ringhold/src/main/java/org/opencadc/ringhold/ArtifactDeselector.java +++ b/ringhold/src/main/java/org/opencadc/ringhold/ArtifactDeselector.java @@ -81,9 +81,9 @@ import org.apache.log4j.Logger; /** - * Class is no longer used, switched to using configured namespace and bucketUri to query + *

Class is no longer used, switched to using configured namespace and bucketUri to query * for artifacts to remove. But left in this package if a use case is found at a future date. - * +

* Implementation of ArtifactSelector that includes artifacts via selective queries. * This class requires one or more fragments of SQL (a WHERE clause), each in a separate * file located in {user.home}/config/include and named {something}.sql -- see the From 593bb1e9154fabab1df1fc5e211b26011794fa5e Mon Sep 17 00:00:00 2001 From: Jeff Burke Date: Wed, 3 Apr 2024 08:05:15 -0700 Subject: [PATCH 5/6] CADC-13234 restore ringhold in github build --- .github/workflows/gradle.yml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/.github/workflows/gradle.yml b/.github/workflows/gradle.yml index 64748cf5b..e3771b65b 100644 --- a/.github/workflows/gradle.yml +++ b/.github/workflows/gradle.yml @@ -70,7 +70,6 @@ jobs: - name: java build -- ratik run: cd ratik && ../gradlew --info clean build javadoc checkstyleMain -## disabled until updated for cadc-inventory-db API changes -# - name: java build -- ringhold -# run: cd ringhold && ../gradlew --info clean build javadoc checkstyleMain + - name: java build -- ringhold + run: cd ringhold && ../gradlew --info clean build javadoc checkstyleMain From a0f5f27dd2a8b0d1b8f6e45ed43474f93312cf66 Mon Sep 17 00:00:00 2001 From: Jeff Burke Date: Wed, 3 Apr 2024 08:20:13 -0700 Subject: [PATCH 6/6] CADC-13234 fix github build formatting --- .github/workflows/gradle.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/gradle.yml b/.github/workflows/gradle.yml index e3771b65b..86f24decc 100644 --- a/.github/workflows/gradle.yml +++ b/.github/workflows/gradle.yml @@ -70,6 +70,6 @@ jobs: - name: java build -- ratik run: cd ratik && ../gradlew --info clean build javadoc checkstyleMain - - name: java build -- ringhold - run: cd ringhold && ../gradlew --info clean build javadoc checkstyleMain + - name: java build -- ringhold + run: cd ringhold && ../gradlew --info clean build javadoc checkstyleMain