diff --git a/cadc-inventory-db/build.gradle b/cadc-inventory-db/build.gradle index 18b76754..8822b4de 100644 --- a/cadc-inventory-db/build.gradle +++ b/cadc-inventory-db/build.gradle @@ -17,7 +17,7 @@ sourceCompatibility = 1.8 group = 'org.opencadc' -version = '1.0.1' +version = '1.0.2' description = 'OpenCADC Storage Inventory database library' def git_url = 'https://github.com/opencadc/storage-inventory' diff --git a/cadc-inventory-db/src/intTest/java/org/opencadc/inventory/db/ArtifactDAOTest.java b/cadc-inventory-db/src/intTest/java/org/opencadc/inventory/db/ArtifactDAOTest.java index f3a22597..4045f213 100644 --- a/cadc-inventory-db/src/intTest/java/org/opencadc/inventory/db/ArtifactDAOTest.java +++ b/cadc-inventory-db/src/intTest/java/org/opencadc/inventory/db/ArtifactDAOTest.java @@ -1106,7 +1106,7 @@ public void testArtifactIterator() { log.info("count vs Namespace incremental from start..."); DateFormat df = DateUtil.getDateFormat(DateUtil.IVOA_DATE_FORMAT, DateUtil.UTC); count = 0; - try (ResourceIterator iter = originDAO.iterator(ns, null, startDate, true)) { + try (ResourceIterator iter = originDAO.iterator(ns, null, startDate, true, null)) { while (iter.hasNext()) { Artifact actual = iter.next(); count++; @@ -1118,7 +1118,7 @@ public void testArtifactIterator() { log.info("count vs Namespace incremental from mid..."); count = 0; - try (ResourceIterator iter = originDAO.iterator(ns, null, midDate, true)) { + try (ResourceIterator iter = originDAO.iterator(ns, null, midDate, true, null)) { while (iter.hasNext()) { Artifact actual = iter.next(); count++; @@ -1130,7 +1130,7 @@ public void testArtifactIterator() { log.info("count vs Namespace incremental from end..."); count = 0; - try (ResourceIterator iter = originDAO.iterator(ns, null, endDate, true)) { + try (ResourceIterator iter = originDAO.iterator(ns, null, endDate, true, null)) { while (iter.hasNext()) { Artifact actual = iter.next(); count++; diff --git a/cadc-inventory-db/src/intTest/java/org/opencadc/vospace/db/DataNodeSizeWorkerTest.java b/cadc-inventory-db/src/intTest/java/org/opencadc/vospace/db/DataNodeSizeWorkerTest.java index 649800b3..2cb847b5 100644 --- a/cadc-inventory-db/src/intTest/java/org/opencadc/vospace/db/DataNodeSizeWorkerTest.java +++ b/cadc-inventory-db/src/intTest/java/org/opencadc/vospace/db/DataNodeSizeWorkerTest.java @@ -86,6 +86,7 @@ import org.opencadc.inventory.Artifact; import org.opencadc.vospace.db.DataNodeSizeWorker; import org.opencadc.inventory.Namespace; +import org.opencadc.inventory.StorageLocation; import org.opencadc.inventory.db.ArtifactDAO; import org.opencadc.inventory.db.HarvestState; import org.opencadc.inventory.db.HarvestStateDAO; @@ -106,7 +107,7 @@ public class DataNodeSizeWorkerTest { static { Log4jInit.setLevel("org.opencadc.inventory", Level.INFO); - Log4jInit.setLevel("org.opencadc.inventory.db", Level.INFO); + Log4jInit.setLevel("org.opencadc.inventory.db", Level.DEBUG); Log4jInit.setLevel("ca.nrc.cadc.db", Level.INFO); Log4jInit.setLevel("org.opencadc.vospace", Level.INFO); Log4jInit.setLevel("org.opencadc.vospace.db", Level.INFO); @@ -180,7 +181,16 @@ public void init_cleanup() throws Exception { } @Test - public void testSyncArtifact() throws Exception { + public void testSyncArtifactSite() throws Exception { + testSyncArtifact(true); + } + + @Test + public void testSyncArtifactGlobal() throws Exception { + testSyncArtifact(false); + } + + private void testSyncArtifact(boolean isStorageSite) throws Exception { UUID rootID = new UUID(0L, 0L); ContainerNode root = new ContainerNode(rootID, "root"); @@ -206,11 +216,15 @@ public void testSyncArtifact() throws Exception { URI.create("md5:d41d8cd98f00b204e9800998ecf8427e"), new Date(), 666L); + if (isStorageSite) { + expected.storageLocation = new StorageLocation(URI.create("id:" + UUID.randomUUID().toString())); + expected.storageLocation.storageBucket = "X"; + } log.info("expected: " + expected); artifactDAO.put(expected); Artifact actualArtifact = artifactDAO.get(expected.getID()); - Assert.assertNotNull(actual); + Assert.assertNotNull(actualArtifact); Assert.assertEquals(expected.getContentLength(), actualArtifact.getContentLength()); String hsName = "ArtifactSize"; @@ -219,25 +233,36 @@ public void testSyncArtifact() throws Exception { harvestStateDAO.put(hs); hs = harvestStateDAO.get(hsName, resourceID); - DataNodeSizeWorker asWorker = new DataNodeSizeWorker(harvestStateDAO, hs, artifactDAO, siNamespace); + DataNodeSizeWorker asWorker = new DataNodeSizeWorker(harvestStateDAO, hs, artifactDAO, siNamespace, isStorageSite); + log.info("*** DataNodeSizeWorker START"); asWorker.run(); + log.info("*** DataNodeSizeWorker DONE"); actual = (DataNode)nodeDAO.get(orig.getID()); Assert.assertNotNull(actual); log.info("found: " + actual.getID() + " aka " + actual); Assert.assertEquals(expected.getContentLength(), actual.bytesUsed); + Thread.sleep(100L); + // update the artifact only artifactDAO.delete(actualArtifact.getID()); - expected = new Artifact(expected.getURI(), expected.getMetaChecksum(), new Date(), 333L); - artifactDAO.put(expected); + Artifact modified = new Artifact(expected.getURI(), expected.getMetaChecksum(), new Date(), 333L); + if (isStorageSite) { + modified.storageLocation = new StorageLocation(URI.create("id:" + UUID.randomUUID().toString())); + modified.storageLocation.storageBucket = "X"; + } + artifactDAO.put(modified); actual = (DataNode)nodeDAO.get(orig.getID()); - Assert.assertNotEquals(expected.getContentLength(), actual.bytesUsed); + Assert.assertNotEquals(modified.getContentLength(), actual.bytesUsed); // run the update + log.info("*** DataNodeSizeWorker START"); asWorker.run(); + log.info("*** DataNodeSizeWorker DONE"); + actual = (DataNode)nodeDAO.get(orig.getID()); - Assert.assertEquals(expected.getContentLength(), actual.bytesUsed); + Assert.assertEquals(modified.getContentLength(), actual.bytesUsed); } diff --git a/cadc-inventory-db/src/main/java/org/opencadc/inventory/db/ArtifactDAO.java b/cadc-inventory-db/src/main/java/org/opencadc/inventory/db/ArtifactDAO.java index cb7964ce..519f831c 100644 --- a/cadc-inventory-db/src/main/java/org/opencadc/inventory/db/ArtifactDAO.java +++ b/cadc-inventory-db/src/main/java/org/opencadc/inventory/db/ArtifactDAO.java @@ -193,7 +193,7 @@ public ResourceIterator storedIterator(String storageBucketPrefix) { SQLGenerator.ArtifactIteratorQuery iter = (SQLGenerator.ArtifactIteratorQuery) gen.getEntityIteratorQuery(Artifact.class); iter.setStorageLocationRequired(true); iter.setStorageBucket(storageBucketPrefix); - iter.setOrderedOutput(true); + iter.setOrderByStorageLocation(true); return iter.query(dataSource); } catch (BadSqlGrammarException ex) { handleInternalFail(ex); @@ -250,7 +250,7 @@ public ResourceIterator iterator(String uriBucketPrefix, boolean order * conditions on fields of the Artifact using the field names for column references. * Example: uri like 'ad:bar/%'. The result is currently not ordered. * - *

Use case: local cleanup by arbitrary criteria + *

Use case: local cleanup by ringhold * * @param ns namespace for selecting artifacts * @param uriBucketPrefix null, prefix, or complete Artifact.uriBucket string @@ -258,23 +258,25 @@ public ResourceIterator iterator(String uriBucketPrefix, boolean order * @return iterator over artifacts matching criteria */ public ResourceIterator iterator(Namespace ns, String uriBucketPrefix, boolean ordered) { - return iterator(ns, uriBucketPrefix, null, ordered); + return iterator(ns, uriBucketPrefix, null, ordered, null); } /** - * Iterate over artifacts that match criteria. This method adds an optional Date argument to + * Iterate over artifacts in a specific namespace.This method adds an optional Date argument to * support incremental processing. In this case, ordered will be in timestamp order rather than * uri order. * - *

Use case: process artifact events directly in the database + *

Use case: process artifact events directly in the database (DataNodeSizeWorker) * * @param ns namespace for selecting artifacts * @param uriBucketPrefix null, prefix, or complete Artifact.uriBucket string - * @param minLastModified minimum Artifact.lastModified to consider (incremental mode) - * @param ordered order by Artifact.uri (true) or not ordered (false) - * @return iterator over artifacts matching criteria + * @param minLastModified minimum Artifact.lastModified to consider (incremental mode), null for all + * @param ordered order by Artifact.lastModified (true) or not ordered (false) + * @param isStored true to select stored artifacts, false for unstored, null for all + * @return iterator over artifacts */ - public ResourceIterator iterator(Namespace ns, String uriBucketPrefix, Date minLastModified, boolean ordered) { + public ResourceIterator iterator(Namespace ns, String uriBucketPrefix, + Date minLastModified, boolean ordered, Boolean isStored) { checkInit(); long t = System.currentTimeMillis(); @@ -284,6 +286,7 @@ public ResourceIterator iterator(Namespace ns, String uriBucketPrefix, iter.setNamespace(ns); iter.setMinLastModified(minLastModified); iter.setOrderedOutput(ordered); + iter.setStorageLocationRequired(isStored); return iter.query(dataSource); } catch (BadSqlGrammarException ex) { handleInternalFail(ex); diff --git a/cadc-inventory-db/src/main/java/org/opencadc/inventory/db/SQLGenerator.java b/cadc-inventory-db/src/main/java/org/opencadc/inventory/db/SQLGenerator.java index 0652a918..38e055c3 100644 --- a/cadc-inventory-db/src/main/java/org/opencadc/inventory/db/SQLGenerator.java +++ b/cadc-inventory-db/src/main/java/org/opencadc/inventory/db/SQLGenerator.java @@ -685,6 +685,7 @@ class ArtifactIteratorQuery implements EntityIteratorQuery { private Namespace namespace; private Date minLastModified; private boolean ordered; + private boolean orderByStorageLocation; private final Calendar utc = Calendar.getInstance(DateUtil.UTC); @@ -730,6 +731,10 @@ public void setOrderedOutput(boolean ordered) { this.ordered = ordered; } + public void setOrderByStorageLocation(boolean obsl) { + this.orderByStorageLocation = obsl; + } + public void setSiteID(UUID siteID) { this.siteID = siteID; } @@ -752,7 +757,7 @@ public ResourceIterator query(DataSource ds) { // null storageBucket to come after non-null // postgresql: the default order is equivalent to explicitly specifying ASC NULLS LAST // default behaviour may not be db-agnostic - if (ordered) { + if (orderByStorageLocation) { sb.append(" ORDER BY storageLocation_storageBucket, storageLocation_storageID"); } } else if (storageLocationRequired != null && !storageLocationRequired) { diff --git a/cadc-inventory-db/src/main/java/org/opencadc/vospace/db/DataNodeSizeWorker.java b/cadc-inventory-db/src/main/java/org/opencadc/vospace/db/DataNodeSizeWorker.java index 523fec02..0cc4b663 100644 --- a/cadc-inventory-db/src/main/java/org/opencadc/vospace/db/DataNodeSizeWorker.java +++ b/cadc-inventory-db/src/main/java/org/opencadc/vospace/db/DataNodeSizeWorker.java @@ -99,6 +99,7 @@ public class DataNodeSizeWorker implements Runnable { private final ArtifactDAO artifactDAO; private final HarvestStateDAO harvestStateDAO; private final Namespace storageNamespace; + private boolean isStorageSite; private long numArtifactsProcessed; @@ -111,12 +112,13 @@ public class DataNodeSizeWorker implements Runnable { * @param namespace artifact namespace */ public DataNodeSizeWorker(HarvestStateDAO harvestStateDAO, HarvestState harvestState, - ArtifactDAO artifactDAO, Namespace namespace) { + ArtifactDAO artifactDAO, Namespace namespace, boolean isStorageSite) { this.harvestState = harvestState; this.harvestStateDAO = harvestStateDAO; this.nodeDAO = new NodeDAO(harvestStateDAO); this.artifactDAO = artifactDAO; this.storageNamespace = namespace; + this.isStorageSite = isStorageSite; } public long getNumArtifactsProcessed() { @@ -147,7 +149,7 @@ public void run() { } String uriBucket = null; // process all artifacts in a single thread - try (final ResourceIterator iter = artifactDAO.iterator(storageNamespace, uriBucket, startTime, true)) { + try (final ResourceIterator iter = artifactDAO.iterator(storageNamespace, uriBucket, startTime, true, isStorageSite)) { TransactionManager tm = nodeDAO.getTransactionManager(); while (iter.hasNext()) { Artifact artifact = iter.next(); diff --git a/vault/README.md b/vault/README.md index f2ef1086..8745ef91 100644 --- a/vault/README.md +++ b/vault/README.md @@ -5,10 +5,11 @@ specification designed to co-exist with other storage-inventory components. It p organization layer on top of the storage management of storage-inventory. The simplest configuration would be to deploy `vault` with `minoc` with a single metadata database and single -back end storage system. Details: TBD. +back end storage system. This is _isStoragerSite = true_ below. The other option would be to deploy `vault` with `raven` and `luskan` in a global inventory database and make -use of one or more of the network of known storage sites to store files. Details: TBD. +use of one or more of the network of known storage sites to store files. This is _isStoragerSite = false_ below. +Details: TBD. ## deployment @@ -78,6 +79,9 @@ A vault.properties file in /config is required to run this service. The followi # service identity org.opencadc.vault.resourceID = ivo://{authority}/{name} +# associated inventory type +org.opencadc.vault.inventory.isStorageSite = true|false + # consistency settings org.opencadc.vault.consistency.preventNotFound=true|false @@ -97,11 +101,16 @@ org.opencadc.vault.storage.namespace = {a storage inventory namespace to use} ``` The vault _resourceID_ is the resourceID of _this_ vault service. +the _isStorageSite_ key tells `vault` whether the associated inventory database is a storage +site or a global database. This effects the behaviour and performance of the background thread that +syncs Artifact.contentLength values to the DataNode.bytesUsed field. + The _preventNotFound_ key can be used to configure `vault` to prevent artifact-not-found errors that might result due to the eventual consistency nature of the storage system by directly checking for the artifact at -_all known_ sites. It only makes sense to enable this when `vault` is running in a global inventory (along with -`raven` and/or `fenwick` instances syncing artifact metadata. This feature introduces an overhead for the -genuine not-found cases: transfer negotiation to GET the file that was never PUT. +_all known_ sites. It makes sense to enable this when `vault` is running in a global inventory (along with +`raven` and/or `fenwick` instances syncing artifact metadata, but even in a single _storage site_ deployment +this also helps hide some temporary inconsistencies. This feature introduces an overhead for the +genuine not-found cases: trying to GET the file that was never successfuly PUT but the DataNode was created. The _allocationParent_ is a path to a container node (directory) which contains space allocations. An allocation is owned by a user (usually different from the _rootOwner_ admin user) who is responsible for the allocation diff --git a/vault/src/main/java/org/opencadc/vault/VaultInitAction.java b/vault/src/main/java/org/opencadc/vault/VaultInitAction.java index a80f0452..b65b29d8 100644 --- a/vault/src/main/java/org/opencadc/vault/VaultInitAction.java +++ b/vault/src/main/java/org/opencadc/vault/VaultInitAction.java @@ -120,6 +120,7 @@ public class VaultInitAction extends InitAction { private static final String VAULT_KEY = "org.opencadc.vault"; static final String RESOURCE_ID_KEY = VAULT_KEY + ".resourceID"; static final String PREVENT_NOT_FOUND_KEY = VAULT_KEY + ".consistency.preventNotFound"; + static final String IS_STORAGE_SITE_KEY = VAULT_KEY + ".inventory.isStorageSite"; static final String INVENTORY_SCHEMA_KEY = VAULT_KEY + ".inventory.schema"; static final String VOSPACE_SCHEMA_KEY = VAULT_KEY + ".vospace.schema"; static final String SINGLE_POOL_KEY = VAULT_KEY + ".singlePool"; @@ -201,6 +202,15 @@ static MultiValuedProperties getConfig() { sb.append("OK"); } + String iss = mvp.getFirstPropertyValue(IS_STORAGE_SITE_KEY); + sb.append("\n\t" + IS_STORAGE_SITE_KEY + ": "); + if (iss == null) { + sb.append("MISSING"); + ok = false; + } else { + sb.append("OK"); + } + String pnf = mvp.getFirstPropertyValue(PREVENT_NOT_FOUND_KEY); sb.append("\n\t" + PREVENT_NOT_FOUND_KEY + ": "); if (pnf == null) { @@ -479,8 +489,10 @@ private void initBackgroundWorkers() { offline = true; } + boolean isStorageSite = Boolean.parseBoolean(props.getFirstPropertyValue(IS_STORAGE_SITE_KEY)); + terminateBackgroundWorkers(); - DataNodeSizeSync async = new DataNodeSizeSync(hsDAO, artifactDAO, storageNamespace); + DataNodeSizeSync async = new DataNodeSizeSync(hsDAO, artifactDAO, storageNamespace, isStorageSite); async.setOffline(offline); this.dataNodeSizeSyncThread = new Thread(async); dataNodeSizeSyncThread.setDaemon(true); diff --git a/vault/src/main/java/org/opencadc/vault/metadata/DataNodeSizeSync.java b/vault/src/main/java/org/opencadc/vault/metadata/DataNodeSizeSync.java index 3bce32e9..a5be2d02 100644 --- a/vault/src/main/java/org/opencadc/vault/metadata/DataNodeSizeSync.java +++ b/vault/src/main/java/org/opencadc/vault/metadata/DataNodeSizeSync.java @@ -98,13 +98,15 @@ public class DataNodeSizeSync implements Runnable { private final HarvestStateDAO stateDAO; private final ArtifactDAO artifactDAO; private final Namespace artifactNamespace; + private final boolean isStorageSite; private boolean offline = false; - public DataNodeSizeSync(HarvestStateDAO stateDAO, ArtifactDAO artifactDAO, Namespace artifactNamespace) { + public DataNodeSizeSync(HarvestStateDAO stateDAO, ArtifactDAO artifactDAO, Namespace artifactNamespace, boolean isStorageSite) { this.stateDAO = stateDAO; this.artifactDAO = artifactDAO; this.artifactNamespace = artifactNamespace; + this.isStorageSite = isStorageSite; // we need continuous timestamp updates to retain leader status, so only schedule maintenance stateDAO.setUpdateBufferCount(0); @@ -153,7 +155,7 @@ public void run() { boolean fail = false; log.info(logInfo.start()); try { - DataNodeSizeWorker worker = new DataNodeSizeWorker(stateDAO, state, artifactDAO, artifactNamespace); + DataNodeSizeWorker worker = new DataNodeSizeWorker(stateDAO, state, artifactDAO, artifactNamespace, isStorageSite); worker.run(); logInfo.setLastModified(state.curLastModified); logInfo.processed = worker.getNumArtifactsProcessed();