From 6757747479005d4c4731608f2b8749d6eb2e58bc Mon Sep 17 00:00:00 2001 From: Patrick Dowler Date: Fri, 20 Sep 2024 13:38:17 -0700 Subject: [PATCH] fix skipped update of data node timstamp update update DataNodeSizeWorker to force update Node.lastModified when it is older than artifact update vault NodePersistence make #date prop consistent in node and listing --- cadc-inventory-db/build.gradle | 2 +- .../vospace/db/DataNodeSizeWorkerTest.java | 51 ++++++++++++++----- .../vospace/db/DataNodeSizeWorker.java | 6 ++- .../java/org/opencadc/vospace/db/NodeDAO.java | 5 ++ vault/VERSION | 2 +- vault/build.gradle | 2 +- .../opencadc/vault/NodePersistenceImpl.java | 39 +++++++------- 7 files changed, 68 insertions(+), 39 deletions(-) diff --git a/cadc-inventory-db/build.gradle b/cadc-inventory-db/build.gradle index e563f680..4a3cb47f 100644 --- a/cadc-inventory-db/build.gradle +++ b/cadc-inventory-db/build.gradle @@ -17,7 +17,7 @@ sourceCompatibility = 11 group = 'org.opencadc' -version = '1.0.3' +version = '1.0.4' description = 'OpenCADC Storage Inventory database library' def git_url = 'https://github.com/opencadc/storage-inventory' diff --git a/cadc-inventory-db/src/intTest/java/org/opencadc/vospace/db/DataNodeSizeWorkerTest.java b/cadc-inventory-db/src/intTest/java/org/opencadc/vospace/db/DataNodeSizeWorkerTest.java index 2cb847b5..81c28449 100644 --- a/cadc-inventory-db/src/intTest/java/org/opencadc/vospace/db/DataNodeSizeWorkerTest.java +++ b/cadc-inventory-db/src/intTest/java/org/opencadc/vospace/db/DataNodeSizeWorkerTest.java @@ -107,7 +107,7 @@ public class DataNodeSizeWorkerTest { static { Log4jInit.setLevel("org.opencadc.inventory", Level.INFO); - Log4jInit.setLevel("org.opencadc.inventory.db", Level.DEBUG); + Log4jInit.setLevel("org.opencadc.inventory.db", Level.INFO); Log4jInit.setLevel("ca.nrc.cadc.db", Level.INFO); Log4jInit.setLevel("org.opencadc.vospace", Level.INFO); Log4jInit.setLevel("org.opencadc.vospace.db", Level.INFO); @@ -221,11 +221,7 @@ private void testSyncArtifact(boolean isStorageSite) throws Exception { expected.storageLocation.storageBucket = "X"; } log.info("expected: " + expected); - artifactDAO.put(expected); - Artifact actualArtifact = artifactDAO.get(expected.getID()); - Assert.assertNotNull(actualArtifact); - Assert.assertEquals(expected.getContentLength(), actualArtifact.getContentLength()); String hsName = "ArtifactSize"; URI resourceID = URI.create("ivo://myorg.org/vospace"); @@ -237,6 +233,8 @@ private void testSyncArtifact(boolean isStorageSite) throws Exception { log.info("*** DataNodeSizeWorker START"); asWorker.run(); log.info("*** DataNodeSizeWorker DONE"); + hs = harvestStateDAO.get(hsName, resourceID); + log.info("HarvestState: " + hs.curLastModified.getTime()); actual = (DataNode)nodeDAO.get(orig.getID()); Assert.assertNotNull(actual); @@ -246,24 +244,51 @@ private void testSyncArtifact(boolean isStorageSite) throws Exception { Thread.sleep(100L); // update the artifact only - artifactDAO.delete(actualArtifact.getID()); - Artifact modified = new Artifact(expected.getURI(), expected.getMetaChecksum(), new Date(), 333L); + artifactDAO.delete(expected.getID()); + Artifact m1 = new Artifact(expected.getURI(), expected.getContentChecksum(), new Date(), 333L); if (isStorageSite) { - modified.storageLocation = new StorageLocation(URI.create("id:" + UUID.randomUUID().toString())); - modified.storageLocation.storageBucket = "X"; + m1.storageLocation = new StorageLocation(URI.create("id:" + UUID.randomUUID().toString())); + m1.storageLocation.storageBucket = "X"; } - artifactDAO.put(modified); + artifactDAO.put(m1); actual = (DataNode)nodeDAO.get(orig.getID()); - Assert.assertNotEquals(modified.getContentLength(), actual.bytesUsed); + Assert.assertNotEquals(m1.getContentLength(), actual.bytesUsed); // run the update log.info("*** DataNodeSizeWorker START"); asWorker.run(); log.info("*** DataNodeSizeWorker DONE"); + hs = harvestStateDAO.get(hsName, resourceID); + log.info("HarvestState: " + hs.curLastModified.getTime()); + + actual = (DataNode)nodeDAO.get(orig.getID()); + Assert.assertEquals(m1.getContentLength(), actual.bytesUsed); + final Date nodeLastMod = actual.getLastModified(); + Thread.sleep(100L); + + // replace the artifact but with the same contentLength to ensure timestamp change + artifactDAO.delete(m1.getID()); + Artifact m2 = new Artifact(expected.getURI(), expected.getContentChecksum(), new Date(), 333L); // same size + if (isStorageSite) { + m2.storageLocation = new StorageLocation(URI.create("id:" + UUID.randomUUID().toString())); + m2.storageLocation.storageBucket = "X"; + } + artifactDAO.put(m2); + log.info("replaced artifact: " + m2.getLastModified().getTime()); actual = (DataNode)nodeDAO.get(orig.getID()); - Assert.assertEquals(modified.getContentLength(), actual.bytesUsed); + log.info(" node: " + actual.getLastModified().getTime()); + Assert.assertTrue("no timestamp change", actual.getLastModified().equals(nodeLastMod)); - } + // run the update + log.info("*** DataNodeSizeWorker START"); + asWorker.run(); + log.info("*** DataNodeSizeWorker DONE"); + hs = harvestStateDAO.get(hsName, resourceID); + log.info("HarvestState: " + hs.curLastModified.getTime()); + actual = (DataNode)nodeDAO.get(orig.getID()); + Assert.assertEquals(m2.getContentLength(), actual.bytesUsed); + Assert.assertTrue("timestamp change", actual.getLastModified().after(nodeLastMod)); + } } diff --git a/cadc-inventory-db/src/main/java/org/opencadc/vospace/db/DataNodeSizeWorker.java b/cadc-inventory-db/src/main/java/org/opencadc/vospace/db/DataNodeSizeWorker.java index 0cc4b663..45e2060c 100644 --- a/cadc-inventory-db/src/main/java/org/opencadc/vospace/db/DataNodeSizeWorker.java +++ b/cadc-inventory-db/src/main/java/org/opencadc/vospace/db/DataNodeSizeWorker.java @@ -154,7 +154,9 @@ public void run() { while (iter.hasNext()) { Artifact artifact = iter.next(); DataNode node = nodeDAO.getDataNode(artifact.getURI()); - if (node != null && !artifact.getContentLength().equals(node.bytesUsed)) { + if (node != null) { + boolean delta = !artifact.getContentLength().equals(node.bytesUsed); // size change + delta = delta || artifact.getLastModified().after(node.getLastModified()); log.debug(artifact.getURI() + " len=" + artifact.getContentLength() + " -> " + node.getName()); tm.startTransaction(); try { @@ -163,7 +165,7 @@ public void run() { continue; // node gone - race condition } node.bytesUsed = artifact.getContentLength(); - nodeDAO.put(node); + nodeDAO.put(node, delta); // delta forces lastModified update tm.commitTransaction(); log.debug("ArtifactSyncWorker.updateDataNode id=" + node.getID() + " bytesUsed=" + node.bytesUsed + " artifact.lastModified=" + df.format(artifact.getLastModified())); diff --git a/cadc-inventory-db/src/main/java/org/opencadc/vospace/db/NodeDAO.java b/cadc-inventory-db/src/main/java/org/opencadc/vospace/db/NodeDAO.java index 8088c66b..4c4b6d07 100644 --- a/cadc-inventory-db/src/main/java/org/opencadc/vospace/db/NodeDAO.java +++ b/cadc-inventory-db/src/main/java/org/opencadc/vospace/db/NodeDAO.java @@ -104,6 +104,11 @@ public void put(Node val) { super.put(val); } + // used by DataNodeWorker + public void put(Node val, boolean forceTimestampUdpate) { + super.put(val, false, forceTimestampUdpate); + } + @Override public Node lock(Node n) { if (n == null) { diff --git a/vault/VERSION b/vault/VERSION index aad86efa..29d2d9db 100644 --- a/vault/VERSION +++ b/vault/VERSION @@ -4,6 +4,6 @@ # tags with and without build number so operators use the versioned # tag but we always keep a timestamped tag in case a semantic tag gets # replaced accidentally -VER=1.0.10 +VER=1.0.11 TAGS="${VER} ${VER}-$(date -u +"%Y%m%dT%H%M%S")" unset VER diff --git a/vault/build.gradle b/vault/build.gradle index 3db6cad7..120f8135 100644 --- a/vault/build.gradle +++ b/vault/build.gradle @@ -44,7 +44,7 @@ dependencies { compile 'org.opencadc:cadc-cdp:[1.2.3,)' compile 'org.opencadc:cadc-registry:[1.7.6,)' compile 'org.opencadc:cadc-inventory:[1.0.1,2.0)' - compile 'org.opencadc:cadc-inventory-db:[1.0.1,2.0)' + compile 'org.opencadc:cadc-inventory-db:[1.0.4,2.0)' compile 'org.opencadc:cadc-inventory-server:[0.3.0,1.0)' compile 'org.opencadc:cadc-permissions:[0.3.5,1.0)' diff --git a/vault/src/main/java/org/opencadc/vault/NodePersistenceImpl.java b/vault/src/main/java/org/opencadc/vault/NodePersistenceImpl.java index 48b71f2c..b2c3e9b9 100644 --- a/vault/src/main/java/org/opencadc/vault/NodePersistenceImpl.java +++ b/vault/src/main/java/org/opencadc/vault/NodePersistenceImpl.java @@ -109,13 +109,10 @@ import org.opencadc.vospace.NodeNotSupportedException; import org.opencadc.vospace.NodeProperty; import org.opencadc.vospace.VOS; -import org.opencadc.vospace.VOSURI; import org.opencadc.vospace.db.NodeDAO; import org.opencadc.vospace.io.NodeWriter; -import org.opencadc.vospace.server.LocalServiceURI; import org.opencadc.vospace.server.NodePersistence; import org.opencadc.vospace.server.Views; -import org.opencadc.vospace.server.transfers.TransferGenerator; /** * @@ -401,7 +398,11 @@ public Node get(ContainerNode parent, String name) throws TransientException { // if DataNode.bytesUsed != Artifact.contentLength we update the cache // this retains put+get consistency in a single-site deployed (with minoc) // and may help hide some inconsistencies in child listing sizes - if (!a.getContentLength().equals(dn.bytesUsed)) { + + // be consistent with DataNodeSizeWorker + boolean delta = !a.getContentLength().equals(dn.bytesUsed); // size change + delta = delta || a.getLastModified().after(dn.getLastModified()); + if (delta) { TransactionManager txn = dao.getTransactionManager(); try { log.debug("starting node transaction"); @@ -412,7 +413,7 @@ public Node get(ContainerNode parent, String name) throws TransientException { if (locked != null) { dn = locked; // safer than accidentally using the wrong variable dn.bytesUsed = a.getContentLength(); - dao.put(dn); + dao.put(dn, delta); ret = dn; } @@ -437,25 +438,21 @@ public Node get(ContainerNode parent, String name) throws TransientException { } } - Date d = ret.getLastModified(); - Date cd = null; - if (ret.getLastModified().before(a.getLastModified())) { - d = a.getLastModified(); - } - if (d.before(a.getContentLastModified())) { - // probably not possible - d = a.getContentLastModified(); - } else { - cd = a.getContentLastModified(); + // #date is always Node.lastModified for consistency with container listing + // TODO: some props like contentType are set on the artifact so those changes do not + // cause a visible #date change; probably OK + ret.getProperties().add(new NodeProperty(VOS.PROPERTY_URI_DATE, df.format(ret.getLastModified()))); + + // #content-date is only output if different from #date + if (!ret.getLastModified().equals(a.getContentLastModified())) { + ret.getProperties().add(new NodeProperty(VOS.PROPERTY_URI_CONTENTDATE, df.format(a.getContentLastModified()))); } - ret.getProperties().add(new NodeProperty(VOS.PROPERTY_URI_DATE, df.format(d))); - if (cd != null) { - ret.getProperties().add(new NodeProperty(VOS.PROPERTY_URI_CONTENTDATE, df.format(cd))); + + // only #md5 in VOSpace spec + if (a.getContentChecksum().getScheme().equalsIgnoreCase("md5")) { + ret.getProperties().add(new NodeProperty(VOS.PROPERTY_URI_CONTENTMD5, a.getContentChecksum().getSchemeSpecificPart())); } - // assume MD5 - ret.getProperties().add(new NodeProperty(VOS.PROPERTY_URI_CONTENTMD5, a.getContentChecksum().getSchemeSpecificPart())); - if (a.contentEncoding != null) { ret.getProperties().add(new NodeProperty(VOS.PROPERTY_URI_CONTENTENCODING, a.contentEncoding)); }