diff --git a/cadc-inventory-db/build.gradle b/cadc-inventory-db/build.gradle index f6f235f8..1b5a7ca8 100644 --- a/cadc-inventory-db/build.gradle +++ b/cadc-inventory-db/build.gradle @@ -25,10 +25,10 @@ def git_url = 'https://github.com/opencadc/storage-inventory' mainClassName = 'org.opencadc.inventory.db.version.Main' dependencies { - compile 'org.opencadc:cadc-util:[1.10.3,2.0)' + compile 'org.opencadc:cadc-util:[1.11.0,2.0)' compile 'org.opencadc:cadc-gms:[1.0.0,)' compile 'org.opencadc:cadc-inventory:[0.9.4,)' - compile 'org.opencadc:cadc-vos:[2.0,3.0)' + compile 'org.opencadc:cadc-vos:[2.0.5,3.0)' testCompile 'junit:junit:[4.0,)' diff --git a/cadc-inventory-db/src/intTest/java/org/opencadc/inventory/db/ArtifactDAOTest.java b/cadc-inventory-db/src/intTest/java/org/opencadc/inventory/db/ArtifactDAOTest.java index bad7928c..f3a22597 100644 --- a/cadc-inventory-db/src/intTest/java/org/opencadc/inventory/db/ArtifactDAOTest.java +++ b/cadc-inventory-db/src/intTest/java/org/opencadc/inventory/db/ArtifactDAOTest.java @@ -94,6 +94,7 @@ import org.junit.Test; import org.opencadc.inventory.Artifact; import org.opencadc.inventory.InventoryUtil; +import org.opencadc.inventory.Namespace; import org.opencadc.inventory.SiteLocation; import org.opencadc.inventory.StorageLocation; import org.opencadc.inventory.StoredArtifactComparator; @@ -955,6 +956,9 @@ public void testIteratorClose() { public void testArtifactIterator() { int num = 10; try { + final Date startDate = new Date(); + Thread.sleep(10L); + int numArtifacts = 0; int numStuffExpected = 0; // artifacts with storageLocation @@ -977,6 +981,7 @@ public void testArtifactIterator() { numStuffExpected++; } } + // some artifacts with no storageLocation collection = "STUFF"; for (int i = num; i < 2 * num; i++) { @@ -996,6 +1001,12 @@ public void testArtifactIterator() { numStuffExpected++; } } + + Thread.sleep(10L); + final Date midDate = new Date(); + Thread.sleep(10L); + final int midNumStuff = numStuffExpected; + // some artifacts with siteLocations UUID siteID = UUID.randomUUID(); int numSiteExpected = 0; @@ -1018,6 +1029,8 @@ public void testArtifactIterator() { numStuffExpected++; } } + Thread.sleep(10L); + final Date endDate = new Date(); log.info("added: " + numArtifacts); log.info("count all..."); @@ -1032,13 +1045,14 @@ public void testArtifactIterator() { Assert.assertEquals("count", numArtifacts, count); log.info("count with criteria..."); + final Namespace ns = new Namespace("cadc:STUFF/"); count = 0; - try (ResourceIterator iter = originDAO.iterator("uri like 'cadc:STUFF/%'", null, false)) { + try (ResourceIterator iter = originDAO.iterator(ns, null, false)) { while (iter.hasNext()) { Artifact actual = iter.next(); count++; log.info("found: " + actual.getURI()); - Assert.assertTrue("STUFF", actual.getURI().toASCIIString().startsWith("cadc:STUFF/")); + Assert.assertTrue("STUFF", actual.getURI().toASCIIString().startsWith(ns.getNamespace())); } } Assert.assertEquals("count", numStuffExpected, count); @@ -1065,7 +1079,8 @@ public void testArtifactIterator() { while (iter.hasNext()) { Artifact actual = iter.next(); count++; - log.info("found: " + actual.getURI()); + log.info("found: " + actual.getBucket() + " " + actual.getURI()); + Assert.assertTrue(actual.getBucket().startsWith(bpre)); } } } @@ -1075,18 +1090,56 @@ public void testArtifactIterator() { count = 0; for (byte b = 0; b < 16; b++) { String bpre = HexUtil.toHex(b).substring(1); - log.debug("bucket prefix: " + bpre); - try (ResourceIterator iter = originDAO.iterator("uri like 'cadc:STUFF/%'", bpre, false)) { + log.info("bucket prefix: " + bpre); + try (ResourceIterator iter = originDAO.iterator(ns, bpre, false)) { while (iter.hasNext()) { Artifact actual = iter.next(); count++; - log.info("found: " + actual.getURI()); - Assert.assertTrue("STUFF", actual.getURI().toASCIIString().startsWith("cadc:STUFF/")); + log.info("found: " + actual.getBucket() + " " + actual.getURI()); + Assert.assertTrue(actual.getBucket().startsWith(bpre)); + Assert.assertTrue("STUFF", actual.getURI().toASCIIString().startsWith(ns.getNamespace())); } } } Assert.assertEquals("count", numStuffExpected, count); + log.info("count vs Namespace incremental from start..."); + DateFormat df = DateUtil.getDateFormat(DateUtil.IVOA_DATE_FORMAT, DateUtil.UTC); + count = 0; + try (ResourceIterator iter = originDAO.iterator(ns, null, startDate, true)) { + while (iter.hasNext()) { + Artifact actual = iter.next(); + count++; + log.info("found: " + actual.getBucket() + " " + actual.getURI() + " " + df.format(actual.getContentLastModified())); + Assert.assertTrue("STUFF", actual.getURI().toASCIIString().startsWith(ns.getNamespace())); + } + } + Assert.assertEquals("count", numStuffExpected, count); + + log.info("count vs Namespace incremental from mid..."); + count = 0; + try (ResourceIterator iter = originDAO.iterator(ns, null, midDate, true)) { + while (iter.hasNext()) { + Artifact actual = iter.next(); + count++; + log.info("found: " + actual.getBucket() + " " + actual.getURI() + " " + df.format(actual.getContentLastModified())); + Assert.assertTrue("STUFF", actual.getURI().toASCIIString().startsWith(ns.getNamespace())); + } + } + Assert.assertEquals("count", midNumStuff, count); + + log.info("count vs Namespace incremental from end..."); + count = 0; + try (ResourceIterator iter = originDAO.iterator(ns, null, endDate, true)) { + while (iter.hasNext()) { + Artifact actual = iter.next(); + count++; + log.info("found: " + actual.getBucket() + " " + actual.getURI() + " " + df.format(actual.getContentLastModified())); + Assert.assertTrue("STUFF", actual.getURI().toASCIIString().startsWith(ns.getNamespace())); + } + } + Assert.assertEquals("count", 0, count); + } catch (Exception unexpected) { log.error("unexpected exception", unexpected); Assert.fail("unexpected exception: " + unexpected); diff --git a/cadc-inventory-db/src/intTest/java/org/opencadc/vospace/db/NodeDAOTest.java b/cadc-inventory-db/src/intTest/java/org/opencadc/vospace/db/NodeDAOTest.java index 4a776c11..4f4e52e9 100644 --- a/cadc-inventory-db/src/intTest/java/org/opencadc/vospace/db/NodeDAOTest.java +++ b/cadc-inventory-db/src/intTest/java/org/opencadc/vospace/db/NodeDAOTest.java @@ -3,7 +3,7 @@ ******************* CANADIAN ASTRONOMY DATA CENTRE ******************* ************** CENTRE CANADIEN DE DONNÉES ASTRONOMIQUES ************** * -* (c) 2023. (c) 2023. +* (c) 2024. (c) 2024. * Government of Canada Gouvernement du Canada * National Research Council Conseil national de recherches * Ottawa, Canada, K1A 0R6 Ottawa, Canada, K1A 0R6 @@ -182,7 +182,7 @@ public void testPutGetUpdateDeleteContainerNode() throws InterruptedException, // put ContainerNode orig = new ContainerNode("container-test"); - orig.parent = root; + orig.parentID = root.getID(); orig.ownerID = "the-owner"; nodeDAO.put(orig); @@ -216,6 +216,7 @@ public void testPutGetUpdateDeleteContainerNode() throws InterruptedException, Assert.assertTrue(a instanceof ContainerNode); ContainerNode c = (ContainerNode) a; Assert.assertEquals(orig.inheritPermissions, c.inheritPermissions); + Assert.assertEquals(orig.bytesUsed, c.bytesUsed); // these are set in put Assert.assertEquals(orig.getMetaChecksum(), a.getMetaChecksum()); @@ -228,7 +229,6 @@ public void testPutGetUpdateDeleteContainerNode() throws InterruptedException, Thread.sleep(10L); orig.getReadOnlyGroup().add(new GroupURI(URI.create("ivo://opencadc.org/gms?g1"))); orig.getReadWriteGroup().add(new GroupURI(URI.create("ivo://opencadc.org/gms?g3"))); - orig.getProperties().add(new NodeProperty(VOS.PROPERTY_URI_CONTENTLENGTH, "123")); orig.isPublic = true; orig.inheritPermissions = true; nodeDAO.put(orig); @@ -251,7 +251,7 @@ public void testPutGetUpdateDeleteContainerNode() throws InterruptedException, Assert.assertTrue(updated instanceof ContainerNode); ContainerNode uc = (ContainerNode) updated; Assert.assertEquals(orig.inheritPermissions, uc.inheritPermissions); - + Assert.assertEquals(orig.bytesUsed, uc.bytesUsed); nodeDAO.delete(orig.getID()); Node gone = nodeDAO.get(orig.getID()); @@ -267,7 +267,7 @@ public void testPutGetUpdateDeleteContainerNodeMax() throws InterruptedException // TODO: use get-by-path to find and remove the test node ContainerNode orig = new ContainerNode("container-test"); - orig.parent = root; + orig.parentID = root.getID(); orig.ownerID = "the-owner"; orig.isPublic = true; orig.isLocked = false; @@ -279,8 +279,6 @@ public void testPutGetUpdateDeleteContainerNodeMax() throws InterruptedException orig.getReadWriteGroup().add(new GroupURI(URI.create("ivo://opencadc.org/gms?g6.g7"))); orig.getReadWriteGroup().add(new GroupURI(URI.create("ivo://opencadc.org/gms?g6_g7"))); orig.getReadWriteGroup().add(new GroupURI(URI.create("ivo://opencadc.org/gms?g6~g7"))); - - orig.getProperties().add(new NodeProperty(VOS.PROPERTY_URI_CONTENTLENGTH, "123")); orig.getProperties().add(new NodeProperty(URI.create("custom:prop"), "spaces in value")); orig.getProperties().add(new NodeProperty(URI.create("sketchy:a,b"), "comma in uri")); orig.getProperties().add(new NodeProperty(URI.create("sketchy:funny"), "value-with-{delims}")); @@ -316,6 +314,7 @@ public void testPutGetUpdateDeleteContainerNodeMax() throws InterruptedException Assert.assertTrue(a instanceof ContainerNode); ContainerNode c = (ContainerNode) a; Assert.assertEquals(orig.inheritPermissions, c.inheritPermissions); + Assert.assertEquals(orig.bytesUsed, c.bytesUsed); // these are set in put Assert.assertEquals(orig.getMetaChecksum(), a.getMetaChecksum()); @@ -333,7 +332,6 @@ public void testPutGetUpdateDeleteContainerNodeMax() throws InterruptedException orig.getReadWriteGroup().clear(); orig.getReadWriteGroup().add(new GroupURI(URI.create("ivo://opencadc.org/gms?g3"))); orig.getProperties().clear(); - orig.getProperties().add(new NodeProperty(VOS.PROPERTY_URI_CONTENTLENGTH, "123")); orig.inheritPermissions = true; nodeDAO.put(orig); Node updated = nodeDAO.get(orig.getID()); @@ -355,6 +353,7 @@ public void testPutGetUpdateDeleteContainerNodeMax() throws InterruptedException Assert.assertTrue(updated instanceof ContainerNode); ContainerNode uc = (ContainerNode) updated; Assert.assertEquals(orig.inheritPermissions, uc.inheritPermissions); + Assert.assertEquals(orig.bytesUsed, uc.bytesUsed); nodeDAO.delete(orig.getID()); Node gone = nodeDAO.get(orig.getID()); @@ -367,9 +366,10 @@ public void testPutGetUpdateDeleteDataNode() throws InterruptedException, UUID rootID = new UUID(0L, 0L); ContainerNode root = new ContainerNode(rootID, "root"); - DataNode orig = new DataNode(UUID.randomUUID(), "data-test", URI.create("cadc:vault/" + UUID.randomUUID())); - orig.parent = root; + DataNode orig = new DataNode("data-test"); + orig.parentID = root.getID(); orig.ownerID = "the-owner"; + orig.storageID = URI.create("vault:" + UUID.randomUUID().toString()); orig.isPublic = true; orig.isLocked = false; orig.getProperties().add(new NodeProperty(VOS.PROPERTY_URI_TYPE, "text/plain")); @@ -423,7 +423,6 @@ public void testPutGetUpdateDeleteDataNode() throws InterruptedException, orig.getReadWriteGroup().clear(); orig.getReadWriteGroup().add(new GroupURI(URI.create("ivo://opencadc.org/gms?g3"))); orig.getProperties().clear(); - orig.getProperties().add(new NodeProperty(VOS.PROPERTY_URI_CONTENTLENGTH, "123")); // don't change storageID nodeDAO.put(orig); Node updated = nodeDAO.get(orig.getID()); @@ -445,6 +444,7 @@ public void testPutGetUpdateDeleteDataNode() throws InterruptedException, Assert.assertTrue(a instanceof DataNode); DataNode udn = (DataNode) updated; + Assert.assertEquals(orig.bytesUsed, udn.bytesUsed); Assert.assertEquals(orig.storageID, udn.storageID); nodeDAO.delete(orig.getID()); @@ -461,7 +461,7 @@ public void testPutGetUpdateDeleteLinkNode() throws InterruptedException, // TODO: use get-by-path to find and remove the test node LinkNode orig = new LinkNode("data-test", URI.create("vos://opencadc.org~srv/path/to/something")); - orig.parent = root; + orig.parentID = root.getID(); orig.ownerID = "the-owner"; orig.isPublic = true; orig.isLocked = false; @@ -514,7 +514,6 @@ public void testPutGetUpdateDeleteLinkNode() throws InterruptedException, orig.getReadWriteGroup().clear(); orig.getReadWriteGroup().add(new GroupURI(URI.create("ivo://opencadc.org/gms?g3"))); orig.getProperties().clear(); - orig.getProperties().add(new NodeProperty(VOS.PROPERTY_URI_CONTENTLENGTH, "123")); // don't change target nodeDAO.put(orig); Node updated = nodeDAO.get(orig.getID()); @@ -542,6 +541,29 @@ public void testPutGetUpdateDeleteLinkNode() throws InterruptedException, Assert.assertNull(gone); } + @Test + public void testGetByStorageID() { + UUID rootID = new UUID(0L, 0L); + ContainerNode root = new ContainerNode(rootID, "root"); + + DataNode notFound = nodeDAO.getDataNode(URI.create("vault:not-found")); + Assert.assertNull(notFound); + + DataNode orig = new DataNode("testGetByStorageID"); + orig.parentID = root.getID(); + orig.ownerID = "the-owner"; + orig.storageID = URI.create("vault:" + UUID.randomUUID().toString()); + nodeDAO.put(orig); + + // get-by-storageID + DataNode gbs = nodeDAO.getDataNode(orig.storageID); + Assert.assertNotNull(gbs); + log.info("found: " + gbs.getID() + " aka " + gbs); + Assert.assertEquals(orig.getID(), gbs.getID()); + + nodeDAO.delete(orig.getID()); + } + @Test public void testGetWithLock() { UUID rootID = new UUID(0L, 0L); @@ -549,7 +571,7 @@ public void testGetWithLock() { // put ContainerNode orig = new ContainerNode("container-test"); - orig.parent = root; + orig.parentID = root.getID(); orig.ownerID = "the-owner"; nodeDAO.put(orig); @@ -571,13 +593,69 @@ public void testGetWithLock() { Assert.assertNull(gone); } + @Test + public void testUpdateNodeSize() throws InterruptedException, + NoSuchAlgorithmException { + UUID rootID = new UUID(0L, 0L); + ContainerNode root = new ContainerNode(rootID, "root"); + + final ContainerNode cnode = new ContainerNode("testUpdateNodeSize-container"); + cnode.parentID = root.getID(); + cnode.ownerID = "the-owner"; + nodeDAO.put(cnode); + + final DataNode dnode = new DataNode("testUpdateNodeSize-data"); + dnode.ownerID = "the-owner"; + dnode.storageID = URI.create("cadc:vault/" + UUID.randomUUID()); + dnode.parentID = cnode.getID(); + nodeDAO.put(dnode); + + final ContainerNode c1 = (ContainerNode) nodeDAO.get(cnode.getID()); + Assert.assertNotNull(c1); + log.info("found: " + c1.getID() + " aka " + c1); + Assert.assertEquals(cnode.getID(), c1.getID()); + Assert.assertEquals(cnode.getName(), c1.getName()); + Assert.assertEquals(root.getID(), c1.parentID); + Assert.assertNull(c1.bytesUsed); + + final DataNode d1 = (DataNode) nodeDAO.get(dnode.getID()); + Assert.assertNotNull(d1); + log.info("found: " + d1.getID() + " aka " + d1); + Assert.assertEquals(dnode.getID(), d1.getID()); + Assert.assertEquals(dnode.getName(), d1.getName()); + Assert.assertEquals(cnode.getID(), d1.parentID); + Assert.assertNull(d1.bytesUsed); + + final URI ccs = c1.getMetaChecksum(); + final URI dcs = d1.getMetaChecksum(); + + log.info("update DataNode"); + d1.bytesUsed = 123L; + nodeDAO.put(d1); + final DataNode d2 = (DataNode) nodeDAO.get(dnode.getID()); + Assert.assertNotNull(d2); + Assert.assertNotNull(d2.bytesUsed); + Assert.assertEquals(d1.bytesUsed, d2.bytesUsed); + + log.info("update ContainerNode.bytesUsed"); + c1.bytesUsed = 123L; + nodeDAO.put(c1); + final ContainerNode c2 = (ContainerNode) nodeDAO.get(cnode.getID()); + Assert.assertNotNull(c2); + Assert.assertNotNull(c2.bytesUsed); + Assert.assertEquals(123L, c2.bytesUsed.longValue()); + + nodeDAO.delete(dnode.getID()); + nodeDAO.delete(cnode.getID()); + } + @Test public void testContainerNodeIterator() throws IOException { UUID rootID = new UUID(0L, 0L); ContainerNode root = new ContainerNode(rootID, "root"); ContainerNode orig = new ContainerNode("container-test"); - orig.parent = root; + orig.parentID = root.getID(); orig.ownerID = "the-owner"; nodeDAO.put(orig); @@ -610,13 +688,13 @@ public void testContainerNodeIterator() throws IOException { // add children ContainerNode cont = new ContainerNode("container1"); - cont.parent = orig; + cont.parentID = orig.getID(); cont.ownerID = orig.ownerID; DataNode data = new DataNode(UUID.randomUUID(), "data1", URI.create("cadc:vault/" + UUID.randomUUID())); - data.parent = orig; + data.parentID = orig.getID(); data.ownerID = orig.ownerID; LinkNode link = new LinkNode("link1", URI.create("cadc:ARCHIVE/data")); - link.parent = orig; + link.parentID = orig.getID(); link.ownerID = orig.ownerID; log.info("put child: " + cont + " of " + cont.parent); nodeDAO.put(cont); diff --git a/cadc-inventory-db/src/main/java/org/opencadc/inventory/db/ArtifactDAO.java b/cadc-inventory-db/src/main/java/org/opencadc/inventory/db/ArtifactDAO.java index 200b4e63..cb7964ce 100644 --- a/cadc-inventory-db/src/main/java/org/opencadc/inventory/db/ArtifactDAO.java +++ b/cadc-inventory-db/src/main/java/org/opencadc/inventory/db/ArtifactDAO.java @@ -69,9 +69,11 @@ import ca.nrc.cadc.io.ResourceIterator; import java.net.URI; +import java.util.Date; import java.util.UUID; import org.apache.log4j.Logger; import org.opencadc.inventory.Artifact; +import org.opencadc.inventory.Namespace; import org.opencadc.inventory.SiteLocation; import org.opencadc.inventory.StorageLocation; import org.springframework.jdbc.BadSqlGrammarException; @@ -190,8 +192,8 @@ public ResourceIterator storedIterator(String storageBucketPrefix) { try { SQLGenerator.ArtifactIteratorQuery iter = (SQLGenerator.ArtifactIteratorQuery) gen.getEntityIteratorQuery(Artifact.class); iter.setStorageLocationRequired(true); + iter.setStorageBucket(storageBucketPrefix); iter.setOrderedOutput(true); - iter.setPrefix(storageBucketPrefix); return iter.query(dataSource); } catch (BadSqlGrammarException ex) { handleInternalFail(ex); @@ -218,8 +220,8 @@ public ResourceIterator unstoredIterator(String uriBucketPrefix) { try { SQLGenerator.ArtifactIteratorQuery iter = (SQLGenerator.ArtifactIteratorQuery) gen.getEntityIteratorQuery(Artifact.class); iter.setStorageLocationRequired(false); + iter.setUriBucket(uriBucketPrefix); iter.setOrderedOutput(true); - iter.setPrefix(uriBucketPrefix); return iter.query(dataSource); } catch (BadSqlGrammarException ex) { handleInternalFail(ex); @@ -240,7 +242,7 @@ public ResourceIterator unstoredIterator(String uriBucketPrefix) { * @return iterator over artifacts */ public ResourceIterator iterator(String uriBucketPrefix, boolean ordered) { - return iterator((String) null, uriBucketPrefix, ordered); + return iterator((Namespace) null, uriBucketPrefix, ordered); } /** @@ -250,19 +252,37 @@ public ResourceIterator iterator(String uriBucketPrefix, boolean order * *

Use case: local cleanup by arbitrary criteria * - * @param criteria conditions for selecting artifacts + * @param ns namespace for selecting artifacts * @param uriBucketPrefix null, prefix, or complete Artifact.uriBucket string * @param ordered order by Artifact.uri (true) or not ordered (false) * @return iterator over artifacts matching criteria */ - public ResourceIterator iterator(String criteria, String uriBucketPrefix, boolean ordered) { + public ResourceIterator iterator(Namespace ns, String uriBucketPrefix, boolean ordered) { + return iterator(ns, uriBucketPrefix, null, ordered); + } + + /** + * Iterate over artifacts that match criteria. This method adds an optional Date argument to + * support incremental processing. In this case, ordered will be in timestamp order rather than + * uri order. + * + *

Use case: process artifact events directly in the database + * + * @param ns namespace for selecting artifacts + * @param uriBucketPrefix null, prefix, or complete Artifact.uriBucket string + * @param minLastModified minimum Artifact.lastModified to consider (incremental mode) + * @param ordered order by Artifact.uri (true) or not ordered (false) + * @return iterator over artifacts matching criteria + */ + public ResourceIterator iterator(Namespace ns, String uriBucketPrefix, Date minLastModified, boolean ordered) { checkInit(); long t = System.currentTimeMillis(); try { SQLGenerator.ArtifactIteratorQuery iter = (SQLGenerator.ArtifactIteratorQuery) gen.getEntityIteratorQuery(Artifact.class); - iter.setPrefix(uriBucketPrefix); - iter.setCriteria(criteria); + iter.setUriBucket(uriBucketPrefix); + iter.setNamespace(ns); + iter.setMinLastModified(minLastModified); iter.setOrderedOutput(ordered); return iter.query(dataSource); } catch (BadSqlGrammarException ex) { @@ -273,7 +293,7 @@ public ResourceIterator iterator(String criteria, String uriBucketPref } throw new RuntimeException("BUG: should be unreachable"); } - + /** * Iterate over Artifacts from a specific site. If a siteID is specified, only artifacts where * artifact.siteLocations includes that siteID are returned; this is only applicable in a global @@ -292,7 +312,7 @@ public ResourceIterator iterator(UUID siteID, String uriBucketPrefix, try { SQLGenerator.ArtifactIteratorQuery iter = (SQLGenerator.ArtifactIteratorQuery) gen.getEntityIteratorQuery(Artifact.class); - iter.setPrefix(uriBucketPrefix); + iter.setUriBucket(uriBucketPrefix); iter.setSiteID(siteID); iter.setOrderedOutput(ordered); return iter.query(dataSource); diff --git a/cadc-inventory-db/src/main/java/org/opencadc/inventory/db/HarvestState.java b/cadc-inventory-db/src/main/java/org/opencadc/inventory/db/HarvestState.java index 04a0b724..4d012cc2 100644 --- a/cadc-inventory-db/src/main/java/org/opencadc/inventory/db/HarvestState.java +++ b/cadc-inventory-db/src/main/java/org/opencadc/inventory/db/HarvestState.java @@ -137,6 +137,9 @@ public void setResourceID(URI resourceID) { @Override public String toString() { + if (instanceID != null) { + return HarvestState.class.getSimpleName() + "[" + instanceID + "," + name + "," + resourceID + "]"; + } return HarvestState.class.getSimpleName() + "[" + name + "," + resourceID + "]"; } } diff --git a/cadc-inventory-db/src/main/java/org/opencadc/inventory/db/SQLGenerator.java b/cadc-inventory-db/src/main/java/org/opencadc/inventory/db/SQLGenerator.java index 16d07a4c..0652a918 100644 --- a/cadc-inventory-db/src/main/java/org/opencadc/inventory/db/SQLGenerator.java +++ b/cadc-inventory-db/src/main/java/org/opencadc/inventory/db/SQLGenerator.java @@ -94,6 +94,7 @@ import org.opencadc.inventory.DeletedArtifactEvent; import org.opencadc.inventory.DeletedStorageLocationEvent; import org.opencadc.inventory.InventoryUtil; +import org.opencadc.inventory.Namespace; import org.opencadc.inventory.ObsoleteStorageLocation; import org.opencadc.inventory.PreauthKeyPair; import org.opencadc.inventory.SiteLocation; @@ -260,8 +261,10 @@ protected void init() { "readWriteGroups", "properties", "inheritPermissions", + "bytesUsed", "busy", "storageID", + "storageBucket", "target", "lastModified", "metaChecksum", @@ -675,10 +678,15 @@ public PreparedStatement createPreparedStatement(Connection conn) throws SQLExce class ArtifactIteratorQuery implements EntityIteratorQuery { private Boolean storageLocationRequired; - private String prefix; + private String storageBucket; + private UUID siteID; - private String whereClause; + private String uriBucket; + private Namespace namespace; + private Date minLastModified; private boolean ordered; + + private final Calendar utc = Calendar.getInstance(DateUtil.UTC); public ArtifactIteratorQuery() { } @@ -694,22 +702,30 @@ public void setStorageLocationRequired(Boolean slr) { this.storageLocationRequired = slr; } - public void setPrefix(String prefix) { + public void setStorageBucket(String prefix) { if (StringUtil.hasText(prefix)) { - this.prefix = prefix.trim(); + this.storageBucket = prefix.trim(); } else { - this.prefix = null; + this.storageBucket = null; } } - public void setCriteria(String whereClause) { - if (StringUtil.hasText(whereClause)) { - this.whereClause = whereClause.trim(); + public void setUriBucket(String uriBucket) { + if (StringUtil.hasText(uriBucket)) { + this.uriBucket = uriBucket.trim(); } else { - this.whereClause = null; + uriBucket = null; } } + public void setNamespace(Namespace namespace) { + this.namespace = namespace; + } + + public void setMinLastModified(Date minLastModified) { + this.minLastModified = minLastModified; + } + public void setOrderedOutput(boolean ordered) { this.ordered = ordered; } @@ -720,13 +736,14 @@ public void setSiteID(UUID siteID) { @Override public ResourceIterator query(DataSource ds) { - - StringBuilder sb = getSelectFromSQL(Artifact.class, false); - sb.append(" WHERE"); - + StringBuilder sb = new StringBuilder(); + boolean where = false; if (storageLocationRequired != null && storageLocationRequired) { // ArtifactDAO.storedIterator - if (StringUtil.hasText(prefix)) { + sb.append(" WHERE"); + where = true; + if (storageBucket != null) { + sb.append(" storageLocation_storageBucket LIKE ? AND"); } sb.append(" storageLocation_storageID IS NOT NULL"); @@ -740,42 +757,64 @@ public ResourceIterator query(DataSource ds) { } } else if (storageLocationRequired != null && !storageLocationRequired) { // ArtifactDAO.unstoredIterator - if (StringUtil.hasText(prefix)) { - sb.append(" uriBucket LIKE ? AND"); - } + sb.append(" WHERE"); + where = true; sb.append(" storageLocation_storageID IS NULL"); - if (ordered) { - sb.append(" ORDER BY uri"); - } - } else if (siteID != null) { - if (prefix != null && siteID != null) { - sb.append(" uriBucket LIKE ? AND ").append("siteLocations @> ARRAY[?]"); + } + + // optional params: + // uriBucket + // siteID + // namespace + // minLastModified + if (uriBucket != null) { + if (where) { + sb.append(" AND"); } else { - sb.append(" siteLocations @> ARRAY[?]"); + sb.append(" WHERE"); + where = true; } - if (ordered) { - sb.append(" ORDER BY uri"); + sb.append(" uriBucket LIKE ?"); + } + if (siteID != null) { + // ArtifactDAO.iterator(UUID, ...) + if (where) { + sb.append(" AND"); + } else { + sb.append(" WHERE"); + where = true; } - } else if (whereClause != null) { - if (prefix != null && whereClause != null) { - sb.append(" uriBucket LIKE ? AND ( ").append(whereClause).append(" )"); + sb.append(" siteLocations @> ARRAY[?]"); + } + if (namespace != null) { + if (where) { + sb.append(" AND"); } else { - sb.append(" (").append(whereClause).append(" )"); + sb.append(" WHERE"); + where = true; } - if (ordered) { - sb.append(" ORDER BY uri"); + sb.append(" uri LIKE ?"); + } + if (minLastModified != null) { + if (where) { + sb.append(" AND"); + } else { + sb.append(" WHERE"); + where = true; } - } else if (prefix != null) { - sb.append(" uriBucket LIKE ?"); - if (ordered) { + sb.append(" lastModified >= ?"); + } + if (ordered && !(storageLocationRequired != null && storageLocationRequired)) { + if (minLastModified != null) { + sb.append(" ORDER BY lastModified ASC"); + } else { sb.append(" ORDER BY uri"); } - } else { - // trim off " WHERE" - sb.delete(sb.length() - 6, sb.length()); } - String sql = sb.toString(); + StringBuilder select = getSelectFromSQL(Artifact.class, false); + select.append(sb.toString()); + String sql = select.toString(); log.debug("sql: " + sql); try { @@ -787,15 +826,29 @@ public ResourceIterator query(DataSource ds) { ps.setFetchSize(1000); ps.setFetchDirection(ResultSet.FETCH_FORWARD); int col = 1; - if (prefix != null) { - String val = prefix + "%"; + if (storageBucket != null) { + String val = storageBucket + "%"; + log.debug("bucket prefix: " + val); + ps.setString(col++, val); + } else if (uriBucket != null) { + String val = uriBucket + "%"; log.debug("bucket prefix: " + val); ps.setString(col++, val); } if (siteID != null) { log.debug("siteID: " + siteID); ps.setObject(col++, siteID); + } + if (namespace != null) { + String val = namespace.getNamespace() + "%"; + log.debug("namespace prefix: " + val); + ps.setString(col++, val); + } + if (minLastModified != null) { + log.debug("min lastModified: " + minLastModified); + ps.setTimestamp(col++, new Timestamp(minLastModified.getTime()), utc); } + ResultSet rs = ps.executeQuery(); return new ArtifactResultSetIterator(con, rs); @@ -939,6 +992,7 @@ public class NodeGet implements EntityGet { private UUID id; private ContainerNode parent; private String name; + private URI storageID; private final boolean forUpdate; public NodeGet(boolean forUpdate) { @@ -954,6 +1008,10 @@ public void setPath(ContainerNode parent, String name) { this.parent = parent; this.name = name; } + + public void setStorageID(URI storageID) { + this.storageID = storageID; + } @Override public Node execute(JdbcTemplate jdbc) { @@ -967,10 +1025,12 @@ public PreparedStatement createPreparedStatement(Connection conn) throws SQLExce if (id != null) { String col = getKeyColumn(Node.class, true); sb.append(col).append(" = ?"); + } else if (storageID != null) { + String col = "storageID"; + sb.append(col).append(" = ?"); } else if (parent != null && name != null) { String pidCol = "parentID"; String nameCol = "name"; - // TODO: better way to get column names? sb.append(pidCol).append(" = ? and ").append(nameCol).append(" = ?"); } else { throw new IllegalStateException("primary key is null"); @@ -983,6 +1043,8 @@ public PreparedStatement createPreparedStatement(Connection conn) throws SQLExce PreparedStatement prep = conn.prepareStatement(sql); if (id != null) { prep.setObject(1, id); + } else if (storageID != null) { + prep.setObject(1, storageID.toASCIIString()); } else { prep.setObject(1, parent.getID()); prep.setObject(2, name); @@ -1466,12 +1528,27 @@ public PreparedStatement createPreparedStatement(Connection conn) throws SQLExce safeSetArray(prep, col++, value.getReadOnlyGroup()); safeSetArray(prep, col++, value.getReadWriteGroup()); safeSetProps(prep, col++, value.getProperties()); + + // ContainerNode-specific fields if (value instanceof ContainerNode) { ContainerNode cn = (ContainerNode) value; safeSetBoolean(prep, col++, cn.inheritPermissions); } else { safeSetBoolean(prep, col++, null); + } + + // bytesUsed is in between CN and DN specific columns + if (value instanceof ContainerNode) { + ContainerNode cn = (ContainerNode) value; + safeSetLong(prep, col++, cn.bytesUsed); + } else if (value instanceof DataNode) { + DataNode dn = (DataNode) value; + safeSetLong(prep, col++, dn.bytesUsed); + } else { + safeSetLong(prep, col++, null); } + + // DataNode specific fields if (value instanceof DataNode) { DataNode dn = (DataNode) value; if (dn.storageID == null) { @@ -1479,10 +1556,14 @@ public PreparedStatement createPreparedStatement(Connection conn) throws SQLExce } safeSetBoolean(prep, col++, dn.busy); safeSetString(prep, col++, dn.storageID); + safeSetString(prep, col++, InventoryUtil.computeBucket(dn.storageID, 5)); // same as Artifact } else { safeSetBoolean(prep, col++, null); safeSetString(prep, col++, (URI) null); + safeSetString(prep, col++, (URI) null); } + + // LinkNode-specific fields if (value instanceof LinkNode) { LinkNode ln = (LinkNode) value; prep.setString(col++, ln.getTarget().toASCIIString()); @@ -1490,6 +1571,7 @@ public PreparedStatement createPreparedStatement(Connection conn) throws SQLExce safeSetString(prep, col++, (URI) null); } + // Entity fields prep.setTimestamp(col++, new Timestamp(value.getLastModified().getTime()), utc); prep.setString(col++, value.getMetaChecksum().toASCIIString()); prep.setObject(col++, value.getID()); @@ -1706,7 +1788,7 @@ private class ArtifactResultSetIterator implements ResourceIterator { private final Connection con; private final ResultSet rs; boolean hasRow; - boolean closeWhenDone = false; // not a pooled connection + boolean closeWhenDone = true; // return to pool | assume close suppressed for static connections ArtifactResultSetIterator(Connection con, ResultSet rs) throws SQLException { this.con = con; @@ -1904,8 +1986,11 @@ private Node mapRowToNode(ResultSet rs, Calendar utc, ContainerNode parent) thro final String rawRWG = rs.getString(col++); final String rawProps = rs.getString(col++); final Boolean inheritPermissions = Util.getBoolean(rs, col++); + final Long bytesUsed = Util.getLong(rs, col++); final Boolean busy = Util.getBoolean(rs, col++); final URI storageID = Util.getURI(rs, col++); + final String storageBucket = rs.getString(col++); + // TODO: return this somehow or just use in DataNode iterator? final URI linkTarget = Util.getURI(rs, col++); final Date lastModified = Util.getDate(rs, col++, utc); final URI metaChecksum = Util.getURI(rs, col++); @@ -1915,9 +2000,12 @@ private Node mapRowToNode(ResultSet rs, Calendar utc, ContainerNode parent) thro if (nodeType.equals("C")) { ContainerNode cn = new ContainerNode(id, name); cn.inheritPermissions = inheritPermissions; + cn.bytesUsed = bytesUsed; ret = cn; } else if (nodeType.equals("D")) { - ret = new DataNode(id, name, storageID); + DataNode dn = new DataNode(id, name, storageID); + dn.bytesUsed = bytesUsed; + ret = dn; } else if (nodeType.equals("L")) { ret = new LinkNode(id, name, linkTarget); } else { @@ -2148,7 +2236,5 @@ public Node extractData(ResultSet rs) throws SQLException, DataAccessException { return mapRowToNode(rs, utc, parent); } - - } } diff --git a/cadc-inventory-db/src/main/java/org/opencadc/vospace/db/InitDatabaseVOS.java b/cadc-inventory-db/src/main/java/org/opencadc/vospace/db/InitDatabaseVOS.java index 5ac9910e..05d8c45b 100644 --- a/cadc-inventory-db/src/main/java/org/opencadc/vospace/db/InitDatabaseVOS.java +++ b/cadc-inventory-db/src/main/java/org/opencadc/vospace/db/InitDatabaseVOS.java @@ -80,8 +80,8 @@ public class InitDatabaseVOS extends ca.nrc.cadc.db.version.InitDatabase { private static final Logger log = Logger.getLogger(InitDatabaseVOS.class); public static final String MODEL_NAME = "vospace-inventory"; - public static final String MODEL_VERSION = "0.3"; - public static final String PREV_MODEL_VERSION = "0.2"; + public static final String MODEL_VERSION = "0.15"; + public static final String PREV_MODEL_VERSION = "0.3"; static String[] CREATE_SQL = new String[] { "generic.ModelVersion.sql", @@ -93,7 +93,7 @@ public class InitDatabaseVOS extends ca.nrc.cadc.db.version.InitDatabase { }; static String[] UPGRADE_SQL = new String[] { - "generic.HarvestState.sql", + "vospace.upgrade-0.15.sql", "generic.permissions.sql" }; diff --git a/cadc-inventory-db/src/main/java/org/opencadc/vospace/db/NodeDAO.java b/cadc-inventory-db/src/main/java/org/opencadc/vospace/db/NodeDAO.java index 58033b79..bee20948 100644 --- a/cadc-inventory-db/src/main/java/org/opencadc/vospace/db/NodeDAO.java +++ b/cadc-inventory-db/src/main/java/org/opencadc/vospace/db/NodeDAO.java @@ -68,11 +68,13 @@ package org.opencadc.vospace.db; import ca.nrc.cadc.io.ResourceIterator; +import java.net.URI; import java.util.UUID; import org.apache.log4j.Logger; import org.opencadc.inventory.db.AbstractDAO; import org.opencadc.inventory.db.SQLGenerator; import org.opencadc.vospace.ContainerNode; +import org.opencadc.vospace.DataNode; import org.opencadc.vospace.Node; import org.springframework.jdbc.BadSqlGrammarException; import org.springframework.jdbc.core.JdbcTemplate; @@ -88,21 +90,15 @@ public NodeDAO() { super(true); } - // needed by vault migration tool: untested public NodeDAO(boolean origin) { super(origin); } @Override public void put(Node val) { - // TBD: caller can assign parent or parentID before put, but here we need - // parentID and it must be assigned before metaChecksum compute in super.put() - if (val.parentID == null && val.parent != null) { - val.parentID = val.parent.getID(); - } super.put(val); } - + @Override public Node lock(Node n) { if (n == null) { @@ -113,6 +109,7 @@ public Node lock(Node n) { } public Node get(UUID id) { + checkInit(); return super.get(Node.class, id); } @@ -135,6 +132,25 @@ public Node get(ContainerNode parent, String name) { throw new RuntimeException("BUG: handleInternalFail did not throw"); } + public DataNode getDataNode(URI storageID) { + checkInit(); + log.debug("GET: " + storageID); + long t = System.currentTimeMillis(); + + try { + JdbcTemplate jdbc = new JdbcTemplate(dataSource); + SQLGenerator.NodeGet get = (SQLGenerator.NodeGet) gen.getEntityGet(Node.class); + get.setStorageID(storageID); + return (DataNode) get.execute(jdbc); + } catch (BadSqlGrammarException ex) { + handleInternalFail(ex); + } finally { + long dt = System.currentTimeMillis() - t; + log.debug("GET: " + storageID + " " + dt + "ms"); + } + throw new RuntimeException("BUG: handleInternalFail did not throw"); + } + public boolean isEmpty(ContainerNode parent) { checkInit(); log.debug("isEmpty: " + parent.getID()); @@ -159,6 +175,14 @@ public void delete(UUID id) { super.delete(Node.class, id); } + /** + * Get iterator of child nodes. + * + * @param parent the container node to list + * @param limit max number of nodes to return, or null + * @param start list starting point, or null + * @return iterator of child nodes matching the arguments + */ public ResourceIterator iterator(ContainerNode parent, Integer limit, String start) { if (parent == null) { throw new IllegalArgumentException("childIterator: parent cannot be null"); diff --git a/cadc-inventory-db/src/main/resources/vospace.Node.sql b/cadc-inventory-db/src/main/resources/vospace.Node.sql index e52a5ed8..3e23b5b2 100644 --- a/cadc-inventory-db/src/main/resources/vospace.Node.sql +++ b/cadc-inventory-db/src/main/resources/vospace.Node.sql @@ -10,7 +10,7 @@ create table .Node ( isLocked boolean, readOnlyGroups text, readWriteGroups text, - + -- store all props in a 2D array properties text[][], @@ -19,7 +19,10 @@ create table .Node ( -- DataNode busy boolean, + bytesUsed bigint, + -- Artifact.uri and Artifact.uriBucket storageID varchar(512), + storageBucket varchar(5), -- LinkNode target text, @@ -29,6 +32,12 @@ create table .Node ( id uuid not null primary key ); +-- usage: vault path navigation create unique index node_parent_child on .Node(parentID,name); +-- usage: Node metadata-sync create index node_lastmodified on .Node(lastModified); + +-- usage: vault incremental Artifact to Node for bytesUsed +-- usage: vault Node vs Artifact validation +create unique index node_storageID on .Node(storageID); \ No newline at end of file diff --git a/cadc-inventory-db/src/main/resources/vospace.upgrade-0.15.sql b/cadc-inventory-db/src/main/resources/vospace.upgrade-0.15.sql new file mode 100644 index 00000000..bfb33166 --- /dev/null +++ b/cadc-inventory-db/src/main/resources/vospace.upgrade-0.15.sql @@ -0,0 +1,7 @@ + +alter table .Node + add column bytesUsed bigint, + add column storageBucket varchar(5) +; + +create unique index node_storageID on .Node(storageID); \ No newline at end of file diff --git a/vault-quota/Design.md b/vault-quota/Design.md index d6b167a9..769f8503 100644 --- a/vault-quota/Design.md +++ b/vault-quota/Design.md @@ -1,18 +1,45 @@ # vault quota design/algorithms The definitive source of content-length (file size) of a DataNode comes from the -`inventory.Artifact` table and it not known until a PUT to storage is completed. +`inventory.Artifact` table and is not known until a PUT to storage is completed. In the case of a `vault` service co-located with a single storage site (`minoc`), the new Artifact is visible in the database as soon as the PUT to `minoc` is completed. In the case of a `vault` service co-located with a global SI, the new Artifact is visible in the database once it is synced from the site of the PUT to the global database by `fenwick` (or worst case: `ratik`). +## NOTE +This design was for supporting propagation of space used up the tree so that +allocation space used was dynamically updated as content was modified. While the +algorithm for doing that is nominally viable, the algorithm to validate and repair +incorrect container node sizes in a live system is excessively complex and probably +impossible to implement in practice (deadlocks, excessive database load and query +processing, etc). + +**This design will not be completed and implemented** and is retained here for future +reference. + ## TODO The design below only takes into account incremental propagation of space used by stored files. It is not complete/verified until we also come up with a validation algorithm that can detect and fix discrepancies in a live `vault`. +## operations that effect node size +The following operations effect node size: +* delete node removed the node and applies a negative delta to parent +* recursive delete will need to update twice as many nodes in small transactions +* move node will applies a negative delta to previous parent and positive delta to new parent +* copy applies a positive delta to new parent +* transfer negotiation needs to check allocationNode quota vs size to allow a put to proceed + +This has to be done entirely inside the NodePersistence implementation; that should be feasible +since the argument of NodePersistence methods is the previously retrieved node with the full parent +tree intact. It's not clear which of these will require changes in cadc-vos-server, but if they do +it will need to be possible for them to be optional and/or gracefully not do anything. + +In any case, any solution with container node delta(s) is inherently multi-threaded because +user requests can modify them. + ## DataNode size algorithm: This is an event watcher that gets Artifact events (after a PUT) and intiates the propagation of sizes (space used). @@ -51,7 +78,7 @@ for each ContainerNode: The above sequence finds candidate propagations, locks (order: parent-child), and applies the propagation. This moves the outstanding delta up the tree one level. If the sequence acts on multiple child containers before the parent, the delta(s) naturally -_merge_ and there are fewer larger delta propagations in the upper part of the tree. +_merge_ and fewer delta propagations occur in the upper part of the tree. The most generic implementation is to iterate over container nodes: ``` @@ -105,27 +132,24 @@ Iterator niter = nodeDAO.iterator(bucket); // storageBucket,stora ### ContainerNode vs child nodes discrepancies These can be validated in ``` -discrepancy 1: container size > sum(child sizes) -explanation: un-propagated delete -evidence: sum(child delta) < 0 -action: none - -discrepancy 1: container size > sum(child sizes) -explanation: bug -evidence: sum(child delta) == 0 -action: fix container size, set container.delta - -discrepancy 1: container size < sum(child sizes) -explanation: un-propagated delta -evidence: sum(child delta) > 0 +discrepancy 1: container size != sum(child size) +explanation: un-propagated delta from put or delete +evidence: sum(child delta) != 0 action: none -discrepancy 1: container size < sum(child sizes) +discrepancy 1: container size != sum(child size) explanation: bug evidence: sum(child delta) == 0 -action: fix container size, set container.delta +action: fix?? container size, set container.delta ``` -Required lock order: locks the parent of a parent-children relationship. +Required lock order: locks the parent of a parent-children relationship so propagations are blocked, +then do the aggregate query (select sum(child size), sum(child delta) where parentID=?) +but child state is still not stable (delete child node, move child out, copy/move node in, +sync child nodes from remote) so all of these would have to lock in the same order to avoid deadlock. +I don't see any way to avoid deadlocks when user requests can lock multiple nodes. + +Recursive delete, container size propagation, datanode validation, and container validation can will +all potentially modify child delta(s). The most generic implementation is to iterate over container nodes: ``` diff --git a/vault-quota/NodeSize.md b/vault-quota/NodeSize.md new file mode 100644 index 00000000..fd48db42 --- /dev/null +++ b/vault-quota/NodeSize.md @@ -0,0 +1,85 @@ +# vault quota design/algorithms + +The definitive source of content-length (file size) of a DataNode comes from the +`inventory.Artifact` table and is not known until a PUT to storage is completed. +In the case of a `vault` service co-located with a single storage site (`minoc`), +the new Artifact is visible in the database as soon as the PUT to `minoc` is +completed. In the case of a `vault` service co-located with a global SI, the new +Artifact is visible in the database once it is synced from the site of the PUT to +the global database by `fenwick` (or worst case: `ratik`). + +## incremental DataNode size algorithm +DataNode(s) require the `bytesUsed` be set so that sizes can be output from listing +container nodes without a join or query to the artifact table. + +This is an event watcher that gets Artifact events (after a PUT) and intiates the +propagation of sizes (space used). +``` +track progress using HarvestState (source, name: TBD) +incremental query for new artifacts in lastModified order +for each new Artifact: + query for DataNode (storageID = artifact.uri) + if Artifact.contentLength != Node.size: + start txn + lock datanode + recheck size diff + set dataNode.size + update HarvestState + commit txn +``` + +## validate DataNode vs Artifact discrepancies +These can be validated in parallel by multiple threads, subdivide work by bucket if we add +DataNode.storageBucket (== Artifact.uriBucket). + +``` +discrepancy: Artifact exists but DataNode does not +explanation: DataNode created, transfer negotiated, DataNode removed, transfer executed +evidence: DeletedNodeEvent exists +action: remove artifact, create DeletedArtifactEvent + +discrepancy: Artifact exists but DataNode does not +explanation: DataNode created, Artifact put, DataNode deleted, Artifact delete failed +evidence: only possible with singlePool==false +action: remove artifact, create DeletedArtifactEvent + +discrepancy: DataNode exists but Artifact does not +explanation: DataNode created, Artifact never (successfully) put (normal) +evidence: DataNode.nodeSize == 0 or null +action: none + +discrepancy: DataNode exists but Artifact does not +explanation: deleted or lost Artifact +evidence: DataNode.nodeSize != 0 (deleted vs lost: DeletedArtifactEvent exists) +action: lock nodes, fix dataNode and propagate delta to parent + +discrepancy: DataNode.nodeSize != Artifact.contentLength +explanation: artifact written (if DataNode.size > 0: replaced) +action: lock nodes, fix DataNode and propagate delta to parent +``` +Required lock order: child-parent or parent-child OK. + +The most generic implementation is a merge join of two iterators (see ratik, tantar): +``` +Iterator aiter = artifactDAO.iterator(vaultNamespace, bucket); // uriBucket,uri order +Iterator niter = nodeDAO.iterator(bucket); // storageBucket,storageID order +``` + +## database changes required +note: all field and column names TBD +* add `transient Long bytesUsed` to ContainerNode and DataNode +* add `bytesUsed` to the `vospace.Node` table +* add `storageBucket` to DataNode?? TBD +* add `storageBucket` to `vospace.Node` table + +## cadc-inventory-db API required immediately +* incremental sync query/iterator: ArtifactDAO.iterator(Namespace ns, String uriBucketPrefix, Date minLastModified, boolean ordered) + order by lastModified if set +* lookup DataNode by storageID: NodeDAO.getDataNode(URI storageID) +* indices to support new queries + +## cadc-inventory-db API required later (tentative) +* validate-by-bucket: use ArtifactDAO.iterator(String uriBucketPrefix, boolean ordered, Namespace ns) +* validate-by-bucket: NodeDAO.dataNodeIterator(String storageBucketPrefix, boolean ordered) +* indices to support new queries + diff --git a/vault/README.md b/vault/README.md index 02e701ea..108357da 100644 --- a/vault/README.md +++ b/vault/README.md @@ -83,6 +83,9 @@ org.opencadc.vault.resourceID = ivo://{authority}/{name} # consistency settings org.opencadc.vault.consistency.preventNotFound=true|false +# (optional) identify which container nodes are allocations +org.opencadc.vault.allocationParent = {top level node} + # vault database settings org.opencadc.vault.inventory.schema = {inventory schema name} org.opencadc.vault.vospace.schema = {vospace schema name} @@ -102,6 +105,15 @@ _all known_ sites. It only makes sense to enable this when `vault` is running in `raven` and/or `fenwick` instances syncing artifact metadata. This feature introduces an overhead for the genuine not-found cases: transfer negotiation to GET the file that was never PUT. +The _allocationParent_ is a path to a container node (directory) which contains space allocations. An allocation +is owned by a user (usually different from the _rootOwner_ admin user) who is responsible for the allocation +and all conntent therein. The owner of an allocation is granted additional permissions within their +allocation (they can read/write/delete anything) so the owner cannot be blocked from access to any content +within their allocation. This probably only matters for multi-user projects. Multiple _allocationParent_(s) may +be configured to organise the top level of the content (e.g. /home and /projects). Paths configured to be +_allocationParent_(s) will be automatically created (if necessary), owned by the _rootOwner_, and will be +anonymously readable (public). Limitation: only a single level of top-level _allocationParent_(s) are supported. + The _inventory.schema_ name is the name of the database schema used for all inventory database objects. This currently must be "inventory" due to configuration limitations in luskan. diff --git a/vault/src/intTest/java/org/opencadc/vault/Constants.java b/vault/src/intTest/java/org/opencadc/vault/Constants.java new file mode 100644 index 00000000..c07bdad0 --- /dev/null +++ b/vault/src/intTest/java/org/opencadc/vault/Constants.java @@ -0,0 +1,92 @@ +/* +************************************************************************ +******************* CANADIAN ASTRONOMY DATA CENTRE ******************* +************** CENTRE CANADIEN DE DONNÉES ASTRONOMIQUES ************** +* +* (c) 2024. (c) 2024. +* Government of Canada Gouvernement du Canada +* National Research Council Conseil national de recherches +* Ottawa, Canada, K1A 0R6 Ottawa, Canada, K1A 0R6 +* All rights reserved Tous droits réservés +* +* NRC disclaims any warranties, Le CNRC dénie toute garantie +* expressed, implied, or énoncée, implicite ou légale, +* statutory, of any kind with de quelque nature que ce +* respect to the software, soit, concernant le logiciel, +* including without limitation y compris sans restriction +* any warranty of merchantability toute garantie de valeur +* or fitness for a particular marchande ou de pertinence +* purpose. NRC shall not be pour un usage particulier. +* liable in any event for any Le CNRC ne pourra en aucun cas +* damages, whether direct or être tenu responsable de tout +* indirect, special or general, dommage, direct ou indirect, +* consequential or incidental, particulier ou général, +* arising from the use of the accessoire ou fortuit, résultant +* software. Neither the name de l'utilisation du logiciel. Ni +* of the National Research le nom du Conseil National de +* Council of Canada nor the Recherches du Canada ni les noms +* names of its contributors may de ses participants ne peuvent +* be used to endorse or promote être utilisés pour approuver ou +* products derived from this promouvoir les produits dérivés +* software without specific prior de ce logiciel sans autorisation +* written permission. préalable et particulière +* par écrit. +* +* This file is part of the Ce fichier fait partie du projet +* OpenCADC project. OpenCADC. +* +* OpenCADC is free software: OpenCADC est un logiciel libre ; +* you can redistribute it and/or vous pouvez le redistribuer ou le +* modify it under the terms of modifier suivant les termes de +* the GNU Affero General Public la “GNU Affero General Public +* License as published by the License” telle que publiée +* Free Software Foundation, par la Free Software Foundation +* either version 3 of the : soit la version 3 de cette +* License, or (at your option) licence, soit (à votre gré) +* any later version. toute version ultérieure. +* +* OpenCADC is distributed in the OpenCADC est distribué +* hope that it will be useful, dans l’espoir qu’il vous +* but WITHOUT ANY WARRANTY; sera utile, mais SANS AUCUNE +* without even the implied GARANTIE : sans même la garantie +* warranty of MERCHANTABILITY implicite de COMMERCIALISABILITÉ +* or FITNESS FOR A PARTICULAR ni d’ADÉQUATION À UN OBJECTIF +* PURPOSE. See the GNU Affero PARTICULIER. Consultez la Licence +* General Public License for Générale Publique GNU Affero +* more details. pour plus de détails. +* +* You should have received Vous devriez avoir reçu une +* a copy of the GNU Affero copie de la Licence Générale +* General Public License along Publique GNU Affero avec +* with OpenCADC. If not, see OpenCADC ; si ce n’est +* . pas le cas, consultez : +* . +* +************************************************************************ +*/ + +package org.opencadc.vault; + +import ca.nrc.cadc.util.FileUtil; +import java.io.File; +import java.net.URI; +import org.apache.log4j.Logger; +import org.opencadc.gms.GroupURI; + +/** + * + * @author pdowler + */ +public class Constants { + private static final Logger log = Logger.getLogger(Constants.class); + + static URI RESOURCE_ID = URI.create("ivo://opencadc.org/vault"); + + static File ADMIN_CERT = FileUtil.getFileFromResource("vault-test.pem", Constants.class); + static File ALT_CERT = FileUtil.getFileFromResource("vault-auth-test.pem", Constants.class); + + static GroupURI ALT_GROUP = new GroupURI(URI.create("ivo://cadc.nrc.ca/gms?opencadc-vospace-test")); + + private Constants() { + } +} diff --git a/vault/src/intTest/java/org/opencadc/vault/FilesTest.java b/vault/src/intTest/java/org/opencadc/vault/FilesTest.java index c0a64bcd..5002c5fe 100644 --- a/vault/src/intTest/java/org/opencadc/vault/FilesTest.java +++ b/vault/src/intTest/java/org/opencadc/vault/FilesTest.java @@ -87,9 +87,9 @@ public class FilesTest extends org.opencadc.conformance.vos.FilesTest { Log4jInit.setLevel("org.opencadc.vospace", Level.DEBUG); } - private static File ADMIN_CERT = FileUtil.getFileFromResource("vault-test.pem", FilesTest.class); - public FilesTest() { - super(URI.create("ivo://opencadc.org/vault"), ADMIN_CERT); + super(Constants.RESOURCE_ID, Constants.ADMIN_CERT); + + enableTestDataNodePermission(Constants.ALT_CERT); } } diff --git a/vault/src/intTest/java/org/opencadc/vault/NodesTest.java b/vault/src/intTest/java/org/opencadc/vault/NodesTest.java index 869ca002..3d8ec5ca 100644 --- a/vault/src/intTest/java/org/opencadc/vault/NodesTest.java +++ b/vault/src/intTest/java/org/opencadc/vault/NodesTest.java @@ -88,13 +88,10 @@ public class NodesTest extends org.opencadc.conformance.vos.NodesTest { Log4jInit.setLevel("org.opencadc.vospace", Level.DEBUG); } - private static File ADMIN_CERT = FileUtil.getFileFromResource("vault-test.pem", NodesTest.class); - public NodesTest() { - super(URI.create("ivo://opencadc.org/vault"), ADMIN_CERT); + super(Constants.RESOURCE_ID, Constants.ADMIN_CERT); - File altCert = FileUtil.getFileFromResource("vault-auth-test.pem", NodesTest.class); - enablePermissionTests(new GroupURI(URI.create("ivo://cadc.nrc.ca/gms?opencadc-vospace-test")), altCert); + enablePermissionTests(Constants.ALT_GROUP, Constants.ALT_CERT); // vault does not check the actual groups in the permission props tests, hence they can be made up. enablePermissionPropsTest(new GroupURI(URI.create("ivo://myauth/gms?gr1")), new GroupURI(URI.create("ivo://myauth/gms?gr2"))); diff --git a/vault/src/intTest/java/org/opencadc/vault/TransferTest.java b/vault/src/intTest/java/org/opencadc/vault/TransferTest.java index f1392e1e..0fe0a6b8 100644 --- a/vault/src/intTest/java/org/opencadc/vault/TransferTest.java +++ b/vault/src/intTest/java/org/opencadc/vault/TransferTest.java @@ -67,23 +67,9 @@ package org.opencadc.vault; -import ca.nrc.cadc.db.ConnectionConfig; -import ca.nrc.cadc.db.DBConfig; -import ca.nrc.cadc.db.DBUtil; -import ca.nrc.cadc.util.FileUtil; import ca.nrc.cadc.util.Log4jInit; -import java.io.File; -import java.net.URI; -import java.util.Map; -import java.util.Set; -import java.util.TreeMap; import org.apache.log4j.Level; import org.apache.log4j.Logger; -import org.junit.Before; -import org.junit.Ignore; -import org.opencadc.inventory.StorageSite; -import org.opencadc.inventory.db.SQLGenerator; -import org.opencadc.inventory.db.StorageSiteDAO; /** * @@ -104,55 +90,8 @@ public class TransferTest extends org.opencadc.conformance.vos.TransferTest { static String DATABASE = "cadctest"; static String SCHEMA = "inventory"; - private static File ADMIN_CERT = FileUtil.getFileFromResource("vault-test.pem", NodesTest.class); - public TransferTest() { - super(URI.create("ivo://opencadc.org/vault"), ADMIN_CERT); - } - - @Before - public void checkGlobal() throws Exception { - // make sure inventory.StorageSite has a readable/writable ivo://opencadc.org/minoc - - try { - DBConfig dbrc = new DBConfig(); - ConnectionConfig cc = dbrc.getConnectionConfig(SERVER, DATABASE); - DBUtil.createJNDIDataSource("jdbc/inventory", cc); - - Map config = new TreeMap(); - config.put(SQLGenerator.class.getName(), SQLGenerator.class); - config.put("jndiDataSourceName", "jdbc/inventory"); - config.put("invSchema", SCHEMA); - config.put("genSchema", SCHEMA); - - StorageSiteDAO dao = new StorageSiteDAO(false); - dao.setConfig(config); - Set sites = dao.list(); - if (sites.isEmpty()) { - StorageSite ss = new StorageSite(URI.create("ivo://opencadc.org/minoc"), "vault-test-minoc", true, true); - dao.put(ss); - log.info("created record in local db: " + ss); - } - sites = dao.list(); - for (StorageSite ss : sites) { - log.info("storage sites in local db: " + ss); - } - } catch (Exception ex) { - log.error("setup failed", ex); - throw ex; - } - } - - @Override - public void asyncMoveTest() { - super.asyncMoveTest(); - } - - @Ignore - @Override - public void syncPushPullTest() { - super.syncPushPullTest(); + super(Constants.RESOURCE_ID, Constants.ADMIN_CERT); + enableTestDataNodePermission(Constants.ALT_GROUP, Constants.ALT_CERT); } - - } diff --git a/vault/src/main/java/org/opencadc/vault/NodePersistenceImpl.java b/vault/src/main/java/org/opencadc/vault/NodePersistenceImpl.java index 0afa377a..e3e98cee 100644 --- a/vault/src/main/java/org/opencadc/vault/NodePersistenceImpl.java +++ b/vault/src/main/java/org/opencadc/vault/NodePersistenceImpl.java @@ -80,10 +80,12 @@ import java.io.IOException; import java.net.URI; import java.text.DateFormat; +import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.Date; import java.util.Iterator; +import java.util.List; import java.util.Map; import java.util.NoSuchElementException; import java.util.Set; @@ -160,6 +162,7 @@ public class NodePersistenceImpl implements NodePersistence { private final boolean singlePool; private final ContainerNode root; + private final List allocationParents = new ArrayList<>(); private final Namespace storageNamespace; private final boolean localGroupsOnly; @@ -191,6 +194,35 @@ public NodePersistenceImpl(URI resourceID) { root.ownerID = identityManager.toOwner(root.owner); root.isPublic = true; root.inheritPermissions = false; + + // allocations + for (String ap : VaultInitAction.getAllocationParents(config)) { + if (ap.isEmpty()) { + // allocations are in root + allocationParents.add(root); + log.info("allocationParent: /"); + } else { + try { + + // simple top-level names only + ContainerNode cn = (ContainerNode) get(root, ap); + String str = ""; + if (cn == null) { + cn = new ContainerNode(ap); + cn.parent = root; + str = "created/"; + } + cn.isPublic = true; + cn.owner = root.owner; + cn.inheritPermissions = false; + put(cn); + allocationParents.add(cn); + log.info(str + "loaded allocationParent: /" + cn.getName()); + } catch (NodeNotSupportedException bug) { + throw new RuntimeException("BUG: failed to update isPublic=true on allocationParent " + ap, bug); + } + } + } String ns = config.getFirstPropertyValue(VaultInitAction.STORAGE_NAMESPACE_KEY); this.storageNamespace = new Namespace(ns); @@ -265,6 +297,37 @@ public ContainerNode getRootNode() { return root; } + @Override + public boolean isAllocation(ContainerNode cn) { + if (cn.parent == null) { + return false; // root is never an allocation + } + ContainerNode p = cn.parent; + for (ContainerNode ap : allocationParents) { + if (p.getID().equals(ap.getID())) { + return true; + } + } + return false; + } + + private boolean absoluteEquals(ContainerNode c1, ContainerNode c2) { + // note: cavern does not use/preserve Node.id except for root + if (!c1.getName().equals(c2.getName())) { + return false; + } + // same name, check parents + if (c1.parent == null && c2.parent == null) { + // both root + return true; + } + if (c1.parent == null || c2.parent == null) { + // one is root + return false; + } + return absoluteEquals(c1.parent, c2.parent); + } + @Override public Set getAdminProps() { return Collections.unmodifiableSet(ADMIN_PROPS); @@ -324,7 +387,6 @@ public Node get(ContainerNode parent, String name) throws TransientException { ret.getProperties().add(new NodeProperty(VOS.PROPERTY_URI_CONTENTDATE, df.format(cd))); } - ret.getProperties().add(new NodeProperty(VOS.PROPERTY_URI_CONTENTLENGTH, a.getContentLength().toString())); // assume MD5 ret.getProperties().add(new NodeProperty(VOS.PROPERTY_URI_CONTENTMD5, a.getContentChecksum().getSchemeSpecificPart())); @@ -334,9 +396,17 @@ public Node get(ContainerNode parent, String name) throws TransientException { if (a.contentType != null) { ret.getProperties().add(new NodeProperty(VOS.PROPERTY_URI_TYPE, a.contentType)); } - } else { - // default size to 0 - ret.getProperties().add(new NodeProperty(VOS.PROPERTY_URI_CONTENTLENGTH, "0")); + // TODO: a.getContentLength() is correct, but might differ from dn.bytesUsed due to eventual consistency + // child listing iterator can only report dn.bytesUsed + // correct or consistency with listing?? + + // currently needed for consistency-requiring FilesTest + log.warn("DataNode.bytesUsed: " + dn.bytesUsed + " -> " + a.getContentLength()); + dn.bytesUsed = a.getContentLength(); + } + if (dn.bytesUsed == null) { + log.warn("DataNode.bytesUsed: 0 (no artifact)"); + dn.bytesUsed = 0L; // no data stored } } return ret; @@ -360,18 +430,19 @@ public ResourceIterator iterator(ContainerNode parent, Integer limit, Stri } NodeDAO dao = getDAO(); ResourceIterator ret = dao.iterator(parent, limit, start); - return new IdentWrapper(parent, ret); + return new ChildNodeWrapper(parent, ret); } - private class IdentWrapper implements ResourceIterator { + // wrapper to add parent, owner, and props to child nodes + private class ChildNodeWrapper implements ResourceIterator { private final ContainerNode parent; private final ResourceIterator childIter; - private IdentityManager identityManager = AuthenticationUtil.getIdentityManager(); - private Map identCache = new TreeMap<>(); + private final IdentityManager identityManager = AuthenticationUtil.getIdentityManager(); + private final Map identCache = new TreeMap<>(); - IdentWrapper(ContainerNode parent, ResourceIterator childIter) { + ChildNodeWrapper(ContainerNode parent, ResourceIterator childIter) { this.parent = parent; this.childIter = childIter; // prime cache with caller @@ -395,6 +466,8 @@ public boolean hasNext() { public Node next() { Node ret = childIter.next(); ret.parent = parent; + + // owner Subject s = identCache.get(ret.ownerID); if (s == null) { s = identityManager.toSubject(ret.ownerID); @@ -402,6 +475,13 @@ public Node next() { } ret.owner = s; ret.ownerDisplay = identityManager.toDisplayString(ret.owner); + + if (ret instanceof DataNode) { + DataNode dn = (DataNode) ret; + if (dn.bytesUsed == null) { + dn.bytesUsed = 0L; + } + } return ret; } @@ -708,4 +788,9 @@ public void delete(Node node) throws TransientException { } } } + + // needed by vault-migrate to configure a HarvestStateDAO for delete processing + public Map getNodeDaoConfig() { + return nodeDaoConfig; + } } diff --git a/vault/src/main/java/org/opencadc/vault/VaultInitAction.java b/vault/src/main/java/org/opencadc/vault/VaultInitAction.java index 6e62a021..e4e51b41 100644 --- a/vault/src/main/java/org/opencadc/vault/VaultInitAction.java +++ b/vault/src/main/java/org/opencadc/vault/VaultInitAction.java @@ -69,6 +69,7 @@ import ca.nrc.cadc.db.DBUtil; import ca.nrc.cadc.rest.InitAction; +import ca.nrc.cadc.util.InvalidConfigException; import ca.nrc.cadc.util.MultiValuedProperties; import ca.nrc.cadc.util.PropertiesReader; import ca.nrc.cadc.util.RsaSignatureGenerator; @@ -76,6 +77,8 @@ import java.net.URI; import java.net.URISyntaxException; import java.security.KeyPair; +import java.util.ArrayList; +import java.util.List; import java.util.Map; import java.util.TreeMap; import javax.naming.Context; @@ -85,11 +88,13 @@ import org.apache.log4j.Logger; import org.opencadc.inventory.Namespace; import org.opencadc.inventory.PreauthKeyPair; +import org.opencadc.inventory.db.HarvestStateDAO; import org.opencadc.inventory.db.PreauthKeyPairDAO; import org.opencadc.inventory.db.SQLGenerator; import org.opencadc.inventory.db.StorageSiteDAO; import org.opencadc.inventory.db.version.InitDatabaseSI; import org.opencadc.inventory.transfer.StorageSiteAvailabilityCheck; +import org.opencadc.vault.metadata.ArtifactSync; import org.opencadc.vospace.db.InitDatabaseVOS; import org.opencadc.vospace.server.NodePersistence; import org.springframework.dao.DataIntegrityViolationException; @@ -115,9 +120,8 @@ public class VaultInitAction extends InitAction { static final String INVENTORY_SCHEMA_KEY = VAULT_KEY + ".inventory.schema"; static final String VOSPACE_SCHEMA_KEY = VAULT_KEY + ".vospace.schema"; static final String SINGLE_POOL_KEY = VAULT_KEY + ".singlePool"; - - static final String ROOT_OWNER = VAULT_KEY + ".root.owner"; // numeric? - + static final String ALLOCATION_PARENT = VAULT_KEY + ".allocationParent"; + static final String ROOT_OWNER = VAULT_KEY + ".root.owner"; static final String STORAGE_NAMESPACE_KEY = VAULT_KEY + ".storage.namespace"; MultiValuedProperties props; @@ -125,11 +129,13 @@ public class VaultInitAction extends InitAction { private Namespace storageNamespace; private Map vosDaoConfig; private Map invDaoConfig; + private List allocationParents = new ArrayList<>(); private String jndiNodePersistence; private String jndiPreauthKeys; private String jndiSiteAvailabilities; private Thread availabilityCheck; + private Thread artifactSync; public VaultInitAction() { super(); @@ -143,6 +149,7 @@ public void doInit() { initNodePersistence(); initKeyPair(); initAvailabilityCheck(); + initBackgroundWorkers(); } @Override @@ -162,6 +169,7 @@ public void doShutdown() { } terminateAvailabilityCheck(); + terminateBackgroundWorkers(); } /** @@ -239,6 +247,26 @@ static MultiValuedProperties getConfig() { return mvp; } + static List getAllocationParents(MultiValuedProperties props) { + List ret = new ArrayList<>(); + for (String sap : props.getProperty(ALLOCATION_PARENT)) { + String ap = sap; + if (ap.charAt(0) == '/') { + ap = ap.substring(1); + } + if (ap.length() > 0 && ap.charAt(ap.length() - 1) == '/') { + ap = ap.substring(0, ap.length() - 1); + } + if (ap.indexOf('/') >= 0) { + throw new InvalidConfigException("invalid " + ALLOCATION_PARENT + ": " + sap + + " reason: must be a top-level container node name"); + } + // empty string means root, otherwise child of root + ret.add(ap); + } + return ret; + } + static Map getDaoConfig(MultiValuedProperties props) { Map ret = new TreeMap<>(); ret.put(SQLGenerator.class.getName(), SQLGenerator.class); // not configurable right now @@ -389,9 +417,9 @@ private void initKeyPair() { } } - void initAvailabilityCheck() { + private void initAvailabilityCheck() { StorageSiteDAO storageSiteDAO = new StorageSiteDAO(); - storageSiteDAO.setConfig(getDaoConfig(props)); + storageSiteDAO.setConfig(getInvConfig(props)); this.jndiSiteAvailabilities = appName + "-" + StorageSiteAvailabilityCheck.class.getName(); terminateAvailabilityCheck(); @@ -400,7 +428,7 @@ void initAvailabilityCheck() { this.availabilityCheck.start(); } - private final void terminateAvailabilityCheck() { + private void terminateAvailabilityCheck() { if (this.availabilityCheck != null) { try { log.info("terminating AvailabilityCheck Thread..."); @@ -413,6 +441,8 @@ private final void terminateAvailabilityCheck() { this.availabilityCheck = null; } } + + // ugh: bind() is inside StorageSiteAvailabilityCheck but unbind() is here try { InitialContext initialContext = new InitialContext(); initialContext.unbind(this.jndiSiteAvailabilities); @@ -421,4 +451,28 @@ private final void terminateAvailabilityCheck() { } } + private void initBackgroundWorkers() { + HarvestStateDAO hsDAO = new HarvestStateDAO(); + hsDAO.setConfig(getDaoConfig(props)); + + terminateBackgroundWorkers(); + this.artifactSync = new Thread(new ArtifactSync(hsDAO)); + artifactSync.setDaemon(true); + artifactSync.start(); + } + + private void terminateBackgroundWorkers() { + if (this.artifactSync != null) { + try { + log.info("terminating ArtifactSync Thread..."); + this.artifactSync.interrupt(); + this.artifactSync.join(); + log.info("terminating ArtifactSync Thread... [OK]"); + } catch (Throwable t) { + log.info("failed to terminate ArtifactSync thread", t); + } finally { + this.artifactSync = null; + } + } + } } diff --git a/vault/src/main/java/org/opencadc/vault/VaultTransferGenerator.java b/vault/src/main/java/org/opencadc/vault/VaultTransferGenerator.java index 97064afb..51fec42a 100644 --- a/vault/src/main/java/org/opencadc/vault/VaultTransferGenerator.java +++ b/vault/src/main/java/org/opencadc/vault/VaultTransferGenerator.java @@ -70,7 +70,6 @@ import ca.nrc.cadc.auth.AuthenticationUtil; import ca.nrc.cadc.auth.IdentityManager; import ca.nrc.cadc.net.ResourceNotFoundException; -import ca.nrc.cadc.reg.Standards; import ca.nrc.cadc.uws.Parameter; import ca.nrc.cadc.vosi.Availability; import java.io.IOException; @@ -193,8 +192,7 @@ private List handleDataNode(DataNode node, String filename, Transfer t log.debug("requested protocol: " + p); if (!protoURIs.contains(p.getUri())) { Protocol anonProto = new Protocol(p.getUri()); - anonProto.setSecurityMethod(Standards.SECURITY_METHOD_ANON); - artifactTrans.getProtocols().add(p); + artifactTrans.getProtocols().add(anonProto); protoURIs.add(p.getUri()); log.debug("Added anon protocol for " + p.getUri()); } diff --git a/vault/src/main/java/org/opencadc/vault/files/GetAction.java b/vault/src/main/java/org/opencadc/vault/files/GetAction.java index b8363eea..d3967e5c 100644 --- a/vault/src/main/java/org/opencadc/vault/files/GetAction.java +++ b/vault/src/main/java/org/opencadc/vault/files/GetAction.java @@ -67,14 +67,18 @@ package org.opencadc.vault.files; +import ca.nrc.cadc.auth.AuthenticationUtil; import ca.nrc.cadc.net.TransientException; import java.net.HttpURLConnection; +import java.net.URI; import java.util.List; +import javax.security.auth.Subject; import org.apache.log4j.Logger; import org.opencadc.vospace.DataNode; import org.opencadc.vospace.NodeProperty; import org.opencadc.vospace.VOS; import org.opencadc.vospace.VOSURI; +import org.opencadc.vospace.server.NodeFault; import org.opencadc.vospace.server.Utils; import org.opencadc.vospace.server.transfers.TransferGenerator; import org.opencadc.vospace.transfer.Direction; @@ -96,12 +100,16 @@ public GetAction() { public void doAction() throws Exception { DataNode node = resolveAndSetMetadata(); - for (NodeProperty prop : node.getProperties()) { - if (prop.getKey().equals(VOS.PROPERTY_URI_CONTENTLENGTH) && prop.getValue().equals("0")) { - // empty file - syncOutput.setCode(HttpURLConnection.HTTP_NO_CONTENT); - return; - } + Subject caller = AuthenticationUtil.getCurrentSubject(); + if (!voSpaceAuthorizer.hasSingleNodeReadPermission(node, caller)) { + // TODO: should output requested vos URI here + throw NodeFault.PermissionDenied.getStatus(syncInput.getPath()); + } + + if (node.bytesUsed == null || node.bytesUsed == 0L) { + // empty file + syncOutput.setCode(HttpURLConnection.HTTP_NO_CONTENT); + return; } VOSURI targetURI = localServiceURI.getURI(node); diff --git a/vault/src/main/java/org/opencadc/vault/files/HeadAction.java b/vault/src/main/java/org/opencadc/vault/files/HeadAction.java index 6344ef72..69b68f00 100644 --- a/vault/src/main/java/org/opencadc/vault/files/HeadAction.java +++ b/vault/src/main/java/org/opencadc/vault/files/HeadAction.java @@ -139,15 +139,14 @@ DataNode resolveAndSetMetadata() throws Exception { if (!(node instanceof DataNode)) { throw new IllegalArgumentException("Resolved target is not a data node: " + Utils.getPath(node)); } + + log.debug("node path resolved: " + node.getName() + " type: " + node.getClass().getName()); - VOSURI nodeURI = localServiceURI.getURI(node); - - log.debug("node path resolved: " + node.getName()); - log.debug("node type: " + node.getClass().getCanonicalName()); - syncOutput.setHeader("Content-Disposition", "inline; filename=\"" + nodeURI.getName() + "\""); + DataNode dn = (DataNode) node; + syncOutput.setHeader("Content-Length", dn.bytesUsed); + syncOutput.setHeader("Content-Disposition", "inline; filename=\"" + node.getName() + "\""); syncOutput.setHeader("Content-Type", node.getPropertyValue(VOS.PROPERTY_URI_TYPE)); syncOutput.setHeader("Content-Encoding", node.getPropertyValue(VOS.PROPERTY_URI_CONTENTENCODING)); - syncOutput.setHeader("Content-Length", node.getPropertyValue(VOS.PROPERTY_URI_CONTENTLENGTH)); if (node.getPropertyValue(VOS.PROPERTY_URI_DATE) != null) { Date lastMod = NodeWriter.getDateFormat().parse(node.getPropertyValue(VOS.PROPERTY_URI_DATE)); syncOutput.setLastModified(lastMod); @@ -163,7 +162,7 @@ DataNode resolveAndSetMetadata() throws Exception { } } syncOutput.setCode(200); - return (DataNode)node; + return dn; } } diff --git a/vault/src/main/java/org/opencadc/vault/metadata/ArtifactSync.java b/vault/src/main/java/org/opencadc/vault/metadata/ArtifactSync.java new file mode 100644 index 00000000..ffd1879e --- /dev/null +++ b/vault/src/main/java/org/opencadc/vault/metadata/ArtifactSync.java @@ -0,0 +1,157 @@ +/* +************************************************************************ +******************* CANADIAN ASTRONOMY DATA CENTRE ******************* +************** CENTRE CANADIEN DE DONNÉES ASTRONOMIQUES ************** +* +* (c) 2024. (c) 2024. +* Government of Canada Gouvernement du Canada +* National Research Council Conseil national de recherches +* Ottawa, Canada, K1A 0R6 Ottawa, Canada, K1A 0R6 +* All rights reserved Tous droits réservés +* +* NRC disclaims any warranties, Le CNRC dénie toute garantie +* expressed, implied, or énoncée, implicite ou légale, +* statutory, of any kind with de quelque nature que ce +* respect to the software, soit, concernant le logiciel, +* including without limitation y compris sans restriction +* any warranty of merchantability toute garantie de valeur +* or fitness for a particular marchande ou de pertinence +* purpose. NRC shall not be pour un usage particulier. +* liable in any event for any Le CNRC ne pourra en aucun cas +* damages, whether direct or être tenu responsable de tout +* indirect, special or general, dommage, direct ou indirect, +* consequential or incidental, particulier ou général, +* arising from the use of the accessoire ou fortuit, résultant +* software. Neither the name de l'utilisation du logiciel. Ni +* of the National Research le nom du Conseil National de +* Council of Canada nor the Recherches du Canada ni les noms +* names of its contributors may de ses participants ne peuvent +* be used to endorse or promote être utilisés pour approuver ou +* products derived from this promouvoir les produits dérivés +* software without specific prior de ce logiciel sans autorisation +* written permission. préalable et particulière +* par écrit. +* +* This file is part of the Ce fichier fait partie du projet +* OpenCADC project. OpenCADC. +* +* OpenCADC is free software: OpenCADC est un logiciel libre ; +* you can redistribute it and/or vous pouvez le redistribuer ou le +* modify it under the terms of modifier suivant les termes de +* the GNU Affero General Public la “GNU Affero General Public +* License as published by the License” telle que publiée +* Free Software Foundation, par la Free Software Foundation +* either version 3 of the : soit la version 3 de cette +* License, or (at your option) licence, soit (à votre gré) +* any later version. toute version ultérieure. +* +* OpenCADC is distributed in the OpenCADC est distribué +* hope that it will be useful, dans l’espoir qu’il vous +* but WITHOUT ANY WARRANTY; sera utile, mais SANS AUCUNE +* without even the implied GARANTIE : sans même la garantie +* warranty of MERCHANTABILITY implicite de COMMERCIALISABILITÉ +* or FITNESS FOR A PARTICULAR ni d’ADÉQUATION À UN OBJECTIF +* PURPOSE. See the GNU Affero PARTICULIER. Consultez la Licence +* General Public License for Générale Publique GNU Affero +* more details. pour plus de détails. +* +* You should have received Vous devriez avoir reçu une +* a copy of the GNU Affero copie de la Licence Générale +* General Public License along Publique GNU Affero avec +* with OpenCADC. If not, see OpenCADC ; si ce n’est +* . pas le cas, consultez : +* . +* +************************************************************************ +*/ + +package org.opencadc.vault.metadata; + +import java.net.URI; +import java.util.Date; +import java.util.UUID; +import org.apache.log4j.Logger; +import org.opencadc.inventory.Artifact; +import org.opencadc.inventory.db.HarvestState; +import org.opencadc.inventory.db.HarvestStateDAO; + +/** + * Main artifact-sync agent that enables incremental sync of Artifact + * metadata to Node. + * + * @author pdowler + */ +public class ArtifactSync implements Runnable { + private static final Logger log = Logger.getLogger(ArtifactSync.class); + + private static final long SHORT_SLEEP = 12000L; + private static final long LONG_SLEEP = 2 * SHORT_SLEEP; + private static final long EVICT_AGE = 3 * LONG_SLEEP; + + private final UUID instanceID = UUID.randomUUID(); + private final HarvestStateDAO dao; + private String name = Artifact.class.getSimpleName(); + private URI resourceID = URI.create("jdbc/inventory"); + + public ArtifactSync(HarvestStateDAO dao) { + this.dao = dao; + + // fenwick setup for production workload: + //dao.setUpdateBufferCount(99); // buffer 99 updates, do every 100 + //dao.setMaintCount(999); // buffer 999 so every 1000 real updates aka every 1e5 events + + // here, we need timestamp updates to retain leader status, so + // dao.setMaintCount(9999); // every 1e4 + } + + @Override + public void run() { + try { + Thread.sleep(SHORT_SLEEP); + + while (true) { + boolean leader = false; + log.debug("check leader " + instanceID); + HarvestState state = dao.get(name, resourceID); + log.debug("check leader " + instanceID + " found: " + state); + if (state.instanceID == null) { + state.instanceID = instanceID; + dao.put(state); + state = dao.get(state.getID()); + log.debug("created: " + state); + } + if (instanceID.equals(state.instanceID)) { + log.debug("still the leader..."); + dao.put(state, true); + leader = true; + } else { + // see if we should perform a coup... + Date now = new Date(); + long age = now.getTime() - state.getLastModified().getTime(); + if (age > EVICT_AGE) { + + state.instanceID = instanceID; + dao.put(state); + state = dao.get(state.getID()); + leader = true; + log.debug("EVICTED " + state.instanceID + " because age " + age + " > " + EVICT_AGE); + } + } + + if (leader) { + log.debug("leader " + state.instanceID + " starting worker..."); + // TODO + dao.flushBufferedState(); + Thread.sleep(SHORT_SLEEP / 2L); // for testing + log.debug("idle leader " + state.instanceID + " sleep=" + SHORT_SLEEP); + Thread.sleep(SHORT_SLEEP); + } else { + log.debug("not leader: sleep=" + LONG_SLEEP); + Thread.sleep(LONG_SLEEP); + } + } + } catch (InterruptedException ex) { + log.debug("interrupted - assuming shutdown", ex); + } + } +} diff --git a/vault/src/main/webapp/META-INF/context.xml b/vault/src/main/webapp/META-INF/context.xml index ebd23f61..94ec7b42 100644 --- a/vault/src/main/webapp/META-INF/context.xml +++ b/vault/src/main/webapp/META-INF/context.xml @@ -27,6 +27,19 @@ removeAbandoned="false" testOnBorrow="true" validationQuery="select 123" /> + + +