From 07cbcd72d6075fe721b03bb63eec445597469514 Mon Sep 17 00:00:00 2001 From: Karen Hanson Date: Mon, 12 Jun 2023 17:50:09 -0400 Subject: [PATCH 1/3] Update EPUB module information EPUB now managed by W3C. Latest release is still in conjunction with IDPF, so for now they continue to be included in the agent name. Next iteration might switch to W3C only. --- .../portico/jhove/module/EpubModule.java | 22 +++++++++---------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/jhove-ext-modules/src/main/java/org/ithaka/portico/jhove/module/EpubModule.java b/jhove-ext-modules/src/main/java/org/ithaka/portico/jhove/module/EpubModule.java index a73f0b92d..b98d8bf56 100644 --- a/jhove-ext-modules/src/main/java/org/ithaka/portico/jhove/module/EpubModule.java +++ b/jhove-ext-modules/src/main/java/org/ithaka/portico/jhove/module/EpubModule.java @@ -82,9 +82,9 @@ public class EpubModule extends ModuleBase { private static final String FORMATNAME = "EPUB"; private static final String NAME = "EPUB-ptc"; - private static final String RELEASE = "1.2"; - private static final int[] DATE = { 2023, 03, 16 }; - private static final String RIGHTS_YEAR = "2019"; + private static final String RELEASE = "1.3"; + private static final int[] DATE = { 2023, 06, 12 }; + private static final String RIGHTS_YEAR = "2023"; private static final String[] FORMAT = { FORMATNAME }; private static final String COVERAGE = FORMATNAME; private static final String[] MIMETYPE = { EPUB_MEDIATYPE }; @@ -94,18 +94,18 @@ public class EpubModule extends ModuleBase { private static final String NOTE = "This module uses EPUBCheck for testing of EPUB files."; // EPUB agent information - private static final String EPUB_AGENTNAME = "International Digital Publishing Forum"; + private static final String EPUB_AGENTNAME = "International Digital Publishing Forum and World Wide Web Consortium"; private static final AgentType EPUB_AGENTTYPE = AgentType.STANDARD; - private static final String EPUB_AGENTADDRESS = "International Digital Publishing Forum (IDPF), " - + "113 Cherry Street, Suite 70-719, Seattle, WA 98104"; - private static final String EPUB_AGENTWEBSITE = "http://idpf.org"; - private static final String EPUB_AGENTEMAIL = "membership@idpf.org"; - private static final String EPUB_AGENTPHONE = "+1-206-451-7250"; + private static final String EPUB_AGENTADDRESS = "W3C Inc, " + + "401 Edgewater Place, Suite 600, Wakefield, MA 01880, USA"; + private static final String EPUB_AGENTWEBSITE = "https://www.w3.org/"; + private static final String EPUB_AGENTEMAIL = "membership@w3.org"; + private static final String EPUB_AGENTPHONE = "+1-339-273-2711"; // EPUB format doc information private static final String EPUB_FORMATDOCTITLE = FORMATNAME; - private static final String EPUB_FORMATDOCDATE = "2019-05-15"; - private static final String EPUB_FORMATDOCURL = "http://www.idpf.org/epub/dir/"; + private static final String EPUB_FORMATDOCDATE = "2023-05-25"; + private static final String EPUB_FORMATDOCURL = "https://www.w3.org/TR/epub/"; // Signatures private static final String EPUB_EXTENSION = ".epub"; From 5989de2dd04b60c02e76e853c0c945154fcf0be5 Mon Sep 17 00:00:00 2001 From: Karen Hanson Date: Fri, 7 Jul 2023 12:58:26 -0400 Subject: [PATCH 2/3] Upgrade to 5.1.0 with corresponding code changes The following changes were made to support EPUBCheck 5.1.0: 1. New version lists resources with fragments making resource list much longer without reflecting new files in the package. Added logic to remove resources with fragments and only list base URL. 2. EPUBLocation changed slightly, updated code to manage support that change. 3. Some redundant messages were removed, which changed some message counts in tests - fixed message counts to reflect correct output. 4. Update validation version to 3.3 - there is no way to determine the minor version number in an EPUB, so validation is always against whatever the latest is. 5. CREATION_DATE is missing from the report, I've logged an issue and commented out the relevant lines in tests. It will be fixed in the next maintenance release and I will move to 5.1.1 when available to add creation date back in. 6. A file that was being used to test title has flipped from Well Formed and Valid to Well Formed and Not Valid according to current criteria - updated the test to reflect this. --- jhove-ext-modules/pom.xml | 2 +- .../jhove/module/epub/JhoveRepInfoReport.java | 5 ++- .../portico/jhove/module/EpubModuleTest.java | 38 +++++++++---------- .../module/epub/JhoveRepInfoReportTest.java | 8 ++-- 4 files changed, 28 insertions(+), 25 deletions(-) diff --git a/jhove-ext-modules/pom.xml b/jhove-ext-modules/pom.xml index bee08f9a0..2df8aa220 100644 --- a/jhove-ext-modules/pom.xml +++ b/jhove-ext-modules/pom.xml @@ -13,7 +13,7 @@ 1.0.3 - 4.2.6 + 5.1.0 diff --git a/jhove-ext-modules/src/main/java/org/ithaka/portico/jhove/module/epub/JhoveRepInfoReport.java b/jhove-ext-modules/src/main/java/org/ithaka/portico/jhove/module/epub/JhoveRepInfoReport.java index 054f833dc..5fa0e80de 100644 --- a/jhove-ext-modules/src/main/java/org/ithaka/portico/jhove/module/epub/JhoveRepInfoReport.java +++ b/jhove-ext-modules/src/main/java/org/ithaka/portico/jhove/module/epub/JhoveRepInfoReport.java @@ -72,6 +72,8 @@ public class JhoveRepInfoReport extends MasterReport { protected static final String ISO_DATE_PATTERN = "yyyy-MM-dd'T'HH:mm:ss'Z'"; protected static final String FALLBACK_FORMAT = "application/octet-stream"; + protected static final String FRAGMENT_START = "#"; + public JhoveRepInfoReport(String ePubName) { this.setEpubFileName(PathUtil.removeWorkingDirectory(ePubName)); @@ -146,7 +148,8 @@ public void info(String resource, FeatureEnum feature, String value) { this.references.add(value); break; case RESOURCE: - this.resources.add(value); + String no_fragment = value.split(FRAGMENT_START)[0]; + this.resources.add(no_fragment); break; case DC_LANGUAGE: this.language = value; diff --git a/jhove-ext-modules/src/test/java/org/ithaka/portico/jhove/module/EpubModuleTest.java b/jhove-ext-modules/src/test/java/org/ithaka/portico/jhove/module/EpubModuleTest.java index 59b1b7f9b..f8e77ef8f 100644 --- a/jhove-ext-modules/src/test/java/org/ithaka/portico/jhove/module/EpubModuleTest.java +++ b/jhove-ext-modules/src/test/java/org/ithaka/portico/jhove/module/EpubModuleTest.java @@ -74,7 +74,7 @@ public class EpubModuleTest { private static final String EPUB3_TITLE_ENCODING = "src/test/resources/epub/epub3-multiple-renditions.epub"; private static final String EXPECTED_MEDIATYPE = "application/epub+zip"; - private static final String EXPECTED_VERSION_3_2 = "3.2"; + private static final String EXPECTED_VERSION_3_3 = "3.3"; private static final String PNG_MIMETYPE = "image/png"; private static final String XHTML_MIMETYPE = "application/xhtml+xml"; private static final String NCX_MIMETYPE = "application/x-dtbncx+xml"; @@ -99,9 +99,11 @@ public void parseValidEpub3PropertiesTest() throws Exception { assertEquals(0, info.getMessage().size()); // no errors assertEquals("EPUB", info.getFormat()); assertEquals(EXPECTED_MEDIATYPE, info.getMimeType()); - assertEquals(EXPECTED_VERSION_3_2, info.getVersion()); + assertEquals(EXPECTED_VERSION_3_3, info.getVersion()); // these may change, so just check they aren't null - assertNotNull(info.getCreated()); + // note: this property is missing in 5.1.0 of epubcheck but + // due for fix in next maintence release, uncomment when fixed. + //assertNotNull(info.getCreated()); assertNotNull(info.getLastModified()); Property metadata = info.getProperty(EPUBMETADATA_KEY); @@ -248,7 +250,9 @@ public void parseValidEpub2PropertiesTest() throws Exception { assertEquals(EXPECTED_MEDIATYPE, info.getMimeType()); assertEquals("2.0.1", info.getVersion()); // may change, so just check it isn't null - assertNotNull(info.getCreated()); + // note: this property is missing in 5.1.0 of epubcheck but + // due for fix in next maintence release, uncomment when fixed. + //assertNotNull(info.getCreated()); Property metadata = info.getProperty(EPUBMETADATA_KEY); Map props = toMap(metadata); @@ -397,7 +401,7 @@ public void parseImproperlyCompressedEpubTest() throws Exception { File epubFile = new File(ZIPPED_EPUB_FILEPATH); RepInfo info = parseAndCheckValidity(epubFile, RepInfo.FALSE, RepInfo.FALSE); assertEquals(EXPECTED_MEDIATYPE, info.getMimeType()); - assertEquals(EXPECTED_VERSION_3_2, info.getVersion()); + assertEquals(EXPECTED_VERSION_3_3, info.getVersion()); assertEquals(1, info.getMessage().size()); assertEquals("PKG-006", info.getMessage().get(0).getId()); } @@ -425,7 +429,7 @@ public void parseNonEpubTest() throws Exception { File epubFile = new File(WRONG_EXT_NOT_AN_EPUB_FILEPATH); RepInfo info = parseAndCheckValidity(epubFile, RepInfo.FALSE, RepInfo.FALSE); List msgs = info.getMessage(); - final int expectedNumMessages = 3; + final int expectedNumMessages = 2; assertEquals(expectedNumMessages, msgs.size()); } @@ -451,7 +455,7 @@ public void parseNonEpubWithEpubExtensionTest() throws Exception { RepInfo info = parseAndCheckValidity(epubFile, RepInfo.FALSE, RepInfo.FALSE); assertEquals(OCTET_MIMETYPE, info.getMimeType()); List msgs = info.getMessage(); - final int expectedNumMessages = 3; + final int expectedNumMessages = 2; assertEquals(expectedNumMessages, msgs.size()); } @@ -508,7 +512,7 @@ public void parseEpubWithMissingFontsTest() throws Exception { Map fontinfo = new HashMap(); font.forEach(f -> fontinfo.put(f.getName(), f.getValue())); - // only one font in this file, listed but missing. + // a single font file is listed - it is supposed to be embedded but is missing. assertEquals("Courier", fontinfo.get(PROPNAME_FONTNAME)); assertEquals(true, fontinfo.get(PROPNAME_FONTFILE)); @@ -579,18 +583,12 @@ public void parseEpubMissingOpfTest() throws Exception { File epubFile = new File(EPUB2_MISSING_OPF_FILEPATH); RepInfo info = parseAndCheckValidity(epubFile, RepInfo.FALSE, RepInfo.FALSE); - assertEquals(OCTET_MIMETYPE, info.getMimeType()); - Set msgCodes = new HashSet(); - assertEquals(2, info.getMessage().size()); - Message msg1 = info.getMessage().get(0); - Message msg2 = info.getMessage().get(1); - assertTrue(msg1 instanceof ErrorMessage); - msgCodes.add(msg1.getId()); - assertTrue(msg2 instanceof ErrorMessage); - msgCodes.add(msg2.getId()); + assertEquals(1, info.getMessage().size()); + Message msg = info.getMessage().get(0); + assertTrue(msg instanceof ErrorMessage); + msgCodes.add(msg.getId()); assertTrue(msgCodes.contains("OPF-002")); - assertTrue(msgCodes.contains("RSC-001")); } /** @@ -665,8 +663,8 @@ public void checkSignaturesEpub2WithEncryptionTest() throws Exception { public void parseEpub3TitleEncodingTest() throws Exception { File epubFile = new File(EPUB3_TITLE_ENCODING); String expectedTitle = "महाभारत"; - // well formed and valid - RepInfo info = parseAndCheckValidity(epubFile, RepInfo.TRUE, RepInfo.TRUE); + // well formed but not valid (this is inconsequential to the test, we're just checking title) + RepInfo info = parseAndCheckValidity(epubFile, RepInfo.TRUE, RepInfo.FALSE); Property metadata = info.getProperty(PROPNAME_EPUB_METADATA); Map props = toMap(metadata); diff --git a/jhove-ext-modules/src/test/java/org/ithaka/portico/jhove/module/epub/JhoveRepInfoReportTest.java b/jhove-ext-modules/src/test/java/org/ithaka/portico/jhove/module/epub/JhoveRepInfoReportTest.java index c2c70dd30..312e4d809 100644 --- a/jhove-ext-modules/src/test/java/org/ithaka/portico/jhove/module/epub/JhoveRepInfoReportTest.java +++ b/jhove-ext-modules/src/test/java/org/ithaka/portico/jhove/module/epub/JhoveRepInfoReportTest.java @@ -4,6 +4,7 @@ import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; +import java.io.File; import java.text.SimpleDateFormat; import java.util.Arrays; import java.util.Date; @@ -45,9 +46,10 @@ public class JhoveRepInfoReportTest { private static final MessageId WARN_MSG_ID = MessageId.CHK_001; private static final String WARN_MSG = "Consider yourself warned"; private static final String WARN_MSG_SUGGEST = "Don't do it again!"; - - private EPUBLocation messageLoc = EPUBLocation.create("epub.opf"); - private EPUBLocation messageLoc2 = EPUBLocation.create("content.xhtml"); + + private EPUBLocation messageLoc = EPUBLocation.of(new File("epub.opf")); + private EPUBLocation messageLoc2 = EPUBLocation.of(new File("content.xhtml")); + private String messageArg = "fakearg"; From f4d4f139e1d7c1e0cf613772c8d94aff607d9720 Mon Sep 17 00:00:00 2001 From: Carl Wilson Date: Mon, 4 Mar 2024 16:55:23 +0000 Subject: [PATCH 3/3] FIX: Bump ePub -> 1.3 in test audit. --- jhove-bbt/scripts/create-1.29-target.sh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/jhove-bbt/scripts/create-1.29-target.sh b/jhove-bbt/scripts/create-1.29-target.sh index 2939733f7..d7d4dc2f7 100755 --- a/jhove-bbt/scripts/create-1.29-target.sh +++ b/jhove-bbt/scripts/create-1.29-target.sh @@ -55,3 +55,6 @@ echo "TEST BASELINE: Creating baseline" # Simply copy baseline for now we're not making any changes echo " - copying ${baselineRoot} baseline to ${targetRoot}" cp -R "${baselineRoot}" "${targetRoot}" + +# Update release details for ePub module +find "${targetRoot}" -type f -name "audit.jhove.xml" -exec sed -i 's/^ EPUB-ptc<\/module>$/ EPUB-ptc<\/module>/' {} \;