Skip to content

Commit

Permalink
FIX: Module reporting XML schema errors
Browse files Browse the repository at this point in the history
Fixed minor issues in the XmlHandler, JPEG2000 and HTML modules that meant reporting output was not valid against the schema.

- replaced the `XmlHandler:cleanURIString()` implementation with something less radical that doesn't produce invalid XML in testing;
- fixed badly formatted dates in the HTML module document details;
- fixed bug in the JPEG2000 module where empty `CompositeLayerHeader` lists were created, these fail report schema validation;
- bumped the JPEG2000 and HTML module versions and release dates;
- added fixes to test scripts for the above; and
- removed some commented out code from XML reporting module.
  • Loading branch information
carlwilson committed Oct 28, 2024
1 parent 08b35f9 commit 84bcfff
Show file tree
Hide file tree
Showing 5 changed files with 133 additions and 123 deletions.
31 changes: 31 additions & 0 deletions jhove-bbt/scripts/create-1.33-target.sh
Original file line number Diff line number Diff line change
Expand Up @@ -56,3 +56,34 @@ echo "TEST BASELINE: Creating baseline"
echo " - copying ${baselineRoot} baseline to ${targetRoot}"
cp -R "${baselineRoot}" "${targetRoot}"

# Update release details for HTML module
find "${targetRoot}" -type f -name "*.html.jhove.xml" -exec sed -i 's/<reportingModule release="1.4.4" date="2024-08-22">HTML-hul<\/reportingModule>/<reportingModule release="1.4.5" date="2024-11-27">HTML-hul<\/reportingModule>/' {} \;
find "${targetRoot}" -type f -name "audit.jhove.xml" -exec sed -i 's/<module release="1.4.4">HTML-hul<\/module>/<module release="1.4.5">HTML-hul<\/module>/' {} \;
find "${targetRoot}" -type f -name "audit-HTML-hul.jhove.xml" -exec sed -i 's/<release>1.4.4<\/release>/<release>1.4.5<\/release>/' {} \;
find "${targetRoot}" -type f -name "audit-HTML-hul.jhove.xml" -exec sed -i 's/2024-08-22/2024-11-27/' {} \;
find "${targetRoot}" -type f -name "audit-HTML-hul.jhove.xml" -exec sed -i 's/01-08-2002/2002-08-01/' {} \;
find "${targetRoot}" -type f -name "audit-HTML-hul.jhove.xml" -exec sed -i 's/31-05-2001/2001-05-31/' {} \;

# Update release details for JPEG 2000 module
find "${targetRoot}" -type f -name "*.jp2.jhove.xml" -exec sed -i 's/<reportingModule release="1.4.4" date="2023-03-16">JPEG2000-hul<\/reportingModule>/<reportingModule release="1.4.5" date="2024-11-27">JPEG2000-hul<\/reportingModule>/' {} \;
find "${targetRoot}" -type f -name "*.jpx.jhove.xml" -exec sed -i 's/<reportingModule release="1.4.4" date="2023-03-16">JPEG2000-hul<\/reportingModule>/<reportingModule release="1.4.5" date="2024-11-27">JPEG2000-hul<\/reportingModule>/' {} \;
find "${targetRoot}" -type f -name "*.md.jhove.xml" -exec sed -i 's/<reportingModule release="1.4.4" date="2023-03-16">JPEG2000-hul<\/reportingModule>/<reportingModule release="1.4.5" date="2024-11-27">JPEG2000-hul<\/reportingModule>/' {} \;
find "${targetRoot}" -type f -name "audit.jhove.xml" -exec sed -i 's/<module release="1.4.4">JPEG2000-hul<\/module>/<module release="1.4.5">JPEG2000-hul<\/module>/' {} \;
find "${targetRoot}" -type f -name "audit-JPEG2000-hul.jhove.xml" -exec sed -i 's/<release>1.4.4<\/release>/<release>1.4.5<\/release>/' {} \;
find "${targetRoot}" -type f -name "audit-JPEG2000-hul.jhove.xml" -exec sed -i 's/2023-03-16/2024-11-27/' {} \;

# Copy the files affected by the relative URL output changes to the XML reporting module
if [[ -f "${candidateRoot}/errors/modules/JPEG2000-hul/ランダム日本語テキスト.jp2.jhove.xml" ]]; then
cp "${candidateRoot}/errors/modules/JPEG2000-hul/ランダム日本語テキスト.jp2.jhove.xml" "${targetRoot}/errors/modules/JPEG2000-hul/ランダム日本語テキスト.jp2.jhove.xml"
fi
if [[ -f "${candidateRoot}/errors/modules/JPEG2000-hul/隨機中國文字.jp2.jhove.xml" ]]; then
cp "${candidateRoot}/errors/modules/JPEG2000-hul/隨機中國文字.jp2.jhove.xml" "${targetRoot}/errors/modules/JPEG2000-hul/隨機中國文字.jp2.jhove.xml"
fi

# Copy the files affected by the change to the JPEG-2000 module that prevents empty CompositeListHeader lists from been created
if [[ -f "${candidateRoot}/errors/modules/JPEG2000-hul/is_jpx.jp2.jhove.xml" ]]; then
cp "${candidateRoot}/errors/modules/JPEG2000-hul/is_jpx.jp2.jhove.xml" "${targetRoot}/errors/modules/JPEG2000-hul/is_jpx.jp2.jhove.xml"
fi
if [[ -f "${candidateRoot}/examples/modules/JPEG2000-hul/ROITest.jpx.jhove.xml" ]]; then
cp "${candidateRoot}/examples/modules/JPEG2000-hul/ROITest.jpx.jhove.xml" "${targetRoot}/examples/modules/JPEG2000-hul/ROITest.jpx.jhove.xml"
fi
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@

package edu.harvard.hul.ois.jhove.handler;

import java.io.File;
import java.net.URI;
import java.net.URISyntaxException;
import java.text.NumberFormat;
import java.util.Date;
import java.util.Iterator;
Expand Down Expand Up @@ -352,11 +355,6 @@ public void show(RepInfo info) {
_writer.println(margn2
+ element("reportingModule", attr2, module.getName()));
}
/*
* else { String [][] attr2 = { {"severity", "error"} }; _writer.println
* (margn2 + element ("message", attr2,
* "file not found or not readable")); }
*/
Date date = info.getCreated();
if (date != null) {
_writer.println(margn2 + element("created", toDateTime(date)));
Expand Down Expand Up @@ -4455,51 +4453,41 @@ private void writeAESTimeRangePart(String indent, String elementName, AESAudioMe
element(elementName, attributes, String.valueOf(timeDesc.getSamples())));
}

/*
* Clean up a URI string by escaping forbidden characters. We assume
* (perhaps dangerously) that a % is the start of an already escaped
* hexadecimal sequence.
/**
* Returns a path normalised URI from the presented string path.@interface
* Solution based upon the follwing post from Eugene Yokota:
* https://eed3si9n.com/encoding-file-path-as-URI-reference/
*/
private String cleanURIString(String uri) {
StringBuffer sb = new StringBuffer(uri.length() * 2);
boolean change = false;
for (int i = 0; i < uri.length(); i++) {
char c = uri.charAt(i);
if ((c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z')
|| (c >= '0' && c <= '9') || (c == '%') || // assume it's an
// escape
("-_.!~*'();/?:@=+$,".indexOf(c) >= 0)) {
sb.append(c);
} else {
int cval = c;

// More significant hex digit
int mshd = (cval >> 4);
if (mshd >= 10) {
mshd += 'A' - 10;
} else {
mshd += '0';
}
sb.append('%');
sb.append((char) mshd);

// Less significant hex digit
int lshd = (cval & 0X0F);
if (lshd >= 10) {
lshd += 'A' - 10;
private static final String cleanURIString(final String path) {
File input = new File(path);
final boolean isWindows = System.getProperty("os.name").toLowerCase(Locale.ENGLISH).contains("windows");
final String fileScheme = "file";
try {
if (isWindows && !path.isEmpty() && path.startsWith(Character.toString(File.separatorChar))) {
if (path.startsWith("\\")) {
return new URI(fileScheme, normaliseToSlash(path), null).toString();
} else {
lshd += '0';
return new URI(fileScheme, "", normaliseToSlash(path), null).toString();
}
sb.append((char) lshd);
change = true;
} else if (input.isAbsolute()) {
return new URI(fileScheme, "", normaliseToSlash(ensureHeadSlash(input.getAbsolutePath())), null)
.toString();
}
return new URI(null, normaliseToSlash(path), null).toString();
} catch (URISyntaxException e) {
// If this fails simply return the original path
return path;
}
// For efficiency, return the original string
// if nothing changed.
if (change) {
return sb.toString();
}
return uri;
}

private static final String ensureHeadSlash(final String name) {
return (!name.isEmpty() && name.startsWith(Character.toString(File.separatorChar)))
? Character.toString(File.separatorChar) + name
: name;
}

private static final String normaliseToSlash(final String name) {
return (File.separatorChar == '/') ? name : name.replace(File.separatorChar, '/');
}

/** Appends a Rational value to a StringBuffer */
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -105,8 +105,8 @@ public class HtmlModule extends ModuleBase {
private static final String XHTML_1_1_STR = "XHTML 1.1";

private static final String NAME = "HTML-hul";
private static final String RELEASE = "1.4.4";
private static final int[] DATE = { 2024, 8, 22 };
private static final String RELEASE = "1.4.5";
private static final int[] DATE = { 2024, 11, 27 };
private static final String[] FORMAT = { "HTML" };
private static final String COVERAGE = "HTML 3.2, HTML 4.0 Strict,"
+ "HTML 4.0 Transitional, HTML 4.0 Frameset, "
Expand Down Expand Up @@ -231,7 +231,7 @@ public HtmlModule() {
+ "(Second Edition)",
DocumentType.REPORT);
doc.setPublisher(w3cAgent);
doc.setDate("01-08-2002");
doc.setDate("2002-08-01");
doc.setIdentifier(new Identifier("http://www.w3.org/TR/xhtml1/",
IdentifierType.URL));
_specification.add(doc);
Expand All @@ -240,7 +240,7 @@ public HtmlModule() {
doc = new Document(" XHTML(TM) 1.1 - Module-based XHTML",
DocumentType.REPORT);
doc.setPublisher(w3cAgent);
doc.setDate("31-05-2001");
doc.setDate("2001-05-31");
doc.setIdentifier(new Identifier(
"http://www.w3.org/TR/2001/REC-xhtml11-20010531/",
IdentifierType.URL));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -105,8 +105,8 @@ public class Jpeg2000Module extends ModuleBase {
******************************************************************/

private static final String NAME = "JPEG2000-hul";
private static final String RELEASE = "1.4.4";
private static final int[] DATE = { 2023, 03, 16 };
private static final String RELEASE = "1.4.5";
private static final int[] DATE = { 2024, 11, 27 };
private static final String[] FORMAT = { "JPEG 2000", "JP2", "JPX" };
private static final String COVERAGE = "JP2 (ISO/IEC 15444-1:2000/"
+ "ITU-T Rec. T.800 (200)), JPX (ISO/IEC 15444-2:2004)";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,138 +24,129 @@ public class ComposLayerHdrBox extends JP2Box {
private Property channelDefProp;
private Property codestreamRegProp;
private List<Property> colorSpecs;



/**
* Constructor with superbox.
* Constructor with superbox.
*
* @param parent parent superbox of this box
* @param parent parent superbox of this box
*/
public ComposLayerHdrBox(RandomAccessFile raf, BoxHolder parent) {
super(raf, parent);
}

/** Reads the box, putting appropriate information in
* the RepInfo object. setModule, setBoxHeader,
* setRepInfo and setDataInputStream must be called
* before <code>readBox</code> is called.
* <code>readBox</code> must completely consume the
* box, so that the next byte to be read by the
* DataInputStream is the <code>FF</code> byte of the next Box.
/**
* Reads the box, putting appropriate information in
* the RepInfo object. setModule, setBoxHeader,
* setRepInfo and setDataInputStream must be called
* before <code>readBox</code> is called.
* <code>readBox</code> must completely consume the
* box, so that the next byte to be read by the
* DataInputStream is the <code>FF</code> byte of the next Box.
*/
@Override
public boolean readBox() throws IOException {
public boolean readBox() throws IOException {
if (_parentBox != null) {
// Box must be at top level.
wrongBoxContext();
return false;
}
initBytesRead ();
initBytesRead();
hasBoxes = true;
colorSpecs = new LinkedList<> ();
colorSpecs = new LinkedList<>();

// Unlike some other boxes, compositing layer boxes
// are numbered by their order in the file, starting
// with 0. A definite case of design by committee.
// with 0. A definite case of design by committee.
JP2Box box = null;
boolean hasOpacity = false;
boolean hasChannelDef = false;
while (hasNext ()) {
box = (JP2Box) next ();
while (hasNext()) {
box = (JP2Box) next();
if (box == null) {
break;
}
if (box instanceof ColorGroupBox ||
box instanceof OpacityBox ||
box instanceof ChannelDefBox ||
box instanceof CodestreamRegBox ||
box instanceof IPRBox ||
box instanceof ResolutionBox ||
box instanceof LabelBox) {
if (!box.readBox ()) {
return false;
}
if (box instanceof OpacityBox) {
hasOpacity = true;
}
else if (box instanceof ChannelDefBox) {
hasChannelDef = true;
}
if (box instanceof LabelBox) {
label = new Property ("Label",
PropertyType.STRING,
((LabelBox) box).getLabel ());
}
}
else {
box.skipBox ();
box instanceof OpacityBox ||
box instanceof ChannelDefBox ||
box instanceof CodestreamRegBox ||
box instanceof IPRBox ||
box instanceof ResolutionBox ||
box instanceof LabelBox) {
if (!box.readBox()) {
return false;
}
if (box instanceof OpacityBox) {
hasOpacity = true;
} else if (box instanceof ChannelDefBox) {
hasChannelDef = true;
}
if (box instanceof LabelBox) {
label = new Property("Label",
PropertyType.STRING,
((LabelBox) box).getLabel());
}
} else {
box.skipBox();
}
}
if (hasOpacity && hasChannelDef) {
_repInfo.setMessage (new ErrorMessage
(MessageConstants.JPEG2000_HUL_12,
_module.getFilePos ()));
_repInfo.setValid (false);
_repInfo.setMessage(new ErrorMessage(MessageConstants.JPEG2000_HUL_12,
_module.getFilePos()));
_repInfo.setValid(false);
}
finalizeBytesRead ();
List<Property> propList = new ArrayList (4);
finalizeBytesRead();

List<Property> propList = new ArrayList(4);
if (label != null) {
propList.add (label);
propList.add(label);
}
if (!colorSpecs.isEmpty ()) {
propList.add (new Property ("ColorSpecs",
if (!colorSpecs.isEmpty()) {
propList.add(new Property("ColorSpecs",
PropertyType.PROPERTY,
PropertyArity.LIST,
colorSpecs));
}
if (opacityProp != null) {
propList.add (opacityProp);
propList.add(opacityProp);
}
if (channelDefProp != null) {
propList.add (channelDefProp);
propList.add(channelDefProp);
}
if (codestreamRegProp != null) {
propList.add (codestreamRegProp);
propList.add(codestreamRegProp);
}
_module.addComposLayer(new Property
("CompositeLayerHeader",
if (!propList.isEmpty()) {
_module.addComposLayer(new Property("CompositeLayerHeader",
PropertyType.PROPERTY,
PropertyArity.LIST,
propList));
}
return true;
}


/** Add a color specification property. */
protected void addColorSpec (Property p)
{
colorSpecs.add (p);
protected void addColorSpec(Property p) {
colorSpecs.add(p);
}

/** Add an opacity property. */
protected void addOpacity (Property p)
{
protected void addOpacity(Property p) {
opacityProp = p;
}

/** Add channel definition property. */
protected void addChannelDef (Property p)
{
protected void addChannelDef(Property p) {
channelDefProp = p;
}

/** Add codestream registration property. */
protected void addCodestreamReg (Property p)
{
protected void addCodestreamReg(Property p) {
codestreamRegProp = p;
}

/** Returns the name of the Box. */
/** Returns the name of the Box. */
@Override
protected String getSelfPropName ()
{
protected String getSelfPropName() {
return "Compositing Layer Header Box";
}
}

0 comments on commit 84bcfff

Please sign in to comment.