Skip to content

Commit

Permalink
FIX: XHTML DTD detection
Browse files Browse the repository at this point in the history
- fixed issue affecting DTD detection for XHTML documents;
  - this needed extra cases adding for each of the XHTML DTD definitions;
- added string constant for XHTML 1.1; and
- added test cases for DTD detection with and without XML declaration.

Closes #904
  • Loading branch information
carlwilson committed Aug 14, 2024
1 parent e7a456d commit eb932c7
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 4 deletions.
14 changes: 12 additions & 2 deletions jhove-bbt/scripts/create-1.31-target.sh
Original file line number Diff line number Diff line change
Expand Up @@ -126,11 +126,21 @@ fi

declare -a tiff_affected=("examples/modules/TIFF-hul/cramps.tif.jhove.xml"
"examples/modules/TIFF-hul/text.tif.jhove.xml"
"examples/modules/TIFF-hul/testpage-small.tif.jhove.xml"
)
"examples/modules/TIFF-hul/testpage-small.tif.jhove.xml")
for filename in "${tiff_affected[@]}"
do
if [[ -f "${candidateRoot}/${filename}" ]]; then
cp "${candidateRoot}/${filename}" "${targetRoot}/${filename}"
fi
done

declare -a xhtml_affected=("errors/modules/HTML-hul/xhtml-trans-no-xml-dec.html.jhove.xml"
"errors/modules/HTML-hul/xhtml-strict-no-xml-dec.html.jhove.xml"
"errors/modules/HTML-hul/xhtml-frames-no-xml-dec.html.jhove.xml"
"errors/modules/HTML-hul/xhtml-1-1-no-xml-dec.html.jhove.xml")
for filename in "${xhtml_affected[@]}"
do
if [[ -f "${candidateRoot}/${filename}" ]]; then
cp "${candidateRoot}/${filename}" "${targetRoot}/${filename}"
fi
done
Original file line number Diff line number Diff line change
Expand Up @@ -102,6 +102,7 @@ public class HtmlModule extends ModuleBase {
private static final String HTML_4_0 = "HTML 4.0";
private static final String HTML_4_01 = "HTML 4.01";
private static final String XHTML_1_0 = "XHTML 1.0";
private static final String XHTML_1_1_STR = "XHTML 1.1";

private static final String NAME = "HTML-hul";
private static final String RELEASE = "1.4.3";
Expand Down Expand Up @@ -162,7 +163,7 @@ public class HtmlModule extends ModuleBase {
/* Version names, matching the above indices */
private static final String[] VERSIONNAMES = { null, "HTML 3.2", HTML_4_0,
HTML_4_0, HTML_4_0, HTML_4_01, HTML_4_01, HTML_4_01, XHTML_1_0,
XHTML_1_0, XHTML_1_0, "XHTML 1.1" };
XHTML_1_0, XHTML_1_0, XHTML_1_1_STR };

/* Flag to know if the property TextMDMetadata is to be added */
protected boolean _withTextMD = false;
Expand Down Expand Up @@ -675,7 +676,15 @@ protected int checkDoctype(List elements) {
return HTML_4_01_TRANSITIONAL;
case "-//W3C//DTD HTML 4.01 FRAMESET//EN":
return HTML_4_01_FRAMESET;
default:
case "-//W3C//DTD XHTML 1.0 STRICT//EN":
return XHTML_1_0_STRICT;
case "-//W3C//DTD XHTML 1.0 TRANSITIONAL//EN":
return XHTML_1_0_TRANSITIONAL;
case "-//W3C//DTD XHTML 1.0 FRAMESET//EN":
return XHTML_1_0_FRAMESET;
case "-//W3C//DTD XHTML 1.1//EN":
return XHTML_1_1;
default:
break;
}
} catch (Exception e) {
Expand Down

0 comments on commit eb932c7

Please sign in to comment.