Skip to content

Commit

Permalink
Schematron tests (#286)
Browse files Browse the repository at this point in the history
* use ietf regardless of script encoding choice

* check dates filter added

* a few more date refinements

* update schematron slightly

* update intro

* update build before generating new beta release

* Revert "update build before generating new beta release"

This reverts commit 7212b79.

* update schematron tests in correct file

* update build before new test release
  • Loading branch information
fordmadox committed Mar 20, 2022
1 parent b8670d9 commit a5391a0
Show file tree
Hide file tree
Showing 6 changed files with 88 additions and 34 deletions.
51 changes: 36 additions & 15 deletions source/modules/schematron/shared-rules.sch
Original file line number Diff line number Diff line change
Expand Up @@ -12,22 +12,23 @@
<language value="iso639-1" filename="iso639-1.rdf"/>
<language value="iso639-2b" filename="iso639-2.rdf"/>
<language value="iso639-3" filename="iso639-3.xml"/>
<!-- ietf-bcp-47 -->
</xsl:variable>

<!-- VARIABLE $language-code-key: the EAD3 document's /ead:ead/ead:control/@langencoding, with iso639-2b as a default value. -->
<sch:let name="active-language-code-key" value="(*/*:control/@languageEncoding[.=$supported-language-codes/@value], 'iso639-2b')[1]"/>
<!-- VARIABLE $language-code-key: the EAD3 document's /ead:ead/ead:control/@langencoding -->
<sch:let name="active-language-code-key" value="(*/*:control/@languageEncoding[.=$supported-language-codes/@value])"/>

<!-- still need to add something here to distinguish between active and deprecated codes -->
<!-- and will also need a functional way, or ability to hit an API endpoint, to test for ietf-bcp-47 codes, most likely -->
<sch:let name="valid-language-codes" value="document($supported-language-codes[@value = $active-language-code-key]/@filename)//(madsrdf:code | iso_639_3_entries/iso_639_3_entry/@id)"/>

<!-- until we have a better way with dealing with "other" as a value, etc.-->
<sch:let name="check-language-codes" value="if (*/*:control/@languageEncoding = ('otherLanguageEncoding', 'ietf-bcp-47')) then false() else true()"/>
<sch:let name="check-ietf-codes" value="if (*/*:control[@languageEncoding eq 'ietf-bcp-47'][not(@scriptEncoding)]) then true() else false()"/>
<sch:let name="check-language-codes" value="if (*/*:control/@languageEncoding = ('iso639-1', 'iso639-2b', 'iso639-3')) then true() else false()"/>
<sch:let name="check-ietf-codes" value="if (*/*:control[@languageEncoding eq 'ietf-bcp-47']) then true() else false()"/>
<sch:let name="check-country-codes" value="if (*/*:control/@countryEncoding eq 'otherCountryEncoding') then false() else true()"/>
<sch:let name="check-script-codes" value="if (*/*:control/@scriptEncoding eq 'otherScriptEncoding') then false() else true()"/>
<sch:let name="check-repository-codes" value="if (*/*:control/@repositoryEncoding eq 'otherRepositoryEncoding') then false() else true()"/>
<sch:let name="check-date-attributes" value="if (*/*:control/@dateEncoding eq 'otherDateEncoding') then false() else true()"/>


<!-- VARIABLE iso15511Pattern -->
<sch:let name="iso15511Pattern" value="'(^([A-Z]{2})|([a-zA-Z]{1})|([a-zA-Z]{3,4}))(-[a-zA-Z0-9:/\-]{1,11})$'"/>
Expand All @@ -47,7 +48,7 @@
<sch:assert test="every $l in (@languageCode | @languageOfElement) satisfies matches(normalize-space($l), $ietfPattern)">The <sch:name/> element's lang or langcode attribute should contain a value from the 'ietf-bcp-47' codelist.</sch:assert>
</sch:rule>
</sch:pattern>

<!-- COUNTRY CODES (in process) -->
<sch:pattern>
<sch:let name="countryCodes" value="document('iso_3166.xml')"/>
Expand Down Expand Up @@ -121,7 +122,7 @@
</sch:assert>
</sch:rule>
</sch:pattern>

<!-- CO-OCCURENCE CONSTRAINTS -->
<sch:pattern id="maintenanceAgency-constraints">
<sch:rule context="*:maintenanceAgency[*:agencyCode[not(normalize-space())]] | *:maintenanceAgency[not(*:agencyCode)]">
Expand All @@ -131,7 +132,7 @@
<sch:assert test="*:agencyCode[normalize-space()]">The maintenanceAgency element requires either an agencyCode or agencyName element that cannot be empty.</sch:assert>
</sch:rule>
</sch:pattern>

<sch:pattern id="eventDateTime">
<sch:rule context="/*/*:control/*:maintenanceHistory/*:maintenanceEvent/*:eventDateTime[not(@standardDateTime)]">
<sch:assert test="normalize-space()">The eventDateTime element requires either a standardDateTime attribute or text.</sch:assert>
Expand All @@ -146,16 +147,36 @@
</sch:pattern>

<!-- DATE NORMALIZATION -->
<!-- will need to update considerably, still. iso 8601 2019 possiblities are quite different...
<!-- will need to update considerably. iso 8601 2019 possiblities are quite different...
also, we will need to be clear that we don't support all values of iso 8601.
for instance, would need to add support for decade, week, dayo, dayk, etc.
as well as additional qualifiers, such as
~, %, X, ?
but likely should never try to add full support, unless we change the attribute names?
also...
Ranges (which negate our notAfter / notBefore attributes):
1978..1984
etc.:
2052Y1MX*D
-->
<!-- might still want to add optional Y indicator, plus support for Years > and < 4 digits...
also need to state whether we support both basic and extended formats. currently we try to, but it does introduce invalid options right now.
-->
<sch:pattern id="dates">
<!-- without using the ETDF parts of ISO 8601 2019, years are capped as such, it looks like...-->
<sch:let name="isoYYYY" value="'\-?(0|1|2)([0-9]{3})'"/>
<sch:let name="isoMM" value="'\-?(01|02|03|04|05|06|07|08|09|10|11|12)'"/>
<sch:let name="isoDD" value="'\-?((0[1-9])|((1|2)[0-9])|(3[0-1]))'"/>
<sch:let name="isoPattern" value="concat('^', $isoYYYY, '$','|', '^', $isoYYYY, $isoMM,'$', '|', '^', $isoYYYY, $isoMM, $isoDD,'$')"/>
<sch:rule context="*:date[exists(@notBefore | @notAfter | @standardDate)] | *:toDate[exists(@notBefore | @notAfter | @standardDate)] | *:fromDate[exists(@notBefore | @notAfter | @standardDate)]">
<sch:let name="Months" value="1 to 12"/>
<sch:let name="Seasons" value="21 to 41"/>
<sch:let name="Y" value="'[+-]?([0-9u]{1}[0-9ux]{3})'"/>
<sch:let name="M" value="'-?(' || string-join(for $x in ($Months) return format-number($x, '00'), '|') || ')'"/>
<sch:let name="M_S" value="'-?(' || string-join(for $x in ($Months, $Seasons) return format-number($x, '00'), '|') || ')'"/>
<sch:let name="D" value="'-?((0[1-9])|((1|2)[0-9])|(3[0-1]))'"/>
<sch:let name="isoPattern" value="concat(
'^', $Y, '$','|'
, '^', $Y, $M_S,'$', '|'
, '^', $Y, $M, $D,'$'
)"/>
<sch:rule context="*:date[$check-date-attributes][exists(@notBefore | @notAfter | @standardDate)] | *:toDate[$check-date-attributes][exists(@notBefore | @notAfter | @standardDate)] | *:fromDate[$check-date-attributes][exists(@notBefore | @notAfter | @standardDate)]">
<sch:assert test="every $d in (@notBefore, @notAfter, @standardDate) satisfies matches($d, $isoPattern)">The <sch:emph>notBefore</sch:emph>, <sch:emph>notAfter</sch:emph>, and <sch:emph>standardDate</sch:emph> attributes of <sch:name/> must be a iso8601 date.</sch:assert>
</sch:rule>
</sch:pattern>
Expand Down
6 changes: 5 additions & 1 deletion source/release-info/intro.xml
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
* *
* EAC-CPF 2.0 *
* *
* 2021 November 2 *
* 2022 March 20 *
* *
*****************************************************************
Expand All @@ -37,6 +37,10 @@
Comments, questions, and suggestions may be addressed to:
ts-eas@archivists.org
EAC-CPF Team Lead: Silke Jagodzinski
Schema Team Lead: Mark Custer
*****************************************************************
*****************************************************************
Expand Down
2 changes: 1 addition & 1 deletion source/release-info/metadata.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
[
{
"name": "EAC-CPF",
"date": "2021-11-02",
"date": "2022-03-20",
"lead": "Silke Jagodzinski",
"team": [],
"schema-team": [],
Expand Down
6 changes: 5 additions & 1 deletion xml-schemas/eac-cpf/eac.rng
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
* *
* EAC-CPF 2.0 *
* *
* 2021 November 2 *
* 2022 March 20 *
* *
*****************************************************************
Expand All @@ -35,6 +35,10 @@
Comments, questions, and suggestions may be addressed to:
ts-eas@archivists.org
EAC-CPF Team Lead: Silke Jagodzinski
Schema Team Lead: Mark Custer
*****************************************************************
*****************************************************************
Expand Down
6 changes: 5 additions & 1 deletion xml-schemas/eac-cpf/eac.xsd
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
* *
* EAC-CPF 2.0 *
* *
* 2021 November 2 *
* 2022 March 20 *
* *
*****************************************************************
Expand All @@ -36,6 +36,10 @@
Comments, questions, and suggestions may be addressed to:
ts-eas@archivists.org
EAC-CPF Team Lead: Silke Jagodzinski
Schema Team Lead: Mark Custer
*****************************************************************
*****************************************************************
Expand Down
51 changes: 36 additions & 15 deletions xml-schemas/eac-cpf/schematron/eac.sch
Original file line number Diff line number Diff line change
Expand Up @@ -12,22 +12,23 @@
<language value="iso639-1" filename="iso639-1.rdf"/>
<language value="iso639-2b" filename="iso639-2.rdf"/>
<language value="iso639-3" filename="iso639-3.xml"/>
<!-- ietf-bcp-47 -->
</xsl:variable>

<!-- VARIABLE $language-code-key: the EAD3 document's /ead:ead/ead:control/@langencoding, with iso639-2b as a default value. -->
<sch:let name="active-language-code-key" value="(*/*:control/@languageEncoding[.=$supported-language-codes/@value], 'iso639-2b')[1]"/>
<!-- VARIABLE $language-code-key: the EAD3 document's /ead:ead/ead:control/@langencoding -->
<sch:let name="active-language-code-key" value="(*/*:control/@languageEncoding[.=$supported-language-codes/@value])"/>

<!-- still need to add something here to distinguish between active and deprecated codes -->
<!-- and will also need a functional way, or ability to hit an API endpoint, to test for ietf-bcp-47 codes, most likely -->
<sch:let name="valid-language-codes" value="document($supported-language-codes[@value = $active-language-code-key]/@filename)//(madsrdf:code | iso_639_3_entries/iso_639_3_entry/@id)"/>

<!-- until we have a better way with dealing with "other" as a value, etc.-->
<sch:let name="check-language-codes" value="if (*/*:control/@languageEncoding = ('otherLanguageEncoding', 'ietf-bcp-47')) then false() else true()"/>
<sch:let name="check-ietf-codes" value="if (*/*:control[@languageEncoding eq 'ietf-bcp-47'][not(@scriptEncoding)]) then true() else false()"/>
<sch:let name="check-language-codes" value="if (*/*:control/@languageEncoding = ('iso639-1', 'iso639-2b', 'iso639-3')) then true() else false()"/>
<sch:let name="check-ietf-codes" value="if (*/*:control[@languageEncoding eq 'ietf-bcp-47']) then true() else false()"/>
<sch:let name="check-country-codes" value="if (*/*:control/@countryEncoding eq 'otherCountryEncoding') then false() else true()"/>
<sch:let name="check-script-codes" value="if (*/*:control/@scriptEncoding eq 'otherScriptEncoding') then false() else true()"/>
<sch:let name="check-repository-codes" value="if (*/*:control/@repositoryEncoding eq 'otherRepositoryEncoding') then false() else true()"/>
<sch:let name="check-date-attributes" value="if (*/*:control/@dateEncoding eq 'otherDateEncoding') then false() else true()"/>


<!-- VARIABLE iso15511Pattern -->
<sch:let name="iso15511Pattern" value="'(^([A-Z]{2})|([a-zA-Z]{1})|([a-zA-Z]{3,4}))(-[a-zA-Z0-9:/\-]{1,11})$'"/>
Expand All @@ -47,7 +48,7 @@
<sch:assert test="every $l in (@languageCode | @languageOfElement) satisfies matches(normalize-space($l), $ietfPattern)">The <sch:name/> element's lang or langcode attribute should contain a value from the 'ietf-bcp-47' codelist.</sch:assert>
</sch:rule>
</sch:pattern>

<!-- COUNTRY CODES (in process) -->
<sch:pattern>
<sch:let name="countryCodes" value="document('iso_3166.xml')"/>
Expand Down Expand Up @@ -121,7 +122,7 @@
</sch:assert>
</sch:rule>
</sch:pattern>

<!-- CO-OCCURENCE CONSTRAINTS -->
<sch:pattern id="maintenanceAgency-constraints">
<sch:rule context="*:maintenanceAgency[*:agencyCode[not(normalize-space())]] | *:maintenanceAgency[not(*:agencyCode)]">
Expand All @@ -131,7 +132,7 @@
<sch:assert test="*:agencyCode[normalize-space()]">The maintenanceAgency element requires either an agencyCode or agencyName element that cannot be empty.</sch:assert>
</sch:rule>
</sch:pattern>

<sch:pattern id="eventDateTime">
<sch:rule context="/*/*:control/*:maintenanceHistory/*:maintenanceEvent/*:eventDateTime[not(@standardDateTime)]">
<sch:assert test="normalize-space()">The eventDateTime element requires either a standardDateTime attribute or text.</sch:assert>
Expand All @@ -146,16 +147,36 @@
</sch:pattern>

<!-- DATE NORMALIZATION -->
<!-- will need to update considerably, still. iso 8601 2019 possiblities are quite different...
<!-- will need to update considerably. iso 8601 2019 possiblities are quite different...
also, we will need to be clear that we don't support all values of iso 8601.
for instance, would need to add support for decade, week, dayo, dayk, etc.
as well as additional qualifiers, such as
~, %, X, ?
but likely should never try to add full support, unless we change the attribute names?
also...
Ranges (which negate our notAfter / notBefore attributes):
1978..1984
etc.:
2052Y1MX*D
-->
<!-- might still want to add optional Y indicator, plus support for Years > and < 4 digits...
also need to state whether we support both basic and extended formats. currently we try to, but it does introduce invalid options right now.
-->
<sch:pattern id="dates">
<!-- without using the ETDF parts of ISO 8601 2019, years are capped as such, it looks like...-->
<sch:let name="isoYYYY" value="'\-?(0|1|2)([0-9]{3})'"/>
<sch:let name="isoMM" value="'\-?(01|02|03|04|05|06|07|08|09|10|11|12)'"/>
<sch:let name="isoDD" value="'\-?((0[1-9])|((1|2)[0-9])|(3[0-1]))'"/>
<sch:let name="isoPattern" value="concat('^', $isoYYYY, '$','|', '^', $isoYYYY, $isoMM,'$', '|', '^', $isoYYYY, $isoMM, $isoDD,'$')"/>
<sch:rule context="*:date[exists(@notBefore | @notAfter | @standardDate)] | *:toDate[exists(@notBefore | @notAfter | @standardDate)] | *:fromDate[exists(@notBefore | @notAfter | @standardDate)]">
<sch:let name="Months" value="1 to 12"/>
<sch:let name="Seasons" value="21 to 41"/>
<sch:let name="Y" value="'[+-]?([0-9u]{1}[0-9ux]{3})'"/>
<sch:let name="M" value="'-?(' || string-join(for $x in ($Months) return format-number($x, '00'), '|') || ')'"/>
<sch:let name="M_S" value="'-?(' || string-join(for $x in ($Months, $Seasons) return format-number($x, '00'), '|') || ')'"/>
<sch:let name="D" value="'-?((0[1-9])|((1|2)[0-9])|(3[0-1]))'"/>
<sch:let name="isoPattern" value="concat(
'^', $Y, '$','|'
, '^', $Y, $M_S,'$', '|'
, '^', $Y, $M, $D,'$'
)"/>
<sch:rule context="*:date[$check-date-attributes][exists(@notBefore | @notAfter | @standardDate)] | *:toDate[$check-date-attributes][exists(@notBefore | @notAfter | @standardDate)] | *:fromDate[$check-date-attributes][exists(@notBefore | @notAfter | @standardDate)]">
<sch:assert test="every $d in (@notBefore, @notAfter, @standardDate) satisfies matches($d, $isoPattern)">The <sch:emph>notBefore</sch:emph>, <sch:emph>notAfter</sch:emph>, and <sch:emph>standardDate</sch:emph> attributes of <sch:name/> must be a iso8601 date.</sch:assert>
</sch:rule>
</sch:pattern>
Expand Down

0 comments on commit a5391a0

Please sign in to comment.