Skip to content

Commit

Permalink
Bugs and <pb> handling
Browse files Browse the repository at this point in the history
  • Loading branch information
glorieux-f committed Sep 26, 2024
1 parent a312672 commit 64cd43d
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 6 deletions.
5 changes: 5 additions & 0 deletions docx/divs.xsl
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,11 @@

<xsl:template match="tei:item/@level"/>

<xsl:template match="tei:pb">
<pb>
<xsl:value-of select="normalize-space(.)"/>
</pb>
</xsl:template>

<xsl:template match="tei:head[@level]">
<xsl:choose>
Expand Down
4 changes: 2 additions & 2 deletions docx/docx_teilike.xsl
Original file line number Diff line number Diff line change
Expand Up @@ -445,13 +445,13 @@ Seen
<xsl:variable name="val" select="w:rPr/w:u/@w:val"/>
<xsl:choose>
<xsl:when test="not(w:rPr/w:u)"/>
<xsl:when test="not($w:style) and not($w:style/w:rPr/w:u)"/>
<xsl:when test="$val = '0' or $val='false' or $val = 'off' or $val = 'none'"/>
<xsl:when test="$val != ''">
<xsl:value-of select="$val"/>
</xsl:when>
</xsl:choose>
</xsl:variable>

<!-- small caps -->
<xsl:variable name="sc">
<xsl:variable name="val" select="w:rPr/w:smallCaps/@w:val"/>
Expand Down Expand Up @@ -479,7 +479,7 @@ Seen
<xsl:when test="w:rPr/w:i">i</xsl:when>
</xsl:choose>
</xsl:variable>
<!-- bold, dangerous in titles, to think -->
<!-- bold -->
<xsl:variable name="bval" select="string(w:rPr/w:b/@w:val)"/>
<xsl:variable name="bCsval" select="string(w:rPr/w:bCs/@w:val)"/>
<xsl:variable name="b">
Expand Down
19 changes: 15 additions & 4 deletions docx/teilike_pcre.tsv
Original file line number Diff line number Diff line change
Expand Up @@ -9,16 +9,27 @@ search replace ?comment

# variables ends with the first replacement rule

# Page breaks
\[([pf]\.[  ][^<>\]]+)\] <pb>\1</pb> Page break with no style
</pb><pb> broken pb
<pb>([  \n]*)</pb>   normalize space
<([a-z]+)>(<pb>[^<>]+</pb>)</\1> \2 false para, doubles <pb> upper, inlines
<([a-z]+)>(<pb>[^<>]+</pb>)</\1> \2
<([a-z]+)>(<pb>[^<>]+</pb>)</\1> \2

# SECTIONS

(<pb>[^<]+</pb>)\s*((<\?div( /)?\?>\s*)+) \2\n\1 first page break of a section
<\?(div|list)\?> <\1> generated hierarchy
<\?(div|list) /\?> </\1> generated hierarchy
\n* *</($parent)>\s*<($parent)[^>]*> *\n* \n\n
\n* *</($parent)>\s*<\1[^>]*> *\n* \n\n

(<head($atts)>)\s+ \1 trim spaces
(<head($atts))>@ \1 type="chapter"> <head>@chapter

-[\*]\n -\n odd chars from OCR

<pb>(\s*)</pb> \1
</pb><pb> broken pb


<($inline)($atts)>(\s*)(<lb($atts)/>)(\s*)</\1> \3\4\6 inline around line break
</($inline)>( *)<\1($atts)> \2 linked inline, block tags separed by \n
Expand All @@ -30,7 +41,7 @@ search replace ?comment
(\s+)</($inline)> </\2>\1 spaces at end of inline, put after
<($inline)($atts)>($pun)</\1> \3 inline without word char
# ([,: \s])(</($inline)>) \2\1 soft pun at end,</inline>
<($inline)($atts)></($inline)> empty inlines
<($inline)($atts)></\1> empty inlines
<($inline)($atts)/> empty inlines


Expand Down

0 comments on commit 64cd43d

Please sign in to comment.