Skip to content

Commit

Permalink
GdsCollector: proof-of-concept to catch validation errors including f…
Browse files Browse the repository at this point in the history
…ilename
  • Loading branch information
kba committed Aug 24, 2020
1 parent b85227a commit 8639b16
Show file tree
Hide file tree
Showing 5 changed files with 27 additions and 10 deletions.
4 changes: 2 additions & 2 deletions ocrd_models/ocrd_models/generatedscollector.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
class OcrdGdsCollector(object):

def __init__(self, messages=None):
print('GdsCollector.__init__', self)
def __init__(self, filename=None, messages=None):
self.filename = filename
if messages is None:
self.messages = []
else:
Expand Down
10 changes: 6 additions & 4 deletions ocrd_models/ocrd_models/ocrd_page_generateds.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# -*- coding: utf-8 -*-

#
# Generated Thu Jul 30 13:44:35 2020 by generateDS.py version 2.35.24.
# Generated Mon Aug 24 15:23:10 2020 by generateDS.py version 2.35.26.
# Python 3.6.9 (default, Jul 17 2020, 12:50:27) [GCC 8.4.0]
#
# Command line options:
Expand Down Expand Up @@ -14950,7 +14950,9 @@ def main():
]
def parse(inFileName, silence=False, print_warnings=True):
global CapturedNsmap_
gds_collector = GdsCollector_()
if not filename:
filename=inFilename
gds_collector = GdsCollector_(filename=filenmae)
parser = None
doc = parsexml_(inFileName, parser)
rootNode = doc.getroot()
Expand Down Expand Up @@ -14979,7 +14981,7 @@ def parse(inFileName, silence=False, print_warnings=True):
sys.stderr.write(separator)
return rootObj

def parseString(inString, silence=False, print_warnings=True):
def parseString(inString, filename=None, silence=False, print_warnings=True):
'''Parse a string, create the object tree, and export it.

Arguments:
Expand All @@ -14990,7 +14992,7 @@ def parseString(inString, silence=False, print_warnings=True):
'''
parser = None
rootNode= parsexmlstring_(inString, parser)
gds_collector = GdsCollector_()
gds_collector = GdsCollector_(filename=filename)
rootTag, rootClass = get_root_tag(rootNode)
if rootClass is None:
rootTag = 'PcGts'
Expand Down
8 changes: 5 additions & 3 deletions ocrd_models/ocrd_page_parse.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
def parse(inFileName, silence=False, print_warnings=True):
global CapturedNsmap_
gds_collector = GdsCollector_()
if not filename:
filename=inFilename
gds_collector = GdsCollector_(filename=filenmae)
parser = None
doc = parsexml_(inFileName, parser)
rootNode = doc.getroot()
Expand Down Expand Up @@ -29,7 +31,7 @@ def parse(inFileName, silence=False, print_warnings=True):
sys.stderr.write(separator)
return rootObj

def parseString(inString, silence=False, print_warnings=True):
def parseString(inString, filename=None, silence=False, print_warnings=True):
'''Parse a string, create the object tree, and export it.
Arguments:
Expand All @@ -40,7 +42,7 @@ def parseString(inString, silence=False, print_warnings=True):
'''
parser = None
rootNode= parsexmlstring_(inString, parser)
gds_collector = GdsCollector_()
gds_collector = GdsCollector_(filename=filename)
rootTag, rootClass = get_root_tag(rootNode)
if rootClass is None:
rootTag = 'PcGts'
Expand Down
2 changes: 1 addition & 1 deletion requirements_test.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
pytest >= 4.0.0
generateDS == 2.35.24
generateDS == 2.35.26
coverage >= 4.5.2
sphinx
codecov >= 2.0.15
Expand Down
13 changes: 13 additions & 0 deletions tests/model/test_ocrd_page.py
Original file line number Diff line number Diff line change
Expand Up @@ -297,5 +297,18 @@ def test_gdscollector_override(self):
gdc = pcgts.gds_collector_
self.assertTrue(isinstance(gdc, OcrdGdsCollector))

def test_gdscollector_info(self):
filename = assets.path_to('gutachten/data/TEMP1/PAGE_TEMP1.xml')
with open(filename, 'r') as f:
s = f.read()
s = s.replace('pc:Page', 'pc:Foo')
s = s.encode('utf-8')
pcgts = parseString(s, silence=True, filename=filename)
gdsc = pcgts.gds_collector_
self.assertEqual(gdsc.messages, [])
self.assertEqual(gdsc.filename, filename)
pcgts.validate_(gdsc, True)
self.assertEqual(gdsc.messages, ['Number of values for Page near line 2 is below the minimum allowed, expected at least 1, found 0'])

if __name__ == '__main__':
main(__file__)

0 comments on commit 8639b16

Please sign in to comment.