Skip to content

Commit

Permalink
test_processor: add test for force (overwrite) w/ METS Server
Browse files Browse the repository at this point in the history
  • Loading branch information
bertsky committed Sep 2, 2024
1 parent dfa715d commit 9a8c41d
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 5 deletions.
2 changes: 1 addition & 1 deletion tests/data/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,7 @@ def process(self):
force=config.OCRD_EXISTING_OUTPUT == 'OVERWRITE',
)

class DummyProcessorWithOutputPagewise(Processor):
class DummyProcessorWithOutputSleep(Processor):
@property
def ocrd_tool(self):
# make deep copy
Expand Down
35 changes: 31 additions & 4 deletions tests/processor/test_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
DummyProcessorWithRequiredParameters,
DummyProcessorWithOutput,
DummyProcessorWithOutputLegacy,
DummyProcessorWithOutputPagewise,
DummyProcessorWithOutputSleep,
DummyProcessorWithOutputFailures,
IncompleteProcessor
)
Expand Down Expand Up @@ -266,7 +266,7 @@ def test_run_output_timeout(self):
config.OCRD_MAX_MISSING_OUTPUTS = -1
config.OCRD_MISSING_OUTPUT = 'ABORT'
config.OCRD_PROCESSING_PAGE_TIMEOUT = 3
run_processor(DummyProcessorWithOutputPagewise, workspace=ws,
run_processor(DummyProcessorWithOutputSleep, workspace=ws,
input_file_grp="OCR-D-IMG",
output_file_grp="OCR-D-OUT",
parameter={"sleep": 1})
Expand All @@ -275,7 +275,7 @@ def test_run_output_timeout(self):
config.OCRD_PROCESSING_PAGE_TIMEOUT = 1
from concurrent.futures import TimeoutError
with pytest.raises(TimeoutError) as exc:
run_processor(DummyProcessorWithOutputPagewise, workspace=ws,
run_processor(DummyProcessorWithOutputSleep, workspace=ws,
input_file_grp="OCR-D-IMG",
output_file_grp="OCR-D-OUT",
parameter={"sleep": 3})
Expand Down Expand Up @@ -419,6 +419,33 @@ def ocrd_tool(self):
r = self.capture_out_err()
assert 'ERROR ocrd.processor.base - Found no file for page phys_0001 in file group GRP1' in r.err

def test_run_output_metsserver(start_mets_server):
mets_server_url, ws = start_mets_server
from ocrd_utils import config
# do not raise for number of failures:
config.OCRD_MAX_MISSING_OUTPUTS = -1
run_processor(DummyProcessorWithOutputSleep, workspace=ws,
input_file_grp="OCR-D-IMG",
output_file_grp="OCR-D-OUT",
parameter={"sleep": 0},
mets_server_url=mets_server_url)
assert len(ws.mets.find_all_files(fileGrp="OCR-D-OUT")) == len(ws.mets.find_all_files(fileGrp="OCR-D-IMG"))
config.OCRD_EXISTING_OUTPUT = 'OVERWRITE'
run_processor(DummyProcessorWithOutputSleep, workspace=ws,
input_file_grp="OCR-D-IMG",
output_file_grp="OCR-D-OUT",
parameter={"sleep": 0},
mets_server_url=mets_server_url)
assert len(ws.mets.find_all_files(fileGrp="OCR-D-OUT")) == len(ws.mets.find_all_files(fileGrp="OCR-D-IMG"))
config.OCRD_EXISTING_OUTPUT = 'ABORT'
with pytest.raises(Exception) as exc:
run_processor(DummyProcessorWithOutputSleep, workspace=ws,
input_file_grp="OCR-D-IMG",
output_file_grp="OCR-D-OUT",
parameter={"sleep": 0},
mets_server_url=mets_server_url)
assert "already exists" in str(exc.value)

# 2s (+ 2s tolerance) instead of 3*3s (+ 2s tolerance)
@pytest.mark.timeout(4)
def test_run_output_parallel(start_mets_server):
Expand All @@ -429,7 +456,7 @@ def test_run_output_parallel(start_mets_server):
# do not raise for number of failures:
config.OCRD_MAX_MISSING_OUTPUTS = -1
config.OCRD_MAX_PARALLEL_PAGES = 3
run_processor(DummyProcessorWithOutputPagewise, workspace=ws,
run_processor(DummyProcessorWithOutputSleep, workspace=ws,
input_file_grp="OCR-D-IMG",
output_file_grp="OCR-D-OUT",
parameter={"sleep": 2},
Expand Down

0 comments on commit 9a8c41d

Please sign in to comment.