Skip to content

Commit

Permalink
Merge branch 'profile-mem'
Browse files Browse the repository at this point in the history
# Conflicts:
#	README.md
  • Loading branch information
kba committed Nov 23, 2022
2 parents 0edf232 + 97a14b0 commit 1b5a362
Show file tree
Hide file tree
Showing 5 changed files with 38 additions and 3 deletions.
5 changes: 5 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,11 @@ Almost all behaviour of the OCR-D/core software is configured via CLI options an
Some parts of the software are configured via environement variables:

* `OCRD_METS_CACHING`: If set to `true`, access to the METS file is cached, speeding in-memory search and modification.
* `OCRD_PROFILE`: This variable configures the built-in CPU and memory profiling. If empty, no profiling is done. Otherwise expected to contain any of the following tokens:
* `CPU`: Enable CPU profiling of processor runs
* `RSS`: Enable RSS memory profiling
* `PSS`: Enable proportionate memory profiling
* `OCRD_PROFILE_FILE`: If set, then the CPU profile is written to this file for later peruse with a analysis tools like [snakeviz](https://jiffyclub.github.io/snakeviz/)

## Packages

Expand Down
7 changes: 7 additions & 0 deletions ocrd/ocrd/decorators/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from os.path import isfile
from os import environ
import sys

import click
Expand Down Expand Up @@ -89,6 +90,12 @@ def ocrd_cli_wrap_processor(
report = WorkspaceValidator.check_file_grp(workspace, kwargs['input_file_grp'], '' if overwrite else kwargs['output_file_grp'], page_id)
if not report.is_valid:
raise Exception("Invalid input/output file grps:\n\t%s" % '\n\t'.join(report.errors))
# Set up profiling behavior from environment variables/flags
if not profile and 'OCRD_PROFILE' in environ:
if 'CPU' in environ['OCRD_PROFILE']:
profile = True
if not profile_file and 'OCRD_PROFILE_FILE' in environ:
profile_file = environ['OCRD_PROFILE_FILE']
if profile or profile_file:
import cProfile
import pstats
Expand Down
21 changes: 20 additions & 1 deletion ocrd/ocrd/processor/helpers.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
"""
Helper methods for running and documenting processors
"""
from os import environ
from time import perf_counter, process_time
import json
import inspect
from subprocess import run, PIPE
from memory_profiler import memory_usage
from sparklines import sparklines

from click import wrap_text
from ocrd_utils import getLogger
Expand Down Expand Up @@ -85,7 +88,23 @@ def run_processor(
log.debug("Processor instance %s (%s doing %s)", processor, name, otherrole)
t0_wall = perf_counter()
t0_cpu = process_time()
processor.process()
if any(x in environ.get('OCRD_PROFILE', '') for x in ['RSS', 'PSS']):
backend = 'psutil_pss' if 'PSS' in environ['OCRD_PROFILE'] else 'psutil'
mem_usage = memory_usage(proc=processor.process,
# only run process once
max_iterations=1,
interval=.1, timeout=None, timestamps=True,
# include sub-processes
multiprocess=True, include_children=True,
# get proportional set size instead of RSS
backend=backend)
mem_usage_values = [mem for mem, _ in mem_usage]
mem_output = 'memory consumption: '
mem_output += ''.join(sparklines(mem_usage_values))
mem_output += ' max: %.2f MiB min: %.2f MiB' % (max(mem_usage_values), min(mem_usage_values))
logProfile.info(mem_output)
else:
processor.process()
t1_wall = perf_counter() - t0_wall
t1_cpu = process_time() - t0_cpu
logProfile.info("Executing processor '%s' took %fs (wall) %fs (CPU)( [--input-file-grp='%s' --output-file-grp='%s' --parameter='%s' --page-id='%s']" % (
Expand Down
4 changes: 3 additions & 1 deletion ocrd/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,6 @@ opencv-python-headless
Flask
jsonschema
pyyaml
Deprecated == 1.2.0
Deprecated == 1.2.0
memory-profiler >= 0.58.0
sparklines >= 0.4.2
4 changes: 3 additions & 1 deletion tests/test_logging.py
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,9 @@ def testProcessorProfiling(self):
# with open('/tmp/debug.log', 'w') as f:
# f.write(log_contents)
# Check whether profile information has been logged. Dummy should finish in under 0.1s
self.assertTrue(match(r'.*Executing processor \'ocrd-test\' took 0.\d+s.*', log_contents))
# print(log_contents)
assert "Executing processor 'ocrd-test' took 0" in log_contents
# self.assertTrue(match(r'.*Executing processor \'ocrd-test\' took 0.\d+s.*', log_contents))

def test_tmpConfigfile(self):
self.assertNotEqual(logging.getLogger('').getEffectiveLevel(), logging.NOTSET)
Expand Down

0 comments on commit 1b5a362

Please sign in to comment.