From 269670528c40aec694a5704bce1be189d1a3102a Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Sun, 20 Aug 2023 19:52:05 +0200 Subject: [PATCH 1/5] Refactor logging --- ocrd_utils/ocrd_utils/__init__.py | 1 - ocrd_utils/ocrd_utils/logging.py | 180 ++++++++++++++++-------------- repo/assets | 2 +- repo/spec | 2 +- tests/test_logging.py | 89 ++++++--------- 5 files changed, 131 insertions(+), 143 deletions(-) diff --git a/ocrd_utils/ocrd_utils/__init__.py b/ocrd_utils/ocrd_utils/__init__.py index 319bc852b3..eff6f4be71 100644 --- a/ocrd_utils/ocrd_utils/__init__.py +++ b/ocrd_utils/ocrd_utils/__init__.py @@ -161,7 +161,6 @@ getLevelName, getLogger, initLogging, - logging, setOverrideLogLevel, ) diff --git a/ocrd_utils/ocrd_utils/logging.py b/ocrd_utils/ocrd_utils/logging.py index 421c05a781..1e94ee1eba 100644 --- a/ocrd_utils/ocrd_utils/logging.py +++ b/ocrd_utils/ocrd_utils/logging.py @@ -10,15 +10,29 @@ - $PWD/ocrd_logging.py These files will be executed in the context of ocrd/ocrd_logging.py, with `logging` global set. + +Changes as of 2023-08-20: + + - Try to be less intrusive with OCR-D specific logging conventions to + make it easier and less surprising to define logging behavior when + using OCR-D/core as a library + - Change setOverrideLogLevel to only override the log level of the ``ocrd`` + logger and its descendants + - initLogging will set exactly one handler, for the root logger or for the + ``ocrd`` logger. + - Child loggers should propagate to the ancestor logging (default + behavior of the logging library - no more PropagationShyLogger) + - disableLogging only removes any handlers from the ``ocrd`` logger """ # pylint: disable=no-member from __future__ import absolute_import + from traceback import format_stack import logging import logging.config -import os +from pathlib import Path import sys from .constants import LOG_FORMAT, LOG_TIMEFMT @@ -33,7 +47,6 @@ ] _initialized_flag = False -_overrideLogLevel = None _ocrdLevel2pythonLevel = { 'TRACE': 'DEBUG', @@ -41,19 +54,6 @@ 'FATAL': 'ERROR', } -class PropagationShyLogger(logging.Logger): - - def addHandler(self, hdlr): - super().addHandler(hdlr) - self.propagate = not self.handlers - - def removeHandler(self, hdlr): - super().removeHandler(hdlr) - self.propagate = not self.handlers - -logging.setLoggerClass(PropagationShyLogger) -logging.getLogger().propagate = False - def getLevelName(lvl): """ Get (string) python logging level for (string) spec-defined log level name. @@ -61,78 +61,94 @@ def getLevelName(lvl): lvl = _ocrdLevel2pythonLevel.get(lvl, lvl) return logging.getLevelName(lvl) -def setOverrideLogLevel(lvl, silent=False): +def getLogger(*args, **kwargs): """ - Override all logger filter levels to include lvl and above. - + Wrapper around ``logging.getLogger`` that alls :py:func:`initLogging` if + that wasn't explicitly called before. + """ + if not _initialized_flag: + initLogging() + logger = logging.getLogger(*args, **kwargs) + return logger - - Set root logger level - - iterates all existing loggers and sets their log level to ``NOTSET``. +def setOverrideLogLevel(lvl, silent=False): + """ + Override the output log level of the handlers attached to the ``ocrd`` logger. Args: lvl (string): Log level name. silent (boolean): Whether to log the override call """ - if lvl is None: - return - root_logger = logging.getLogger('') - if not silent: - root_logger.info('Overriding log level globally to %s', lvl) - lvl = getLevelName(lvl) - global _overrideLogLevel # pylint: disable=global-statement - _overrideLogLevel = lvl - for loggerName in logging.Logger.manager.loggerDict: - logger = logging.Logger.manager.loggerDict[loggerName] - if isinstance(logger, logging.PlaceHolder): - continue - logger.setLevel(logging.NOTSET) - root_logger.setLevel(lvl) - -def getLogger(*args, **kwargs): - """ - Wrapper around ``logging.getLogger`` that respects `overrideLogLevel <#setOverrideLogLevel>`_. - """ if not _initialized_flag: initLogging() - logging.getLogger('').critical('getLogger was called before initLogging. Source of the call:') - for line in [x for x in format_stack(limit=2)[0].split('\n') if x]: - logging.getLogger('').critical(line) - name = args[0] - logger = logging.getLogger(*args, **kwargs) - if _overrideLogLevel and name: - logger.setLevel(logging.NOTSET) - return logger + ocrd_logger = logging.getLogger('ocrd') + + if lvl is None: + if not silent: + ocrd_logger.info('Reset log level override') + ocrd_logger.setLevel(logging.NOTSET) + else: + if not silent: + ocrd_logger.info('Overriding log level globally to %s', lvl) + ocrd_logger.setLevel(lvl) -def initLogging(): +def initLogging(builtin_only=False, basic_config=True, force_reinit=False): """ - Reset root logger, read logging configuration if exists, otherwise use basicConfig + Reset ``ocrd`` logger, read logging configuration if exists, otherwise use basicConfig + + initLogging is to be called by OCR-D/core once, i.e. + - for the ``ocrd`` CLI + - for the processor wrapper methods + + Other processes that use OCR-D/core as a library can, but do not have to, use this functionality. + + Keyword Args: + - basic_config (bool, False): Whether to attach the handler to the + root logger instead of just the ``ocrd`` logger + like ``logging.basicConfig`` does. + - builtin_only (bool, False): Whether to search for logging configuration + on-disk (``False``) or only use the + hard-coded config (``True``). For testing + - force_reinit (bool, False): Whether to ignore the module-level + ``_initialized_flag``. For testing only. """ - global _initialized_flag # pylint: disable=global-statement - if _initialized_flag: - logging.getLogger('').critical('initLogging was called multiple times. Source of latest call:') - for line in [x for x in format_stack(limit=2)[0].split('\n') if x]: - logging.getLogger('').critical(line) + global _initialized_flag + if _initialized_flag and not force_reinit: + return + # https://docs.python.org/3/library/logging.html#logging.disable + # If logging.disable(logging.NOTSET) is called, it effectively removes this + # overriding level, so that logging output again depends on the effective + # levels of individual loggers. logging.disable(logging.NOTSET) - for handler in logging.root.handlers[:]: - logging.root.removeHandler(handler) - - CONFIG_PATHS = [ - os.path.curdir, - os.path.join(os.path.expanduser('~')), - '/etc', - ] - config_file = next((f for f \ - in [os.path.join(p, 'ocrd_logging.conf') for p in CONFIG_PATHS] \ - if os.path.exists(f)), - None) + + # remove all handlers for the ocrd logger + for handler in logging.getLogger('ocrd').handlers[:]: + logging.getLogger('ocrd').removeHandler(handler) + + config_file = None + if not builtin_only: + CONFIG_PATHS = [ + Path.cwd(), + Path.home(), + Path('/etc'), + ] + config_file = next((f for f \ + in [p / 'ocrd_logging.conf' for p in CONFIG_PATHS] \ + if f.exists()), + None) if config_file: logging.config.fileConfig(config_file) - logging.getLogger('ocrd.logging').debug("Picked up logging config at %s" % config_file) + logging.getLogger('ocrd.logging').debug("Picked up logging config at %s", config_file) else: # Default logging config - logging.basicConfig(level=logging.INFO, format=LOG_FORMAT, datefmt=LOG_TIMEFMT, stream=sys.stderr) - logging.getLogger('').setLevel(logging.INFO) + ocrd_handler = logging.StreamHandler(stream=sys.stderr) + ocrd_handler.setFormatter(logging.Formatter(fmt=LOG_FORMAT, datefmt=LOG_TIMEFMT)) + if basic_config: + logging.getLogger('').addHandler(ocrd_handler) + else: + logging.getLogger('ocrd').addHandler(ocrd_handler) + logging.getLogger('ocrd').setLevel('INFO') # logging.getLogger('ocrd.resolver').setLevel(logging.INFO) # logging.getLogger('ocrd.resolver.download_to_directory').setLevel(logging.INFO) # logging.getLogger('ocrd.resolver.add_files_to_mets').setLevel(logging.INFO) @@ -141,27 +157,21 @@ def initLogging(): logging.getLogger('shapely.geos').setLevel(logging.ERROR) logging.getLogger('tensorflow').setLevel(logging.ERROR) - if _overrideLogLevel: - # for existing loggers that won't have getLogger do this - # (because they were instantiated through logging.getLogger - # instead of ours, or come from logging.config.fileConfig), - # unset log levels so the global override can apply: - for loggerName in logging.Logger.manager.loggerDict: - logger = logging.Logger.manager.loggerDict[loggerName] - if isinstance(logger, logging.PlaceHolder): - continue - logger.setLevel(logging.NOTSET) - logging.getLogger('').setLevel(_overrideLogLevel) - _initialized_flag = True def disableLogging(): + """ + Disables all logging of the ``ocrd`` logger and descendants + """ global _initialized_flag # pylint: disable=global-statement + if _initialized_flag: + logging.getLogger('ocrd.logging').debug("Disabling logging") _initialized_flag = False - global _overrideLogLevel # pylint: disable=global-statement - _overrideLogLevel = None - logging.basicConfig(level=logging.CRITICAL) - logging.disable(logging.ERROR) + # logging.basicConfig(level=logging.CRITICAL) + # logging.disable(logging.ERROR) + # remove all handlers for the ocrd logger + for handler in logging.getLogger('ocrd').handlers[:]: + logging.getLogger('ocrd').removeHandler(handler) # Initializing stream handlers at module level # would cause message output in all runtime contexts, diff --git a/repo/assets b/repo/assets index bcfa982e81..4c7396455e 160000 --- a/repo/assets +++ b/repo/assets @@ -1 +1 @@ -Subproject commit bcfa982e81319513a13ae58ab2c216e014e52bd7 +Subproject commit 4c7396455ebc0e7734d2408ca736ef642c4dd32a diff --git a/repo/spec b/repo/spec index 506b33936d..1194310c18 160000 --- a/repo/spec +++ b/repo/spec @@ -1 +1 @@ -Subproject commit 506b33936d89080a683fa8a26837f2a23b23e5e2 +Subproject commit 1194310c18d90b280c380bdc3cb04adb6a41120f diff --git a/tests/test_logging.py b/tests/test_logging.py index a051c81a45..f091b3dbbd 100644 --- a/tests/test_logging.py +++ b/tests/test_logging.py @@ -1,3 +1,4 @@ +import pytest import logging from re import match from tempfile import TemporaryDirectory @@ -26,81 +27,59 @@ def setUp(self): super().setUp() disableLogging() - def test_setOverrideLogLevel(self): - initLogging() - rootLogger = logging.getLogger('') - somelogger = getLogger('foo.bar') - somelogger.setLevel(getLevelName('ERROR')) + def test_loglevel_inheritance(self): + initLogging(builtin_only=True) + ocrd_logger = logging.getLogger('ocrd') + assert ocrd_logger.getEffectiveLevel() == logging.INFO + some_logger = getLogger('ocrd.foo') + assert some_logger.getEffectiveLevel() == logging.INFO setOverrideLogLevel('ERROR') - self.assertEqual(rootLogger.getEffectiveLevel(), logging.ERROR) - self.assertEqual(somelogger.getEffectiveLevel(), logging.ERROR) - notherlogger = getLogger('bar.foo') - self.assertEqual(notherlogger.getEffectiveLevel(), logging.ERROR) - setOverrideLogLevel('INFO') - somelogger = getLogger('foo.bar') - - def test_multiple_initLogging(self): - disableLogging() - initLogging() - self.capture_out_err() - initLogging() - out = '\n'.join(self.capture_out_err()) - assert 'initLogging was called multiple times' in out - assert __file__ in out - - def test_getLogger_before_initLogging(self): - disableLogging() - self.capture_out_err() - getLogger('foo') - out = '\n'.join(self.capture_out_err()) - print(out) - assert 'getLogger was called before initLogging' in out - assert __file__ in out + assert ocrd_logger.getEffectiveLevel() == logging.ERROR + assert some_logger.getEffectiveLevel() == logging.ERROR + another_logger = getLogger('ocrd.bar') + assert another_logger.getEffectiveLevel() == logging.ERROR def test_getLevelName(self): self.assertEqual(getLevelName('ERROR'), logging.ERROR) self.assertEqual(getLevelName('FATAL'), logging.ERROR) self.assertEqual(getLevelName('OFF'), logging.CRITICAL) + @pytest.mark.skip("""\ + This tests whether the message sent to one handler in the + hierarchy will not also be sent to a handler of the parent logger. + This is what Handler.propagate does and we do not mess with it any + more, so the test is obsolete""") def test_logging_really_non_duplicate(self): - initLogging() - child_logger = getLogger('a.b') - print(child_logger) - parent_logger = getLogger('a') - root_logger = getLogger('') - self.assertFalse(root_logger.propagate, 'root logger should not propagate') - self.assertTrue(parent_logger.propagate, 'parent has no handler => do propagate') - self.assertTrue(child_logger.propagate, 'child no handler => do propagate') - - root_capture = FIFOIO(256) - root_handler = logging.StreamHandler(root_capture) - root_handler.setFormatter(logging.Formatter(fmt=LOG_FORMAT, datefmt=LOG_TIMEFMT)) - root_logger.addHandler(root_handler) + initLogging(builtin_only=True) + ocrd_logger = getLogger('ocrd') + parent_logger = getLogger('ocrd.a') + child_logger = getLogger('ocrd.a.b') + + ocrd_capture = FIFOIO(256) + ocrd_handler = logging.StreamHandler(ocrd_capture) + ocrd_handler.setFormatter(logging.Formatter(fmt=LOG_FORMAT, datefmt=LOG_TIMEFMT)) + ocrd_logger.addHandler(ocrd_handler) parent_capture = FIFOIO(256) parent_handler = logging.StreamHandler(parent_capture) parent_handler.setFormatter(logging.Formatter(fmt=LOG_FORMAT, datefmt=LOG_TIMEFMT)) parent_logger.addHandler(parent_handler) - # parent_logger = getLogger('a') - # self.assertFalse(parent_logger.propagate, 'parent has handler now => do not propagate') - - self.assertTrue(child_logger.propagate, 'child has still no handler => do propagate') - child_logger.error('test') - root_str = root_capture.getvalue() + ocrd_str = ocrd_capture.getvalue() parent_str = parent_capture.getvalue() - print('root_str=%s' % root_str) + print('ocrd_str=%s' % ocrd_str) print('parent_str=%s' % parent_str) - self.assertEqual(root_str.count('\n'), 0) + self.assertEqual(ocrd_str.count('\n'), 0) self.assertEqual(parent_str.count('\n'), 1) - # root_logger.removeHandler(root_handler) # remove stream handler so we actually see the output - # root_logger.info('root_str=%s', root_str) - # root_logger.info('parent_str=%s', parent_str) + # ocrd_logger.removeHandler(ocrd_handler) # remove stream handler so we actually see the output + # ocrd_logger.info('ocrd_str=%s', ocrd_str) + # ocrd_logger.info('parent_str=%s', parent_str) + @pytest.mark.skip("Skip for same reason as test_logging_non_duplicate") def test_logging_non_duplicate(self): """ Verify that child loggers don't propagate a log message they handle @@ -154,7 +133,7 @@ def test_logging_non_duplicate(self): 'child received second error and debug but not first error and debug') def testProcessorProfiling(self): - initLogging() + initLogging(builtin_only=True) log_capture_string = FIFOIO(256) ch = logging.StreamHandler(log_capture_string) ch.setFormatter(logging.Formatter(LOG_FORMAT)) @@ -173,7 +152,7 @@ def testProcessorProfiling(self): # self.assertTrue(match(r'.*Executing processor \'ocrd-test\' took 0.\d+s.*', log_contents)) def test_tmpConfigfile(self): - self.assertNotEqual(logging.getLogger('').getEffectiveLevel(), logging.NOTSET) + self.assertNotEqual(logging.getLogger('ocrd').getEffectiveLevel(), logging.NOTSET) with pushd_popd(tempdir=True) as tempdir: with open('ocrd_logging.conf', 'w') as f: # write logging configuration file (MWE) From fab5903faee46f47c64f0ea800e6894da42c5233 Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Tue, 12 Sep 2023 14:45:18 +0200 Subject: [PATCH 2/5] logging: remove remnants of modifying root logger --- ocrd/ocrd/cli/log.py | 2 +- ocrd/ocrd/decorators/loglevel_option.py | 2 + ocrd/ocrd/mets_server.py | 5 --- ocrd_utils/ocrd_logging.conf | 29 +++++++++++++- ocrd_utils/ocrd_utils/logging.py | 50 +++++++++++++++---------- tests/cli/test_log.py | 19 +++++----- 6 files changed, 71 insertions(+), 36 deletions(-) diff --git a/ocrd/ocrd/cli/log.py b/ocrd/ocrd/cli/log.py index a5bc5cd2f8..76e5cd7888 100644 --- a/ocrd/ocrd/cli/log.py +++ b/ocrd/ocrd/cli/log.py @@ -21,7 +21,7 @@ def log(self, lvl, *args, **kwargs): pass_log = click.make_pass_decorator(LogCtx) @click.group("log") -@click.option('-n', '--name', envvar='OCRD_TOOL_NAME', default='', metavar='LOGGER_NAME', help='Name of the logger', show_default=True) +@click.option('-n', '--name', envvar='OCRD_TOOL_NAME', default='ocrd', metavar='LOGGER_NAME', help='Name of the logger', show_default=True) @click.pass_context def log_cli(ctx, name, *args, **kwargs): """ diff --git a/ocrd/ocrd/decorators/loglevel_option.py b/ocrd/ocrd/decorators/loglevel_option.py index 920d3e3060..9edd013454 100644 --- a/ocrd/ocrd/decorators/loglevel_option.py +++ b/ocrd/ocrd/decorators/loglevel_option.py @@ -4,6 +4,8 @@ __all__ = ['ocrd_loglevel'] def _setOverrideLogLevel(ctx, param, value): # pylint: disable=unused-argument + if value is None: # Explicitly test for None because logging.DEBUG == 0 + return setOverrideLogLevel(value) return value diff --git a/ocrd/ocrd/mets_server.py b/ocrd/ocrd/mets_server.py index 44dec51c68..54a3cd618b 100644 --- a/ocrd/ocrd/mets_server.py +++ b/ocrd/ocrd/mets_server.py @@ -19,11 +19,6 @@ from ocrd_models import OcrdMets, OcrdFile, ClientSideOcrdFile, OcrdAgent, ClientSideOcrdAgent from ocrd_utils import initLogging, getLogger, deprecated_alias -# -# XXX HACKS TODO -# -initLogging() - # # Models # diff --git a/ocrd_utils/ocrd_logging.conf b/ocrd_utils/ocrd_logging.conf index 3383b4f54d..879b5e14e9 100644 --- a/ocrd_utils/ocrd_logging.conf +++ b/ocrd_utils/ocrd_logging.conf @@ -53,6 +53,34 @@ handlers=consoleHandler #handlers=fileHandler #qualname=ocrd.workspace +# ocrd loggers +[logger_ocrd_ocrd] +level=ERROR +handlers=consoleHandler +qualname=ocrd + +# ocrd loggers +[logger_ocrd_ocrd] +level=INFO +handlers=consoleHandler +qualname=ocrd +[logger_ocrd_ocrd_models] +level=INFO +handlers=consoleHandler +qualname=ocrd_models +[logger_ocrd_ocrd_utils] +level=INFO +handlers=consoleHandler +qualname=ocrd_utils +[logger_ocrd_ocrd_network] +level=INFO +handlers=consoleHandler +qualname=ocrd_network +[logger_ocrd_ocrd_exif] +level=INFO +handlers=consoleHandler +qualname=ocrd_exif + # # logger tensorflow @@ -62,7 +90,6 @@ level=ERROR handlers=consoleHandler qualname=tensorflow - # # logger shapely.geos # diff --git a/ocrd_utils/ocrd_utils/logging.py b/ocrd_utils/ocrd_utils/logging.py index 3b5cfa6d82..683031852f 100644 --- a/ocrd_utils/ocrd_utils/logging.py +++ b/ocrd_utils/ocrd_utils/logging.py @@ -46,6 +46,21 @@ 'setOverrideLogLevel', ] +LOGGING_DEFAULTS = { + 'ocrd': logging.INFO, + 'ocrd_models': logging.INFO, + 'ocrd_utils': logging.INFO, + 'ocrd_network': logging.INFO, + 'ocrd_exif': logging.INFO, + # 'ocrd.resolver': logging.INFO, + # 'ocrd.resolver.download_to_directory': logging.INFO, + # 'ocrd.resolver.add_files_to_mets': logging.INFO, + # To cut back on the `Self-intersection at or near point` INFO messages + 'shapely.geos': logging.ERROR, + 'tensorflow': logging.ERROR, + 'PIL': logging.INFO, +} + _initialized_flag = False _ocrdLevel2pythonLevel = { @@ -84,7 +99,7 @@ def getLogger(*args, **kwargs): logger = logging.getLogger(*args, **kwargs) return logger -def setOverrideLogLevel(lvl, silent=False): +def setOverrideLogLevel(lvl, silent=True): """ Override the output log level of the handlers attached to the ``ocrd`` logger. @@ -102,10 +117,10 @@ def setOverrideLogLevel(lvl, silent=False): ocrd_logger.setLevel(logging.NOTSET) else: if not silent: - ocrd_logger.info('Overriding log level globally to %s', lvl) + ocrd_logger.info('Overriding ocrd log level to %s', lvl) ocrd_logger.setLevel(lvl) -def initLogging(builtin_only=False, basic_config=True, force_reinit=False): +def initLogging(builtin_only=False, force_reinit=False): """ Reset ``ocrd`` logger, read logging configuration if exists, otherwise use basicConfig @@ -116,9 +131,6 @@ def initLogging(builtin_only=False, basic_config=True, force_reinit=False): Other processes that use OCR-D/core as a library can, but do not have to, use this functionality. Keyword Args: - - basic_config (bool, False): Whether to attach the handler to the - root logger instead of just the ``ocrd`` logger - like ``logging.basicConfig`` does. - builtin_only (bool, False): Whether to search for logging configuration on-disk (``False``) or only use the hard-coded config (``True``). For testing @@ -128,6 +140,7 @@ def initLogging(builtin_only=False, basic_config=True, force_reinit=False): global _initialized_flag if _initialized_flag and not force_reinit: return + # disableLogging() # https://docs.python.org/3/library/logging.html#logging.disable # If logging.disable(logging.NOTSET) is called, it effectively removes this @@ -157,27 +170,22 @@ def initLogging(builtin_only=False, basic_config=True, force_reinit=False): # Default logging config ocrd_handler = logging.StreamHandler(stream=sys.stderr) ocrd_handler.setFormatter(logging.Formatter(fmt=LOG_FORMAT, datefmt=LOG_TIMEFMT)) - if basic_config: - logging.getLogger('').addHandler(ocrd_handler) - else: - logging.getLogger('ocrd').addHandler(ocrd_handler) - logging.getLogger('ocrd').setLevel('INFO') - # logging.getLogger('ocrd.resolver').setLevel(logging.INFO) - # logging.getLogger('ocrd.resolver.download_to_directory').setLevel(logging.INFO) - # logging.getLogger('ocrd.resolver.add_files_to_mets').setLevel(logging.INFO) - logging.getLogger('PIL').setLevel(logging.INFO) - # To cut back on the `Self-intersection at or near point` INFO messages - logging.getLogger('shapely.geos').setLevel(logging.ERROR) - logging.getLogger('tensorflow').setLevel(logging.ERROR) + ocrd_handler.setLevel(logging.DEBUG) + logging.getLogger('ocrd').addHandler(ocrd_handler) + for logger_name, logger_level in LOGGING_DEFAULTS.items(): + logging.getLogger(logger_name).setLevel(logger_level) _initialized_flag = True -def disableLogging(): +def disableLogging(silent=True): """ Disables all logging of the ``ocrd`` logger and descendants + + Keyword Args: + silent (bool, True): Whether to log the call to disableLogging """ global _initialized_flag # pylint: disable=global-statement - if _initialized_flag: + if _initialized_flag and not silent: logging.getLogger('ocrd.logging').debug("Disabling logging") _initialized_flag = False # logging.basicConfig(level=logging.CRITICAL) @@ -185,6 +193,8 @@ def disableLogging(): # remove all handlers for the ocrd logger for handler in logging.getLogger('ocrd').handlers[:]: logging.getLogger('ocrd').removeHandler(handler) + for logger_name in LOGGING_DEFAULTS: + logging.getLogger(logger_name).setLevel(logging.NOTSET) # Initializing stream handlers at module level # would cause message output in all runtime contexts, diff --git a/tests/cli/test_log.py b/tests/cli/test_log.py index cfab900c89..86193352e5 100644 --- a/tests/cli/test_log.py +++ b/tests/cli/test_log.py @@ -6,7 +6,8 @@ from tests.base import CapturingTestCase as TestCase, main, assets, copy_of_directory from ocrd.decorators import ocrd_loglevel -from ocrd_utils import initLogging, setOverrideLogLevel, logging, disableLogging +from ocrd_utils import setOverrideLogLevel, logging, disableLogging +import logging as python_logging @click.group() @ocrd_loglevel @@ -19,26 +20,26 @@ class TestLogCli(TestCase): def _get_log_output(self, *args): disableLogging() code, out, err = self.invoke_cli(mock_ocrd_cli, args) - print(code, out, err) - return out + err + print({'code': code, 'out': out, 'err': err}) + return err def tearDown(self): if 'OCRD_TOOL_NAME' in ENV: del(ENV['OCRD_TOOL_NAME']) def test_loglevel(self): - assert 'DEBUG root - foo' not in self._get_log_output('log', 'debug', 'foo') - assert 'DEBUG root - foo' in self._get_log_output('-l', 'DEBUG', 'log', 'debug', 'foo') + assert 'DEBUG ocrd - foo' not in self._get_log_output('log', 'debug', 'foo') + assert 'DEBUG ocrd - foo' in self._get_log_output('-l', 'DEBUG', 'log', 'debug', 'foo') def test_log_basic(self): - assert 'INFO root - foo bar' in self._get_log_output('log', 'info', 'foo bar') + assert 'INFO ocrd - foo bar' in self._get_log_output('log', 'info', 'foo bar') def test_log_name_param(self): - assert 'INFO boo.far - foo bar' in self._get_log_output('log', '--name', 'boo.far', 'info', 'foo bar') + assert 'INFO ocrd.boo.far - foo bar' in self._get_log_output('log', '--name', 'ocrd.boo.far', 'info', 'foo bar') def test_log_name_envvar(self): - ENV['OCRD_TOOL_NAME'] = 'boo.far' - assert 'INFO boo.far - foo bar' in self._get_log_output('log', 'info', 'foo bar') + ENV['OCRD_TOOL_NAME'] = 'ocrd.boo.far' + assert 'INFO ocrd.boo.far - foo bar' in self._get_log_output('log', 'info', 'foo bar') def test_log_name_levels(self): ENV['OCRD_TOOL_NAME'] = 'ocrd.foo' From c44123d97f698284114dfcef36706605af87d437 Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Tue, 12 Sep 2023 15:02:33 +0200 Subject: [PATCH 3/5] adapt bashlib to logging changes --- ocrd/ocrd/cli/log.py | 5 ++- tests/cli/test_bashlib.py | 60 ++---------------------------- tests/cli/test_workspace.py | 4 +- tests/data/bashlib_cp_processor.sh | 58 +++++++++++++++++++++++++++++ tests/test_decorators.py | 10 +++-- 5 files changed, 73 insertions(+), 64 deletions(-) create mode 100644 tests/data/bashlib_cp_processor.sh diff --git a/ocrd/ocrd/cli/log.py b/ocrd/ocrd/cli/log.py index 76e5cd7888..308c947c63 100644 --- a/ocrd/ocrd/cli/log.py +++ b/ocrd/ocrd/cli/log.py @@ -26,9 +26,12 @@ def log(self, lvl, *args, **kwargs): def log_cli(ctx, name, *args, **kwargs): """ Logging + + Logger name will be ocrd.OCRD_TOOL_NAME where OCRD_TOOL_NAME is normally + (when using bashlib) the name of the processor. """ initLogging() - ctx.obj = LogCtx(name) + ctx.obj = LogCtx('ocrd.' + name) def _bind_log_command(lvl): @click.argument('msgs', nargs=-1) diff --git a/tests/cli/test_bashlib.py b/tests/cli/test_bashlib.py index 0e61e093e8..fc043b5fd7 100644 --- a/tests/cli/test_bashlib.py +++ b/tests/cli/test_bashlib.py @@ -8,6 +8,7 @@ import pathlib import yaml import json +from pathlib import Path from ocrd.cli.bashlib import bashlib_cli @@ -128,63 +129,7 @@ def test_bashlib_cp_processor(self): } } } - script = """#!/usr/bin/env bash - set -eu - set -o pipefail - MIMETYPE_PAGE=$(ocrd bashlib constants MIMETYPE_PAGE) - source $(ocrd bashlib filename) - ocrd__wrap ocrd-tool.json ocrd-cp "$@" - - IFS=',' read -ra in_file_grps <<< ${ocrd__argv[input_file_grp]} - if ((${#in_file_grps[*]}>1)); then - ocrd__log info "running on multiple input fileGrps ${in_file_grps[*]}" - else - ocrd__log info "running on single input fileGrp ${in_file_grps}" - fi - out_file_grp=${ocrd__argv[output_file_grp]} - message="${params[message]}" - - cd "${ocrd__argv[working_dir]}" - mets=$(basename ${ocrd__argv[mets_file]}) - for ((n=0; n<${#ocrd__files[*]}; n++)); do - in_fpath=($(ocrd__input_file $n local_filename)) - in_id=($(ocrd__input_file $n ID)) - in_mimetype=($(ocrd__input_file $n mimetype)) - in_pageId=($(ocrd__input_file $n pageId)) - if ! test -f "${in_fpath#file://}"; then - ocrd__log error "input file '${in_fpath#file://}' (ID=${in_id} pageId=${in_pageId} MIME=${in_mimetype}) is not on disk" - continue - fi - if [ "x${in_mimetype}" = x${MIMETYPE_PAGE} ]; then - ocrd__log info "processing PAGE-XML input file $in_fpath ($in_id / $in_pageId)" - out_suf=.xml - else - ocrd__log info "processing ${in_mimetype} input file $in_fpath ($in_id / $in_pageId)" - out_suf=.${in_fpath##*.} - fi - for ((i=1; i<${#in_fpath[*]}; i++)); do - ocrd__log warning "ignoring ${in_mimetype[$i]} input file ${in_fpath[$i]} (${in_id[$i]} ${in_pageId[$i]})" - done - out_id=$(ocrd__input_file $n outputFileId) - out_fpath="${ocrd__argv[output_file_grp]}/${out_id}${out_suf}" - declare -a options - if [[ "${ocrd__argv[overwrite]}" == true ]]; then - options=( --force ) - else - options=() - fi - mkdir -p $out_file_grp - cp ${options[*]} "$in_fpath" "$out_fpath" - if [ -n "$message" ]; then - echo "$message" - fi - if [ -n "$in_pageId" ]; then - options+=( -g $in_pageId ) - fi - options+=( -G $out_file_grp -m $in_mimetype -i "$out_id" "$out_fpath" ) - ocrd -l ${ocrd__argv[log_level]} workspace add "${options[@]}" - done - """ + script = (Path(__file__).parent.parent / 'data/bashlib_cp_processor.sh').read_text() with copy_of_directory(assets.path_to('kant_aufklaerung_1784/data')) as wsdir: with pushd_popd(wsdir): with open('ocrd-tool.json', 'w') as toolfile: @@ -193,6 +138,7 @@ def test_bashlib_cp_processor(self): exit_code, out, err = self.invoke_bash( script, '-I', 'OCR-D-GT-PAGE', '-O', 'OCR-D-GT-PAGE2', '-P', 'message', 'hello world', executable='ocrd-cp') + print({'exit_code': exit_code, 'out': out, 'err': err}) assert 'single input fileGrp' in err assert 'processing PAGE-XML' in err assert exit_code == 0 diff --git a/tests/cli/test_workspace.py b/tests/cli/test_workspace.py index 088432a029..1dad3280c0 100644 --- a/tests/cli/test_workspace.py +++ b/tests/cli/test_workspace.py @@ -362,12 +362,12 @@ def test_copy_vs_clone(self): result = self.runner.invoke(workspace_cli, ['clone', join(src_dir, 'mets.xml'), shallowcloneddir]) self.assertEqual(result.exit_code, 0) - result = self.runner.invoke(workspace_cli, ['clone', '-a', join(src_dir, 'mets.xml'), fullcloneddir]) + result = self.runner.invoke(workspace_cli, ['clone', '--download', join(src_dir, 'mets.xml'), fullcloneddir]) self.assertEqual(result.exit_code, 0) with copy_of_directory(src_dir, copieddir): shallow_vs_copied = dircmp(shallowcloneddir, copieddir) - self.assertEqual(set(shallow_vs_copied.right_only), set(['OCR-D-GT-ALTO', 'OCR-D-GT-PAGE', 'OCR-D-IMG'])) + assert set(shallow_vs_copied.right_only) == set(['OCR-D-GT-ALTO', 'OCR-D-GT-PAGE', 'OCR-D-IMG']) full_vs_copied = dircmp(fullcloneddir, copieddir) # print(full_vs_copied) diff --git a/tests/data/bashlib_cp_processor.sh b/tests/data/bashlib_cp_processor.sh new file mode 100644 index 0000000000..7febaa7692 --- /dev/null +++ b/tests/data/bashlib_cp_processor.sh @@ -0,0 +1,58 @@ +#!/usr/bin/env bash +# vim: ft=zsh +set -eu +set -o pipefail +MIMETYPE_PAGE=$(ocrd bashlib constants MIMETYPE_PAGE) +source $(ocrd bashlib filename) +ocrd__wrap ocrd-tool.json ocrd-cp "$@" + +IFS=',' read -ra in_file_grps <<< ${ocrd__argv[input_file_grp]} +if ((${#in_file_grps[*]}>1)); then + ocrd__log info "running on multiple input fileGrps ${in_file_grps[*]}" +else + ocrd__log info "running on single input fileGrp ${in_file_grps}" +fi +out_file_grp=${ocrd__argv[output_file_grp]} +message="${params[message]}" + +cd "${ocrd__argv[working_dir]}" +mets=$(basename ${ocrd__argv[mets_file]}) +for ((n=0; n<${#ocrd__files[*]}; n++)); do + in_fpath=($(ocrd__input_file $n local_filename)) + in_id=($(ocrd__input_file $n ID)) + in_mimetype=($(ocrd__input_file $n mimetype)) + in_pageId=($(ocrd__input_file $n pageId)) + if ! test -f "${in_fpath#file://}"; then + ocrd__log error "input file '${in_fpath#file://}' (ID=${in_id} pageId=${in_pageId} MIME=${in_mimetype}) is not on disk" + continue + fi + if [ "x${in_mimetype}" = x${MIMETYPE_PAGE} ]; then + ocrd__log info "processing PAGE-XML input file $in_fpath ($in_id / $in_pageId)" + out_suf=.xml + else + ocrd__log info "processing ${in_mimetype} input file $in_fpath ($in_id / $in_pageId)" + out_suf=.${in_fpath##*.} + fi + for ((i=1; i<${#in_fpath[*]}; i++)); do + ocrd__log warning "ignoring ${in_mimetype[$i]} input file ${in_fpath[$i]} (${in_id[$i]} ${in_pageId[$i]})" + done + out_id=$(ocrd__input_file $n outputFileId) + out_fpath="${ocrd__argv[output_file_grp]}/${out_id}${out_suf}" + declare -a options + if [[ "${ocrd__argv[overwrite]}" == true ]]; then + options=( --force ) + else + options=() + fi + mkdir -p $out_file_grp + cp ${options[*]} "$in_fpath" "$out_fpath" + if [ -n "$message" ]; then + echo "$message" + fi + if [ -n "$in_pageId" ]; then + options+=( -g $in_pageId ) + fi + options+=( -G $out_file_grp -m $in_mimetype -i "$out_id" "$out_fpath" ) + ocrd -l ${ocrd__argv[log_level]} workspace add "${options[@]}" +done + diff --git a/tests/test_decorators.py b/tests/test_decorators.py index 4f4f3ecc00..992e173dc1 100644 --- a/tests/test_decorators.py +++ b/tests/test_decorators.py @@ -14,7 +14,7 @@ ocrd_loglevel, ocrd_cli_wrap_processor, ) # pylint: disable=protected-access -from ocrd_utils import pushd_popd, VERSION as OCRD_VERSION, disableLogging +from ocrd_utils import pushd_popd, VERSION as OCRD_VERSION, disableLogging, initLogging @click.command() @ocrd_cli_options @@ -60,11 +60,13 @@ def test_loglevel_invalid(self): def test_loglevel_override(self): import logging - self.assertEqual(logging.getLogger('').getEffectiveLevel(), logging.INFO) - self.assertEqual(logging.getLogger('PIL').getEffectiveLevel(), logging.INFO) + disableLogging() + assert logging.getLogger('ocrd').getEffectiveLevel() == logging.WARNING + initLogging() + assert logging.getLogger('ocrd').getEffectiveLevel() == logging.INFO code, _, _ = self.invoke_cli(cli_with_ocrd_loglevel, ['--log-level', 'DEBUG']) assert not code - self.assertEqual(logging.getLogger('PIL').getEffectiveLevel(), logging.DEBUG) + assert logging.getLogger('ocrd').getEffectiveLevel() == logging.DEBUG def test_processor_no_mets(self): """ From edd3becd85705a42252136a2c9f83614d0a6dd99 Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Tue, 12 Sep 2023 15:26:57 +0200 Subject: [PATCH 4/5] drop make test-logging, fix duplication error in logging config --- Makefile | 5 ----- ocrd_utils/ocrd_logging.conf | 6 ------ 2 files changed, 11 deletions(-) diff --git a/Makefile b/Makefile index e5c9ae202b..372b375272 100644 --- a/Makefile +++ b/Makefile @@ -198,15 +198,10 @@ assets: repo/assets test: assets $(PYTHON) \ -m pytest $(PYTEST_ARGS) --durations=10\ - --ignore=$(TESTDIR)/test_logging.py \ --ignore=$(TESTDIR)/test_logging_conf.py \ --ignore-glob="$(TESTDIR)/**/*bench*.py" \ $(TESTDIR) - #$(MAKE) test-logging - -test-logging: HOME=$(CURDIR)/ocrd_utils $(PYTHON) -m pytest --continue-on-collection-errors -k TestLogging $(TESTDIR) - HOME=$(CURDIR) $(PYTHON) -m pytest --continue-on-collection-errors -k TestLogging $(TESTDIR) benchmark: $(PYTHON) -m pytest $(TESTDIR)/model/test_ocrd_mets_bench.py diff --git a/ocrd_utils/ocrd_logging.conf b/ocrd_utils/ocrd_logging.conf index 879b5e14e9..2eb78237cf 100644 --- a/ocrd_utils/ocrd_logging.conf +++ b/ocrd_utils/ocrd_logging.conf @@ -53,12 +53,6 @@ handlers=consoleHandler #handlers=fileHandler #qualname=ocrd.workspace -# ocrd loggers -[logger_ocrd_ocrd] -level=ERROR -handlers=consoleHandler -qualname=ocrd - # ocrd loggers [logger_ocrd_ocrd] level=INFO From c6d3997baac2e07eb7f05ed7549ccc87f129fd01 Mon Sep 17 00:00:00 2001 From: Konstantin Baierer Date: Tue, 12 Sep 2023 15:50:13 +0200 Subject: [PATCH 5/5] make test_logging.py actually pick up configuration --- Makefile | 5 ++--- ocrd_utils/ocrd_logging.conf | 3 ++- tests/test_logging.py | 5 ++--- 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/Makefile b/Makefile index 372b375272..30771ef847 100644 --- a/Makefile +++ b/Makefile @@ -5,7 +5,7 @@ PYTHON ?= python PIP ?= pip LOG_LEVEL = INFO PYTHONIOENCODING=utf8 -TESTDIR = tests +TESTDIR = $(CURDIR)/tests PYTEST_ARGS = --continue-on-collection-errors SPHINX_APIDOC = @@ -198,10 +198,9 @@ assets: repo/assets test: assets $(PYTHON) \ -m pytest $(PYTEST_ARGS) --durations=10\ - --ignore=$(TESTDIR)/test_logging_conf.py \ --ignore-glob="$(TESTDIR)/**/*bench*.py" \ $(TESTDIR) - HOME=$(CURDIR)/ocrd_utils $(PYTHON) -m pytest --continue-on-collection-errors -k TestLogging $(TESTDIR) + cd ocrd_utils ; $(PYTHON) -m pytest --continue-on-collection-errors -k TestLogging $(TESTDIR) benchmark: $(PYTHON) -m pytest $(TESTDIR)/model/test_ocrd_mets_bench.py diff --git a/ocrd_utils/ocrd_logging.conf b/ocrd_utils/ocrd_logging.conf index 2eb78237cf..4117a2a494 100644 --- a/ocrd_utils/ocrd_logging.conf +++ b/ocrd_utils/ocrd_logging.conf @@ -55,9 +55,10 @@ handlers=consoleHandler # ocrd loggers [logger_ocrd_ocrd] -level=INFO +level=ERROR handlers=consoleHandler qualname=ocrd + [logger_ocrd_ocrd_models] level=INFO handlers=consoleHandler diff --git a/tests/test_logging.py b/tests/test_logging.py index f091b3dbbd..2e4e0861b5 100644 --- a/tests/test_logging.py +++ b/tests/test_logging.py @@ -24,8 +24,7 @@ class TestLogging(TestCase): def setUp(self): - super().setUp() - disableLogging() + pass # do not chdir def test_loglevel_inheritance(self): initLogging(builtin_only=True) @@ -179,7 +178,7 @@ def test_tmpConfigfile(self): # so the defaults from the import-time initLogging should be invalided initLogging() # ensure log level is set from temporary config file - self.assertEqual(logging.getLogger('').getEffectiveLevel(), logging.ERROR) + self.assertEqual(logging.getLogger('ocrd').getEffectiveLevel(), logging.ERROR) if __name__ == '__main__': main(__file__)