Skip to content

Commit

Permalink
feat: Automatically configure logging (#271)
Browse files Browse the repository at this point in the history
- blocked by crawlee 0.3.5 release
  • Loading branch information
janbuchar authored Sep 10, 2024
1 parent 6e7d19b commit 1906bb2
Show file tree
Hide file tree
Showing 8 changed files with 66 additions and 24 deletions.
2 changes: 2 additions & 0 deletions docs/04-upgrading/upgrading_to_v20.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,8 @@ Attributes suffixed with `_millis` were renamed to remove said suffix and have t
- The `Actor.main` method has been removed as it brings no benefits compared to using `async with Actor`.
- The `Actor.add_webhook`, `Actor.start`, `Actor.call` and `Actor.start_task` methods now accept instances of the `apify.Webhook` model instead of an untyped `dict`.
- `Actor.start`, `Actor.call`, `Actor.start_task`, `Actor.set_status_message` and `Actor.abort` return instances of the `ActorRun` model instead of an untyped `dict`.
- Upon entering the context manager (`async with Actor`), the `Actor` puts the default logging configuration in place. This can be disabled using the `configure_logging` parameter.
- The `config` parameter of `Actor` has been renamed to `configuration`.

## Scrapy integration

Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ keywords = [
python = "^3.9"
apify-client = ">=1.7.1"
apify-shared = ">=1.1.2"
crawlee = ">=0.3.0"
crawlee = ">=0.3.5"
cryptography = ">=42.0.0"
httpx = ">=0.27.0"
lazy-object-proxy = ">=1.10.0"
Expand Down
28 changes: 22 additions & 6 deletions src/apify/_actor.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
from apify._proxy_configuration import ProxyConfiguration
from apify._utils import get_system_info, is_running_in_ipython
from apify.apify_storage_client import ApifyStorageClient
from apify.log import logger
from apify.log import _configure_logging, logger
from apify.storages import Dataset, KeyValueStore, RequestQueue

if TYPE_CHECKING:
Expand All @@ -46,16 +46,24 @@ class _ActorType:
_configuration: Configuration
_is_exiting = False

def __init__(self, config: Configuration | None = None) -> None:
def __init__(
self,
configuration: Configuration | None = None,
*,
configure_logging: bool = True,
) -> None:
"""Create an Actor instance.
Note that you don't have to do this, all the functionality is accessible using the default instance
(e.g. `Actor.open_dataset()`).
Args:
config: The Actor configuration to be used. If not passed, a new Configuration instance will be created.
configuration: The Actor configuration to be used. If not passed, a new Configuration instance will
be created.
configure_logging: Should the default logging configuration be configured?
"""
self._configuration = config or Configuration.get_global_configuration()
self._configuration = configuration or Configuration.get_global_configuration()
self._configure_logging = configure_logging
self._apify_client = self.new_client()

self._event_manager: EventManager
Expand All @@ -81,6 +89,9 @@ async def __aenter__(self) -> Self:
When you exit the `async with` block, the `Actor.exit()` method is called, and if any exception happens while
executing the block code, the `Actor.fail` method is called.
"""
if self._configure_logging:
_configure_logging(self._configuration)

await self.init()
return self

Expand Down Expand Up @@ -111,15 +122,20 @@ def __repr__(self) -> str:

return super().__repr__()

def __call__(self, config: Configuration) -> Self:
def __call__(self, configuration: Configuration | None = None, *, configure_logging: bool = True) -> Self:
"""Make a new Actor instance with a non-default configuration."""
return self.__class__(config=config)
return self.__class__(configuration=configuration, configure_logging=configure_logging)

@property
def apify_client(self) -> ApifyClientAsync:
"""The ApifyClientAsync instance the Actor instance uses."""
return self._apify_client

@property
def configuration(self) -> Configuration:
"""The Configuration instance the Actor instance uses."""
return self._configuration

@property
def config(self) -> Configuration:
"""The Configuration instance the Actor instance uses."""
Expand Down
30 changes: 29 additions & 1 deletion src/apify/log.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,12 @@
from __future__ import annotations

import logging
from typing import TYPE_CHECKING

from crawlee._log_config import CrawleeLogFormatter
from crawlee._log_config import CrawleeLogFormatter, configure_logger, get_configured_log_level

if TYPE_CHECKING:
from apify import Configuration

# Name of the logger used throughout the library (resolves to 'apify')
logger_name = __name__.split('.')[0]
Expand All @@ -13,3 +17,27 @@

class ActorLogFormatter(CrawleeLogFormatter): # noqa: D101 Inherited from parent class
pass


def _configure_logging(configuration: Configuration) -> None:
apify_client_logger = logging.getLogger('apify_client')
configure_logger(apify_client_logger, configuration, remove_old_handlers=True)

level = get_configured_log_level(configuration)

# Keep apify_client logger quiet unless debug logging is requested
if level > logging.DEBUG:
apify_client_logger.setLevel(logging.INFO)
else:
apify_client_logger.setLevel(level)

# Silence HTTPX logger unless debug logging is requested
httpx_logger = logging.getLogger('httpx')
if level > logging.DEBUG:
httpx_logger.setLevel(logging.WARNING)
else:
httpx_logger.setLevel(level)

# Use configured log level for apify logger
apify_logger = logging.getLogger('apify')
configure_logger(apify_logger, configuration, remove_old_handlers=True)
20 changes: 5 additions & 15 deletions tests/integration/test_actor_log.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,21 +13,11 @@ async def test_actor_log(self: TestActorLog, make_actor: ActorFactory) -> None:
async def main() -> None:
import logging

from apify.log import ActorLogFormatter, logger

# Clear any other log handlers, so they don't mess with this test
client_logger = logging.getLogger('apify_client')
apify_logger = logging.getLogger('apify')
client_logger.handlers.clear()
apify_logger.handlers.clear()

# Set handler only on the 'apify' logger
apify_logger.setLevel(logging.DEBUG)
handler = logging.StreamHandler()
handler.setFormatter(ActorLogFormatter())
apify_logger.addHandler(handler)
from apify.log import logger

async with Actor:
logger.setLevel(logging.DEBUG)

# Test Actor.log
Actor.log.debug('Debug message')
Actor.log.info('Info message')
Expand Down Expand Up @@ -82,7 +72,7 @@ async def main() -> None:
assert run_log_lines.pop(0) == '[apify] ERROR Error message'
assert run_log_lines.pop(0) == '[apify] ERROR Exception message'
assert run_log_lines.pop(0) == ' Traceback (most recent call last):'
assert run_log_lines.pop(0) == ' File "/usr/src/app/src/main.py", line 35, in main'
assert run_log_lines.pop(0) == ' File "/usr/src/app/src/main.py", line 25, in main'
assert run_log_lines.pop(0) == " raise ValueError('Dummy ValueError')"
assert run_log_lines.pop(0) == ' ValueError: Dummy ValueError'
assert run_log_lines.pop(0) == '[apify] INFO Multi'
Expand All @@ -91,7 +81,7 @@ async def main() -> None:
assert run_log_lines.pop(0) == 'message'
assert run_log_lines.pop(0) == '[apify] ERROR Actor failed with an exception'
assert run_log_lines.pop(0) == ' Traceback (most recent call last):'
assert run_log_lines.pop(0) == ' File "/usr/src/app/src/main.py", line 43, in main'
assert run_log_lines.pop(0) == ' File "/usr/src/app/src/main.py", line 33, in main'
assert run_log_lines.pop(0) == " raise RuntimeError('Dummy RuntimeError')"
assert run_log_lines.pop(0) == ' RuntimeError: Dummy RuntimeError'
assert run_log_lines.pop(0) == '[apify] INFO Exiting Actor ({"exit_code": 91})'
3 changes: 3 additions & 0 deletions tests/unit/actor/test_actor_env_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ async def test_get_env_use_env_vars(self, monkeypatch: pytest.MonkeyPatch) -> No
ApifyEnvVars.DEFAULT_REQUEST_QUEUE_ID,
ApifyEnvVars.SDK_LATEST_VERSION,
ApifyEnvVars.LOG_FORMAT,
ApifyEnvVars.LOG_LEVEL,
}

legacy_env_vars = {
Expand All @@ -65,6 +66,8 @@ async def test_get_env_use_env_vars(self, monkeypatch: pytest.MonkeyPatch) -> No

# Set up random env vars
expected_get_env: dict[str, Any] = {}
expected_get_env[ApifyEnvVars.LOG_LEVEL.name.lower()] = 'INFO'

for int_env_var in INTEGER_ENV_VARS:
if int_env_var in ignored_env_vars:
continue
Expand Down
3 changes: 3 additions & 0 deletions tests/unit/actor/test_actor_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,7 @@ async def test_actor_metamorpth_not_work_locally(
self: TestActorMethodsWorksOnlyOnPlatform,
caplog: pytest.LogCaptureFixture,
) -> None:
caplog.set_level('WARNING')
async with Actor:
await Actor.metamorph('random-id')

Expand All @@ -145,6 +146,7 @@ async def test_actor_reboot_not_work_locally(
self: TestActorMethodsWorksOnlyOnPlatform,
caplog: pytest.LogCaptureFixture,
) -> None:
caplog.set_level('WARNING')
async with Actor:
await Actor.reboot()

Expand All @@ -156,6 +158,7 @@ async def test_actor_add_webhook_not_work_locally(
self: TestActorMethodsWorksOnlyOnPlatform,
caplog: pytest.LogCaptureFixture,
) -> None:
caplog.set_level('WARNING')
async with Actor:
await Actor.add_webhook(
Webhook(event_types=[WebhookEventType.ACTOR_BUILD_ABORTED], request_url='https://example.com')
Expand Down
2 changes: 1 addition & 1 deletion tests/unit/actor/test_actor_log.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ async def test_actor_log(
monkeypatch.setenv('APIFY_IS_AT_HOME', '1')

with contextlib.suppress(RuntimeError):
async with Actor:
async with Actor(configure_logging=False):
# Test Actor.log
Actor.log.debug('Debug message')
Actor.log.info('Info message')
Expand Down

0 comments on commit 1906bb2

Please sign in to comment.