From 4e8a8b62de6b74dc7a1417ad714df79aaf52e4ac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Edgar=20Ram=C3=ADrez=20Mondrag=C3=B3n?= Date: Tue, 26 Sep 2023 15:06:47 -0600 Subject: [PATCH] refactor: Use `functools.lru_cache` instead of the stale `memoization` library (#1981) --- docs/code_samples.md | 5 ++--- poetry.lock | 12 +----------- pyproject.toml | 1 - singer_sdk/helpers/_catalog.py | 10 +++++----- singer_sdk/helpers/jsonpath.py | 4 ++-- tests/core/rest/conftest.py | 5 ++--- tests/core/test_mapper.py | 9 --------- 7 files changed, 12 insertions(+), 34 deletions(-) diff --git a/docs/code_samples.md b/docs/code_samples.md index 0cd79f5637..6d9efefd11 100644 --- a/docs/code_samples.md +++ b/docs/code_samples.md @@ -240,7 +240,7 @@ class SingletonAuthStream(RESTStream): ### Make a stream reuse the same authenticator instance for all requests ```python -from memoization import cached +from functools import cached_property from singer_sdk.authenticators import APIAuthenticatorBase from singer_sdk.streams import RESTStream @@ -248,8 +248,7 @@ from singer_sdk.streams import RESTStream class CachedAuthStream(RESTStream): """A stream with singleton authenticator.""" - @property - @cached + @cached_property def authenticator(self) -> APIAuthenticatorBase: """Stream authenticator.""" return APIAuthenticatorBase(stream=self) diff --git a/poetry.lock b/poetry.lock index 2ff04bccf6..6435a5c434 100644 --- a/poetry.lock +++ b/poetry.lock @@ -1052,16 +1052,6 @@ files = [ {file = "mdurl-0.1.2.tar.gz", hash = "sha256:bb413d29f5eea38f31dd4754dd7377d4465116fb207585f97bf925588687c1ba"}, ] -[[package]] -name = "memoization" -version = "0.4.0" -description = "A powerful caching library for Python, with TTL support and multiple algorithm options. (https://github.com/lonelyenvoy/python-memoization)" -optional = false -python-versions = ">=3, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, <4" -files = [ - {file = "memoization-0.4.0.tar.gz", hash = "sha256:fde5e7cd060ef45b135e0310cfec17b2029dc472ccb5bbbbb42a503d4538a135"}, -] - [[package]] name = "mypy" version = "1.6.1" @@ -2501,4 +2491,4 @@ testing = ["pytest", "pytest-durations"] [metadata] lock-version = "2.0" python-versions = ">=3.8,<4" -content-hash = "e2f9b26b07122d960c57bfd56006d5342d8678c42c26011b90a7fdfbe2d15144" +content-hash = "29f7e2b5cfc0781701a9d486705dbfc3312cb786abe3ddea7c7095c9ab1b7eeb" diff --git a/pyproject.toml b/pyproject.toml index 4f7d282f47..e181a4ed23 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -47,7 +47,6 @@ inflection = ">=0.5.1" joblib = ">=1.0.1" jsonpath-ng = ">=1.5.3" jsonschema = ">=4.16.0" -memoization = ">=0.3.2,<0.5.0" packaging = ">=23.1" pendulum = ">=2.1.0" PyJWT = "~=2.4" diff --git a/singer_sdk/helpers/_catalog.py b/singer_sdk/helpers/_catalog.py index 49ea2f1cc6..0a5a852d03 100644 --- a/singer_sdk/helpers/_catalog.py +++ b/singer_sdk/helpers/_catalog.py @@ -5,8 +5,6 @@ import typing as t from copy import deepcopy -from memoization import cached - from singer_sdk.helpers._typing import is_object_type if t.TYPE_CHECKING: @@ -14,10 +12,12 @@ from singer_sdk._singerlib import Catalog, SelectionMask -_MAX_LRU_CACHE = 500 - -@cached(max_size=_MAX_LRU_CACHE) +# TODO: this was previously cached using the `memoization` library. However, the +# `functools.lru_cache` decorator does not support non-hashable arguments. +# It is possible that this function is not a bottleneck, but if it is, we should +# consider implementing a custom LRU cache decorator that supports non-hashable +# arguments. def get_selected_schema( stream_name: str, schema: dict, diff --git a/singer_sdk/helpers/jsonpath.py b/singer_sdk/helpers/jsonpath.py index 82c514b41d..6b438c263d 100644 --- a/singer_sdk/helpers/jsonpath.py +++ b/singer_sdk/helpers/jsonpath.py @@ -4,8 +4,8 @@ import logging import typing as t +from functools import lru_cache -import memoization from jsonpath_ng.ext import parse if t.TYPE_CHECKING: @@ -39,7 +39,7 @@ def extract_jsonpath( yield match.value -@memoization.cached +@lru_cache def _compile_jsonpath(expression: str) -> jsonpath_ng.JSONPath: """Parse a JSONPath expression and cache the result. diff --git a/tests/core/rest/conftest.py b/tests/core/rest/conftest.py index 13d7eb9ebb..daeb8d2dce 100644 --- a/tests/core/rest/conftest.py +++ b/tests/core/rest/conftest.py @@ -3,9 +3,9 @@ from __future__ import annotations import typing as t +from functools import cached_property import pytest -from memoization.memoization import cached from requests.auth import HTTPProxyAuth from singer_sdk.authenticators import APIAuthenticatorBase, SingletonMeta @@ -49,8 +49,7 @@ class NaiveAuthenticator(APIAuthenticatorBase): class CachedAuthStream(SimpleRESTStream): """A stream with Naive authentication.""" - @property - @cached + @cached_property def authenticator(self) -> NaiveAuthenticator: """Stream authenticator.""" return NaiveAuthenticator(stream=self) diff --git a/tests/core/test_mapper.py b/tests/core/test_mapper.py index 10f65cf8ef..ab8c5f1fca 100644 --- a/tests/core/test_mapper.py +++ b/tests/core/test_mapper.py @@ -16,7 +16,6 @@ from singer_sdk._singerlib import Catalog from singer_sdk.exceptions import MapExpressionError -from singer_sdk.helpers._catalog import get_selected_schema from singer_sdk.mapper import PluginMapper, RemoveRecordTransform, md5 from singer_sdk.streams.core import Stream from singer_sdk.tap_base import Tap @@ -563,19 +562,11 @@ def discover_streams(self): return [MappedStream(self)] -@pytest.fixture -def _clear_schema_cache() -> None: - """Schemas are cached, so the cache needs to be cleared between test invocations.""" - yield - get_selected_schema.cache_clear() - - @time_machine.travel( datetime.datetime(2022, 1, 1, tzinfo=datetime.timezone.utc), tick=False, ) @pytest.mark.snapshot() -@pytest.mark.usefixtures("_clear_schema_cache") @pytest.mark.parametrize( "stream_maps,flatten,flatten_max_depth,snapshot_name", [