Skip to content

Commit

Permalink
Avoid eviction on entry replacement
Browse files Browse the repository at this point in the history
`*Result` objects are immutable, thus if a `PartialResult` gets filled
further it has to be re-set into the cache.

This does not change the cache size, but because the current S3 and
SIEVE implementations unconditionally check the cache size on
`__setitem__` they may evict an entry unnecessarily.

Fix that: if there is already a valid cache entry for the key, just
update it in place instead of trying to evict then creating a brand
new entry.

Also update the LRU to pre-check for size (and presence as well), this
may make setting a bit more expensive than post-check but it avoids
"wronging" the user by bypassing the limit they set.

Fixes #201
  • Loading branch information
masklinn committed Mar 26, 2024
1 parent a270fe2 commit 854c12f
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 24 deletions.
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ select = ["F", "E", "W", "I", "RET", "RUF", "PT"]
ignore = [
"RET505", # elif after return
"E501", # line too long, formatter should take care of the fixable ones
"E721", # I'll compare types with `is` if I want
]

[tool.ruff.lint.isort]
Expand Down
43 changes: 19 additions & 24 deletions src/ua_parser/caching.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,11 +26,11 @@


class Cache(Protocol):
"""Cache abstract protocol. The :class:`CachingResolver` will look
"""Cache()
Cache abstract protocol. The :class:`CachingResolver` will look
values up, merge what was returned (possibly nothing) with what it
got from its actual parser, and *re-set the result*.
A :class:`Cache` is responsible for its own replacement policy.
"""

@abc.abstractmethod
Expand All @@ -55,12 +55,6 @@ class Lru:
full of popular items then all of them being replaced at once:
:class:`S3Fifo` and :class:`Sieve` are FIFO-based caches and have
worst-case O(n) eviction.
.. note:: The cache size is adjusted after inserting the new
entry, so the cache will temporarily contain ``maxsize +
1`` items.
"""

def __init__(self, maxsize: int):
Expand All @@ -77,10 +71,9 @@ def __getitem__(self, key: str) -> Optional[PartialResult]:

def __setitem__(self, key: str, value: PartialResult) -> None:
with self.lock:
self.cache[key] = value
self.cache.move_to_end(key)
while len(self.cache) > self.maxsize:
if len(self.cache) >= self.maxsize and key not in self.cache:
self.cache.popitem(last=False)
self.cache[key] = value


@dataclasses.dataclass
Expand All @@ -92,14 +85,12 @@ class CacheEntry:


class S3Fifo:
"""FIFO-based quick-demotion lazy-promotion cache by Yang, Zhang,
Qiu, Yue, Vinayak.
"""FIFO-based quick-demotion lazy-promotion cache [S3-FIFO]_.
Experimentally provides excellent hit rate at lower cache sizes,
for a relatively simple and efficient implementation. Notably
excellent at handling "one hit wonders", aka entries seen only
once during a work-set (or reasonable work window).
"""

def __init__(self, maxsize: int):
Expand All @@ -113,16 +104,19 @@ def __init__(self, maxsize: int):
self.lock = threading.Lock()

def __getitem__(self, key: str) -> Optional[PartialResult]:
e = self.index.get(key)
if e and isinstance(e, CacheEntry):
# small race here, we could bump the freq above the limit
if (e := self.index.get(key)) and type(e) is CacheEntry:
# small race here, we could miss an increment
e.freq = min(e.freq + 1, 3)
return e.value

return None

def __setitem__(self, key: str, r: PartialResult) -> None:
with self.lock:
if (e := self.index.get(key)) and type(e) is CacheEntry:
e.value = r
return

if len(self.small) + len(self.main) >= self.maxsize:
# if main is not overcapacity, resize small
if len(self.main) < self.main_target:
Expand All @@ -133,7 +127,7 @@ def __setitem__(self, key: str, r: PartialResult) -> None:
self._evict_main()

entry = CacheEntry(key, r, 0)
if isinstance(self.index.get(key), str):
if type(self.index.get(key)) is str:
self.main.appendleft(entry)
else:
self.small.appendleft(entry)
Expand Down Expand Up @@ -175,8 +169,7 @@ class SieveNode:


class Sieve:
"""FIFO-based quick-demotion cache by Zhang, Yang, Yue, Vigfusson,
Rashmi.
"""FIFO-based quick-demotion cache [SIEVE]_.
Simpler FIFO-based cache, cousin of :class:`S3Fifo`.
Experimentally slightly lower hit rates than :class:`S3Fifo` (if
Expand All @@ -187,7 +180,6 @@ class Sieve:
Can be an interesting candidate when trying to save on memory,
although the contained entries will generally be much larger than
the cache itself.
"""

def __init__(self, maxsize: int) -> None:
Expand All @@ -200,15 +192,18 @@ def __init__(self, maxsize: int) -> None:
self.lock = threading.Lock()

def __getitem__(self, key: str) -> Optional[PartialResult]:
entry = self.cache.get(key)
if entry:
if entry := self.cache.get(key):
entry.visited = True
return entry.value

return None

def __setitem__(self, key: str, value: PartialResult) -> None:
with self.lock:
if e := self.cache.get(key):
e.value = value
return

if len(self.cache) >= self.maxsize:
self._evict()

Expand Down

0 comments on commit 854c12f

Please sign in to comment.