Skip to content

Commit

Permalink
Merge branch 'develop' into issue205
Browse files Browse the repository at this point in the history
  • Loading branch information
cary-rowen committed Dec 28, 2024
2 parents a3a2d9d + 1bc3ac8 commit 66abdd3
Show file tree
Hide file tree
Showing 8 changed files with 2,225 additions and 1,356 deletions.
23 changes: 23 additions & 0 deletions bookworm/document/cache_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
"""Caching utilities"""
from pathlib import Path

from diskcache import Cache


def is_document_modified(key: str, path: Path, cache: Cache) -> bool:
"""
Checks whether a particular document was modified
We can currently afford to naively just stat() the file_path in order to determine it
"""
mtime = cache.get(f"{key}_meta")
if not mtime:
# No information for the book was found, so return True just to be safe
# TODO: Is this acceptable?
return True
stat_mtime = path.stat().st_mtime
return mtime != stat_mtime

def set_document_modified_time(key: str, path: Path, cache: Cache) -> bool:
key = f"{key}_meta"
cache.set(key, path.stat().st_mtime)

7 changes: 5 additions & 2 deletions bookworm/document/formats/epub.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
from lxml import html as lxml_html
from selectolax.parser import HTMLParser

from bookworm.document import cache_utils
from bookworm.i18n import LocaleInfo
from bookworm.image_io import ImageIO
from bookworm.logger import logger
Expand Down Expand Up @@ -190,7 +191,7 @@ def epub_html_items(self) -> tuple[str]:
# As reported in issue 243
# We will now sort the items obtained earlier based on the position that the chapter itself occupies in the TOC
spine = [x[0].split('/')[-1] for x in self.epub.spine]
log.info(spine)
log.debug(spine)
try:
items = sorted(items, key=lambda x: spine.index(x.id))
except ValueError:
Expand Down Expand Up @@ -310,7 +311,8 @@ def html_content(self):
self._get_cache_directory(), eviction_policy="least-frequently-used"
)
cache_key = self.uri.to_uri_string()
if cached_html_content := cache.get(cache_key):
document_path = self.get_file_system_path()
if (cached_html_content := cache.get(cache_key)) and not cache_utils.is_document_modified(cache_key, document_path, cache):
return cached_html_content.decode("utf-8")
html_content_gen = (
(item.file_name, item.content) for item in self.epub_html_items
Expand All @@ -323,6 +325,7 @@ def html_content(self):
title=self.epub.title, body_content=buf.getvalue()
)
cache.set(cache_key, html_content.encode("utf-8"))
cache_utils.set_document_modified_time(cache_key, document_path, cache)
return html_content

def prefix_html_ids(self, filename, html):
Expand Down
5 changes: 4 additions & 1 deletion bookworm/document/formats/word.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

from bookworm import app
from bookworm.concurrency import process_worker, threaded_worker
from bookworm.document import cache_utils
from bookworm.document.uri import DocumentUri
from bookworm.logger import logger
from bookworm.paths import app_path, home_data_path
Expand Down Expand Up @@ -85,17 +86,19 @@ def try_decrypt(self, data_buf, decryption_key):

def _get_html_content_from_docx(self, data_buf, is_encrypted_document):
data_buf.seek(0)
doc_path = self.get_file_system_path
cache = Cache(
self._get_cache_directory(), eviction_policy="least-frequently-used"
)
cache_key = self.uri.to_uri_string()
if cached_html_content := cache.get(cache_key):
if (cached_html_content := cache.get(cache_key)) and not cache_utils.is_document_modified(cache_key, doc_path, cache):
return cached_html_content.decode("utf-8")
result = mammoth.convert_to_html(data_buf, include_embedded_style_map=False)
data_buf.seek(0)
html_content = self.make_proper_html(result.value, data_buf)
if not is_encrypted_document:
cache.set(cache_key, html_content.encode("utf-8"))
cache_utils.set_document_modified_time(cache_key, doc_path, cache)
return html_content

def make_proper_html(self, html_string, data_buf):
Expand Down
Loading

0 comments on commit 66abdd3

Please sign in to comment.