From 64a34d3e6486070aa8f1338278ddeaf38e4cdbaa Mon Sep 17 00:00:00 2001 From: s045pd Date: Wed, 3 Jul 2024 07:53:43 +0800 Subject: [PATCH] feat: Improve episode image downloading in admin panel --- apps/admin.py | 13 +++---------- apps/management/commands/random_book_get.py | 9 ++++++++- apps/services.py | 21 +++++++++++++++++++-- apps/tasks.py | 8 ++++---- apps/tools.py | 12 ++++++++++++ 5 files changed, 46 insertions(+), 17 deletions(-) diff --git a/apps/admin.py b/apps/admin.py index bb6a412..b9c2d2e 100644 --- a/apps/admin.py +++ b/apps/admin.py @@ -2,13 +2,7 @@ from django.utils.html import format_html from apps.models import Book, Episode, Image, Tag -from apps.tasks import ( - convert_to_pdf, - download_image, - download_images, - find_episodes, - find_images, -) +from apps.tasks import convert_to_pdf, download_images, find_episodes, find_images @admin.register(Tag) @@ -132,9 +126,8 @@ def convert_to_pdf_force(self, request, queryset): def get_images(self, request, queryset): """Download images for selected episodes""" - download_images.apply_async( - args=[list(queryset.only("id").values_list("id", flat=True))] - ) + for episode_id in queryset.only("id").values_list("id", flat=True).iterator(): + find_images.apply_async(args=[episode_id, True]) get_images.short_description = "Download Images (Force)" diff --git a/apps/management/commands/random_book_get.py b/apps/management/commands/random_book_get.py index ce8a102..61871d7 100644 --- a/apps/management/commands/random_book_get.py +++ b/apps/management/commands/random_book_get.py @@ -36,9 +36,16 @@ async def handle_async(self): book_dir = Path("books") book_dir.mkdir(exist_ok=True) worker = ImageExtractor() + await worker.get_max_page() + + random_page = choice(range(1, worker.max_page + 1)) print("Start fetching books") - if not (books := await self.collect_async_generator(worker.get_books())): + if not ( + books := await self.collect_async_generator( + worker.get_books(target_page=random_page) + ) + ): print("No books found") return diff --git a/apps/services.py b/apps/services.py index 27ec0f9..2f663d6 100644 --- a/apps/services.py +++ b/apps/services.py @@ -31,6 +31,7 @@ def __init__(self): "Priority": "u=0, i", } ) + self.max_page = 2000 async def _send_request(self, url: str, use_curl: bool = True) -> object: """Send a GET request to the given URL""" @@ -42,9 +43,25 @@ async def _send_request(self, url: str, use_curl: bool = True) -> object: return await self.cli.get(url=url, headers={"referer": "https://se8.us/"}) - async def get_books(self) -> AsyncGenerator[str, None]: + async def get_max_page(self) -> int: + """Fetch the maximum page number""" + try: + resp = await self._send_request(f"{self.origin}/index.php/category/page/1") + self.max_page = int(resp.xpath('//a[@class="end"]/@href')[0].split("/")[-1]) + print(f"Max page: {self.max_page}") + except Exception as e: + print(e) + + async def get_books(self, target_page: int = None) -> AsyncGenerator[str, None]: """Fetch books from the website""" - for page in range(1, 2000): + + page_range = ( + range(1, self.max_page + 1) + if not target_page + else range(target_page, target_page + 1) + ) + + for page in page_range: print(f"Fetching page {page}") resp = await self._send_request( f"{self.origin}/index.php/category/page/{page}" diff --git a/apps/tasks.py b/apps/tasks.py index 83bf8e3..3c3c60e 100644 --- a/apps/tasks.py +++ b/apps/tasks.py @@ -89,7 +89,7 @@ def find_episodes(book_id: str, start_index: int | None = None): loop.run_until_complete(process_episodes(book_id)) -async def process_images(episode_id: str): +async def process_images(episode_id: str, force: bool = False): episode = await sync_to_async(Episode.objects.get)(pk=episode_id) images_task = [] async for data in ImageExtractor().get_images(episode.raw_url): @@ -98,7 +98,7 @@ async def process_images(episode_id: str): episode=episode, defaults=data, ) - if created: + if created or force: images_task.append([image.id, image.raw_url]) logger.info(f"Find image: {episode.title} - {image.index}") @@ -112,13 +112,13 @@ async def process_images(episode_id: str): @shared_task -def find_images(episode_id: str): +def find_images(episode_id: str, force: bool = False): """ Find images for a specific episode and create or update Image objects Usage: from apps.models import Episode;from apps.tasks import find_images as t;t( Episode.objects.first().id ); """ with async_event_loop() as loop: - loop.run_until_complete(process_images(episode_id)) + loop.run_until_complete(process_images(episode_id, force=force)) @shared_task diff --git a/apps/tools.py b/apps/tools.py index 3ea2f88..5ec9a00 100644 --- a/apps/tools.py +++ b/apps/tools.py @@ -47,16 +47,19 @@ def combine_images(images): def create_pdf(img): + # Calculate dimensions and scaling img_width, img_height = img.size pdf_width, pdf_height = A4 scale = pdf_width / img_width scaled_height = int(img_height * scale) pages = (scaled_height + int(pdf_height) - 1) // int(pdf_height) + # Create PDF buffer = BytesIO() pdf_canvas = canvas.Canvas(buffer, pagesize=A4) for page in range(pages): + # Calculate crop box for each page top = int(page * pdf_height / scale) bottom = int((page + 1) * pdf_height / scale) bottom = min(bottom, img_height) @@ -65,8 +68,10 @@ def create_pdf(img): logger.error(f"Invalid crop box coordinates: top={top}, bottom={bottom}") continue + # Crop and resize image for the current page crop_box = (0, top, img_width, bottom) cropped_img = img.crop(crop_box) + new_width = int(pdf_width) new_height = int(cropped_img.height * scale) @@ -79,6 +84,8 @@ def create_pdf(img): continue cropped_img = cropped_img.resize((new_width, new_height)) + + # Save cropped image to buffer and draw on PDF img_buffer = BytesIO() cropped_img.save(img_buffer, format="PNG") img_buffer.seek(0) @@ -91,8 +98,13 @@ def create_pdf(img): height=cropped_img.height, ) + pdf_canvas.showPage() + pdf_canvas.save() + + # Save PDF to model buffer.seek(0) + return buffer