Skip to content

Commit

Permalink
feat: Improve episode image downloading in admin panel
Browse files Browse the repository at this point in the history
  • Loading branch information
s045pd committed Jul 2, 2024
1 parent e430b3f commit 64a34d3
Show file tree
Hide file tree
Showing 5 changed files with 46 additions and 17 deletions.
13 changes: 3 additions & 10 deletions apps/admin.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,7 @@
from django.utils.html import format_html

from apps.models import Book, Episode, Image, Tag
from apps.tasks import (
convert_to_pdf,
download_image,
download_images,
find_episodes,
find_images,
)
from apps.tasks import convert_to_pdf, download_images, find_episodes, find_images


@admin.register(Tag)
Expand Down Expand Up @@ -132,9 +126,8 @@ def convert_to_pdf_force(self, request, queryset):

def get_images(self, request, queryset):
"""Download images for selected episodes"""
download_images.apply_async(
args=[list(queryset.only("id").values_list("id", flat=True))]
)
for episode_id in queryset.only("id").values_list("id", flat=True).iterator():
find_images.apply_async(args=[episode_id, True])

get_images.short_description = "Download Images (Force)"

Expand Down
9 changes: 8 additions & 1 deletion apps/management/commands/random_book_get.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,16 @@ async def handle_async(self):
book_dir = Path("books")
book_dir.mkdir(exist_ok=True)
worker = ImageExtractor()
await worker.get_max_page()

random_page = choice(range(1, worker.max_page + 1))

print("Start fetching books")
if not (books := await self.collect_async_generator(worker.get_books())):
if not (
books := await self.collect_async_generator(
worker.get_books(target_page=random_page)
)
):
print("No books found")
return

Expand Down
21 changes: 19 additions & 2 deletions apps/services.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ def __init__(self):
"Priority": "u=0, i",
}
)
self.max_page = 2000

async def _send_request(self, url: str, use_curl: bool = True) -> object:
"""Send a GET request to the given URL"""
Expand All @@ -42,9 +43,25 @@ async def _send_request(self, url: str, use_curl: bool = True) -> object:

return await self.cli.get(url=url, headers={"referer": "https://se8.us/"})

async def get_books(self) -> AsyncGenerator[str, None]:
async def get_max_page(self) -> int:
"""Fetch the maximum page number"""
try:
resp = await self._send_request(f"{self.origin}/index.php/category/page/1")
self.max_page = int(resp.xpath('//a[@class="end"]/@href')[0].split("/")[-1])
print(f"Max page: {self.max_page}")
except Exception as e:
print(e)

async def get_books(self, target_page: int = None) -> AsyncGenerator[str, None]:
"""Fetch books from the website"""
for page in range(1, 2000):

page_range = (
range(1, self.max_page + 1)
if not target_page
else range(target_page, target_page + 1)
)

for page in page_range:
print(f"Fetching page {page}")
resp = await self._send_request(
f"{self.origin}/index.php/category/page/{page}"
Expand Down
8 changes: 4 additions & 4 deletions apps/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ def find_episodes(book_id: str, start_index: int | None = None):
loop.run_until_complete(process_episodes(book_id))


async def process_images(episode_id: str):
async def process_images(episode_id: str, force: bool = False):
episode = await sync_to_async(Episode.objects.get)(pk=episode_id)
images_task = []
async for data in ImageExtractor().get_images(episode.raw_url):
Expand All @@ -98,7 +98,7 @@ async def process_images(episode_id: str):
episode=episode,
defaults=data,
)
if created:
if created or force:
images_task.append([image.id, image.raw_url])
logger.info(f"Find image: {episode.title} - {image.index}")

Expand All @@ -112,13 +112,13 @@ async def process_images(episode_id: str):


@shared_task
def find_images(episode_id: str):
def find_images(episode_id: str, force: bool = False):
"""
Find images for a specific episode and create or update Image objects
Usage: from apps.models import Episode;from apps.tasks import find_images as t;t( Episode.objects.first().id );
"""
with async_event_loop() as loop:
loop.run_until_complete(process_images(episode_id))
loop.run_until_complete(process_images(episode_id, force=force))


@shared_task
Expand Down
12 changes: 12 additions & 0 deletions apps/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,16 +47,19 @@ def combine_images(images):


def create_pdf(img):
# Calculate dimensions and scaling
img_width, img_height = img.size
pdf_width, pdf_height = A4
scale = pdf_width / img_width
scaled_height = int(img_height * scale)
pages = (scaled_height + int(pdf_height) - 1) // int(pdf_height)

# Create PDF
buffer = BytesIO()
pdf_canvas = canvas.Canvas(buffer, pagesize=A4)

for page in range(pages):
# Calculate crop box for each page
top = int(page * pdf_height / scale)
bottom = int((page + 1) * pdf_height / scale)
bottom = min(bottom, img_height)
Expand All @@ -65,8 +68,10 @@ def create_pdf(img):
logger.error(f"Invalid crop box coordinates: top={top}, bottom={bottom}")
continue

# Crop and resize image for the current page
crop_box = (0, top, img_width, bottom)
cropped_img = img.crop(crop_box)

new_width = int(pdf_width)
new_height = int(cropped_img.height * scale)

Expand All @@ -79,6 +84,8 @@ def create_pdf(img):
continue

cropped_img = cropped_img.resize((new_width, new_height))

# Save cropped image to buffer and draw on PDF
img_buffer = BytesIO()
cropped_img.save(img_buffer, format="PNG")
img_buffer.seek(0)
Expand All @@ -91,8 +98,13 @@ def create_pdf(img):
height=cropped_img.height,
)

pdf_canvas.showPage()

pdf_canvas.save()

# Save PDF to model
buffer.seek(0)

return buffer


Expand Down

0 comments on commit 64a34d3

Please sign in to comment.