diff --git a/CHANGELOG.md b/CHANGELOG.md index ee001f4..00bd9ec 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added - BYNDER_MAX_DOCUMENT_FILE_SIZE and BYNDER_MAX_IMAGE_FILE_SIZE settings to guard against memory spikes when downloading asset files ([#31]https://github.com/torchbox/wagtail-bynder/pull/31) @ababic +- Automatic reformatting and downsizing of source images ([#32](https://github.com/torchbox/wagtail-bynder/pull/32)) @ababic ## [0.5.1] - 2024-07-29 diff --git a/README.md b/README.md index b9a805c..7153e5c 100644 --- a/README.md +++ b/README.md @@ -56,6 +56,16 @@ To sync images updated within the last three days: $ python manage.py update_stale_images --days=3 ``` +### Automatic conversion and downsizing of images + +When the `BYNDER_IMAGE_SOURCE_THUMBNAIL_NAME` derivative for an image is successfully downloaded by Wagtail, it is passed to the `convert_downloaded_image()` method of your custom image model in order to convert it into something more suitable for Wagtail. + +Firstly, downloaded images are converted to the most appropriate type, according to your project's `WAGTAILIMAGES_FORMAT_CONVERSIONS` setting and Wagtail's default preferences. For example, by default, `BMP` and `WebP` image are converted to `PNG`. + +Secondly, images are downsized according to the `BYNDER_MAX_SOURCE_IMAGE_WIDTH` and `BYNDER_MAX_SOURCE_IMAGE_HEIGHT` setting values, in a way that preserves their original aspect ratio. Whilst Bynder is expected to address this on their side by generating appropriately-sized derivatives - this isn't always a possibile with their basic offering. + +Ensuring source images only have enough pixels to meet the rendition-generation requirements of your project has an enormous long-term benefit for a Wagtail project (especially one with image-heavy content pages), as it provides efficiency gains **every single time** a new rendition is generated. + ## What to ask of Bynder When communicating with Bynder about configuring a new instance for compatibility with Wagtail, there are a few things you'll want to be clear about: @@ -241,17 +251,6 @@ An API token for Bynder's JavaScript 'compact view' to use. The value is injecte for the JavaScript to pick up, exposing it to Wagtail users. Because of this, it should be different to `BYNDER_API_TOKEN` and only needs to have basic read permissions. -### `BYNDER_IMAGE_SOURCE_THUMBNAIL_NAME` - -Default: `"WagtailSource"` - -The name of the automatically generated derivative that should be downloaded and used as the `file` value for the -representative Wagtail image (as it appears in `thumbnails` in the API representation). - -WARNING: It's important to get this right, because if the specified derivative is NOT present in the response for an -image for any reason, the ORIGINAL will be downloaded - which will lead to slow chooser response times and higher memory -usage when generating renditions. - ### `BYNDER_MAX_DOCUMENT_FILE_SIZE` Example: `10485760` @@ -264,6 +263,17 @@ The maximum acceptable file size (in Bytes) when downloading a 'Document' asset - How large the documents are that editors want to feature in content - Whether you are doing anything particularly memory intensive with document files in your project (e.g. text/content analysis) +### `BYNDER_IMAGE_SOURCE_THUMBNAIL_NAME` + +Default: `"WagtailSource"` + +The name of the automatically generated derivative that should be downloaded and used as the `file` value for the +representative Wagtail image (as it appears in `thumbnails` in the API representation). + +WARNING: It's important to get this right, because if the specified derivative is NOT present in the response for an +image for any reason, the ORIGINAL will be downloaded - which will lead to slow chooser response times and higher memory +usage when generating renditions. + ### `BYNDER_MAX_IMAGE_FILE_SIZE` Example: `10485760` @@ -276,6 +286,22 @@ This setting is provided separately to `BYNDER_MAX_DOCUMENT_FILE_SIZE`, because As with `BYNDER_MAX_DOCUMENT_FILE_SIZE`, this can be tweaked for individual projects/environments to reflect how much RAM is available in the host infrastructure. +### `BYNDER_MAX_SOURCE_IMAGE_WIDTH` + +Example: `5000` + +Default: `3500` + +Used to restrict the **width** of images downloaded from Bynder before they are used as source images for objects in Wagtail's image library. + +### `BYNDER_MAX_SOURCE_IMAGE_HEIGHT` + +Example: `5000` + +Default: `3500` + +Used to restrict the **height** of images downloaded from Bynder before they are used as source images for objects in Wagtail's image library. + ### `BYNDER_VIDEO_MODEL` Example: `"video.Video"` diff --git a/src/wagtail_bynder/models.py b/src/wagtail_bynder/models.py index 864512b..7b9bfb2 100644 --- a/src/wagtail_bynder/models.py +++ b/src/wagtail_bynder/models.py @@ -1,18 +1,27 @@ +import io import logging import math +import os +from dataclasses import dataclass from datetime import datetime from mimetypes import guess_type +from tempfile import NamedTemporaryFile from typing import Any from django.conf import settings -from django.core.files.uploadedfile import UploadedFile +from django.core.files.uploadedfile import InMemoryUploadedFile, UploadedFile from django.db import models from django.utils.functional import cached_property from django.utils.translation import gettext_lazy as _ from wagtail.admin.panels import FieldPanel, MultiFieldPanel from wagtail.documents.models import AbstractDocument, Document -from wagtail.images.models import AbstractImage, Image +from wagtail.images.models import ( + IMAGE_FORMAT_EXTENSIONS, + AbstractImage, + Filter, + Image, +) from wagtail.models import Collection, CollectionMember from wagtail.search import index @@ -24,6 +33,15 @@ logger = logging.getLogger("wagtail.images") +@dataclass(frozen=True) +class ConvertedImageDetails: + width: int + height: int + file_size: int + image_format: str + mime_type: str + + class BynderAssetMixin(models.Model): # Fields relevant to the Bynder integration only bynder_id = models.CharField( @@ -266,6 +284,116 @@ def update_file(self, asset_data: dict[str, Any]) -> None: def download_file(self, source_url: str) -> UploadedFile: return utils.download_image(source_url) + def process_downloaded_file( + self, + file: UploadedFile, + asset_data: dict[str, Any] | None = None, + ) -> UploadedFile: + """ + Overrides ``BynderAssetWithFileMixin.process_downloaded_file()`` to + pass the downloaded image to ``convert_downloaded_image()`` before using it as + a value for this object's ``file`` field. + """ + + # Write to filesystem to avoid using memory for the same image + tmp = NamedTemporaryFile(mode="w+b", dir=settings.FILE_UPLOAD_TEMP_DIR) + details = self.convert_downloaded_image(file, tmp) + + # The original file is now redundant and can be deleted, making + # more memory available + del file.file + + # Load the converted image into memory to speed up the additional + # reads and writes performed by Wagtail + new_file = io.BytesIO() + tmp.seek(0) + with open(tmp.name, "rb") as source: + for line in source: + new_file.write(line) + + name_minus_extension, _ = os.path.splitext(file.name) + new_extension = IMAGE_FORMAT_EXTENSIONS[details.image_format] + + # Return replacement InMemoryUploadedFile + return InMemoryUploadedFile( + new_file, + field_name="file", + name=f"{name_minus_extension}{new_extension}", + content_type=details.mime_type, + size=details.file_size, + charset=None, + ) + + def get_source_image_filter_string( + self, original_format: str, *, is_animated: bool + ) -> str: + """ + Return a string for ``convert_downloaded_image()`` to use to create a + ``wagtail.images.models.Filter`` object that can be used for source image + conversion. + """ + + # Retreieve maximum height and width from settings + max_width = int(getattr(settings, "BYNDER_MAX_SOURCE_IMAGE_WIDTH", 3500)) + max_height = int(getattr(settings, "BYNDER_MAX_SOURCE_IMAGE_HEIGHT", 3500)) + + filter_str = f"max-{max_width}x{max_height}" + if ( + utils.get_output_image_format(original_format, is_animated=is_animated) + == "jpeg" + ): + # Since this will be a source image, use a higher JPEG quality than normal + filter_str += "|format-jpeg|jpegquality-90" + + return filter_str + + def convert_downloaded_image( + self, source_file, target_file + ) -> ConvertedImageDetails: + """ + Handles the conversion of the supplied ``file`` into something + ``process_downloaded_file()`` can use to successfully assemble a + new ``InMemoryUploadedFile``. + + ``target_file`` must be a writable file-like object, and is where the + new file contents is written to. + + The return value is a ``ConvertedImageDetails`` object, which allows + ``process_downloaded_file()`` to determine the height, width, + format, mime-type and file size of the newly generated image without + having to perform any more file operations. + """ + + width, height, original_format, is_animated = utils.get_image_info(source_file) + filter_str = self.get_source_image_filter_string( + original_format, is_animated=is_animated + ) + + # Filter.run() expects the object's width and height to reflect + # the image we're formatting, so we update them temporarily + original_width, original_height = self.width, self.height + self.width, self.height = width, height + try: + # Use wagtail built-ins to resize/reformat the image + willow_image = Filter(filter_str).run( + self, + target_file, + source_file, + ) + finally: + # Always restore original field values + self.width, self.height = original_width, original_height + + # Gather up all of the useful data about the new image + final_width, final_height = willow_image.get_size() + return ConvertedImageDetails( + final_width, + final_height, + target_file.tell(), + willow_image.format_name, + willow_image.mime_type, + ) + def set_focal_area_from_focus_point( self, x: int, y: int, original_height: int, original_width: int ) -> None: diff --git a/src/wagtail_bynder/utils.py b/src/wagtail_bynder/utils.py index 18e58d3..b971f49 100644 --- a/src/wagtail_bynder/utils.py +++ b/src/wagtail_bynder/utils.py @@ -8,9 +8,11 @@ from asgiref.local import Local from bynder_sdk import BynderClient from django.conf import settings +from django.core.files import File from django.core.files.uploadedfile import InMemoryUploadedFile from django.template.defaultfilters import filesizeformat from wagtail.models import Collection +from willow import Image from .exceptions import BynderAssetFileTooLarge @@ -59,6 +61,29 @@ def download_image(url: str) -> InMemoryUploadedFile: return download_file(url, max_filesize, max_filesize_setting_name) +def get_image_info(file: File) -> tuple[int, int, str, bool]: + willow_image = Image.open(file) + width, height = willow_image.get_size() + return (width, height, willow_image.format_name, willow_image.has_animation()) + + +def get_output_image_format(original_format: str, *, is_animated: bool = False) -> str: + conversions = { + "avif": "png", + "bmp": "png", + "webp": "png", + } + if is_animated: + # Convert non-animated GIFs to PNG as well + conversions["gif"] = "png" + + # Allow the user to override the conversions + custom_conversions = getattr(settings, "WAGTAILIMAGES_FORMAT_CONVERSIONS", {}) + conversions.update(custom_conversions) + + return conversions.get(original_format, original_format) + + def filename_from_url(url: str) -> str: return os.path.basename(url) diff --git a/tests/test_models.py b/tests/test_models.py index 75eff12..f5276a3 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -1,7 +1,9 @@ +import io + from unittest import mock from django.conf import settings -from django.test import SimpleTestCase +from django.test import SimpleTestCase, override_settings from wagtail.documents import get_document_model from wagtail.images import get_image_model @@ -293,6 +295,74 @@ def test_update_from_asset_data_without_focal_point_change(self): self.assertEqual(new_focal_point, current_focal_point) self.assertFalse(self.obj._focal_point_changed) + def test_process_downloaded_file(self): + fake_image = get_fake_downloaded_image("example.jpg", 500, 200) + state_before = self.obj.__dict__ + + # The original image data should be available via the `file` attribute + self.assertTrue(fake_image.file) + + result = self.obj.process_downloaded_file(fake_image, self.asset_data) + + # Wagtail doesn't convert JPEGs to a differet format by default, so the + # resulting name and content type should be the same as what was provided + self.assertEqual(result.name, fake_image.name) + self.assertEqual(result.content_type, fake_image.content_type) + + # The original image data should have been deleted to create headroom + # for the converted image + self.assertFalse(hasattr(fake_image, "file")) + + # No attribute values should on the object itself should have changed + self.assertEqual(state_before, self.obj.__dict__) + + @override_settings( + BYNDER_MAX_SOURCE_IMAGE_WIDTH=100, + BYNDER_MAX_SOURCE_IMAGE_HEIGHT=100, + WAGTAILIMAGES_FORMAT_CONVERSIONS={"gif": "png", "bmp": "png", "tiff": "jpeg"}, + ) + def test_convert_downloaded_image(self): + for original_details, expected_details in ( + ( + ("tall.gif", "gif", "image/gif", 240, 400), + ("tall.png", "png", "image/png", 60, 100), + ), + ( + ("wide.bmp", "bmp", "image/bmp", 400, 100), + ("wide.png", "png", "image/png", 100, 25), + ), + ( + ("big-square.tif", "tiff", "image/tiff", 400, 400), + ("big-square.jpg", "jpeg", "image/jpeg", 100, 100), + ), + ( + ("small-square.tiff", "tiff", "image/tiff", 80, 80), + ("small-square.jpg", "jpeg", "image/jpeg", 80, 80), + ), + ): + with self.subTest(f"{original_details[0]} becomes {expected_details[0]}"): + original = get_fake_downloaded_image( + name=original_details[0], + width=original_details[3], + height=original_details[4], + ) + self.assertEqual(original.content_type, original_details[2]) + result = self.obj.convert_downloaded_image(original, io.BytesIO()) + self.assertEqual( + ( + result.image_format, + result.mime_type, + result.width, + result.height, + ), + ( + expected_details[1], + expected_details[2], + expected_details[3], + expected_details[4], + ), + ) + class BynderSyncedVideoTests(SimpleTestCase): def setUp(self):