Merge pull request #133 from eea/develop

Refs #272346 - Find unused images script.
eea · Nov 21, 2024 · 19833f0 · 19833f0
2 parents b1a1178 + 40ddf7d
commit 19833f0
Show file tree

Hide file tree

Showing 4 changed files with 199 additions and 1 deletion.
diff --git a/clms/addon/browser/cleanup.py b/clms/addon/browser/cleanup.py
@@ -0,0 +1,187 @@
+"""
+Cleanup: find unused images
+"""
+
+from Products.Five.browser import BrowserView
+from zope.schema import getFieldsInOrder
+from plone.dexterity.utils import iterSchemata
+from plone.app.uuid.utils import uuidToObject
+import re
+from logging import getLogger
+
+
+def convert_uid_to_path(uid):
+    """Return url for resolve uid item"""
+    try:
+        obj = uuidToObject(uid)
+        path = obj.absolute_url()
+        return path
+    except Exception:
+        return None
+
+
+resolveuid_pattern = re.compile(r"resolveuid/([a-f0-9]{32})")
+
+
+def extract_urls(text):
+    """Extract urls from text"""
+    urls = re.findall(r'["\'](https?://\S+)["\']', text)
+
+    return urls
+
+
+def extract_resolveuids(text):
+    """Extract uid from resolve uid str"""
+    return resolveuid_pattern.findall(text)
+
+
+def get_all_images_urls(catalog):
+    """Prepare a dict containing all images from website
+    images_dict: {
+         'http....image.png': False,
+         'image.png': False,
+         ...
+     }
+
+     original: {
+         'image.png': 'http....image.png',
+         ...
+     }
+    """
+    images = catalog(portal_type="Image")
+    images_urls = [brain.getURL() for brain in images]
+    images_dict = {}
+
+    original = {}
+    for url in images_urls:
+        filename = url.split("/")[-1]
+        images_dict[url] = False
+        images_dict[filename] = False
+        original[filename] = url
+    return {"images_dict": images_dict, "original": original}
+
+
+def is_img_used(images_dict, original, path, context):
+    """Search for given path in images_dict"""
+    if path in images_dict:
+        if "//" in path:
+            return (True, path)
+        return (True, original[path])
+
+    return (False, None)
+
+
+class FindUnusedImages(BrowserView):
+    """Callback view"""
+
+    def __call__(self):
+        """custom __call__ method"""
+        catalog = self.context.portal_catalog
+        images = get_all_images_urls(catalog)
+        images_dict = images["images_dict"]
+        original = images["original"]
+
+        all_brains = catalog()
+        for brain in all_brains:
+            try:
+                obj = brain.getObject()
+            except Exception:
+                continue
+
+            try:
+                if obj.portal_type == "Image":
+                    continue
+            except Exception:
+                continue
+
+            for schema in iterSchemata(obj):
+                for k, v in getFieldsInOrder(schema):
+                    if k != "blocks":
+                        if "file" in str(v) or "image" in str(v):
+                            value = getattr(obj, k, None)
+                            if value is not None:
+                                try:
+                                    res = is_img_used(
+                                        images_dict, original,
+                                        value.filename, obj
+                                    )
+                                    if res[0]:
+                                        images_dict[res[1]] = True
+                                except Exception:
+                                    pass
+                        continue
+                    value = getattr(obj, k, None)
+
+                    if k == "blocks" and isinstance(value, dict):
+                        for block in value.values():
+                            if block.get("@type", None) is not None:
+                                if block["@type"] == "image":
+                                    try_url = block.get("url", None)
+                                    if try_url is not None:
+                                        if "resolveuid" in try_url:
+                                            uids = extract_resolveuids(try_url)
+                                            if uids:
+                                                for uid in uids:
+                                                    path = convert_uid_to_path(
+                                                        uid)
+                                                    if path is None:
+                                                        continue
+                                                    res = is_img_used(
+                                                        images_dict, original,
+                                                        path, obj
+                                                    )
+                                                    if res[0]:
+                                                        images_dict[res[1]
+                                                                    ] = True
+                                        else:
+                                            try_path = try_url.split("/")[-1]
+                                            res = is_img_used(
+                                                images_dict, original,
+                                                try_path, obj
+                                            )
+                                            if res[0]:
+                                                images_dict[res[1]] = True
+
+                            block_str = str(block)
+                            uids = extract_resolveuids(block_str)
+                            if uids:
+                                for uid in uids:
+                                    path = convert_uid_to_path(uid)
+                                    if path is None:
+                                        continue
+                                    res = is_img_used(
+                                        images_dict, original, path, obj)
+                                    if res[0]:
+                                        images_dict[res[1]] = True
+
+                            urls = extract_urls(block_str)
+                            if urls:
+                                for url in urls:
+                                    res = is_img_used(
+                                        images_dict, original, url, obj)
+                                    if res[0]:
+                                        images_dict[res[1]] = True
+
+                                    try_path = url.split("/")[-1]
+                                    res = is_img_used(
+                                        images_dict, original, try_path, obj
+                                    )
+                                    if res[0]:
+                                        images_dict[res[1]] = True
+
+        final_dict = {key: value for key,
+                      value in images_dict.items() if "//" in key}
+        # used_images = [
+        # path for path, is_used in final_dict.items() if is_used]
+        unused_images = [
+            path
+            for path, is_used in final_dict.items()
+            if not is_used and "/assets/" not in path
+        ]
+        # print("NOT USED", unused_images)
+        # print("NOT USED", len(unused_images))
+        log = getLogger(__name__)
+        log.info(unused_images)
+        # print("USED", len(used_images))
+
+        return unused_images
diff --git a/clms/addon/browser/configure.zcml b/clms/addon/browser/configure.zcml
@@ -11,5 +11,11 @@
       layer="pas.plugins.oidc.interfaces.IPasPluginsOidcLayer"
       />
 
+  <browser:page
+      for="*"
+      name="find-unused-images"
+      class=".cleanup.FindUnusedImages"
+      permission="cmf.ModifyPortalContent"
+      />
 
 </configure>
diff --git a/clms/addon/version.txt b/clms/addon/version.txt
@@ -1 +1 @@
-12.0
+12.1
diff --git a/docs/HISTORY.txt b/docs/HISTORY.txt
@@ -1,6 +1,11 @@
 Changelog
 =========
 
+12.1 - (2024-11-21)
+---------------------------
+* Change: Refs #272346 - Find unused images script.
+  [GhitaB]
+
 12.0 - (2024-11-05)
 ---------------------------
 * Change: Develop