From 078f2b8b2578af07e7cdb81a19c8c71bfa64bcfc Mon Sep 17 00:00:00 2001
From: Sergey Grigoriev <sergey.grigoriev@sbb.ch>
Date: Thu, 19 Sep 2024 14:38:43 +0200
Subject: [PATCH 1/2] fix: refactoring to reduce cognitive complexity

Refs: #69
---
 app/SvgUtils.py | 165 ++++++++++++++++++++++++++----------------------
 1 file changed, 89 insertions(+), 76 deletions(-)
diff --git a/app/SvgUtils.py b/app/SvgUtils.py
index baa709b..6ff78f0 100644
--- a/app/SvgUtils.py
+++ b/app/SvgUtils.py
@@ -11,29 +11,35 @@
 
 # Process img tags, replacing base64 SVG images with PNGs
 def process_svg(html):
-    pattern = re.compile(r'<img(?P<intermediate>[^>]+?src="data:)(?P<type>[^;>]*?);base64, (?P<base64>[^">]*?)"')
+    pattern = re.compile(r'<img(?P<intermediate>[^>]+?src="data:)(?P<type>[^;>]*?);base64,(?P<base64>[^">]*?)"')
     return re.sub(pattern, replace_img_base64, html)
 
 
+# Decode and validate if the provided content is SVG.
 def get_svg_content(content_type, content_base64):
     # We do not require to have 'image/svg+xml' content type coz not all systems will properly set it
 
     if content_type in NON_SVG_CONTENT_TYPES:
         return False  # Skip processing if content type set explicitly as not svg
 
-    decoded_content = base64.b64decode(content_base64)
-    if b'\0' in decoded_content:
-        return False  # Skip processing if decoded content is binary (not text)
+    try:
+        decoded_content = base64.b64decode(content_base64)
+        if b'\0' in decoded_content:
+            return False  # Skip processing if decoded content is binary (not text)
 
-    svg_content = decoded_content.decode('utf-8')
+        svg_content = decoded_content.decode('utf-8')
 
-    # Fast check that this is a svg
-    if '</svg>' not in svg_content:
-        return False
+        # Fast check that this is a svg
+        if '</svg>' not in svg_content:
+            return False
 
-    return svg_content
+        return svg_content
+    except Exception as e:
+        logging.error(f"Failed to decode base64 content: {e}")
+        return False
 
 
+# Replace base64 SVG images with PNG equivalents in the HTML img tag.
 def replace_img_base64(match):
     entry = match.group(0)
     content_type = match.group('type')
@@ -42,89 +48,107 @@ def replace_img_base64(match):
     svg_content = get_svg_content(content_type, content_base64)
     if svg_content is False:
         return entry
-    else:
-        replaced_content_base64 = replace_svg_with_png(svg_content)
-        if replaced_content_base64 == content_base64:
-            return entry  # For some reason content wasn't replaced
-        else:
-            return f'<img{match.group("intermediate")}image/svg+xml;base64, {replaced_content_base64}"'
+
+    replaced_content_base64 = replace_svg_with_png(svg_content)
+    if replaced_content_base64 == content_base64:
+        return entry  # For some reason content wasn't replaced
+
+    return f'<img{match.group("intermediate")}image/svg+xml;base64,{replaced_content_base64}"'
 
 
 # Checks that base64 encoded content is a svg image and replaces it with the png screenshot made by chromium
 def replace_svg_with_png(svg_content):
-    chromium_executable = os.environ.get('CHROMIUM_EXECUTABLE_PATH')
-    if not chromium_executable:
-        logging.error('CHROMIUM_EXECUTABLE_PATH not set')
+    width, height = extract_svg_dimensions(svg_content)
+    if not width or not height:
         return svg_content
 
-    # Fetch width & height from root svg tag
-    match = re.search(r'<svg[^>]+?width="(?P<width>[\d.]+)', svg_content)
-    if match:
-        width = match.group('width')
-    else:
-        logging.error('Cannot find svg width in ' + svg_content)
+    svg_filepath, png_filepath = prepare_temp_files(svg_content)
+    if not svg_filepath or not png_filepath:
         return svg_content
 
-    match = re.search(r'<svg[^>]+?height="(?P<height>[\d.]+)', svg_content)
-    if match:
-        height = match.group('height')
-    else:
-        logging.error('Cannot find svg height in ' + svg_content)
+    if not convert_svg_to_png(width, height, png_filepath, svg_filepath):
         return svg_content
 
-    # Log large svg content size
-    svg_content_length = len(svg_content)
-    if svg_content_length > 100_000:
-        logging.warning(f"SVG content length: {svg_content_length}")
+    png_base64 = read_and_cleanup_png(png_filepath)
+    return png_base64 if png_base64 else svg_content
 
-    # Will be used as a name for tmp files
-    uuid = str(uuid4())
 
-    temp_folder = tempfile.gettempdir()
+# Extract the width and height from the SVG tag
+def extract_svg_dimensions(svg_content):
+    width_match = re.search(r'<svg[^>]+?width="(?P<width>[\d.]+)', svg_content)
+    height_match = re.search(r'<svg[^>]+?height="(?P<height>[\d.]+)', svg_content)
 
-    # Put svg into tmp file
-    svg_filepath = os.path.join(temp_folder, uuid + '.svg')
-    f = open(svg_filepath, 'w', encoding='utf-8')
-    f.write(svg_content)
-    f.close()
+    width = width_match.group('width') if width_match else None
+    height = height_match.group('height') if height_match else None
 
-    # Feed svg file to chromium
-    png_filepath = os.path.join(temp_folder, uuid + '.png')
+    if not width or not height:
+        logging.error(f"Cannot find SVG dimensions. Width: {width}, Height: {height}")
+    return width, height
 
-    chromium_command = create_chromium_command(
-        chromium_executable,
-        height,
-        width,
-        png_filepath,
-        svg_filepath,
-    )
 
-    result = subprocess.run(chromium_command)
+# Save the SVG content to a temporary file and return the file paths for the SVG and PNG.
+def prepare_temp_files(svg_content):
+    try:
+        temp_folder = tempfile.gettempdir()
+        uuid = str(uuid4())
 
-    # Remove tmp svg file
-    os.remove(svg_filepath)
+        svg_filepath = os.path.join(temp_folder, f'{uuid}.svg')
+        png_filepath = os.path.join(temp_folder, f'{uuid}.png')
+
+        with open(svg_filepath, 'w', encoding='utf-8') as f:
+            f.write(svg_content)
+
+        return svg_filepath, png_filepath
+    except Exception as e:
+        logging.error(f"Failed to save SVG to temp file: {e}")
+        return None, None
+
+
+# Convert the SVG file to PNG using Chromium and return success status
+def convert_svg_to_png(width, height, png_filepath, svg_filepath):
+    command = create_chromium_command(width, height, png_filepath, svg_filepath)
+    result = subprocess.run(command)
 
     if result.returncode != 0:
-        logging.error(f'Error converting to png, returncode = {result.returncode}')
-        return svg_content
+        logging.error(f"Error converting SVG to PNG, return code = {result.returncode}")
+        return False
 
-    # Get resulting screenshot content
-    with open(png_filepath, 'rb') as img_file:
-        img_data = img_file.read()
-        png_base64 = base64.b64encode(img_data).decode('utf-8')
+    return True
+
+
+# Read the PNG file, encode it in base64, and clean up the temporary file.
+def read_and_cleanup_png(png_filepath):
+    try:
+        with open(png_filepath, 'rb') as img_file:
+            img_data = img_file.read()
 
-    # Remove tmp png file
-    os.remove(png_filepath)
+        png_base64 = base64.b64encode(img_data).decode('utf-8')
+        os.remove(png_filepath)
+        return png_base64
+    except Exception as e:
+        logging.error(f"Failed to read or clean up PNG file: {e}")
+        return None
 
-    return png_base64
 
+# Create the Chromium command for converting SVG to PNG
+def create_chromium_command(width, height, png_filepath, svg_filepath):
+    chromium_executable = os.environ.get('CHROMIUM_EXECUTABLE_PATH')
+    if not chromium_executable:
+        logging.error('CHROMIUM_EXECUTABLE_PATH is not set.')
+        return None
 
-def create_chromium_command(chromium_executable, height, width, png_filepath, svg_filepath):
-    # Check if the ENABLE_HARDWARE_ACCELERATION environment variable is set to true
     enable_hardware_acceleration = os.getenv('ENABLE_HARDWARE_ACCELERATION', 'false').lower() == 'true'
 
     command = [
-        f'{chromium_executable}',
+        chromium_executable,
+        '--headless=old',
+        '--no-sandbox',
+        '--default-background-color=00000000',
+        '--hide-scrollbars',
+        '--enable-features=ConversionMeasurement,AttributionReportingCrossAppWeb',
+        f'--screenshot={png_filepath}',
+        f'--window-size={width},{height}',
+        svg_filepath,
     ]
 
     if not enable_hardware_acceleration:
@@ -134,15 +158,4 @@ def create_chromium_command(chromium_executable, height, width, png_filepath, sv
             '--disable-dev-shm-usage',
         ])
 
-    command.extend([
-        '--headless=old',  # because of issue in new with SVG conversion
-        '--no-sandbox',
-        '--default-background-color=00000000',
-        '--hide-scrollbars',
-        '--enable-features=ConversionMeasurement,AttributionReportingCrossAppWeb',
-        f'--screenshot={png_filepath}',
-        f'--window-size={width},{height}',
-        f'{svg_filepath}',
-    ])
-
     return command

From 1a33aee5653c970168713f4ac231421a64fcf090 Mon Sep 17 00:00:00 2001
From: Sergey Grigoriev <sergey.grigoriev@sbb.ch>
Date: Thu, 19 Sep 2024 14:41:10 +0200
Subject: [PATCH 2/2] fix: refactoring to reduce cognitive complexity

Refs: #69
---
 app/SvgUtils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/app/SvgUtils.py b/app/SvgUtils.py
index 6ff78f0..5bab0b9 100644
--- a/app/SvgUtils.py
+++ b/app/SvgUtils.py
@@ -11,7 +11,7 @@
 
 # Process img tags, replacing base64 SVG images with PNGs
 def process_svg(html):
-    pattern = re.compile(r'<img(?P<intermediate>[^>]+?src="data:)(?P<type>[^;>]*?);base64,(?P<base64>[^">]*?)"')
+    pattern = re.compile(r'<img(?P<intermediate>[^>]+?src="data:)(?P<type>[^;>]*?);base64,\s?(?P<base64>[^">]*?)"')
     return re.sub(pattern, replace_img_base64, html)