From 078f2b8b2578af07e7cdb81a19c8c71bfa64bcfc Mon Sep 17 00:00:00 2001 From: Sergey Grigoriev Date: Thu, 19 Sep 2024 14:38:43 +0200 Subject: [PATCH 1/2] fix: refactoring to reduce cognitive complexity Refs: #69 --- app/SvgUtils.py | 165 ++++++++++++++++++++++++++---------------------- 1 file changed, 89 insertions(+), 76 deletions(-) diff --git a/app/SvgUtils.py b/app/SvgUtils.py index baa709b..6ff78f0 100644 --- a/app/SvgUtils.py +++ b/app/SvgUtils.py @@ -11,29 +11,35 @@ # Process img tags, replacing base64 SVG images with PNGs def process_svg(html): - pattern = re.compile(r'[^>]+?src="data:)(?P[^;>]*?);base64, (?P[^">]*?)"') + pattern = re.compile(r'[^>]+?src="data:)(?P[^;>]*?);base64,(?P[^">]*?)"') return re.sub(pattern, replace_img_base64, html) +# Decode and validate if the provided content is SVG. def get_svg_content(content_type, content_base64): # We do not require to have 'image/svg+xml' content type coz not all systems will properly set it if content_type in NON_SVG_CONTENT_TYPES: return False # Skip processing if content type set explicitly as not svg - decoded_content = base64.b64decode(content_base64) - if b'\0' in decoded_content: - return False # Skip processing if decoded content is binary (not text) + try: + decoded_content = base64.b64decode(content_base64) + if b'\0' in decoded_content: + return False # Skip processing if decoded content is binary (not text) - svg_content = decoded_content.decode('utf-8') + svg_content = decoded_content.decode('utf-8') - # Fast check that this is a svg - if '' not in svg_content: - return False + # Fast check that this is a svg + if '' not in svg_content: + return False - return svg_content + return svg_content + except Exception as e: + logging.error(f"Failed to decode base64 content: {e}") + return False +# Replace base64 SVG images with PNG equivalents in the HTML img tag. def replace_img_base64(match): entry = match.group(0) content_type = match.group('type') @@ -42,89 +48,107 @@ def replace_img_base64(match): svg_content = get_svg_content(content_type, content_base64) if svg_content is False: return entry - else: - replaced_content_base64 = replace_svg_with_png(svg_content) - if replaced_content_base64 == content_base64: - return entry # For some reason content wasn't replaced - else: - return f']+?width="(?P[\d.]+)', svg_content) - if match: - width = match.group('width') - else: - logging.error('Cannot find svg width in ' + svg_content) + svg_filepath, png_filepath = prepare_temp_files(svg_content) + if not svg_filepath or not png_filepath: return svg_content - match = re.search(r']+?height="(?P[\d.]+)', svg_content) - if match: - height = match.group('height') - else: - logging.error('Cannot find svg height in ' + svg_content) + if not convert_svg_to_png(width, height, png_filepath, svg_filepath): return svg_content - # Log large svg content size - svg_content_length = len(svg_content) - if svg_content_length > 100_000: - logging.warning(f"SVG content length: {svg_content_length}") + png_base64 = read_and_cleanup_png(png_filepath) + return png_base64 if png_base64 else svg_content - # Will be used as a name for tmp files - uuid = str(uuid4()) - temp_folder = tempfile.gettempdir() +# Extract the width and height from the SVG tag +def extract_svg_dimensions(svg_content): + width_match = re.search(r']+?width="(?P[\d.]+)', svg_content) + height_match = re.search(r']+?height="(?P[\d.]+)', svg_content) - # Put svg into tmp file - svg_filepath = os.path.join(temp_folder, uuid + '.svg') - f = open(svg_filepath, 'w', encoding='utf-8') - f.write(svg_content) - f.close() + width = width_match.group('width') if width_match else None + height = height_match.group('height') if height_match else None - # Feed svg file to chromium - png_filepath = os.path.join(temp_folder, uuid + '.png') + if not width or not height: + logging.error(f"Cannot find SVG dimensions. Width: {width}, Height: {height}") + return width, height - chromium_command = create_chromium_command( - chromium_executable, - height, - width, - png_filepath, - svg_filepath, - ) - result = subprocess.run(chromium_command) +# Save the SVG content to a temporary file and return the file paths for the SVG and PNG. +def prepare_temp_files(svg_content): + try: + temp_folder = tempfile.gettempdir() + uuid = str(uuid4()) - # Remove tmp svg file - os.remove(svg_filepath) + svg_filepath = os.path.join(temp_folder, f'{uuid}.svg') + png_filepath = os.path.join(temp_folder, f'{uuid}.png') + + with open(svg_filepath, 'w', encoding='utf-8') as f: + f.write(svg_content) + + return svg_filepath, png_filepath + except Exception as e: + logging.error(f"Failed to save SVG to temp file: {e}") + return None, None + + +# Convert the SVG file to PNG using Chromium and return success status +def convert_svg_to_png(width, height, png_filepath, svg_filepath): + command = create_chromium_command(width, height, png_filepath, svg_filepath) + result = subprocess.run(command) if result.returncode != 0: - logging.error(f'Error converting to png, returncode = {result.returncode}') - return svg_content + logging.error(f"Error converting SVG to PNG, return code = {result.returncode}") + return False - # Get resulting screenshot content - with open(png_filepath, 'rb') as img_file: - img_data = img_file.read() - png_base64 = base64.b64encode(img_data).decode('utf-8') + return True + + +# Read the PNG file, encode it in base64, and clean up the temporary file. +def read_and_cleanup_png(png_filepath): + try: + with open(png_filepath, 'rb') as img_file: + img_data = img_file.read() - # Remove tmp png file - os.remove(png_filepath) + png_base64 = base64.b64encode(img_data).decode('utf-8') + os.remove(png_filepath) + return png_base64 + except Exception as e: + logging.error(f"Failed to read or clean up PNG file: {e}") + return None - return png_base64 +# Create the Chromium command for converting SVG to PNG +def create_chromium_command(width, height, png_filepath, svg_filepath): + chromium_executable = os.environ.get('CHROMIUM_EXECUTABLE_PATH') + if not chromium_executable: + logging.error('CHROMIUM_EXECUTABLE_PATH is not set.') + return None -def create_chromium_command(chromium_executable, height, width, png_filepath, svg_filepath): - # Check if the ENABLE_HARDWARE_ACCELERATION environment variable is set to true enable_hardware_acceleration = os.getenv('ENABLE_HARDWARE_ACCELERATION', 'false').lower() == 'true' command = [ - f'{chromium_executable}', + chromium_executable, + '--headless=old', + '--no-sandbox', + '--default-background-color=00000000', + '--hide-scrollbars', + '--enable-features=ConversionMeasurement,AttributionReportingCrossAppWeb', + f'--screenshot={png_filepath}', + f'--window-size={width},{height}', + svg_filepath, ] if not enable_hardware_acceleration: @@ -134,15 +158,4 @@ def create_chromium_command(chromium_executable, height, width, png_filepath, sv '--disable-dev-shm-usage', ]) - command.extend([ - '--headless=old', # because of issue in new with SVG conversion - '--no-sandbox', - '--default-background-color=00000000', - '--hide-scrollbars', - '--enable-features=ConversionMeasurement,AttributionReportingCrossAppWeb', - f'--screenshot={png_filepath}', - f'--window-size={width},{height}', - f'{svg_filepath}', - ]) - return command From 1a33aee5653c970168713f4ac231421a64fcf090 Mon Sep 17 00:00:00 2001 From: Sergey Grigoriev Date: Thu, 19 Sep 2024 14:41:10 +0200 Subject: [PATCH 2/2] fix: refactoring to reduce cognitive complexity Refs: #69 --- app/SvgUtils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/app/SvgUtils.py b/app/SvgUtils.py index 6ff78f0..5bab0b9 100644 --- a/app/SvgUtils.py +++ b/app/SvgUtils.py @@ -11,7 +11,7 @@ # Process img tags, replacing base64 SVG images with PNGs def process_svg(html): - pattern = re.compile(r'[^>]+?src="data:)(?P[^;>]*?);base64,(?P[^">]*?)"') + pattern = re.compile(r'[^>]+?src="data:)(?P[^;>]*?);base64,\s?(?P[^">]*?)"') return re.sub(pattern, replace_img_base64, html)