From 90e457a671f161c7a56896b9be0d5ae996e7f0df Mon Sep 17 00:00:00 2001
From: Chocobo1 <Chocobo1@users.noreply.github.com>
Date: Fri, 27 Dec 2024 01:56:33 +0800
Subject: [PATCH 1/2] Use built-in method for decoding HTML entities

---
 src/searchengine/nova3/helpers.py | 21 ++++-----------------
 1 file changed, 4 insertions(+), 17 deletions(-)

diff --git a/src/searchengine/nova3/helpers.py b/src/searchengine/nova3/helpers.py
index ef8376a28d53..dfdfe234cbbc 100644
--- a/src/searchengine/nova3/helpers.py
+++ b/src/searchengine/nova3/helpers.py
@@ -1,4 +1,4 @@
-#VERSION: 1.49
+#VERSION: 1.50
 
 # Author:
 #  Christophe DUMEZ (chris@qbittorrent.org)
@@ -29,7 +29,7 @@
 
 import datetime
 import gzip
-import html.entities
+import html
 import io
 import os
 import re
@@ -72,21 +72,8 @@ def getBrowserUserAgent() -> str:
         socket.socket = socks.socksocket  # type: ignore[misc]
 
 
-def htmlentitydecode(s: str) -> str:
-    # First convert alpha entities (such as &eacute;)
-    # (Inspired from http://mail.python.org/pipermail/python-list/2007-June/443813.html)
-    def entity2char(m: re.Match[str]) -> str:
-        entity = m.group(1)
-        if entity in html.entities.name2codepoint:
-            return chr(html.entities.name2codepoint[entity])
-        return " "  # Unknown entity: We replace with a space.
-    t = re.sub('&(%s);' % '|'.join(html.entities.name2codepoint), entity2char, s)
-
-    # Then convert numerical entities (such as &#233;)
-    t = re.sub(r'&#(\d+);', lambda x: chr(int(x.group(1))), t)
-
-    # Then convert hexa entities (such as &#x00E9;)
-    return re.sub(r'&#x(\w+);', lambda x: chr(int(x.group(1), 16)), t)
+# This is only provided for backward compatibility, new code should not use it
+htmlentitydecode = html.unescape
 
 
 def retrieve_url(url: str, custom_headers: Mapping[str, Any] = {}, request_data: Optional[Any] = None) -> str:

From cc31a909316cce598027a2764a39a595bbf9d077 Mon Sep 17 00:00:00 2001
From: Chocobo1 <Chocobo1@users.noreply.github.com>
Date: Fri, 27 Dec 2024 02:15:35 +0800
Subject: [PATCH 2/2] Provide SSL context field

The allows the caller to provide proper SSL parameters and avoid dirty monkey patching to
suppress SSL errors.
---
 src/searchengine/nova3/helpers.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/src/searchengine/nova3/helpers.py b/src/searchengine/nova3/helpers.py
index dfdfe234cbbc..abf201439ab6 100644
--- a/src/searchengine/nova3/helpers.py
+++ b/src/searchengine/nova3/helpers.py
@@ -35,6 +35,7 @@
 import re
 import socket
 import socks
+import ssl
 import sys
 import tempfile
 import urllib.error
@@ -76,12 +77,12 @@ def getBrowserUserAgent() -> str:
 htmlentitydecode = html.unescape
 
 
-def retrieve_url(url: str, custom_headers: Mapping[str, Any] = {}, request_data: Optional[Any] = None) -> str:
+def retrieve_url(url: str, custom_headers: Mapping[str, Any] = {}, request_data: Optional[Any] = None, ssl_context: Optional[ssl.SSLContext] = None) -> str:
     """ Return the content of the url page as a string """
 
     request = urllib.request.Request(url, request_data, {**headers, **custom_headers})
     try:
-        response = urllib.request.urlopen(request)
+        response = urllib.request.urlopen(request, context=ssl_context)
     except urllib.error.URLError as errno:
         print(f"Connection error: {errno.reason}", file=sys.stderr)
         return ""
@@ -104,14 +105,14 @@ def retrieve_url(url: str, custom_headers: Mapping[str, Any] = {}, request_data:
     return dataStr
 
 
-def download_file(url: str, referer: Optional[str] = None) -> str:
+def download_file(url: str, referer: Optional[str] = None, ssl_context: Optional[ssl.SSLContext] = None) -> str:
     """ Download file at url and write it to a file, return the path to the file and the url """
 
     # Download url
     request = urllib.request.Request(url, headers=headers)
     if referer is not None:
         request.add_header('referer', referer)
-    response = urllib.request.urlopen(request)
+    response = urllib.request.urlopen(request, context=ssl_context)
     data = response.read()
 
     # Check if it is gzipped