From cc31a909316cce598027a2764a39a595bbf9d077 Mon Sep 17 00:00:00 2001 From: Chocobo1 Date: Fri, 27 Dec 2024 02:15:35 +0800 Subject: [PATCH] Provide SSL context field The allows the caller to provide proper SSL parameters and avoid dirty monkey patching to suppress SSL errors. --- src/searchengine/nova3/helpers.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/searchengine/nova3/helpers.py b/src/searchengine/nova3/helpers.py index dfdfe234cbb..abf201439ab 100644 --- a/src/searchengine/nova3/helpers.py +++ b/src/searchengine/nova3/helpers.py @@ -35,6 +35,7 @@ import re import socket import socks +import ssl import sys import tempfile import urllib.error @@ -76,12 +77,12 @@ def getBrowserUserAgent() -> str: htmlentitydecode = html.unescape -def retrieve_url(url: str, custom_headers: Mapping[str, Any] = {}, request_data: Optional[Any] = None) -> str: +def retrieve_url(url: str, custom_headers: Mapping[str, Any] = {}, request_data: Optional[Any] = None, ssl_context: Optional[ssl.SSLContext] = None) -> str: """ Return the content of the url page as a string """ request = urllib.request.Request(url, request_data, {**headers, **custom_headers}) try: - response = urllib.request.urlopen(request) + response = urllib.request.urlopen(request, context=ssl_context) except urllib.error.URLError as errno: print(f"Connection error: {errno.reason}", file=sys.stderr) return "" @@ -104,14 +105,14 @@ def retrieve_url(url: str, custom_headers: Mapping[str, Any] = {}, request_data: return dataStr -def download_file(url: str, referer: Optional[str] = None) -> str: +def download_file(url: str, referer: Optional[str] = None, ssl_context: Optional[ssl.SSLContext] = None) -> str: """ Download file at url and write it to a file, return the path to the file and the url """ # Download url request = urllib.request.Request(url, headers=headers) if referer is not None: request.add_header('referer', referer) - response = urllib.request.urlopen(request) + response = urllib.request.urlopen(request, context=ssl_context) data = response.read() # Check if it is gzipped