From eac14953a03d3f4adb91ca3adde6564e405cc59f Mon Sep 17 00:00:00 2001 From: "Miss Islington (bot)" <31488909+miss-islington@users.noreply.github.com> Date: Tue, 19 Mar 2024 11:53:33 +0100 Subject: [PATCH] [3.10] gh-115197: Stop resolving host in urllib.request proxy bypass (GH-115210) (GH-116070) Use of a proxy is intended to defer DNS for the hosts to the proxy itself, rather than a potential for information leak of the host doing DNS resolution itself for any reason. Proxy bypass lists are strictly name based. Most implementations of proxy support agree. (cherry picked from commit c43b26d02eaa103756c250e8d36829d388c5f3be) Co-authored-by: Weii Wang --- Lib/test/test_urllib2.py | 29 ++++++- Lib/urllib/request.py | 77 +++++++++---------- ...-02-09-19-41-48.gh-issue-115197.20wkWH.rst | 2 + 3 files changed, 64 insertions(+), 44 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-02-09-19-41-48.gh-issue-115197.20wkWH.rst diff --git a/Lib/test/test_urllib2.py b/Lib/test/test_urllib2.py index 55f45db7288b09..fe17aac6c1fec4 100644 --- a/Lib/test/test_urllib2.py +++ b/Lib/test/test_urllib2.py @@ -14,10 +14,11 @@ import subprocess import urllib.request -# The proxy bypass method imported below has logic specific to the OSX -# proxy config data structure but is testable on all platforms. +# The proxy bypass method imported below has logic specific to the +# corresponding system but is testable on all platforms. from urllib.request import (Request, OpenerDirector, HTTPBasicAuthHandler, HTTPPasswordMgrWithPriorAuth, _parse_proxy, + _proxy_bypass_winreg_override, _proxy_bypass_macosx_sysconf, AbstractDigestAuthHandler) from urllib.parse import urlparse @@ -1443,6 +1444,30 @@ def test_proxy_https_proxy_authorization(self): self.assertEqual(req.host, "proxy.example.com:3128") self.assertEqual(req.get_header("Proxy-authorization"), "FooBar") + @unittest.skipUnless(os.name == "nt", "only relevant for Windows") + def test_winreg_proxy_bypass(self): + proxy_override = "www.example.com;*.example.net; 192.168.0.1" + proxy_bypass = _proxy_bypass_winreg_override + for host in ("www.example.com", "www.example.net", "192.168.0.1"): + self.assertTrue(proxy_bypass(host, proxy_override), + "expected bypass of %s to be true" % host) + + for host in ("example.com", "www.example.org", "example.net", + "192.168.0.2"): + self.assertFalse(proxy_bypass(host, proxy_override), + "expected bypass of %s to be False" % host) + + # check intranet address bypass + proxy_override = "example.com; " + self.assertTrue(proxy_bypass("example.com", proxy_override), + "expected bypass of %s to be true" % host) + self.assertFalse(proxy_bypass("example.net", proxy_override), + "expected bypass of %s to be False" % host) + for host in ("test", "localhost"): + self.assertTrue(proxy_bypass(host, proxy_override), + "expect to bypass intranet address '%s'" + % host) + @unittest.skipUnless(sys.platform == 'darwin', "only relevant for OSX") def test_osx_proxy_bypass(self): bypass = { diff --git a/Lib/urllib/request.py b/Lib/urllib/request.py index 76fdd719cdaa22..6edde1f73189b1 100644 --- a/Lib/urllib/request.py +++ b/Lib/urllib/request.py @@ -2571,6 +2571,7 @@ def _proxy_bypass_macosx_sysconf(host, proxy_settings): } """ from fnmatch import fnmatch + from ipaddress import AddressValueError, IPv4Address hostonly, port = _splitport(host) @@ -2587,20 +2588,17 @@ def ip2num(ipAddr): return True hostIP = None + try: + hostIP = int(IPv4Address(hostonly)) + except AddressValueError: + pass for value in proxy_settings.get('exceptions', ()): # Items in the list are strings like these: *.local, 169.254/16 if not value: continue m = re.match(r"(\d+(?:\.\d+)*)(/\d+)?", value) - if m is not None: - if hostIP is None: - try: - hostIP = socket.gethostbyname(hostonly) - hostIP = ip2num(hostIP) - except OSError: - continue - + if m is not None and hostIP is not None: base = ip2num(m.group(1)) mask = m.group(2) if mask is None: @@ -2623,6 +2621,31 @@ def ip2num(ipAddr): return False +# Same as _proxy_bypass_macosx_sysconf, testable on all platforms +def _proxy_bypass_winreg_override(host, override): + """Return True if the host should bypass the proxy server. + + The proxy override list is obtained from the Windows + Internet settings proxy override registry value. + + An example of a proxy override value is: + "www.example.com;*.example.net; 192.168.0.1" + """ + from fnmatch import fnmatch + + host, _ = _splitport(host) + proxy_override = override.split(';') + for test in proxy_override: + test = test.strip() + # "" should bypass the proxy server for all intranet addresses + if test == '': + if '.' not in host: + return True + elif fnmatch(host, test): + return True + return False + + if sys.platform == 'darwin': from _scproxy import _get_proxy_settings, _get_proxies @@ -2721,7 +2744,7 @@ def proxy_bypass_registry(host): import winreg except ImportError: # Std modules, so should be around - but you never know! - return 0 + return False try: internetSettings = winreg.OpenKey(winreg.HKEY_CURRENT_USER, r'Software\Microsoft\Windows\CurrentVersion\Internet Settings') @@ -2731,40 +2754,10 @@ def proxy_bypass_registry(host): 'ProxyOverride')[0]) # ^^^^ Returned as Unicode but problems if not converted to ASCII except OSError: - return 0 + return False if not proxyEnable or not proxyOverride: - return 0 - # try to make a host list from name and IP address. - rawHost, port = _splitport(host) - host = [rawHost] - try: - addr = socket.gethostbyname(rawHost) - if addr != rawHost: - host.append(addr) - except OSError: - pass - try: - fqdn = socket.getfqdn(rawHost) - if fqdn != rawHost: - host.append(fqdn) - except OSError: - pass - # make a check value list from the registry entry: replace the - # '' string by the localhost entry and the corresponding - # canonical entry. - proxyOverride = proxyOverride.split(';') - # now check if we match one of the registry values. - for test in proxyOverride: - if test == '': - if '.' not in rawHost: - return 1 - test = test.replace(".", r"\.") # mask dots - test = test.replace("*", r".*") # change glob sequence - test = test.replace("?", r".") # change glob char - for val in host: - if re.match(test, val, re.I): - return 1 - return 0 + return False + return _proxy_bypass_winreg_override(host, proxyOverride) def proxy_bypass(host): """Return True, if host should be bypassed. diff --git a/Misc/NEWS.d/next/Library/2024-02-09-19-41-48.gh-issue-115197.20wkWH.rst b/Misc/NEWS.d/next/Library/2024-02-09-19-41-48.gh-issue-115197.20wkWH.rst new file mode 100644 index 00000000000000..e6ca3cc525d74a --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-02-09-19-41-48.gh-issue-115197.20wkWH.rst @@ -0,0 +1,2 @@ +``urllib.request`` no longer resolves the hostname before checking it +against the system's proxy bypass list on macOS and Windows.