From 74a4fa0a56563caa97fc7c8e0b7035d37e5d8b6c Mon Sep 17 00:00:00 2001 From: Weii Wang Date: Thu, 8 Feb 2024 19:37:16 +0800 Subject: [PATCH 1/5] urllib.request: stop resolving host in proxy bypass --- Lib/urllib/request.py | 36 ++++++++++-------------------------- 1 file changed, 10 insertions(+), 26 deletions(-) diff --git a/Lib/urllib/request.py b/Lib/urllib/request.py index bca594420f6d9d..5265b1b8d8f5ed 100644 --- a/Lib/urllib/request.py +++ b/Lib/urllib/request.py @@ -2563,6 +2563,7 @@ def _proxy_bypass_macosx_sysconf(host, proxy_settings): } """ from fnmatch import fnmatch + from ipaddress import AddressValueError, IPv4Address hostonly, port = _splitport(host) @@ -2579,20 +2580,17 @@ def ip2num(ipAddr): return True hostIP = None + try: + hostIP = int(IPv4Address(hostonly)) + except AddressValueError: + pass for value in proxy_settings.get('exceptions', ()): # Items in the list are strings like these: *.local, 169.254/16 if not value: continue m = re.match(r"(\d+(?:\.\d+)*)(/\d+)?", value) - if m is not None: - if hostIP is None: - try: - hostIP = socket.gethostbyname(hostonly) - hostIP = ip2num(hostIP) - except OSError: - continue - + if m is not None and hostIP is not None: base = ip2num(m.group(1)) mask = m.group(2) if mask is None: @@ -2727,20 +2725,7 @@ def proxy_bypass_registry(host): if not proxyEnable or not proxyOverride: return 0 # try to make a host list from name and IP address. - rawHost, port = _splitport(host) - host = [rawHost] - try: - addr = socket.gethostbyname(rawHost) - if addr != rawHost: - host.append(addr) - except OSError: - pass - try: - fqdn = socket.getfqdn(rawHost) - if fqdn != rawHost: - host.append(fqdn) - except OSError: - pass + host, _ = _splitport(host) # make a check value list from the registry entry: replace the # '' string by the localhost entry and the corresponding # canonical entry. @@ -2748,14 +2733,13 @@ def proxy_bypass_registry(host): # now check if we match one of the registry values. for test in proxyOverride: if test == '': - if '.' not in rawHost: + if '.' not in host: return 1 test = test.replace(".", r"\.") # mask dots test = test.replace("*", r".*") # change glob sequence test = test.replace("?", r".") # change glob char - for val in host: - if re.match(test, val, re.I): - return 1 + if re.match(test, host, re.I): + return 1 return 0 def proxy_bypass(host): From 287ddb4cdfa31e3d0a8c659feee27ae4c14a5110 Mon Sep 17 00:00:00 2001 From: Weii Wang Date: Fri, 9 Feb 2024 19:43:14 +0800 Subject: [PATCH 2/5] Add a NEWS entry --- .../next/Library/2024-02-09-19-41-48.gh-issue-115197.20wkWH.rst | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2024-02-09-19-41-48.gh-issue-115197.20wkWH.rst diff --git a/Misc/NEWS.d/next/Library/2024-02-09-19-41-48.gh-issue-115197.20wkWH.rst b/Misc/NEWS.d/next/Library/2024-02-09-19-41-48.gh-issue-115197.20wkWH.rst new file mode 100644 index 00000000000000..e6ca3cc525d74a --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-02-09-19-41-48.gh-issue-115197.20wkWH.rst @@ -0,0 +1,2 @@ +``urllib.request`` no longer resolves the hostname before checking it +against the system's proxy bypass list on macOS and Windows. From 96a8eb8ececca6cfdcc992fe46998912680b87de Mon Sep 17 00:00:00 2001 From: Weii Wang Date: Wed, 14 Feb 2024 15:07:58 +0800 Subject: [PATCH 3/5] Put Windows proxy bypass into a separate function --- Lib/test/test_urllib2.py | 29 ++++++++++++++++++++++-- Lib/urllib/request.py | 49 ++++++++++++++++++++++++---------------- 2 files changed, 56 insertions(+), 22 deletions(-) diff --git a/Lib/test/test_urllib2.py b/Lib/test/test_urllib2.py index fa528a675892b5..739c15df13de21 100644 --- a/Lib/test/test_urllib2.py +++ b/Lib/test/test_urllib2.py @@ -15,10 +15,11 @@ import subprocess import urllib.request -# The proxy bypass method imported below has logic specific to the OSX -# proxy config data structure but is testable on all platforms. +# The proxy bypass method imported below has logic specific to the +# corresponding system but is testable on all platforms. from urllib.request import (Request, OpenerDirector, HTTPBasicAuthHandler, HTTPPasswordMgrWithPriorAuth, _parse_proxy, + _proxy_bypass_winreg_override, _proxy_bypass_macosx_sysconf, AbstractDigestAuthHandler) from urllib.parse import urlparse @@ -1485,6 +1486,30 @@ def test_proxy_https_proxy_authorization(self): self.assertEqual(req.host, "proxy.example.com:3128") self.assertEqual(req.get_header("Proxy-authorization"), "FooBar") + @unittest.skipUnless(os.name == "nt", "only relevant for Windows") + def test_winreg_proxy_bypass(self): + proxy_override = "www.example.com;*.example.net; 192.168.0.1" + proxy_bypass = _proxy_bypass_winreg_override + for host in ("www.example.com", "www.example.net", "192.168.0.1"): + self.assertTrue(proxy_bypass(host, proxy_override), + "expected bypass of %s to be true" % host) + + for host in ("example.com", "www.example.org", "example.net", + "192.168.0.2"): + self.assertFalse(proxy_bypass(host, proxy_override), + "expected bypass of %s to be False" % host) + + # check intranet address bypass + proxy_override = "example.com; " + self.assertTrue(proxy_bypass("example.com", proxy_override), + "expected bypass of %s to be true" % host) + self.assertFalse(proxy_bypass("example.net", proxy_override), + "expected bypass of %s to be False" % host) + for host in ("test", "localhost"): + self.assertTrue(proxy_bypass(host, proxy_override), + "expect to bypass intranet address '%s'" + % host) + @unittest.skipUnless(sys.platform == 'darwin', "only relevant for OSX") def test_osx_proxy_bypass(self): bypass = { diff --git a/Lib/urllib/request.py b/Lib/urllib/request.py index 5265b1b8d8f5ed..d22af6618d80f1 100644 --- a/Lib/urllib/request.py +++ b/Lib/urllib/request.py @@ -2613,6 +2613,31 @@ def ip2num(ipAddr): return False +# Same as _proxy_bypass_macosx_sysconf, testable on all platforms +def _proxy_bypass_winreg_override(host, override): + """Return True if the host should bypass the proxy server. + + The proxy override list is obtained from the Windows + Internet settings proxy override registry value. + + An example of a proxy override value is: + "www.example.com;*.example.net; 192.168.0.1" + """ + from fnmatch import fnmatch + + host, _ = _splitport(host) + proxy_override = override.split(';') + for test in proxy_override: + test = test.strip() + # "" should bypass the proxy server for all intranet addresses + if test == '': + if '.' not in host: + return True + elif fnmatch(host, test): + return True + return False + + if sys.platform == 'darwin': from _scproxy import _get_proxy_settings, _get_proxies @@ -2711,7 +2736,7 @@ def proxy_bypass_registry(host): import winreg except ImportError: # Std modules, so should be around - but you never know! - return 0 + return False try: internetSettings = winreg.OpenKey(winreg.HKEY_CURRENT_USER, r'Software\Microsoft\Windows\CurrentVersion\Internet Settings') @@ -2721,26 +2746,10 @@ def proxy_bypass_registry(host): 'ProxyOverride')[0]) # ^^^^ Returned as Unicode but problems if not converted to ASCII except OSError: - return 0 + return False if not proxyEnable or not proxyOverride: - return 0 - # try to make a host list from name and IP address. - host, _ = _splitport(host) - # make a check value list from the registry entry: replace the - # '' string by the localhost entry and the corresponding - # canonical entry. - proxyOverride = proxyOverride.split(';') - # now check if we match one of the registry values. - for test in proxyOverride: - if test == '': - if '.' not in host: - return 1 - test = test.replace(".", r"\.") # mask dots - test = test.replace("*", r".*") # change glob sequence - test = test.replace("?", r".") # change glob char - if re.match(test, host, re.I): - return 1 - return 0 + return False + return _proxy_bypass_winreg_override(host, proxyOverride) def proxy_bypass(host): """Return True, if host should be bypassed. From 95e393f6f55503b55308bd146db10b9a756fba54 Mon Sep 17 00:00:00 2001 From: Weii Wang Date: Wed, 14 Feb 2024 17:28:55 +0800 Subject: [PATCH 4/5] Update macOS proxy bypass rules --- Lib/test/test_urllib2.py | 26 ++++++++++---------- Lib/urllib/request.py | 51 +++++++++++++++++----------------------- 2 files changed, 35 insertions(+), 42 deletions(-) diff --git a/Lib/test/test_urllib2.py b/Lib/test/test_urllib2.py index 739c15df13de21..2c3452cbe0eae6 100644 --- a/Lib/test/test_urllib2.py +++ b/Lib/test/test_urllib2.py @@ -1514,17 +1514,20 @@ def test_winreg_proxy_bypass(self): def test_osx_proxy_bypass(self): bypass = { 'exclude_simple': False, - 'exceptions': ['foo.bar', '*.bar.com', '127.0.0.1', '10.10', - '10.0/16'] + 'exceptions': ['foo.bar', '*.bar.com', '127.0.0.1', '10.0/16', + '11.0./16', '12.13.14.0/24', '19.*', + '2001:0db8:0123:4567:89ab:cdef:1234:5678'] } # Check hosts that should trigger the proxy bypass - for host in ('foo.bar', 'www.bar.com', '127.0.0.1', '10.10.0.1', - '10.0.0.1'): + for host in ('foo.bar', 'www.bar.com', '127.0.0.1', '10.0.0.1', + '11.0.0.1', '12.13.14.15', '19.0.0.1', + '[2001:0db8:0123:4567:89ab:cdef:1234:5678]'): self.assertTrue(_proxy_bypass_macosx_sysconf(host, bypass), 'expected bypass of %s to be True' % host) # Check hosts that should not trigger the proxy bypass for host in ('abc.foo.bar', 'bar.com', '127.0.0.2', '10.11.0.1', - 'notinbypass'): + '11.1.0.1', '12.13.15.16', 'notinbypass', + '[2001:0db8:0123:4567:89ab:cdef:1234:0001]'): self.assertFalse(_proxy_bypass_macosx_sysconf(host, bypass), 'expected bypass of %s to be False' % host) @@ -1532,17 +1535,14 @@ def test_osx_proxy_bypass(self): bypass = {'exclude_simple': True, 'exceptions': []} self.assertTrue(_proxy_bypass_macosx_sysconf('test', bypass)) - # Check that invalid prefix lengths are ignored + # Check that invalid IPs are ignored bypass = { 'exclude_simple': False, - 'exceptions': [ '10.0.0.0/40', '172.19.10.0/24' ] + 'exceptions': ['10.0.0.0/40', '1.256/16', '192.168'] } - host = '172.19.10.5' - self.assertTrue(_proxy_bypass_macosx_sysconf(host, bypass), - 'expected bypass of %s to be True' % host) - host = '10.0.1.5' - self.assertFalse(_proxy_bypass_macosx_sysconf(host, bypass), - 'expected bypass of %s to be False' % host) + for host in ('10.0.1.5', '1.256.0.1', '192.168.0.1'): + self.assertFalse(_proxy_bypass_macosx_sysconf(host, bypass), + 'expected bypass of %s to be False' % host) def check_basic_auth(self, headers, realm): with self.subTest(realm=realm, headers=headers): diff --git a/Lib/urllib/request.py b/Lib/urllib/request.py index d22af6618d80f1..1705f4e114e078 100644 --- a/Lib/urllib/request.py +++ b/Lib/urllib/request.py @@ -2563,51 +2563,44 @@ def _proxy_bypass_macosx_sysconf(host, proxy_settings): } """ from fnmatch import fnmatch - from ipaddress import AddressValueError, IPv4Address + from ipaddress import IPv4Address, IPv4Network - hostonly, port = _splitport(host) - - def ip2num(ipAddr): - parts = ipAddr.split('.') - parts = list(map(int, parts)) - if len(parts) != 4: - parts = (parts + [0, 0, 0, 0])[:4] - return (parts[0] << 24) | (parts[1] << 16) | (parts[2] << 8) | parts[3] + host, _ = _splitport(host) + # Strip brackets for IPv6 addresses + if host and host[0] == '[' and host[-1] == ']': + host = host[1:-1] # Check for simple host names: if '.' not in host: if proxy_settings['exclude_simple']: return True - hostIP = None + host_ip = None try: - hostIP = int(IPv4Address(hostonly)) - except AddressValueError: + host_ip = IPv4Address(host) + except ValueError: pass for value in proxy_settings.get('exceptions', ()): # Items in the list are strings like these: *.local, 169.254/16 + value = value.strip() if not value: continue - m = re.match(r"(\d+(?:\.\d+)*)(/\d+)?", value) - if m is not None and hostIP is not None: - base = ip2num(m.group(1)) + m = re.match(r"(\d+(?:\.\d+)*)\.?/(\d+)", value) + if m is not None and host_ip is not None: + base = m.group(1) mask = m.group(2) - if mask is None: - mask = 8 * (m.group(1).count('.') + 1) - else: - mask = int(mask[1:]) - - if mask < 0 or mask > 32: - # System libraries ignore invalid prefix lengths - continue - - mask = 32 - mask - - if (hostIP >> mask) == (base >> mask): - return True + parts = base.split(".") + if len(parts) < 4: + base = ".".join((parts + ["0", "0", "0"])[:4]) + try: + network = IPv4Network("{}/{}".format(base, mask)) + if host_ip in network: + return True + except ValueError: + pass - elif fnmatch(host, value): + if fnmatch(host, value): return True return False From 4e4a4ee84b3c8e4be45cbe3031cfdca2ae4e1156 Mon Sep 17 00:00:00 2001 From: Weii Wang Date: Thu, 29 Feb 2024 01:12:51 +0800 Subject: [PATCH 5/5] Revert "Update macOS proxy bypass rules" This reverts commit 95e393f6f55503b55308bd146db10b9a756fba54. --- Lib/test/test_urllib2.py | 26 ++++++++++---------- Lib/urllib/request.py | 51 +++++++++++++++++++++++----------------- 2 files changed, 42 insertions(+), 35 deletions(-) diff --git a/Lib/test/test_urllib2.py b/Lib/test/test_urllib2.py index 2c3452cbe0eae6..739c15df13de21 100644 --- a/Lib/test/test_urllib2.py +++ b/Lib/test/test_urllib2.py @@ -1514,20 +1514,17 @@ def test_winreg_proxy_bypass(self): def test_osx_proxy_bypass(self): bypass = { 'exclude_simple': False, - 'exceptions': ['foo.bar', '*.bar.com', '127.0.0.1', '10.0/16', - '11.0./16', '12.13.14.0/24', '19.*', - '2001:0db8:0123:4567:89ab:cdef:1234:5678'] + 'exceptions': ['foo.bar', '*.bar.com', '127.0.0.1', '10.10', + '10.0/16'] } # Check hosts that should trigger the proxy bypass - for host in ('foo.bar', 'www.bar.com', '127.0.0.1', '10.0.0.1', - '11.0.0.1', '12.13.14.15', '19.0.0.1', - '[2001:0db8:0123:4567:89ab:cdef:1234:5678]'): + for host in ('foo.bar', 'www.bar.com', '127.0.0.1', '10.10.0.1', + '10.0.0.1'): self.assertTrue(_proxy_bypass_macosx_sysconf(host, bypass), 'expected bypass of %s to be True' % host) # Check hosts that should not trigger the proxy bypass for host in ('abc.foo.bar', 'bar.com', '127.0.0.2', '10.11.0.1', - '11.1.0.1', '12.13.15.16', 'notinbypass', - '[2001:0db8:0123:4567:89ab:cdef:1234:0001]'): + 'notinbypass'): self.assertFalse(_proxy_bypass_macosx_sysconf(host, bypass), 'expected bypass of %s to be False' % host) @@ -1535,14 +1532,17 @@ def test_osx_proxy_bypass(self): bypass = {'exclude_simple': True, 'exceptions': []} self.assertTrue(_proxy_bypass_macosx_sysconf('test', bypass)) - # Check that invalid IPs are ignored + # Check that invalid prefix lengths are ignored bypass = { 'exclude_simple': False, - 'exceptions': ['10.0.0.0/40', '1.256/16', '192.168'] + 'exceptions': [ '10.0.0.0/40', '172.19.10.0/24' ] } - for host in ('10.0.1.5', '1.256.0.1', '192.168.0.1'): - self.assertFalse(_proxy_bypass_macosx_sysconf(host, bypass), - 'expected bypass of %s to be False' % host) + host = '172.19.10.5' + self.assertTrue(_proxy_bypass_macosx_sysconf(host, bypass), + 'expected bypass of %s to be True' % host) + host = '10.0.1.5' + self.assertFalse(_proxy_bypass_macosx_sysconf(host, bypass), + 'expected bypass of %s to be False' % host) def check_basic_auth(self, headers, realm): with self.subTest(realm=realm, headers=headers): diff --git a/Lib/urllib/request.py b/Lib/urllib/request.py index 1705f4e114e078..d22af6618d80f1 100644 --- a/Lib/urllib/request.py +++ b/Lib/urllib/request.py @@ -2563,44 +2563,51 @@ def _proxy_bypass_macosx_sysconf(host, proxy_settings): } """ from fnmatch import fnmatch - from ipaddress import IPv4Address, IPv4Network + from ipaddress import AddressValueError, IPv4Address - host, _ = _splitport(host) - # Strip brackets for IPv6 addresses - if host and host[0] == '[' and host[-1] == ']': - host = host[1:-1] + hostonly, port = _splitport(host) + + def ip2num(ipAddr): + parts = ipAddr.split('.') + parts = list(map(int, parts)) + if len(parts) != 4: + parts = (parts + [0, 0, 0, 0])[:4] + return (parts[0] << 24) | (parts[1] << 16) | (parts[2] << 8) | parts[3] # Check for simple host names: if '.' not in host: if proxy_settings['exclude_simple']: return True - host_ip = None + hostIP = None try: - host_ip = IPv4Address(host) - except ValueError: + hostIP = int(IPv4Address(hostonly)) + except AddressValueError: pass for value in proxy_settings.get('exceptions', ()): # Items in the list are strings like these: *.local, 169.254/16 - value = value.strip() if not value: continue - m = re.match(r"(\d+(?:\.\d+)*)\.?/(\d+)", value) - if m is not None and host_ip is not None: - base = m.group(1) + m = re.match(r"(\d+(?:\.\d+)*)(/\d+)?", value) + if m is not None and hostIP is not None: + base = ip2num(m.group(1)) mask = m.group(2) - parts = base.split(".") - if len(parts) < 4: - base = ".".join((parts + ["0", "0", "0"])[:4]) - try: - network = IPv4Network("{}/{}".format(base, mask)) - if host_ip in network: - return True - except ValueError: - pass + if mask is None: + mask = 8 * (m.group(1).count('.') + 1) + else: + mask = int(mask[1:]) + + if mask < 0 or mask > 32: + # System libraries ignore invalid prefix lengths + continue + + mask = 32 - mask + + if (hostIP >> mask) == (base >> mask): + return True - if fnmatch(host, value): + elif fnmatch(host, value): return True return False