From 5ca0fbae5c8f1ae112ddd0d4647d3399fe610501 Mon Sep 17 00:00:00 2001 From: jcole-crowdstrike Date: Fri, 1 Mar 2024 11:55:32 -0800 Subject: [PATCH 1/5] Updating regex pattern to handle unicode whitespaces. Replacing the \s whitespace characters with normal spaces (" ") to prevent breaking on unicode whitespace characters (e.g., NBSP). Without this, if a branch name contains a unicode whitespace character that falls under \s, then the branch name will be truncated. --- git/remote.py | 2 +- test/test_remote.py | 15 +++++++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/git/remote.py b/git/remote.py index 6ce720ee3..6e841949f 100644 --- a/git/remote.py +++ b/git/remote.py @@ -325,7 +325,7 @@ class FetchInfo(IterableObj): ERROR, ) = [1 << x for x in range(8)] - _re_fetch_result = re.compile(r"^\s*(.) (\[[\w\s\.$@]+\]|[\w\.$@]+)\s+(.+) -> ([^\s]+)( \(.*\)?$)?") + _re_fetch_result = re.compile(r"^ *(.) (\[[\w \.$@]+\]|[\w\.$@]+) +(.+) -> ([^ ]+)( \(.*\)?$)?") _flag_map: Dict[flagKeyLiteral, int] = { "!": ERROR, diff --git a/test/test_remote.py b/test/test_remote.py index c0bd11f80..63eababac 100644 --- a/test/test_remote.py +++ b/test/test_remote.py @@ -1002,6 +1002,21 @@ def test_push_unsafe_options_allowed(self, rw_repo): assert tmp_file.exists() tmp_file.unlink() + @with_rw_and_rw_remote_repo("0.1.6") + def test_fetch_unsafe_branch_name(self, rw_repo, remote_repo): + # Create branch with a name containing a NBSP + bad_branch_name = f"branch_with_{chr(160)}_nbsp" + Head.create(remote_repo, bad_branch_name) + + # Fetch and get branches + remote = rw_repo.remote("origin") + branches = remote.fetch() + + # Test for truncated branch name in branches + assert f"origin/{bad_branch_name}" in [b.name for b in branches] + + # Cleanup branch + Head.delete(remote_repo, bad_branch_name) class TestTimeouts(TestBase): @with_rw_repo("HEAD", bare=False) From 827e986ec686b168e65f3bbad526d6a863191031 Mon Sep 17 00:00:00 2001 From: jcole-crowdstrike Date: Fri, 1 Mar 2024 13:51:33 -0800 Subject: [PATCH 2/5] Updating spacing per linting test. Adding a second blank line after the newly added test case. --- test/test_remote.py | 1 + 1 file changed, 1 insertion(+) diff --git a/test/test_remote.py b/test/test_remote.py index 63eababac..35af8172d 100644 --- a/test/test_remote.py +++ b/test/test_remote.py @@ -1018,6 +1018,7 @@ def test_fetch_unsafe_branch_name(self, rw_repo, remote_repo): # Cleanup branch Head.delete(remote_repo, bad_branch_name) + class TestTimeouts(TestBase): @with_rw_repo("HEAD", bare=False) def test_timeout_funcs(self, repo): From 4810491cb9d30d05e3ed73e55fe803fcf7ae0b53 Mon Sep 17 00:00:00 2001 From: jcole-crowdstrike Date: Mon, 4 Mar 2024 20:33:24 -0800 Subject: [PATCH 3/5] Fixing Windows encoding issue. Stdout is being encoded as Windows-1251 instead of UTF-8 due to passing universal_newlines as True to subprocess. --- git/remote.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/git/remote.py b/git/remote.py index 6e841949f..6cb33bede 100644 --- a/git/remote.py +++ b/git/remote.py @@ -891,7 +891,7 @@ def _get_fetch_info_from_stderr( None, progress_handler, finalizer=None, - decode_streams=False, + decode_streams=True, kill_after_timeout=kill_after_timeout, ) @@ -1068,7 +1068,7 @@ def fetch( Git.check_unsafe_options(options=list(kwargs.keys()), unsafe_options=self.unsafe_git_fetch_options) proc = self.repo.git.fetch( - "--", self, *args, as_process=True, with_stdout=False, universal_newlines=True, v=verbose, **kwargs + "--", self, *args, as_process=True, with_stdout=False, v=verbose, **kwargs ) res = self._get_fetch_info_from_stderr(proc, progress, kill_after_timeout=kill_after_timeout) if hasattr(self.repo.odb, "update_cache"): From 938f2725eb137492a459b4210ae58565e26e4fd5 Mon Sep 17 00:00:00 2001 From: jcole-crowdstrike Date: Tue, 5 Mar 2024 07:37:36 -0800 Subject: [PATCH 4/5] Setting universal_newlines=False explicitly in fetch() and pull(). This fix avoids encoding problems on Windows, as was done for fetch() in the previous commit. However, here it is being made more explicit and adds the same fix for the pull function. --- git/remote.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/git/remote.py b/git/remote.py index 6cb33bede..bbe1aea97 100644 --- a/git/remote.py +++ b/git/remote.py @@ -1068,7 +1068,7 @@ def fetch( Git.check_unsafe_options(options=list(kwargs.keys()), unsafe_options=self.unsafe_git_fetch_options) proc = self.repo.git.fetch( - "--", self, *args, as_process=True, with_stdout=False, v=verbose, **kwargs + "--", self, *args, as_process=True, with_stdout=False, universal_newlines=False, v=verbose, **kwargs ) res = self._get_fetch_info_from_stderr(proc, progress, kill_after_timeout=kill_after_timeout) if hasattr(self.repo.odb, "update_cache"): @@ -1122,7 +1122,7 @@ def pull( Git.check_unsafe_options(options=list(kwargs.keys()), unsafe_options=self.unsafe_git_pull_options) proc = self.repo.git.pull( - "--", self, refspec, with_stdout=False, as_process=True, universal_newlines=True, v=True, **kwargs + "--", self, refspec, with_stdout=False, as_process=True, universal_newlines=False, v=True, **kwargs ) res = self._get_fetch_info_from_stderr(proc, progress, kill_after_timeout=kill_after_timeout) if hasattr(self.repo.odb, "update_cache"): From 8b8c76a707155e32e7538136053ff00c3231cc92 Mon Sep 17 00:00:00 2001 From: jcole-crowdstrike Date: Fri, 8 Mar 2024 12:45:33 -0800 Subject: [PATCH 5/5] Fixing stripping issue causing passing tests to be interpreted as failures Removing this block of code that strips away nonprintable ASCII characters, as it is causing some tests to fail inappropriately. Successful tests are identified by the suffix ", done", which is being removed from some messages due to this aforementioned block of code. The codeblock probably intends to just strip away traililng non-ASCII characters from strings, but does not break, and so will instead continue to iterate through the string and then strip away both printable/nonprintable ASCII; this is causing the loss of the ", done" because anytime a nonprintable character is anywhere but the end of the string then the entire section of string after the character is truncated. Looking back through commits, this codeblock was added in 882ebb153e14488b275e374ccebcdda1dea22dd7 as a workaround after the addition of universal_newlines. Seeing as we have removed universal_newlines in the previous commit, we can also remove this codeblock. In future, if something likw this is needed, then maybe a join that concatenates all printable ASCII characters would serve better (e.g., "".join(b for b in line if b >= 32)), instead of trying to cut out the nonprintable chars. --- git/util.py | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/git/util.py b/git/util.py index 52f9dbdad..63c9b1e99 100644 --- a/git/util.py +++ b/git/util.py @@ -611,20 +611,6 @@ def _parse_progress_line(self, line: AnyStr) -> None: self.error_lines.append(self._cur_line) return - # Find escape characters and cut them away - regex will not work with - # them as they are non-ASCII. As git might expect a tty, it will send them. - last_valid_index = None - for i, c in enumerate(reversed(line_str)): - if ord(c) < 32: - # its a slice index - last_valid_index = -i - 1 - # END character was non-ASCII - # END for each character in line - if last_valid_index is not None: - line_str = line_str[:last_valid_index] - # END cut away invalid part - line_str = line_str.rstrip() - cur_count, max_count = None, None match = self.re_op_relative.match(line_str) if match is None: