Skip to content

Commit

Permalink
Merge pull request #1688 from EliahKagan/execute-args
Browse files Browse the repository at this point in the history
Clarify Git.execute and Popen arguments
  • Loading branch information
Byron authored Oct 3, 2023
2 parents 881b464 + ab95886 commit 91f63cd
Show file tree
Hide file tree
Showing 2 changed files with 91 additions and 74 deletions.
127 changes: 63 additions & 64 deletions git/cmd.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,10 +66,10 @@
"with_extended_output",
"with_exceptions",
"as_process",
"stdout_as_string",
"output_stream",
"with_stdout",
"stdout_as_string",
"kill_after_timeout",
"with_stdout",
"universal_newlines",
"shell",
"env",
Expand Down Expand Up @@ -105,7 +105,7 @@ def handle_process_output(
) -> None:
"""Registers for notifications to learn that process output is ready to read, and dispatches lines to
the respective line handlers.
This function returns once the finalizer returns
This function returns once the finalizer returns.
:return: result of finalizer
:param process: subprocess.Popen instance
Expand Down Expand Up @@ -294,9 +294,7 @@ def __setstate__(self, d: Dict[str, Any]) -> None:

@classmethod
def refresh(cls, path: Union[None, PathLike] = None) -> bool:
"""This gets called by the refresh function (see the top level
__init__).
"""
"""This gets called by the refresh function (see the top level __init__)."""
# discern which path to refresh with
if path is not None:
new_git = os.path.expanduser(path)
Expand Down Expand Up @@ -446,9 +444,9 @@ def polish_url(cls, url: str, is_cygwin: Union[None, bool] = None) -> PathLike:
if is_cygwin:
url = cygpath(url)
else:
"""Remove any backslahes from urls to be written in config files.
"""Remove any backslashes from urls to be written in config files.
Windows might create config-files containing paths with backslashed,
Windows might create config files containing paths with backslashes,
but git stops liking them as it will escape the backslashes.
Hence we undo the escaping just to be sure.
"""
Expand All @@ -464,8 +462,8 @@ def check_unsafe_protocols(cls, url: str) -> None:
Check for unsafe protocols.
Apart from the usual protocols (http, git, ssh),
Git allows "remote helpers" that have the form `<transport>::<address>`,
one of these helpers (`ext::`) can be used to invoke any arbitrary command.
Git allows "remote helpers" that have the form ``<transport>::<address>``,
one of these helpers (``ext::``) can be used to invoke any arbitrary command.
See:
Expand Down Expand Up @@ -517,7 +515,7 @@ def __init__(self, proc: Union[None, subprocess.Popen], args: Any) -> None:
self.status: Union[int, None] = None

def _terminate(self) -> None:
"""Terminate the underlying process"""
"""Terminate the underlying process."""
if self.proc is None:
return

Expand Down Expand Up @@ -572,7 +570,7 @@ def wait(self, stderr: Union[None, str, bytes] = b"") -> int:
"""Wait for the process and return its status code.
:param stderr: Previously read value of stderr, in case stderr is already closed.
:warn: may deadlock if output or error pipes are used and not handled separately.
:warn: May deadlock if output or error pipes are used and not handled separately.
:raise GitCommandError: if the return status is not 0"""
if stderr is None:
stderr_b = b""
Expand Down Expand Up @@ -605,13 +603,12 @@ def read_all_from_possibly_closed_stream(stream: Union[IO[bytes], None]) -> byte
# END auto interrupt

class CatFileContentStream(object):

"""Object representing a sized read-only stream returning the contents of
an object.
It behaves like a stream, but counts the data read and simulates an empty
stream once our sized content region is empty.
If not all data is read to the end of the objects's lifetime, we read the
rest to assure the underlying stream continues to work"""
If not all data is read to the end of the object's lifetime, we read the
rest to assure the underlying stream continues to work."""

__slots__: Tuple[str, ...] = ("_stream", "_nbr", "_size")

Expand Down Expand Up @@ -740,11 +737,11 @@ def __getattr__(self, name: str) -> Any:

def set_persistent_git_options(self, **kwargs: Any) -> None:
"""Specify command line options to the git executable
for subsequent subcommand calls
for subsequent subcommand calls.
:param kwargs:
is a dict of keyword arguments.
these arguments are passed as in _call_process
These arguments are passed as in _call_process
but will be passed to the git command rather than
the subcommand.
"""
Expand Down Expand Up @@ -775,7 +772,7 @@ def version_info(self) -> Tuple[int, int, int, int]:
"""
:return: tuple(int, int, int, int) tuple with integers representing the major, minor
and additional version numbers as parsed from git version.
This value is generated on demand and is cached"""
This value is generated on demand and is cached."""
return self._version_info

@overload
Expand Down Expand Up @@ -842,16 +839,16 @@ def execute(
strip_newline_in_stdout: bool = True,
**subprocess_kwargs: Any,
) -> Union[str, bytes, Tuple[int, Union[str, bytes], str], AutoInterrupt]:
"""Handles executing the command on the shell and consumes and returns
the returned information (stdout)
"""Handles executing the command and consumes and returns the returned
information (stdout).
:param command:
The command argument list to execute.
It should be a string, or a sequence of program arguments. The
It should be a sequence of program arguments, or a string. The
program to execute is the first item in the args sequence or string.
:param istream:
Standard input filehandle passed to subprocess.Popen.
Standard input filehandle passed to `subprocess.Popen`.
:param with_extended_output:
Whether to return a (status, stdout, stderr) tuple.
Expand All @@ -862,8 +859,7 @@ def execute(
:param as_process:
Whether to return the created process instance directly from which
streams can be read on demand. This will render with_extended_output and
with_exceptions ineffective - the caller will have
to deal with the details himself.
with_exceptions ineffective - the caller will have to deal with the details.
It is important to note that the process will be placed into an AutoInterrupt
wrapper that will interrupt the process once it goes out of scope. If you
use the command in iterators, you should pass the whole process instance
Expand All @@ -876,13 +872,34 @@ def execute(
always be created with a pipe due to issues with subprocess.
This merely is a workaround as data will be copied from the
output pipe to the given output stream directly.
Judging from the implementation, you shouldn't use this flag !
Judging from the implementation, you shouldn't use this parameter!
:param stdout_as_string:
if False, the commands standard output will be bytes. Otherwise, it will be
decoded into a string using the default encoding (usually utf-8).
If False, the command's standard output will be bytes. Otherwise, it will be
decoded into a string using the default encoding (usually UTF-8).
The latter can fail, if the output contains binary data.
:param kill_after_timeout:
Specifies a timeout in seconds for the git command, after which the process
should be killed. This will have no effect if as_process is set to True. It is
set to None by default and will let the process run until the timeout is
explicitly specified. This feature is not supported on Windows. It's also worth
noting that kill_after_timeout uses SIGKILL, which can have negative side
effects on a repository. For example, stale locks in case of ``git gc`` could
render the repository incapable of accepting changes until the lock is manually
removed.
:param with_stdout:
If True, default True, we open stdout on the created process.
:param universal_newlines:
if True, pipes will be opened as text, and lines are split at
all known line endings.
:param shell:
Whether to invoke commands through a shell (see `Popen(..., shell=True)`).
It overrides :attr:`USE_SHELL` if it is not `None`.
:param env:
A dictionary of environment variables to be passed to `subprocess.Popen`.
Expand All @@ -891,38 +908,23 @@ def execute(
one invocation of write() method. If the given number is not positive then
the default value is used.
:param strip_newline_in_stdout:
Whether to strip the trailing ``\\n`` of the command stdout.
:param subprocess_kwargs:
Keyword arguments to be passed to subprocess.Popen. Please note that
some of the valid kwargs are already set by this method, the ones you
Keyword arguments to be passed to `subprocess.Popen`. Please note that
some of the valid kwargs are already set by this method; the ones you
specify may not be the same ones.
:param with_stdout: If True, default True, we open stdout on the created process
:param universal_newlines:
if True, pipes will be opened as text, and lines are split at
all known line endings.
:param shell:
Whether to invoke commands through a shell (see `Popen(..., shell=True)`).
It overrides :attr:`USE_SHELL` if it is not `None`.
:param kill_after_timeout:
To specify a timeout in seconds for the git command, after which the process
should be killed. This will have no effect if as_process is set to True. It is
set to None by default and will let the process run until the timeout is
explicitly specified. This feature is not supported on Windows. It's also worth
noting that kill_after_timeout uses SIGKILL, which can have negative side
effects on a repository. For example, stale locks in case of git gc could
render the repository incapable of accepting changes until the lock is manually
removed.
:param strip_newline_in_stdout:
Whether to strip the trailing ``\\n`` of the command stdout.
:return:
* str(output) if extended_output = False (Default)
* tuple(int(status), str(stdout), str(stderr)) if extended_output = True
if output_stream is True, the stdout value will be your output stream:
If output_stream is True, the stdout value will be your output stream:
* output_stream if extended_output = False
* tuple(int(status), output_stream, str(stderr)) if extended_output = True
Note git is executed with LC_MESSAGES="C" to ensure consistent
Note that git is executed with ``LC_MESSAGES="C"`` to ensure consistent
output regardless of system language.
:raise GitCommandError:
Expand Down Expand Up @@ -971,18 +973,15 @@ def execute(
# end handle

stdout_sink = PIPE if with_stdout else getattr(subprocess, "DEVNULL", None) or open(os.devnull, "wb")
istream_ok = "None"
if istream:
istream_ok = "<valid stream>"
if shell is None:
shell = self.USE_SHELL
log.debug(
"Popen(%s, cwd=%s, universal_newlines=%s, shell=%s, istream=%s)",
"Popen(%s, cwd=%s, stdin=%s, shell=%s, universal_newlines=%s)",
redacted_command,
cwd,
universal_newlines,
"<valid stream>" if istream else "None",
shell,
istream_ok,
universal_newlines,
)
try:
with maybe_patch_caller_env:
Expand Down Expand Up @@ -1010,8 +1009,8 @@ def execute(
if as_process:
return self.AutoInterrupt(proc, command)

def _kill_process(pid: int) -> None:
"""Callback method to kill a process."""
def kill_process(pid: int) -> None:
"""Callback to kill a process."""
p = Popen(
["ps", "--ppid", str(pid)],
stdout=PIPE,
Expand Down Expand Up @@ -1044,7 +1043,7 @@ def _kill_process(pid: int) -> None:

if kill_after_timeout is not None:
kill_check = threading.Event()
watchdog = threading.Timer(kill_after_timeout, _kill_process, args=(proc.pid,))
watchdog = threading.Timer(kill_after_timeout, kill_process, args=(proc.pid,))

# Wait for the process to return
status = 0
Expand Down Expand Up @@ -1208,7 +1207,7 @@ def _unpack_args(cls, arg_list: Sequence[str]) -> List[str]:

def __call__(self, **kwargs: Any) -> "Git":
"""Specify command line options to the git executable
for a subcommand call
for a subcommand call.
:param kwargs:
is a dict of keyword arguments.
Expand Down Expand Up @@ -1246,7 +1245,7 @@ def _call_process(
self, method: str, *args: Any, **kwargs: Any
) -> Union[str, bytes, Tuple[int, Union[str, bytes], str], "Git.AutoInterrupt"]:
"""Run the given git command with the specified arguments and return
the result as a String
the result as a string.
:param method:
is the command. Contained "_" characters will be converted to dashes,
Expand All @@ -1255,7 +1254,7 @@ def _call_process(
:param args:
is the list of arguments. If None is included, it will be pruned.
This allows your commands to call git more conveniently as None
is realized as non-existent
is realized as non-existent.
:param kwargs:
It contains key-values for the following:
Expand Down Expand Up @@ -1385,7 +1384,7 @@ def get_object_header(self, ref: str) -> Tuple[str, str, int]:
return self.__get_object_header(cmd, ref)

def get_object_data(self, ref: str) -> Tuple[str, str, int, bytes]:
"""As get_object_header, but returns object data as well
"""As get_object_header, but returns object data as well.
:return: (hexsha, type_string, size_as_int, data_string)
:note: not threadsafe"""
Expand All @@ -1395,10 +1394,10 @@ def get_object_data(self, ref: str) -> Tuple[str, str, int, bytes]:
return (hexsha, typename, size, data)

def stream_object_data(self, ref: str) -> Tuple[str, str, int, "Git.CatFileContentStream"]:
"""As get_object_header, but returns the data as a stream
"""As get_object_header, but returns the data as a stream.
:return: (hexsha, type_string, size_as_int, stream)
:note: This method is not threadsafe, you need one independent Command instance per thread to be safe !"""
:note: This method is not threadsafe, you need one independent Command instance per thread to be safe!"""
cmd = self._get_persistent_cmd("cat_file_all", "cat_file", batch=True)
hexsha, typename, size = self.__get_object_header(cmd, ref)
cmd_stdout = cmd.stdout if cmd.stdout is not None else io.BytesIO()
Expand Down
38 changes: 28 additions & 10 deletions test/test_git.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
#
# This module is part of GitPython and is released under
# the BSD License: https://opensource.org/license/bsd-3-clause/
import contextlib
import inspect
import logging
import os
import os.path as osp
Expand Down Expand Up @@ -40,6 +40,13 @@ def tearDown(self):

gc.collect()

def _assert_logged_for_popen(self, log_watcher, name, value):
re_name = re.escape(name)
re_value = re.escape(str(value))
re_line = re.compile(fr"DEBUG:git.cmd:Popen\(.*\b{re_name}={re_value}[,)]")
match_attempts = [re_line.match(message) for message in log_watcher.output]
self.assertTrue(any(match_attempts), repr(log_watcher.output))

@mock.patch.object(Git, "execute")
def test_call_process_calls_execute(self, git):
git.return_value = ""
Expand Down Expand Up @@ -96,10 +103,8 @@ def _do_shell_combo(self, value_in_call, value_from_class):
# git.cmd gets Popen via a "from" import, so patch it there.
with mock.patch.object(cmd, "Popen", wraps=cmd.Popen) as mock_popen:
# Use a command with no arguments (besides the program name), so it runs
# with or without a shell, on all OSes, with the same effect. Since git
# errors out when run with no arguments, we swallow that error.
with contextlib.suppress(GitCommandError):
self.git.execute(["git"], shell=value_in_call)
# with or without a shell, on all OSes, with the same effect.
self.git.execute(["git"], with_exceptions=False, shell=value_in_call)

return mock_popen

Expand All @@ -115,14 +120,19 @@ def test_it_uses_shell_or_not_as_specified(self, case):
def test_it_logs_if_it_uses_a_shell(self, case):
"""``shell=`` in the log message agrees with what is passed to `Popen`."""
value_in_call, value_from_class = case

with self.assertLogs(cmd.log, level=logging.DEBUG) as log_watcher:
mock_popen = self._do_shell_combo(value_in_call, value_from_class)
self._assert_logged_for_popen(log_watcher, "shell", mock_popen.call_args.kwargs["shell"])

popen_shell_arg = mock_popen.call_args.kwargs["shell"]
expected_message = re.compile(rf"DEBUG:git.cmd:Popen\(.*\bshell={popen_shell_arg}\b.*\)")
match_attempts = [expected_message.fullmatch(message) for message in log_watcher.output]
self.assertTrue(any(match_attempts), repr(log_watcher.output))
@ddt.data(
("None", None),
("<valid stream>", subprocess.PIPE),
)
def test_it_logs_istream_summary_for_stdin(self, case):
expected_summary, istream_argument = case
with self.assertLogs(cmd.log, level=logging.DEBUG) as log_watcher:
self.git.execute(["git", "version"], istream=istream_argument)
self._assert_logged_for_popen(log_watcher, "stdin", expected_summary)

def test_it_executes_git_and_returns_result(self):
self.assertRegex(self.git.execute(["git", "version"]), r"^git version [\d\.]{2}.*$")
Expand Down Expand Up @@ -364,3 +374,11 @@ def counter_stderr(line):

self.assertEqual(count[1], line_count)
self.assertEqual(count[2], line_count)

def test_execute_kwargs_set_agrees_with_method(self):
parameter_names = inspect.signature(cmd.Git.execute).parameters.keys()
self_param, command_param, *most_params, extra_kwargs_param = parameter_names
self.assertEqual(self_param, "self")
self.assertEqual(command_param, "command")
self.assertEqual(set(most_params), cmd.execute_kwargs) # Most important.
self.assertEqual(extra_kwargs_param, "subprocess_kwargs")

0 comments on commit 91f63cd

Please sign in to comment.