From c84e643c6aa177f364ebe28e4c7bab1e37fb0242 Mon Sep 17 00:00:00 2001 From: David Lakin Date: Sun, 28 Apr 2024 22:41:11 -0400 Subject: [PATCH] Replace the suboptimal fuzz_tree harness with a better alternative As discussed in the initial fuzzing integration PR[^1], `fuzz_tree.py`'s implementation was not ideal in terms of coverage and its reading/writing to hard-coded paths inside `/tmp` was problematic as (among other concerns), it causes intermittent crashes on ClusterFuzz[^2] when multiple workers execute the test at the same time on the same machine. The changes here replace `fuzz_tree.py` completely with a completely new `fuzz_repo.py` fuzz target which: - Uses `tempfile.TemporaryDirectory()` to safely manage tmpdir creation and tear down, including during multi-worker execution runs. - Retains the same feature coverage as `fuzz_tree.py`, but it also adds considerably more from much smaller data inputs and with less memory consumed (and it doesn't even have a seed corpus or target specific dictionary yet.) - Can likely be improved further in the future by exercising additional features of `Repo` to the harness. Because `fuzz_tree.py` was removed and `fuzz_repo.py` was not derived from it, the Apache License call outs in the docs were also updated as they only apply to the singe `fuzz_config.py` file now. [^1]: https://github.com/gitpython-developers/GitPython/pull/1901#discussion_r1565001609 [^2]: https://bugs.chromium.org/p/oss-fuzz/issues/detail?id=68355 --- README.md | 4 +- fuzzing/README.md | 16 +++---- fuzzing/dictionaries/fuzz_tree.dict | 13 ------ fuzzing/fuzz-targets/fuzz_repo.py | 47 ++++++++++++++++++++ fuzzing/fuzz-targets/fuzz_tree.py | 67 ----------------------------- 5 files changed, 57 insertions(+), 90 deletions(-) delete mode 100644 fuzzing/dictionaries/fuzz_tree.dict create mode 100644 fuzzing/fuzz-targets/fuzz_repo.py delete mode 100644 fuzzing/fuzz-targets/fuzz_tree.py diff --git a/README.md b/README.md index 987e40e6c..d365a6584 100644 --- a/README.md +++ b/README.md @@ -240,8 +240,8 @@ Please have a look at the [contributions file][contributing]. [3-Clause BSD License](https://opensource.org/license/bsd-3-clause/), also known as the New BSD License. See the [LICENSE file][license]. -Two files exclusively used for fuzz testing are subject to [a separate license, detailed here](./fuzzing/README.md#license). -These files are not included in the wheel or sdist packages published by the maintainers of GitPython. +One file exclusively used for fuzz testing is subject to [a separate license, detailed here](./fuzzing/README.md#license). +This file is not included in the wheel or sdist packages published by the maintainers of GitPython. [contributing]: https://github.com/gitpython-developers/GitPython/blob/main/CONTRIBUTING.md [license]: https://github.com/gitpython-developers/GitPython/blob/main/LICENSE diff --git a/fuzzing/README.md b/fuzzing/README.md index 0a62b4c85..9d02bf72f 100644 --- a/fuzzing/README.md +++ b/fuzzing/README.md @@ -225,14 +225,14 @@ to [the official OSS-Fuzz documentation][oss-fuzz-docs]. ## LICENSE All files located within the `fuzzing/` directory are subject to [the same license](../LICENSE) -as [the other files in this repository](../README.md#license) with two exceptions: - -Two files located in this directory, [`fuzz_config.py`](./fuzz-targets/fuzz_config.py) -and [`fuzz_tree.py`](./fuzz-targets/fuzz_tree.py), have been migrated here from the OSS-Fuzz project repository where -they were originally created. As such, these two files retain their original license and copyright notice (Apache -License, Version 2.0 and Copyright 2023 Google LLC respectively.) Each file includes a notice in their respective header -comments stating that they have been modified. [LICENSE-APACHE](./LICENSE-APACHE) contains the original license used by -the OSS-Fuzz project repository at the time they were migrated. +as [the other files in this repository](../README.md#license) with one exception: + +[`fuzz_config.py`](./fuzz-targets/fuzz_config.py) was migrated to this repository from the OSS-Fuzz project's repository +where it was originally created. As such, [`fuzz_config.py`](./fuzz-targets/fuzz_config.py) retains its original license +and copyright notice (Apache License, Version 2.0 and Copyright 2023 Google LLC respectively) as in a header +comment, followed by a notice stating that it has have been modified contributors to GitPython. +[LICENSE-APACHE](./LICENSE-APACHE) contains the original license used by the OSS-Fuzz project repository at the time the +file was migrated. [oss-fuzz-repo]: https://github.com/google/oss-fuzz diff --git a/fuzzing/dictionaries/fuzz_tree.dict b/fuzzing/dictionaries/fuzz_tree.dict deleted file mode 100644 index 3ebe52b7f..000000000 --- a/fuzzing/dictionaries/fuzz_tree.dict +++ /dev/null @@ -1,13 +0,0 @@ -"\\001\\000\\000\\000" -"_join_multiline_va" -"setdef" -"1\\000\\000\\000\\000\\000\\000\\000" -"\\000\\000\\000\\000\\000\\000\\000\\020" -"\\377\\377\\377\\377\\377\\377\\377r" -"\\001\\000\\000\\000\\000\\000\\000\\001" -"\\000\\000\\000\\000\\000\\000\\000\\014" -"\\000\\000\\000\\000\\000\\000\\000\\003" -"\\001\\000" -"\\032\\000\\000\\000\\000\\000\\000\\000" -"-\\000\\000\\000\\000\\000\\000\\000" -"__format" diff --git a/fuzzing/fuzz-targets/fuzz_repo.py b/fuzzing/fuzz-targets/fuzz_repo.py new file mode 100644 index 000000000..7bd82c120 --- /dev/null +++ b/fuzzing/fuzz-targets/fuzz_repo.py @@ -0,0 +1,47 @@ +import atheris +import io +import sys +import os +import tempfile + +if getattr(sys, "frozen", False) and hasattr(sys, "_MEIPASS"): + path_to_bundled_git_binary = os.path.abspath(os.path.join(os.path.dirname(__file__), "git")) + os.environ["GIT_PYTHON_GIT_EXECUTABLE"] = path_to_bundled_git_binary + +with atheris.instrument_imports(): + import git + + +def TestOneInput(data): + fdp = atheris.FuzzedDataProvider(data) + + with tempfile.TemporaryDirectory() as temp_dir: + repo = git.Repo.init(path=temp_dir) + + # Generate a minimal set of files based on fuzz data to minimize I/O operations. + file_paths = [os.path.join(temp_dir, f"File{i}") for i in range(min(3, fdp.ConsumeIntInRange(1, 3)))] + for file_path in file_paths: + with open(file_path, "wb") as f: + # The chosen upperbound for count of bytes we consume by writing to these + # files is somewhat arbitrary and may be worth experimenting with if the + # fuzzer coverage plateaus. + f.write(fdp.ConsumeBytes(fdp.ConsumeIntInRange(1, 512))) + + repo.index.add(file_paths) + repo.index.commit(fdp.ConsumeUnicodeNoSurrogates(fdp.ConsumeIntInRange(1, 80))) + + fuzz_tree = git.Tree(repo, git.Tree.NULL_BIN_SHA, 0, "") + + try: + fuzz_tree._deserialize(io.BytesIO(data)) + except IndexError: + return -1 + + +def main(): + atheris.Setup(sys.argv, TestOneInput) + atheris.Fuzz() + + +if __name__ == "__main__": + main() diff --git a/fuzzing/fuzz-targets/fuzz_tree.py b/fuzzing/fuzz-targets/fuzz_tree.py deleted file mode 100644 index 4e2038add..000000000 --- a/fuzzing/fuzz-targets/fuzz_tree.py +++ /dev/null @@ -1,67 +0,0 @@ -# Copyright 2023 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -############################################################################### -# Note: This file has been modified by contributors to GitPython. -# The original state of this file may be referenced here: -# https://github.com/google/oss-fuzz/commit/f26f254558fc48f3c9bc130b10507386b94522da -############################################################################### -import atheris -import io -import sys -import os -import shutil - -if getattr(sys, "frozen", False) and hasattr(sys, "_MEIPASS"): - path_to_bundled_git_binary = os.path.abspath(os.path.join(os.path.dirname(__file__), "git")) - os.environ["GIT_PYTHON_GIT_EXECUTABLE"] = path_to_bundled_git_binary - -with atheris.instrument_imports(): - import git - - -def TestOneInput(data): - fdp = atheris.FuzzedDataProvider(data) - git_dir = "/tmp/.git" - head_file = os.path.join(git_dir, "HEAD") - refs_dir = os.path.join(git_dir, "refs") - common_dir = os.path.join(git_dir, "commondir") - objects_dir = os.path.join(git_dir, "objects") - - if os.path.isdir(git_dir): - shutil.rmtree(git_dir) - - os.mkdir(git_dir) - with open(head_file, "w") as f: - f.write(fdp.ConsumeUnicodeNoSurrogates(1024)) - os.mkdir(refs_dir) - os.mkdir(common_dir) - os.mkdir(objects_dir) - - _repo = git.Repo("/tmp/") - - fuzz_tree = git.Tree(_repo, git.Tree.NULL_BIN_SHA, 0, "") - try: - fuzz_tree._deserialize(io.BytesIO(data)) - except IndexError: - return -1 - - -def main(): - atheris.Setup(sys.argv, TestOneInput) - atheris.Fuzz() - - -if __name__ == "__main__": - main()