From a1622f0d223e93166a3edfdba9afd978a6531162 Mon Sep 17 00:00:00 2001 From: Johan Andersson Date: Thu, 24 Aug 2023 16:04:06 +0200 Subject: [PATCH] Implement sparse-checkout feature in the tool Extended config file format with new key "sparse.paths = []" that defines what paths should be filtered out --- README.md | 30 +++++++++++++++++++++++++++++- subgit/core.py | 44 ++++++++++++++++++++++++++++++++++++-------- 2 files changed, 65 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 46a0fbe..0ab3887 100644 --- a/README.md +++ b/README.md @@ -167,7 +167,35 @@ Requirement for a merge request - Any gitlab actions or PR specific tests/validation shold be green - No merge conflicts with master branch, if you have then you either merge master into your branch and resolve conflicts, or you rebase your branch ontop of master branch - Always do basic useability tests with most common commands as tests do not always show errors with everything - + + +## Sparse checkout + +This feature requires you to have git version `2.25.0` or later + +A in-depth blog about `sparse-checkout` in general can be read [here by github.blog](https://github.blog/2020-01-17-bring-your-monorepo-down-to-size-with-sparse-checkout/) + +The main idea with `sparse-checkout` feature is to reduce down the size of mono repos into more manageable chunks of both output files in your file tree, but also to enable less git refs to clone. + +By adding the key `sparse.paths` to your repo config, you can define what folders that should be visible in the pulled file tree. + +``` +# Example config file +repos: + - name: phabfive + url: git@github.com:dynamist/phabfive.git + revision: + branch: "master" + sparse: + paths: + - "phabfive/" + - "tests/" + ``` + +This example would clone the entire repo, all git refs within it and enable `sparse-checkout` on the cloned repo at that given revision (could be branch, commit or tag) and then configure the git clone to have the two folders `phabfive/` and `tests/` as what is visible in the tree. + +The paths you define works similar to how `.gitignore` works. What you define in reality is a filter that is matched against all files and folders in the checked out git repo. This means that you can add paths like `*.py` or `*.md` or any other syntax that `.gitignore` syntax supports and it will be used as a filter for what files is visible. Remember that a subfolder that has a matching file within it will be created even if that filename is not matching any provided path. Same the other way around that if you specify `tests/` as a path it will include all sub folders & files even if they don't match any filter. + ### Run unitest suite & Tox diff --git a/subgit/core.py b/subgit/core.py index 01316a1..a4334e5 100644 --- a/subgit/core.py +++ b/subgit/core.py @@ -26,6 +26,18 @@ log = logging.getLogger(__name__) +def run_cmd(cli_command): + process = Popen( + cli_command, + stdout=PIPE, + stderr=None, + shell=True, + ) + output, stderr = process.communicate() + + return output, stderr + + class SubGit(): def __init__(self, config_file_path=None, answer_yes=False): self.answer_yes = answer_yes @@ -158,15 +170,9 @@ def repo_status(self): file_cwd = Path().cwd() / repo_name / ".git/FETCH_HEAD" if file_cwd.exists(): - command = f"stat -c %y {file_cwd}" - process = Popen( - command, - stdout=PIPE, - stderr=None, - shell=True, - ) - output, stderr = process.communicate() + output, stderr = run_cmd(f"stat -c %y {file_cwd}") parsed_output = str(output).replace('\\n', '') + print(f" Last pull/fetch: {parsed_output}") else: print(" Last pull/fetch: Repo has not been pulled or fetch since initial clone") @@ -586,6 +592,28 @@ def pull(self, names): log.info(f"Current commit summary on HEAD in git repo '{name}': ") log.info(f" {str(repo.head.commit.summary)}") + # Handle sparse checkout by configure the repo + sparse_repo_config = repo_data.get("sparse", None) + + if sparse_repo_config: + log.info(f"Enable sparse checkout on repo {name}") + + # Always ensure that sparse is enabled + g.sparse_checkout("init") + + repos = [ + str(path) + for path in sparse_repo_config["paths"] + ] + + # Set what paths we defined to be checked out + g.sparse_checkout("set", *repos) + else: + # By always setting disable as a default, this will automatically revert any repo + # that used to have sparse enabled but no longer is ensabled + log.debug(f"Disabling sparse checkout on repo {name}") + g.sparse_checkout("disable") + def delete(self, repo_names=None): """ Helper method that recieves a list of repos. Deletes them as long as not one or