From 083d8dda85b417c505670976d6229ca254a4e154 Mon Sep 17 00:00:00 2001 From: Tim Hockin Date: Sat, 18 Nov 2023 13:17:25 -0800 Subject: [PATCH] Simplify inner loop: just fetch $ref Old way: - ls-remote $ref $ref^{} and parse - compare to current - if changed, fetch - update worktree New way: - fetch $ref - compare to current - if change, update worktree --- main.go | 93 +++++++++++------------------------------------------ test_e2e.sh | 4 +-- v3-to-v4.md | 8 +++++ 3 files changed, 29 insertions(+), 76 deletions(-) diff --git a/main.go b/main.go index e653d4969..eee5ece12 100644 --- a/main.go +++ b/main.go @@ -836,10 +836,7 @@ func main() { os.Exit(exitCode) } - if isHash, err := git.IsKnownHash(ctx, git.ref); err != nil { - log.Error(err, "can't tell if ref is a git hash, exiting", "ref", git.ref) - os.Exit(1) - } else if isHash { + if hash == git.ref { log.V(0).Info("ref appears to be a git hash, no further sync needed", "ref", git.ref) log.DeleteErrorFile() sleepForever() @@ -1493,49 +1490,6 @@ func (m multiError) Error() string { return strings.Join(strs, "; ") } -// remoteHashForRef returns the upstream hash for a given ref. -func (git *repoSync) remoteHashForRef(ctx context.Context, ref string) (string, error) { - // Fetch both the bare and dereferenced ref. git sorts the results and - // prints the dereferenced result, if present, after the bare result, so we - // always want the last result it produces. - output, _, err := git.Run(ctx, git.root, "ls-remote", "-q", git.repo, ref, ref+"^{}") - if err != nil { - return "", err - } - line := lastNonEmptyLine(output) - parts := strings.Split(line, "\t") // guaranteed to have at least 1 element - return parts[0], nil -} - -func lastNonEmptyLine(text string) string { - lines := strings.Split(text, "\n") // guaranteed to have at least 1 element - for i := len(lines) - 1; i >= 0; i-- { - line := strings.TrimSpace(lines[i]) - if line != "" { - return line - } - } - return "" -} - -// IsKnownHash returns true if ref is the hash of a commit which is known to this -// repo. In the event that ref is an abbreviated hash (e.g. "abcd" which -// resolves to "abcdef1234567890"), this will return true by prefix-matching. -// If ref is ambiguous, it will consider whatever result git returns. If ref -// is not a hash or is not known to this repo, even if it appears to be a hash, -// this will return false. -func (git *repoSync) IsKnownHash(ctx context.Context, ref string) (bool, error) { - stdout, stderr, err := git.Run(ctx, git.root, "rev-parse", ref+"^{commit}") - if err != nil { - if strings.Contains(stderr, "unknown revision") { - return false, nil - } - return false, err - } - line := lastNonEmptyLine(stdout) - return strings.HasPrefix(line, ref), nil -} - // worktree represents a git worktree (which may or may not exist on disk). type worktree absPath @@ -1592,26 +1546,6 @@ func (git *repoSync) SyncRepo(ctx context.Context, refreshCreds func(context.Con return false, "", err } - // Figure out what hash the remote resolves to. - remoteHash, err := git.remoteHashForRef(ctx, git.ref) - if err != nil { - return false, "", err - } - - // If we couldn't find a remote commit, it might have been a hash literal. - if remoteHash == "" { - // If git thinks it tastes like a hash, we just use that and if it - // is wrong, we will fail later. - output, _, err := git.Run(ctx, git.root, "rev-parse", git.ref) - if err != nil { - return false, "", err - } - result := strings.Trim(output, "\n") - if result == git.ref { - remoteHash = git.ref - } - } - // Find out what we currently have synced, if anything. var currentWorktree worktree if wt, err := git.currentWorktree(); err != nil { @@ -1622,6 +1556,22 @@ func (git *repoSync) SyncRepo(ctx context.Context, refreshCreds func(context.Con currentHash := currentWorktree.Hash() git.log.V(3).Info("current state", "hash", currentHash, "worktree", currentWorktree) + // This should be very fast if we already have the hash we need. Parameters + // like depth are set at fetch time. + if err := git.fetch(ctx, git.ref); err != nil { + return false, "", err + } + + // Figure out what we got. The ^{} syntax "peels" annotated tags to + // their underlying commit hashes, but has no effect if we fetched a + // branch, plain tag, or hash. + remoteHash := "" + if output, _, err := git.Run(ctx, git.root, "rev-parse", "FETCH_HEAD^{}"); err != nil { + return false, "", err + } else { + remoteHash = strings.Trim(output, "\n") + } + if currentHash == remoteHash { // We seem to have the right hash already. Let's be sure it's good. git.log.V(3).Info("current hash is same as remote", "hash", currentHash) @@ -1643,17 +1593,12 @@ func (git *repoSync) SyncRepo(ctx context.Context, refreshCreds func(context.Con // are set properly. This is cheap when we already have the target hash. if changed || git.syncCount == 0 { git.log.V(0).Info("update required", "ref", git.ref, "local", currentHash, "remote", remoteHash, "syncCount", git.syncCount) - - // Parameters like depth are set at fetch time. - if err := git.fetch(ctx, remoteHash); err != nil { - return false, "", err - } metricFetchCount.Inc() // Reset the repo (note: not the worktree - that happens later) to the new // ref. This makes subsequent fetches much less expensive. It uses --soft // so no files are checked out. - if _, _, err := git.Run(ctx, git.root, "reset", "--soft", "FETCH_HEAD"); err != nil { + if _, _, err := git.Run(ctx, git.root, "reset", "--soft", remoteHash); err != nil { return false, "", err } @@ -1715,7 +1660,7 @@ func (git *repoSync) SyncRepo(ctx context.Context, refreshCreds func(context.Con // fetch retrieves the specified ref from the upstream repo. func (git *repoSync) fetch(ctx context.Context, ref string) error { - git.log.V(1).Info("fetching", "ref", ref, "repo", git.repo) + git.log.V(2).Info("fetching", "ref", ref, "repo", git.repo) // Fetch the ref and do some cleanup, setting or un-setting the repo's // shallow flag as appropriate. diff --git a/test_e2e.sh b/test_e2e.sh index 884f36dd4..5134b3881 100755 --- a/test_e2e.sh +++ b/test_e2e.sh @@ -3021,7 +3021,7 @@ function e2e::export_error() { --error-file="error.json" assert_file_absent "$ROOT/link" assert_file_absent "$ROOT/link/file" - assert_file_contains "$ROOT/error.json" "unknown revision" + assert_file_contains "$ROOT/error.json" "couldn't find remote ref" # the error.json file should be removed if sync succeeds. GIT_SYNC \ @@ -3049,7 +3049,7 @@ function e2e::export_error_abs_path() { --error-file="$ROOT/dir/error.json" assert_file_absent "$ROOT/link" assert_file_absent "$ROOT/link/file" - assert_file_contains "$ROOT/dir/error.json" "unknown revision" + assert_file_contains "$ROOT/dir/error.json" "couldn't find remote ref" } ############################################## diff --git a/v3-to-v4.md b/v3-to-v4.md index 0a931f2f9..fe4e52ddd 100644 --- a/v3-to-v4.md +++ b/v3-to-v4.md @@ -31,6 +31,14 @@ commit (by SHA) it needs. This transfers less data and closes the race condition where a symbolic name can change after `git ls-remote` but before `git fetch`. +### The v4.2+ loop + +The v4.2 loop refines the v4 loop even further. Instead of using ls-remote to +see what the upstream has and then fetching it, git-sync sill just fetch it by +ref. If the local sync already has the corresponding hash, nothing more will +be synced. If it did not have that hash before, then it does now and can +update the worktree. + ## Flags The flag syntax parsing has changed in v4. git-sync v3 accept flags in Go's