From a431fabb1932a141d94de7258be36db7a357dc4d Mon Sep 17 00:00:00 2001 From: Pierre Chevalier Date: Fri, 15 Mar 2024 02:57:27 -0700 Subject: [PATCH] Fix import of unroundtripable git commit Summary: By unroundtripable, we mean: given a git commit, the bonsai generated from it doesn't contain enough information to produce the exact same commit back when deriving the git commit from the bonsai. Here is the faulty sequence: * Update bonsai git mappings with the imported commits * `derive_bulk`, by contract attempts to derive these commits again * When `derive_bulk` is done, it attempts to update the bonsai git mappings (as they are the mapping for the "git_commits" derived data type) * There is a conflict between the imported and the derived commits, so this update fails To fix the issue, we must never call `derive_bulk` for git commits. Instead, * First derive all types but git commits and delta manifests (which depend on git commits) * Then update the mappings. This replaces deriving the git commits * Finally, derive delta manifests, which depend on git commits, but see them as "already derived" since they were imported and the mappings exist There is still an open issue after this: If the git_delta_manifests derivation fails and someone reruns the import, we will see that the mappings exists and proceed without deriving the delta manifests. This will be patched separately. Reviewed By: gustavoavena Differential Revision: D54905694 fbshipit-source-id: f5b68c25a08fd1ee0df6b222ad61cd34c9f8de37 --- .../git/import_direct/src/uploader.rs | 64 +++++++++++++++---- .../test-gitimport-unroundtripable.t | 23 ++----- 2 files changed, 58 insertions(+), 29 deletions(-) diff --git a/eden/mononoke/git/import_direct/src/uploader.rs b/eden/mononoke/git/import_direct/src/uploader.rs index 9e64f53850ad8..d1086a1540545 100644 --- a/eden/mononoke/git/import_direct/src/uploader.rs +++ b/eden/mononoke/git/import_direct/src/uploader.rs @@ -42,6 +42,7 @@ use mononoke_types::hash; use mononoke_types::BonsaiChangeset; use mononoke_types::BonsaiChangesetMut; use mononoke_types::ChangesetId; +use mononoke_types::DerivableType; use mononoke_types::FileChange; use mononoke_types::FileType; use mononoke_types::NonRootMPath; @@ -238,19 +239,58 @@ where stats.completion_time ); - if !dry_run { - self.inner - .bonsai_git_mapping() - .bulk_add(ctx, &oid_to_bcsid) - .await?; - let csids = oid_to_bcsid.iter().map(|entry| entry.bcs_id).collect(); - let config = self.inner.repo_derived_data().active_config(); - self.inner - .repo_derived_data() - .manager() - .derive_bulk(ctx, csids, None, &backfill_derivation.types(&config.types)) - .await?; + if dry_run { + // Short circuit the steps that write + return Ok(()); } + + let csids = oid_to_bcsid + .iter() + .map(|entry| entry.bcs_id) + .collect::>(); + let config = self.inner.repo_derived_data().active_config(); + + // Derive all types that don't depend on GitCommit + let non_git_types = backfill_derivation + .types(&config.types) + .into_iter() + .filter(|dt| match dt { + DerivableType::GitCommit | DerivableType::GitDeltaManifest => false, + _ => true, + }) + .collect::>(); + self.inner + .repo_derived_data() + .manager() + .derive_bulk(ctx, csids.clone(), None, &non_git_types) + .await?; + // Upload all bonsai git mappings. + // This is done instead of deriving git commits. It is not equivalent as roundtrip from + // git to bonsai and back is not guaranteed. + // We want to do this as late as possible. + // Ideally, we would want to do it last as this is what is used to determine whether + // it is safe to proceed from there. + // We can't actually do it last as it must be done before deriving `GitDeltaManifest` + // since that depends on git commits. + self.inner + .bonsai_git_mapping() + .bulk_add(ctx, &oid_to_bcsid) + .await?; + // derive git delta manifests: note: GitCommit don't need to be explicitly + // derived as they were already imported + let delta_manifest = backfill_derivation + .types(&config.types) + .into_iter() + .filter(|dt| match dt { + DerivableType::GitDeltaManifest => true, + _ => false, + }) + .collect::>(); + self.inner + .repo_derived_data() + .manager() + .derive_bulk(ctx, csids, None, &delta_manifest) + .await?; Ok(()) } diff --git a/eden/mononoke/tests/integration/test-gitimport-unroundtripable.t b/eden/mononoke/tests/integration/test-gitimport-unroundtripable.t index 28ff39945899e..fe3d21be371ad 100644 --- a/eden/mononoke/tests/integration/test-gitimport-unroundtripable.t +++ b/eden/mononoke/tests/integration/test-gitimport-unroundtripable.t @@ -25,21 +25,12 @@ $ git commit -qa -m "Unroundtripable commit: we don't store the encoding" # Import it into Mononoke - $ gitimport "$GIT_REPO" --concurrency 1 full-repo 2> import_output - [1] -# Dancing around displaying the output because the error can be spelled out in different ways based on a race condition - $ head -5 import_output + $ gitimport "$GIT_REPO" --concurrency 1 full-repo * using repo "repo" repoid RepositoryId(0) (glob) * GitRepo:$TESTTMP/repo-git commit 1 of 1 - Oid:a57065d8 => Bid:f1c2afeb (glob) - * Execution error: gitimport failed (glob) - - Caused by: - $ cat import_output | grep Conflicting - * Conflicting mapping Some(BonsaiGitMappingEntry { git_sha1: GitSha1(a57065d80c86fdef0f01cc4c822278257107ccad), bcs_id: ChangesetId(Blake2(f1c2afeb1a400c6b7d45af203fd2de012f5c55a08616cdd2a8499278ab1ddf3d)) }) detected while inserting git mappings (tried inserting: [BonsaiGitMappingEntry { git_sha1: GitSha1(b206826a406290c524ea0e4fb3bce838b4c853dc), bcs_id: ChangesetId(Blake2(f1c2afeb1a400c6b7d45af203fd2de012f5c55a08616cdd2a8499278ab1ddf3d)) }]) (glob) + * Ref: "refs/heads/master": Some(ChangesetId(Blake2(f1c2afeb1a400c6b7d45af203fd2de012f5c55a08616cdd2a8499278ab1ddf3d))) (glob) - $ mononoke_newadmin git-objects -R repo fetch --id a57065d80c86fdef0f01cc4c822278257107ccad > imported_commit - $ mononoke_newadmin git-objects -R repo fetch --id b206826a406290c524ea0e4fb3bce838b4c853dc > derived_commit - $ diff --old-line-format="- %L" --new-line-format="+ %L" imported_commit derived_commit + $ mononoke_newadmin git-objects -R repo fetch --id a57065d80c86fdef0f01cc4c822278257107ccad The object is a Git Commit Commit { @@ -63,11 +54,9 @@ sign: Plus, }, }, - - encoding: Some( - - "ISO-8859-1", - - ), - + encoding: None, + encoding: Some( + "ISO-8859-1", + ), message: "Unroundtripable commit: we don\'t store the encoding\n", extra_headers: [], } - [1]