diff --git a/Cargo.lock b/Cargo.lock index d6398414be2..bc8cb1be423 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2466,10 +2466,8 @@ version = "0.24.0" dependencies = [ "bstr", "document-features", - "filetime", "gix-attributes 0.16.0", "gix-features 0.32.1", - "gix-filter", "gix-fs 0.4.1", "gix-glob 0.10.2", "gix-hash 0.11.4", @@ -2479,13 +2477,8 @@ dependencies = [ "gix-odb", "gix-path 0.8.4", "gix-testtools", - "io-close", - "once_cell", "serde", "symlink", - "tempfile", - "thiserror", - "walkdir", ] [[package]] diff --git a/crate-status.md b/crate-status.md index a0ca1892fc7..3079e063507 100644 --- a/crate-status.md +++ b/crate-status.md @@ -482,25 +482,7 @@ Make it the best-performing implementation and the most convenient one. * [x] attributes ### gix-worktree -* handle the working **tree/checkout** - - [x] checkout an index of files, executables and symlinks just as fast as git - - [x] forbid symlinks in directories - - [ ] handle submodules - - [ ] handle sparse directories - - [ ] handle sparse index - - [x] linear scaling with multi-threading up to IO saturation - - supported attributes to affect working tree and index contents - - [x] eol - - [x] working-tree-encoding - - …more - - **filtering** - - [x] `text` - - [x] `ident` - - [x] filter processes - - [x] single-invocation clean/smudge filters -* access to per-path information, like `.gitignore` and `.gitattributes` in a manner well suited for efficient lookups - * [x] _exclude_ information - * [x] attributes +* [x] A stack to to efficiently generate attribute lists for matching paths against. ### gix-revision * [x] `describe()` (similar to `git name-rev`) diff --git a/gix-worktree/Cargo.toml b/gix-worktree/Cargo.toml index fbcb08fa1c5..2796485c024 100644 --- a/gix-worktree/Cargo.toml +++ b/gix-worktree/Cargo.toml @@ -3,7 +3,7 @@ name = "gix-worktree" version = "0.24.0" repository = "https://github.com/Byron/gitoxide" license = "MIT OR Apache-2.0" -description = "A crate of the gitoxide project dedicated implementing everything around working trees and git excludes" +description = "A crate of the gitoxide project for shared worktree related types and utilities." authors = ["Sebastian Thiel "] edition = "2021" include = ["src/**/*", "LICENSE-*", "CHANGELOG.md"] @@ -33,25 +33,17 @@ gix-path = { version = "^0.8.4", path = "../gix-path" } gix-attributes = { version = "^0.16.0", path = "../gix-attributes" } gix-ignore = { version = "^0.5.1", path = "../gix-ignore" } gix-features = { version = "^0.32.1", path = "../gix-features" } -gix-filter = { version = "^0.3.0", path = "../gix-filter" } serde = { version = "1.0.114", optional = true, default-features = false, features = ["derive"]} -thiserror = "1.0.26" -filetime = "0.2.15" bstr = { version = "1.3.0", default-features = false } document-features = { version = "0.2.0", optional = true } -io-close = "0.3.7" [dev-dependencies] gix-testtools = { path = "../tests/tools" } gix-odb = { path = "../gix-odb" } symlink = "0.1.0" -once_cell = "1.18.0" - -walkdir = "2.3.2" -tempfile = "3.2.0" [package.metadata.docs.rs] features = ["document-features", "serde"] diff --git a/gix-worktree/src/checkout/chunk.rs b/gix-worktree/src/checkout/chunk.rs deleted file mode 100644 index 13110e90004..00000000000 --- a/gix-worktree/src/checkout/chunk.rs +++ /dev/null @@ -1,402 +0,0 @@ -use std::{ - collections::BTreeSet, - sync::atomic::{AtomicUsize, Ordering}, -}; - -use bstr::{BStr, BString}; -use gix_hash::oid; - -use crate::{checkout, checkout::entry, Cache}; - -mod reduce { - use std::marker::PhantomData; - - use gix_features::progress::Progress; - - use crate::checkout; - - pub struct Reduce<'a, 'entry, P1, P2, E> { - pub files: Option<&'a mut P1>, - pub bytes: Option<&'a mut P2>, - pub aggregate: super::Outcome<'entry>, - pub marker: PhantomData, - } - - impl<'a, 'entry, P1, P2, E> gix_features::parallel::Reduce for Reduce<'a, 'entry, P1, P2, E> - where - P1: Progress, - P2: Progress, - E: std::error::Error + Send + Sync + 'static, - { - type Input = Result, checkout::Error>; - type FeedProduce = (); - type Output = super::Outcome<'entry>; - type Error = checkout::Error; - - fn feed(&mut self, item: Self::Input) -> Result { - let item = item?; - let super::Outcome { - bytes_written, - files, - delayed_symlinks, - errors, - collisions, - delayed_paths_unknown, - delayed_paths_unprocessed, - } = item; - self.aggregate.bytes_written += bytes_written; - self.aggregate.files += files; - self.aggregate.delayed_symlinks.extend(delayed_symlinks); - self.aggregate.errors.extend(errors); - self.aggregate.collisions.extend(collisions); - self.aggregate.delayed_paths_unknown.extend(delayed_paths_unknown); - self.aggregate - .delayed_paths_unprocessed - .extend(delayed_paths_unprocessed); - - if let Some(progress) = self.bytes.as_deref_mut() { - progress.set(self.aggregate.bytes_written as gix_features::progress::Step); - } - if let Some(progress) = self.files.as_deref_mut() { - progress.set(self.aggregate.files); - } - - Ok(()) - } - - fn finalize(self) -> Result { - Ok(self.aggregate) - } - } -} -pub use reduce::Reduce; - -use crate::checkout::entry::DelayedFilteredStream; - -#[derive(Default)] -pub struct Outcome<'a> { - pub collisions: Vec, - pub errors: Vec, - pub delayed_symlinks: Vec<(&'a mut gix_index::Entry, &'a BStr)>, - // all (immediately) written bytes - pub bytes_written: u64, - // the amount of files we processed - pub files: usize, - /// Relative paths that the process listed as 'delayed' even though we never passed them. - pub delayed_paths_unknown: Vec, - /// All paths that were left unprocessed, because they were never listed by the process even though we passed them. - pub delayed_paths_unprocessed: Vec, -} - -#[derive(Clone)] -pub struct Context { - pub find: Find, - pub path_cache: Cache, - pub filters: gix_filter::Pipeline, - pub buf: Vec, - pub options: Options, -} - -#[derive(Clone, Copy)] -pub struct Options { - pub fs: gix_fs::Capabilities, - pub destination_is_initially_empty: bool, - pub overwrite_existing: bool, - pub keep_going: bool, - pub filter_process_delay: gix_filter::driver::apply::Delay, -} - -impl From<&checkout::Options> for Options { - fn from(opts: &checkout::Options) -> Self { - Options { - fs: opts.fs, - destination_is_initially_empty: opts.destination_is_initially_empty, - overwrite_existing: opts.overwrite_existing, - keep_going: opts.keep_going, - filter_process_delay: opts.filter_process_delay, - } - } -} - -pub fn process<'entry, Find, E>( - entries_with_paths: impl Iterator, - files: Option<&AtomicUsize>, - bytes: Option<&AtomicUsize>, - delayed_filter_results: &mut Vec>, - ctx: &mut Context, -) -> Result, checkout::Error> -where - Find: for<'a> FnMut(&oid, &'a mut Vec) -> Result, E> + Clone, - E: std::error::Error + Send + Sync + 'static, -{ - let mut delayed_symlinks = Vec::new(); - let mut collisions = Vec::new(); - let mut errors = Vec::new(); - let mut bytes_written = 0; - let mut files_in_chunk = 0; - - for (entry, entry_path) in entries_with_paths { - // TODO: write test for that - if entry.flags.contains(gix_index::entry::Flags::SKIP_WORKTREE) { - if let Some(files) = files { - files.fetch_add(1, Ordering::SeqCst); - } - files_in_chunk += 1; - continue; - } - - // Symlinks always have to be delayed on windows as they have to point to something that exists on creation. - // And even if not, there is a distinction between file and directory symlinks, hence we have to check what the target is - // before creating it. - // And to keep things sane, we just do the same on non-windows as well which is similar to what git does and adds some safety - // around writing through symlinks (even though we handle this). - // This also means that we prefer content in files over symlinks in case of collisions, which probably is for the better, too. - if entry.mode == gix_index::entry::Mode::SYMLINK { - delayed_symlinks.push((entry, entry_path)); - continue; - } - - match checkout_entry_handle_result(entry, entry_path, &mut errors, &mut collisions, files, bytes, ctx)? { - entry::Outcome::Written { bytes } => { - bytes_written += bytes as u64; - files_in_chunk += 1 - } - entry::Outcome::Delayed(delayed) => delayed_filter_results.push(delayed), - } - } - - Ok(Outcome { - bytes_written, - files: files_in_chunk, - errors, - collisions, - delayed_symlinks, - delayed_paths_unknown: Vec::new(), - delayed_paths_unprocessed: Vec::new(), - }) -} - -pub fn process_delayed_filter_results( - mut delayed_filter_results: Vec>, - files: Option<&AtomicUsize>, - bytes: Option<&AtomicUsize>, - out: &mut Outcome<'_>, - ctx: &mut Context, -) -> Result<(), checkout::Error> -where - Find: for<'a> FnMut(&oid, &'a mut Vec) -> Result, E> + Clone, - E: std::error::Error + Send + Sync + 'static, -{ - let Options { - destination_is_initially_empty, - overwrite_existing, - keep_going, - .. - } = ctx.options; - let mut bytes_written = 0; - let mut delayed_files = 0; - // Sort by path for fast lookups - delayed_filter_results.sort_by(|a, b| a.entry_path.cmp(b.entry_path)); - // We process each key and do as the filter process tells us, while collecting data about the overall progress. - let keys: BTreeSet<_> = delayed_filter_results.iter().map(|d| d.key.clone()).collect(); - let mut unknown_paths = Vec::new(); - let mut rela_path_as_path = Default::default(); - for key in keys { - loop { - let rela_paths = ctx.filters.driver_state_mut().list_delayed_paths(&key)?; - if rela_paths.is_empty() { - break; - } - - for rela_path in rela_paths { - let delayed = match delayed_filter_results.binary_search_by(|d| d.entry_path.cmp(rela_path.as_ref())) { - Ok(idx) => &mut delayed_filter_results[idx], - Err(_) => { - if keep_going { - unknown_paths.push(rela_path); - continue; - } else { - return Err(checkout::Error::FilterPathUnknown { rela_path }); - } - } - }; - let mut read = std::io::BufReader::with_capacity( - 512 * 1024, - ctx.filters.driver_state_mut().fetch_delayed( - &key, - rela_path.as_ref(), - gix_filter::driver::Operation::Smudge, - )?, - ); - let (file, set_executable_after_creation) = match entry::open_file( - &std::mem::take(&mut delayed.validated_file_path), // mark it as seen, relevant for `unprocessed_paths` - destination_is_initially_empty, - overwrite_existing, - delayed.needs_executable_bit, - delayed.entry.mode, - ) { - Ok(res) => res, - Err(err) => { - if !is_collision(&err, delayed.entry_path, &mut out.collisions, files) { - handle_error(err, delayed.entry_path, files, &mut out.errors, ctx.options.keep_going)?; - } - std::io::copy(&mut read, &mut std::io::sink())?; - continue; - } - }; - let mut write = WriteWithProgress { - inner: std::io::BufWriter::with_capacity(512 * 1024, file), - progress: bytes, - }; - bytes_written += std::io::copy(&mut read, &mut write)?; - entry::finalize_entry( - delayed.entry, - write.inner.into_inner().map_err(std::io::IntoInnerError::into_error)?, - set_executable_after_creation.then(|| { - rela_path_as_path = gix_path::from_bstr(delayed.entry_path); - rela_path_as_path.as_ref() - }), - )?; - delayed_files += 1; - if let Some(files) = files { - files.fetch_add(1, Ordering::SeqCst); - } - } - } - } - - let unprocessed_paths = delayed_filter_results - .into_iter() - .filter_map(|d| (!d.validated_file_path.as_os_str().is_empty()).then(|| d.entry_path.to_owned())) - .collect(); - - if !keep_going && !unknown_paths.is_empty() { - return Err(checkout::Error::FilterPathsUnprocessed { - rela_paths: unprocessed_paths, - }); - } - - out.delayed_paths_unknown = unknown_paths; - out.delayed_paths_unprocessed = unprocessed_paths; - out.bytes_written += bytes_written; - out.files += delayed_files; - Ok(()) -} - -pub struct WriteWithProgress<'a, T> { - pub inner: T, - pub progress: Option<&'a AtomicUsize>, -} - -impl<'a, T> std::io::Write for WriteWithProgress<'a, T> -where - T: std::io::Write, -{ - fn write(&mut self, buf: &[u8]) -> std::io::Result { - let written = self.inner.write(buf)?; - if let Some(progress) = self.progress { - progress.fetch_add(written as gix_features::progress::Step, Ordering::SeqCst); - } - Ok(written) - } - - fn flush(&mut self) -> std::io::Result<()> { - self.inner.flush() - } -} - -pub fn checkout_entry_handle_result<'entry, Find, E>( - entry: &'entry mut gix_index::Entry, - entry_path: &'entry BStr, - errors: &mut Vec, - collisions: &mut Vec, - files: Option<&AtomicUsize>, - bytes: Option<&AtomicUsize>, - Context { - find, - path_cache, - filters, - buf, - options, - }: &mut Context, -) -> Result, checkout::Error> -where - Find: for<'a> FnMut(&oid, &'a mut Vec) -> Result, E> + Clone, - E: std::error::Error + Send + Sync + 'static, -{ - let res = entry::checkout( - entry, - entry_path, - entry::Context { - find, - path_cache, - filters, - buf, - }, - *options, - ); - match res { - Ok(out) => { - if let Some(num) = out.as_bytes() { - if let Some(bytes) = bytes { - bytes.fetch_add(num, Ordering::SeqCst); - } - if let Some(files) = files { - files.fetch_add(1, Ordering::SeqCst); - } - } - Ok(out) - } - Err(checkout::Error::Io(err)) if is_collision(&err, entry_path, collisions, files) => { - Ok(entry::Outcome::Written { bytes: 0 }) - } - Err(err) => handle_error(err, entry_path, files, errors, options.keep_going) - .map(|()| entry::Outcome::Written { bytes: 0 }), - } -} - -fn handle_error( - err: E, - entry_path: &BStr, - files: Option<&AtomicUsize>, - errors: &mut Vec, - keep_going: bool, -) -> Result<(), E> -where - E: std::error::Error + Send + Sync + 'static, -{ - if keep_going { - errors.push(checkout::ErrorRecord { - path: entry_path.into(), - error: Box::new(err), - }); - if let Some(files) = files { - files.fetch_add(1, Ordering::SeqCst); - } - Ok(()) - } else { - Err(err) - } -} - -fn is_collision( - err: &std::io::Error, - entry_path: &BStr, - collisions: &mut Vec, - files: Option<&AtomicUsize>, -) -> bool { - if !gix_fs::symlink::is_collision_error(err) { - return false; - } - // We are here because a file existed or was blocked by a directory which shouldn't be possible unless - // we are on a file insensitive file system. - gix_features::trace::error!("{entry_path}: collided ({:?})", err.kind()); - collisions.push(checkout::Collision { - path: entry_path.into(), - error_kind: err.kind(), - }); - if let Some(files) = files { - files.fetch_add(1, Ordering::SeqCst); - } - true -} diff --git a/gix-worktree/src/checkout/entry.rs b/gix-worktree/src/checkout/entry.rs deleted file mode 100644 index 4744f5e16ce..00000000000 --- a/gix-worktree/src/checkout/entry.rs +++ /dev/null @@ -1,294 +0,0 @@ -use std::{ - fs::OpenOptions, - io::Write, - path::{Path, PathBuf}, -}; - -use bstr::BStr; -use gix_filter::{driver::apply::MaybeDelayed, pipeline::convert::ToWorktreeOutcome}; -use gix_hash::oid; -use gix_index::{entry::Stat, Entry}; -use io_close::Close; - -use crate::Cache; - -pub struct Context<'a, Find> { - pub find: &'a mut Find, - pub path_cache: &'a mut Cache, - pub filters: &'a mut gix_filter::Pipeline, - pub buf: &'a mut Vec, -} - -/// A delayed result of a long-running filter process, which is made available as stream. -pub struct DelayedFilteredStream<'a> { - /// The key identifying the driver program - pub key: gix_filter::driver::Key, - /// If the file is going to be an executable. - pub needs_executable_bit: bool, - /// The validated path on disk at which the file should be placed. - pub validated_file_path: PathBuf, - /// The entry to adjust with the file we will write. - pub entry: &'a mut gix_index::Entry, - /// The relative path at which the entry resides (for use when querying the delayed entry). - pub entry_path: &'a BStr, -} - -pub enum Outcome<'a> { - /// The file was written. - Written { - /// The amount of written bytes. - bytes: usize, - }, - /// The will be ready later. - Delayed(DelayedFilteredStream<'a>), -} - -impl Outcome<'_> { - /// Return ourselves as (in-memory) bytes if possible. - pub fn as_bytes(&self) -> Option { - match self { - Outcome::Written { bytes } => Some(*bytes), - Outcome::Delayed { .. } => None, - } - } -} - -#[cfg_attr(not(unix), allow(unused_variables))] -pub fn checkout<'entry, Find, E>( - entry: &'entry mut Entry, - entry_path: &'entry BStr, - Context { - find, - filters, - path_cache, - buf, - }: Context<'_, Find>, - crate::checkout::chunk::Options { - fs: gix_fs::Capabilities { - symlink, - executable_bit, - .. - }, - destination_is_initially_empty, - overwrite_existing, - filter_process_delay, - .. - }: crate::checkout::chunk::Options, -) -> Result, crate::checkout::Error> -where - Find: for<'a> FnMut(&oid, &'a mut Vec) -> Result, E>, - E: std::error::Error + Send + Sync + 'static, -{ - let dest_relative = gix_path::try_from_bstr(entry_path).map_err(|_| crate::checkout::Error::IllformedUtf8 { - path: entry_path.to_owned(), - })?; - let is_dir = Some(entry.mode == gix_index::entry::Mode::COMMIT || entry.mode == gix_index::entry::Mode::DIR); - let path_cache = path_cache.at_path(dest_relative, is_dir, &mut *find)?; - let dest = path_cache.path(); - - let object_size = match entry.mode { - gix_index::entry::Mode::FILE | gix_index::entry::Mode::FILE_EXECUTABLE => { - let obj = find(&entry.id, buf).map_err(|err| crate::checkout::Error::Find { - err, - oid: entry.id, - path: dest.to_path_buf(), - })?; - - let filtered = filters.convert_to_worktree( - obj.data, - entry_path, - |_, attrs| { - path_cache.matching_attributes(attrs); - }, - filter_process_delay, - )?; - let (num_bytes, file, set_executable_after_creation) = match filtered { - ToWorktreeOutcome::Unchanged(buf) | ToWorktreeOutcome::Buffer(buf) => { - let (mut file, flag) = open_file( - dest, - destination_is_initially_empty, - overwrite_existing, - executable_bit, - entry.mode, - )?; - file.write_all(buf)?; - (buf.len(), file, flag) - } - ToWorktreeOutcome::Process(MaybeDelayed::Immediate(mut filtered)) => { - let (mut file, flag) = open_file( - dest, - destination_is_initially_empty, - overwrite_existing, - executable_bit, - entry.mode, - )?; - let num_bytes = std::io::copy(&mut filtered, &mut file)? as usize; - (num_bytes, file, flag) - } - ToWorktreeOutcome::Process(MaybeDelayed::Delayed(key)) => { - return Ok(Outcome::Delayed(DelayedFilteredStream { - key, - needs_executable_bit: false, - validated_file_path: dest.to_owned(), - entry, - entry_path, - })) - } - }; - - // For possibly existing, overwritten files, we must change the file mode explicitly. - finalize_entry(entry, file, set_executable_after_creation.then_some(dest))?; - num_bytes - } - gix_index::entry::Mode::SYMLINK => { - let obj = find(&entry.id, buf).map_err(|err| crate::checkout::Error::Find { - err, - oid: entry.id, - path: dest.to_path_buf(), - })?; - let symlink_destination = gix_path::try_from_byte_slice(obj.data) - .map_err(|_| crate::checkout::Error::IllformedUtf8 { path: obj.data.into() })?; - - if symlink { - try_op_or_unlink(dest, overwrite_existing, |p| { - gix_fs::symlink::create(symlink_destination, p) - })?; - } else { - let mut file = try_op_or_unlink(dest, overwrite_existing, |p| { - open_options(p, destination_is_initially_empty, overwrite_existing).open(dest) - })?; - file.write_all(obj.data)?; - file.close()?; - } - - entry.stat = Stat::from_fs(&std::fs::symlink_metadata(dest)?)?; - obj.data.len() - } - gix_index::entry::Mode::DIR => { - gix_features::trace::warn!( - "Skipped sparse directory at '{entry_path}' ({id}) as it cannot yet be handled", - id = entry.id - ); - 0 - } - gix_index::entry::Mode::COMMIT => { - gix_features::trace::warn!( - "Skipped submodule at '{entry_path}' ({id}) as it cannot yet be handled", - id = entry.id - ); - 0 - } - _ => unreachable!(), - }; - Ok(Outcome::Written { bytes: object_size }) -} - -/// Note that this works only because we assume to not race ourselves when symlinks are involved, and we do this by -/// delaying symlink creation to the end and will always do that sequentially. -/// It's still possible to fall for a race if other actors create symlinks in our path, but that's nothing to defend against. -fn try_op_or_unlink( - path: &Path, - overwrite_existing: bool, - op: impl Fn(&Path) -> std::io::Result, -) -> std::io::Result { - if overwrite_existing { - match op(path) { - Ok(res) => Ok(res), - Err(err) if gix_fs::symlink::is_collision_error(&err) => { - try_unlink_path_recursively(path, &std::fs::symlink_metadata(path)?)?; - op(path) - } - Err(err) => Err(err), - } - } else { - op(path) - } -} - -fn try_unlink_path_recursively(path: &Path, path_meta: &std::fs::Metadata) -> std::io::Result<()> { - if path_meta.is_dir() { - std::fs::remove_dir_all(path) - } else if path_meta.file_type().is_symlink() { - gix_fs::symlink::remove(path) - } else { - std::fs::remove_file(path) - } -} - -#[cfg(not(debug_assertions))] -fn debug_assert_dest_is_no_symlink(_path: &Path) {} - -/// This is a debug assertion as we expect the machinery calling this to prevent this possibility in the first place -#[cfg(debug_assertions)] -fn debug_assert_dest_is_no_symlink(path: &Path) { - if let Ok(meta) = path.metadata() { - debug_assert!( - !meta.file_type().is_symlink(), - "BUG: should not ever allow to overwrite/write-into the target of a symbolic link: {}", - path.display() - ); - } -} - -fn open_options(path: &Path, destination_is_initially_empty: bool, overwrite_existing: bool) -> OpenOptions { - if overwrite_existing || !destination_is_initially_empty { - debug_assert_dest_is_no_symlink(path); - } - let mut options = gix_features::fs::open_options_no_follow(); - options - .create_new(destination_is_initially_empty && !overwrite_existing) - .create(!destination_is_initially_empty || overwrite_existing) - .write(true); - options -} - -pub(crate) fn open_file( - path: &Path, - destination_is_initially_empty: bool, - overwrite_existing: bool, - fs_supports_executable_bit: bool, - entry_mode: gix_index::entry::Mode, -) -> std::io::Result<(std::fs::File, bool)> { - #[cfg_attr(windows, allow(unused_mut))] - let mut options = open_options(path, destination_is_initially_empty, overwrite_existing); - let needs_executable_bit = fs_supports_executable_bit && entry_mode == gix_index::entry::Mode::FILE_EXECUTABLE; - #[cfg(unix)] - let set_executable_after_creation = if needs_executable_bit && destination_is_initially_empty { - use std::os::unix::fs::OpenOptionsExt; - // Note that these only work if the file was newly created, but won't if it's already - // existing, possibly without the executable bit set. Thus we do this only if the file is new. - options.mode(0o777); - false - } else { - needs_executable_bit - }; - // not supported on windows - #[cfg(windows)] - let set_executable_after_creation = needs_executable_bit; - try_op_or_unlink(path, overwrite_existing, |p| options.open(p)).map(|f| (f, set_executable_after_creation)) -} - -/// Close `file` and store its stats in `entry`, possibly setting `file` executable depending on `set_executable_after_creation`. -#[cfg_attr(windows, allow(unused_variables))] -pub(crate) fn finalize_entry( - entry: &mut gix_index::Entry, - file: std::fs::File, - set_executable_after_creation: Option<&Path>, -) -> Result<(), crate::checkout::Error> -where - E: std::error::Error + Send + Sync + 'static, -{ - // For possibly existing, overwritten files, we must change the file mode explicitly. - #[cfg(unix)] - if let Some(path) = set_executable_after_creation { - use std::os::unix::fs::PermissionsExt; - let mut perm = std::fs::symlink_metadata(path)?.permissions(); - perm.set_mode(0o777); - std::fs::set_permissions(path, perm)?; - } - // NOTE: we don't call `file.sync_all()` here knowing that some filesystems don't handle this well. - // revisit this once there is a bug to fix. - entry.stat = Stat::from_fs(&file.metadata()?)?; - file.close()?; - Ok(()) -} diff --git a/gix-worktree/src/checkout/function.rs b/gix-worktree/src/checkout/function.rs deleted file mode 100644 index e52299e3972..00000000000 --- a/gix-worktree/src/checkout/function.rs +++ /dev/null @@ -1,168 +0,0 @@ -use std::sync::atomic::AtomicBool; - -use gix_features::{interrupt, parallel::in_parallel_with_finalize, progress::Progress}; -use gix_hash::oid; - -use crate::{cache, checkout::chunk, Cache}; - -/// Checkout the entire `index` into `dir`, and resolve objects found in index entries with `find` to write their content to their -/// respective path in `dir`. -/// Use `files` to count each fully checked out file, and count the amount written `bytes`. If `should_interrupt` is `true`, the -/// operation will abort. -/// `options` provide a lot of context on how to perform the operation. -/// -/// ### Handling the return value -/// -/// Note that interruption still produce an `Ok(…)` value, so the caller should look at `should_interrupt` to communicate the outcome. -/// -#[allow(clippy::too_many_arguments)] -pub fn checkout( - index: &mut gix_index::State, - dir: impl Into, - find: Find, - files: &mut impl Progress, - bytes: &mut impl Progress, - should_interrupt: &AtomicBool, - options: crate::checkout::Options, -) -> Result> -where - Find: for<'a> FnMut(&oid, &'a mut Vec) -> Result, E> + Send + Clone, - E: std::error::Error + Send + Sync + 'static, -{ - let paths = index.take_path_backing(); - let res = checkout_inner(index, &paths, dir, find, files, bytes, should_interrupt, options); - index.return_path_backing(paths); - res -} - -#[allow(clippy::too_many_arguments)] -fn checkout_inner( - index: &mut gix_index::State, - paths: &gix_index::PathStorage, - dir: impl Into, - find: Find, - files: &mut impl Progress, - bytes: &mut impl Progress, - should_interrupt: &AtomicBool, - mut options: crate::checkout::Options, -) -> Result> -where - Find: for<'a> FnMut(&oid, &'a mut Vec) -> Result, E> + Send + Clone, - E: std::error::Error + Send + Sync + 'static, -{ - let num_files = files.counter(); - let num_bytes = bytes.counter(); - let dir = dir.into(); - let case = if options.fs.ignore_case { - gix_glob::pattern::Case::Fold - } else { - gix_glob::pattern::Case::Sensitive - }; - let (chunk_size, thread_limit, num_threads) = gix_features::parallel::optimize_chunk_size_and_thread_limit( - 100, - index.entries().len().into(), - options.thread_limit, - None, - ); - - let state = cache::State::for_checkout(options.overwrite_existing, std::mem::take(&mut options.attributes)); - let attribute_files = state.id_mappings_from_index(index, paths, case); - let mut ctx = chunk::Context { - buf: Vec::new(), - options: (&options).into(), - path_cache: Cache::new(dir, state, case, Vec::with_capacity(512), attribute_files), - filters: options.filters, - find, - }; - - let chunk::Outcome { - mut collisions, - mut errors, - mut bytes_written, - files: files_updated, - delayed_symlinks, - delayed_paths_unknown, - delayed_paths_unprocessed, - } = if num_threads == 1 { - let entries_with_paths = interrupt::Iter::new(index.entries_mut_with_paths_in(paths), should_interrupt); - let mut delayed_filter_results = Vec::new(); - let mut out = chunk::process( - entries_with_paths, - num_files.as_deref(), - num_bytes.as_deref(), - &mut delayed_filter_results, - &mut ctx, - )?; - chunk::process_delayed_filter_results( - delayed_filter_results, - num_files.as_deref(), - num_bytes.as_deref(), - &mut out, - &mut ctx, - )?; - out - } else { - let entries_with_paths = interrupt::Iter::new(index.entries_mut_with_paths_in(paths), should_interrupt); - in_parallel_with_finalize( - gix_features::iter::Chunks { - inner: entries_with_paths, - size: chunk_size, - }, - thread_limit, - { - let ctx = ctx.clone(); - move |_| (Vec::new(), ctx) - }, - |chunk, (delayed_filter_results, ctx)| { - chunk::process( - chunk.into_iter(), - num_files.as_deref(), - num_bytes.as_deref(), - delayed_filter_results, - ctx, - ) - }, - |(delayed_filter_results, mut ctx)| { - let mut out = chunk::Outcome::default(); - chunk::process_delayed_filter_results( - delayed_filter_results, - num_files.as_deref(), - num_bytes.as_deref(), - &mut out, - &mut ctx, - )?; - Ok(out) - }, - chunk::Reduce { - files: num_files.is_none().then_some(files), - bytes: num_bytes.is_none().then_some(bytes), - aggregate: Default::default(), - marker: Default::default(), - }, - )? - }; - - for (entry, entry_path) in delayed_symlinks { - bytes_written += chunk::checkout_entry_handle_result( - entry, - entry_path, - &mut errors, - &mut collisions, - num_files.as_deref(), - num_bytes.as_deref(), - &mut ctx, - )? - .as_bytes() - .expect("only symlinks are delayed here, they are never filtered (or delayed again)") - as u64; - } - - Ok(crate::checkout::Outcome { - files_updated, - collisions, - errors, - bytes_written, - delayed_paths_unknown, - delayed_paths_unprocessed, - }) -} diff --git a/gix-worktree/src/checkout/mod.rs b/gix-worktree/src/checkout/mod.rs deleted file mode 100644 index 4590b1f0373..00000000000 --- a/gix-worktree/src/checkout/mod.rs +++ /dev/null @@ -1,101 +0,0 @@ -use bstr::BString; -use gix_index::entry::stat; - -/// Information about a path that failed to checkout as something else was already present. -#[derive(Clone, Debug, Eq, Hash, Ord, PartialEq, PartialOrd)] -pub struct Collision { - /// the path that collided with something already present on disk. - pub path: BString, - /// The io error we encountered when checking out `path`. - pub error_kind: std::io::ErrorKind, -} - -/// A path that encountered an IO error. -pub struct ErrorRecord { - /// the path that encountered the error. - pub path: BString, - /// The error - pub error: Box, -} - -/// The outcome of checking out an entire index. -#[derive(Default)] -pub struct Outcome { - /// The amount of files updated, or created. - pub files_updated: usize, - /// The amount of bytes written to disk, - pub bytes_written: u64, - /// The encountered collisions, which can happen on a case-insensitive filesystem. - pub collisions: Vec, - /// Other errors that happened during checkout. - pub errors: Vec, - /// Relative paths that the process listed as 'delayed' even though we never passed them. - pub delayed_paths_unknown: Vec, - /// All paths that were left unprocessed, because they were never listed by the process even though we passed them. - pub delayed_paths_unprocessed: Vec, -} - -/// Options to further configure the checkout operation. -#[derive(Clone, Default)] -pub struct Options { - /// capabilities of the file system - pub fs: gix_fs::Capabilities, - /// If set, don't use more than this amount of threads. - /// Otherwise, usually use as many threads as there are logical cores. - /// A value of 0 is interpreted as no-limit - pub thread_limit: Option, - /// If true, we assume no file to exist in the target directory, and want exclusive access to it. - /// This should be enabled when cloning to avoid checks for freshness of files. This also enables - /// detection of collisions based on whether or not exclusive file creation succeeds or fails. - pub destination_is_initially_empty: bool, - /// If true, default false, worktree entries on disk will be overwritten with content from the index - /// even if they appear to be changed. When creating directories that clash with existing worktree entries, - /// these will try to delete the existing entry. - /// This is similar in behaviour as `git checkout --force`. - pub overwrite_existing: bool, - /// If true, default false, try to checkout as much as possible and don't abort on first error which isn't - /// due to a conflict. - /// The checkout operation will never fail, but count the encountered errors instead along with their paths. - pub keep_going: bool, - /// Control how stat comparisons are made when checking if a file is fresh. - pub stat_options: stat::Options, - /// A stack of attributes to use with the filesystem cache to use as driver for filters. - pub attributes: crate::cache::state::Attributes, - /// The filter pipeline to use for applying mandatory filters before writing to the worktree. - pub filters: gix_filter::Pipeline, - /// Control how long-running processes may use the 'delay' capability. - pub filter_process_delay: gix_filter::driver::apply::Delay, -} - -/// The error returned by the [checkout()][crate::checkout()] function. -#[derive(Debug, thiserror::Error)] -#[allow(missing_docs)] -pub enum Error { - #[error("Could not convert path to UTF8: {}", .path)] - IllformedUtf8 { path: BString }, - #[error("The clock was off when reading file related metadata after updating a file on disk")] - Time(#[from] std::time::SystemTimeError), - #[error("IO error while writing blob or reading file metadata or changing filetype")] - Io(#[from] std::io::Error), - #[error("object {} for checkout at {} could not be retrieved from object database", .oid.to_hex(), .path.display())] - Find { - #[source] - err: E, - oid: gix_hash::ObjectId, - path: std::path::PathBuf, - }, - #[error(transparent)] - Filter(#[from] gix_filter::pipeline::convert::to_worktree::Error), - #[error(transparent)] - FilterListDelayed(#[from] gix_filter::driver::delayed::list::Error), - #[error(transparent)] - FilterFetchDelayed(#[from] gix_filter::driver::delayed::fetch::Error), - #[error("The entry at path '{rela_path}' was listed as delayed by the filter process, but we never passed it")] - FilterPathUnknown { rela_path: BString }, - #[error("The following paths were delayed and apparently forgotten to be processed by the filter driver: ")] - FilterPathsUnprocessed { rela_paths: Vec }, -} - -mod chunk; -mod entry; -pub(crate) mod function; diff --git a/gix-worktree/src/lib.rs b/gix-worktree/src/lib.rs index 20ae186877b..9a0c32641fc 100644 --- a/gix-worktree/src/lib.rs +++ b/gix-worktree/src/lib.rs @@ -1,4 +1,4 @@ -//! A crate with all index-centric functionality that is interacting with a worktree. +//! A crate with utility types for use by other crates that implement specifics. //! //! Unless specified differently, all operations need an index file (e.g. `.git/index`) as driver. //! @@ -11,9 +11,6 @@ #![deny(missing_docs, rust_2018_idioms, unsafe_code)] use bstr::BString; -/// -pub mod read; - /// A cache for efficiently executing operations on directories and files which are encountered in sorted order. /// That way, these operations can be re-used for subsequent invocations in the same directory. /// @@ -52,9 +49,3 @@ pub(crate) type PathIdMapping = (BString, gix_hash::ObjectId); /// pub mod cache; -/// -pub mod checkout; -pub use checkout::function::checkout; - -pub mod status; -pub use status::function::status; diff --git a/gix-worktree/src/read.rs b/gix-worktree/src/read.rs deleted file mode 100644 index a54fc2c7611..00000000000 --- a/gix-worktree/src/read.rs +++ /dev/null @@ -1,64 +0,0 @@ -//! This module allows creating git blobs from worktree files. -//! -//! For the most part a blob just contains the raw on-disk data. However symlinks need to be considered properly -//! and attributes/config options need to be considered. - -use std::{ - fs::{read_link, File}, - io::{self, Read}, - path::Path, -}; - -use gix_object::Blob; -use gix_path as path; - -// TODO: tests - -// TODO: what to do about precompose unicode and ignore_case for symlinks - -/// Create a blob from a file or symlink. -pub fn blob(path: &Path, capabilities: &gix_fs::Capabilities) -> io::Result { - let mut data = Vec::new(); - data_to_buf(path, &mut data, capabilities)?; - Ok(Blob { data }) -} - -/// Create a blob from a file or symlink. -pub fn blob_with_meta(path: &Path, is_symlink: bool, capabilities: &gix_fs::Capabilities) -> io::Result { - let mut data = Vec::new(); - data_to_buf_with_meta(path, &mut data, is_symlink, capabilities)?; - Ok(Blob { data }) -} - -/// Create blob data from a file or symlink. -pub fn data_to_buf<'a>(path: &Path, buf: &'a mut Vec, capabilities: &gix_fs::Capabilities) -> io::Result<&'a [u8]> { - data_to_buf_with_meta(path, buf, path.symlink_metadata()?.is_symlink(), capabilities) -} - -/// Create a blob from a file or symlink. -pub fn data_to_buf_with_meta<'a>( - path: &Path, - buf: &'a mut Vec, - is_symlink: bool, - capabilities: &gix_fs::Capabilities, -) -> io::Result<&'a [u8]> { - buf.clear(); - // symlinks are only stored as actual symlinks if the FS supports it otherwise they are just - // normal files with their content equal to the linked path (so can be read normally) - // - if is_symlink && capabilities.symlink { - // conversion to bstr can never fail because symlinks are only used - // on unix (by git) so no reason to use the try version here - let symlink_path = path::into_bstr(read_link(path)?); - buf.extend_from_slice(&symlink_path); - // TODO: there is no reason this should be a clone - // std isn't great about allowing users to avoid allocations but we could - // simply write our own wrapper around libc::readlink which reuses the - // buffer. This would require unsafe code tough (obviously) - } else { - buf.clear(); - File::open(path)?.read_to_end(buf)?; - // TODO apply filters - } - Ok(buf.as_slice()) -} diff --git a/gix-worktree/src/status/content.rs b/gix-worktree/src/status/content.rs deleted file mode 100644 index aa775821a7a..00000000000 --- a/gix-worktree/src/status/content.rs +++ /dev/null @@ -1,80 +0,0 @@ -use gix_hash::ObjectId; -use gix_index as index; -use index::Entry; - -/// Compares the content of two blobs in some way. -pub trait CompareBlobs { - /// Output data produced by [`compare_blobs()`][CompareBlobs::compare_blobs()]. - type Output; - - /// Providing the underlying index `entry`, allow comparing a file in the worktree of size `worktree_blob_size` - /// and allow reading its bytes using `worktree_blob`. - /// If this function returns `None` the `entry` and the `worktree_blob` are assumed to be identical. - /// Use `entry_blob` to obtain the data for the blob referred to by `entry`, allowing comparisons of the data itself. - fn compare_blobs<'a, E>( - &mut self, - entry: &'a gix_index::Entry, - worktree_blob_size: usize, - worktree_blob: impl ReadDataOnce<'a, E>, - entry_blob: impl ReadDataOnce<'a, E>, - ) -> Result, E>; -} - -/// Lazy borrowed access to blob data. -pub trait ReadDataOnce<'a, E> { - /// Returns the contents of this blob. - /// - /// This potentially performs IO and other expensive operations - /// and should only be called when necessary. - fn read_data(self) -> Result<&'a [u8], E>; -} - -/// Compares to blobs by comparing their size and oid, and only looks at the file if -/// the size matches, therefore it's very fast. -#[derive(Clone)] -pub struct FastEq; - -impl CompareBlobs for FastEq { - type Output = (); - - fn compare_blobs<'a, E>( - &mut self, - entry: &'a Entry, - worktree_blob_size: usize, - worktree_blob: impl ReadDataOnce<'a, E>, - _entry_blob: impl ReadDataOnce<'a, E>, - ) -> Result, E> { - // make sure to account for racily smudged entries here so that they don't always keep - // showing up as modified even after their contents have changed again, to a potentially - // unmodified state. That means that we want to ignore stat.size == 0 for non_empty_blobs. - if entry.stat.size as usize != worktree_blob_size && (entry.id.is_empty_blob() || entry.stat.size != 0) { - return Ok(Some(())); - } - let blob = worktree_blob.read_data()?; - let file_hash = gix_object::compute_hash(entry.id.kind(), gix_object::Kind::Blob, blob); - Ok((entry.id != file_hash).then_some(())) - } -} - -/// Compares files to blobs by *always* comparing their hashes. -/// -/// Same as [`FastEq`] but does not contain a fast path for files with mismatched files and -/// therefore always returns an OID that can be reused later. -#[derive(Clone)] -pub struct HashEq; - -impl CompareBlobs for HashEq { - type Output = ObjectId; - - fn compare_blobs<'a, E>( - &mut self, - entry: &'a Entry, - _worktree_blob_size: usize, - worktree_blob: impl ReadDataOnce<'a, E>, - _entry_blob: impl ReadDataOnce<'a, E>, - ) -> Result, E> { - let blob = worktree_blob.read_data()?; - let file_hash = gix_object::compute_hash(entry.id.kind(), gix_object::Kind::Blob, blob); - Ok((entry.id != file_hash).then_some(file_hash)) - } -} diff --git a/gix-worktree/src/status/function.rs b/gix-worktree/src/status/function.rs deleted file mode 100644 index 5e01628b416..00000000000 --- a/gix-worktree/src/status/function.rs +++ /dev/null @@ -1,331 +0,0 @@ -use std::{io, marker::PhantomData, path::Path}; - -use bstr::BStr; -use filetime::FileTime; -use gix_features::parallel::{in_parallel_if, Reduce}; - -use crate::{ - read, - status::{ - content, - content::CompareBlobs, - types::{Error, Options}, - Change, VisitEntry, - }, -}; - -/// Calculates the changes that need to be applied to an `index` to match the state of the `worktree` and makes them -/// observable in `collector`, along with information produced by `compare` which gets to see blobs that may have changes. -/// `options` are used to configure the operation. -/// -/// Note that `index` is updated with the latest seen stat information from the worktree, and its timestamp is adjusted to -/// the current time for which it will be considered fresh. -/// -/// Note that this isn't technically quite what this function does as this also provides some additional information, -/// like whether a file has conflicts, and files that were added with `git add` are shown as a special -/// changes despite not technically requiring a change to the index since `git add` already added the file to the index. -pub fn status<'index, T, Find, E>( - index: &'index mut gix_index::State, - worktree: &Path, - collector: &mut impl VisitEntry<'index, ContentChange = T>, - compare: impl CompareBlobs + Send + Clone, - find: Find, - options: Options, -) -> Result<(), Error> -where - T: Send, - E: std::error::Error + Send + Sync + 'static, - Find: for<'a> FnMut(&gix_hash::oid, &'a mut Vec) -> Result, E> + Send + Clone, -{ - // the order is absolutely critical here we use the old timestamp to detect racy index entries - // (modified at or after the last index update) during the index update we then set those - // entries size to 0 (see below) to ensure they keep showing up as racy and reset the timestamp. - let timestamp = index.timestamp(); - index.set_timestamp(FileTime::now()); - let (chunk_size, thread_limit, _) = gix_features::parallel::optimize_chunk_size_and_thread_limit( - 100, - index.entries().len().into(), - options.thread_limit, - None, - ); - let (entries, path_backing) = index.entries_mut_and_pathbacking(); - in_parallel_if( - || true, // TODO: heuristic: when is parallelization not worth it? - entries.chunks_mut(chunk_size), - thread_limit, - { - let options = &options; - move |_| { - ( - State { - buf: Vec::new(), - odb_buf: Vec::new(), - timestamp, - path_backing, - worktree, - options, - }, - compare.clone(), - find.clone(), - ) - } - }, - |entries, (state, diff, find)| { - entries - .iter_mut() - .filter_map(|entry| state.process(entry, diff, find)) - .collect() - }, - ReduceChange { - collector, - phantom: PhantomData, - }, - ) -} - -struct State<'a, 'b> { - buf: Vec, - odb_buf: Vec, - timestamp: FileTime, - // path_cache: fs::Cache TODO path cache - path_backing: &'b [u8], - worktree: &'a Path, - options: &'a Options, -} - -type StatusResult<'index, T> = Result<(&'index gix_index::Entry, &'index BStr, Option>, bool), Error>; - -impl<'index> State<'_, 'index> { - fn process( - &mut self, - entry: &'index mut gix_index::Entry, - diff: &mut impl CompareBlobs, - find: &mut Find, - ) -> Option> - where - E: std::error::Error + Send + Sync + 'static, - Find: for<'a> FnMut(&gix_hash::oid, &'a mut Vec) -> Result, E> + Send + Clone, - { - let conflict = match entry.stage() { - 0 => false, - 1 => true, - _ => return None, - }; - if entry.flags.intersects( - gix_index::entry::Flags::UPTODATE - | gix_index::entry::Flags::SKIP_WORKTREE - | gix_index::entry::Flags::ASSUME_VALID - | gix_index::entry::Flags::FSMONITOR_VALID, - ) { - return None; - } - let path = entry.path_in(self.path_backing); - let status = self.compute_status(&mut *entry, path, diff, find); - Some(status.map(move |status| (&*entry, path, status, conflict))) - } - - /// # On how racy-git is handled here - /// - /// Basically the racy detection is a safety mechanism that ensures we can always just compare the stat - /// information between index and worktree and if they match we don't need to look at the content. - /// This usually just works but if a file updates quickly we could run into the following situation: - /// - /// * save file version `A` from disk into worktree (git add) - /// * file is changed so fast that the mtime doesn't change - *we only looks at seconds by default* - /// * file contents change but file-size stays the same, so `"foo" -> "bar"` has the same size but different content - /// - /// Now both `mtime` and `size`, and all other stat information, is the same but the file has actually changed. - /// This case is called *racily clean*. *The file should show up as changed but due to a data race it doesn't.* - /// This is the racy git problem. - /// - /// To solve this we do the following trick: Whenever we modify the index, which includes `git status`, we save the - /// current timestamp before the modification starts. This timestamp fundamentally represents a checkpoint of sorts. - /// We "promise" ourselves that after the modification finishes all entries modified before this timestamp have the - /// racy git problem resolved. - /// - /// So now when we modify the index we must resolve the racy git problem somehow. To do that we only need to look at - /// unchanged entries. Changed entries are not interesting since they are already showing up as changed anyway so there - /// isn't really a race-condition to worry about. This also explains why removing the `return` here doesn't have an apparent effect. - /// This entire branch here is just the optimization of "don't even look at index entries where the stat hasn't changed". - /// If we don't have this optimization the result shouldn't change, our status implementation will just be super slow :D - - /// We calculate whether this change is `racy_clean`, so if the last `timestamp` is before or the same as the `mtime` of the entry - /// which is what `new_stat.is_racy(..)` does in the branch, and only if we are sure that there is no race condition - /// do we `return` early. Since we don't `return` early we just do a full content comparison below, - /// which always yields the correct result, there is no race condition there. - /// - /// If a file showed up as racily clean and didn't change then we don't need to do anything. After this status check is - /// complete and the file won't show up as racily clean anymore, since it's mtime is now before the new timestamp. - /// However if the file did actually change then we really ran into one of those rare race conditions in that case we, - /// and git does the same, set the size of the file in the index to 0. This will always make the file show up as changed. - /// This adds the need to treat all files of size 0 in the index as changed. This is not quite right of course because 0 sized files - /// could be entirely valid and unchanged. Therefore this only applies if the oid doesn't match the oid of an empty file, - /// which is a constant. - /// - /// Adapted from [here](https://github.com/Byron/gitoxide/pull/805#discussion_r1164676777). - fn compute_status( - &mut self, - entry: &mut gix_index::Entry, - git_path: &BStr, - diff: &mut impl CompareBlobs, - find: &mut Find, - ) -> Result>, Error> - where - E: std::error::Error + Send + Sync + 'static, - Find: for<'a> FnMut(&gix_hash::oid, &'a mut Vec) -> Result, E> + Send + Clone, - { - // TODO fs cache - let worktree_path = gix_path::try_from_bstr(git_path).map_err(|_| Error::IllformedUtf8)?; - let worktree_path = self.worktree.join(worktree_path); - let metadata = match worktree_path.symlink_metadata() { - // TODO: check if any parent directory is a symlink - // we need to use fs::Cache for that - Ok(metadata) if metadata.is_dir() => { - // index entries are normally only for files/symlinks - // if a file turned into a directory it was removed - // the only exception here are submodules which are - // part of the index despite being directories - // - // TODO: submodules: - // if entry.mode.contains(Mode::COMMIT) && - // resolve_gitlink_ref(ce->name, "HEAD", &sub)) - return Ok(Some(Change::Removed)); - } - Ok(metadata) => metadata, - Err(err) if err.kind() == io::ErrorKind::NotFound => return Ok(Some(Change::Removed)), - Err(err) => { - return Err(err.into()); - } - }; - if entry.flags.contains(gix_index::entry::Flags::INTENT_TO_ADD) { - return Ok(Some(Change::IntentToAdd)); - } - let new_stat = gix_index::entry::Stat::from_fs(&metadata)?; - let executable_bit_changed = - match entry - .mode - .change_to_match_fs(&metadata, self.options.fs.symlink, self.options.fs.executable_bit) - { - Some(gix_index::entry::mode::Change::Type { .. }) => return Ok(Some(Change::Type)), - Some(gix_index::entry::mode::Change::ExecutableBit) => true, - None => false, - }; - - // Here we implement racy-git. See racy-git.txt in the git documentation for a detailed documentation. - // - // A file is racy if: - // 1. its `mtime` is at or after the last index timestamp and its entry stat information - // matches the on-disk file but the file contents are actually modified - // 2. it's size is 0 (set after detecting a file was racy previously) - // - // The first case is detected below by checking the timestamp if the file is marked unmodified. - // The second case is usually detected either because the on-disk file is not empty, hence - // the basic stat match fails, or by checking whether the size doesn't fit the oid. - let mut racy_clean = false; - if !executable_bit_changed - && new_stat.matches(&entry.stat, self.options.stat) - // TODO: find a test for the following line or remove it. Is this more often hit with smudge/clean filters? - && (!entry.id.is_empty_blob() || entry.stat.size == 0) - { - racy_clean = new_stat.is_racy(self.timestamp, self.options.stat); - if !racy_clean { - return Ok(None); - } - } - - let read_file = WorktreeBlob { - buf: &mut self.buf, - path: &worktree_path, - entry, - options: self.options, - }; - let read_blob = OdbBlob { - buf: &mut self.odb_buf, - id: &entry.id, - find, - }; - let content_change = diff.compare_blobs::(entry, metadata.len() as usize, read_file, read_blob)?; - // This file is racy clean! Set the size to 0 so we keep detecting this as the file is updated. - if content_change.is_some() && racy_clean { - entry.stat.size = 0; - } - if content_change.is_some() || executable_bit_changed { - Ok(Some(Change::Modification { - executable_bit_changed, - content_change, - })) - } else { - // don't diff against this file next time since we know the file is unchanged. - entry.stat = new_stat; - Ok(None) - } - } -} - -struct ReduceChange<'a, 'index, T: VisitEntry<'index>> { - collector: &'a mut T, - phantom: PhantomData, -} - -impl<'index, T, C: VisitEntry<'index, ContentChange = T>> Reduce for ReduceChange<'_, 'index, C> { - type Input = Vec>; - - type FeedProduce = (); - - type Output = (); - - type Error = Error; - - fn feed(&mut self, items: Self::Input) -> Result { - for item in items { - let (entry, path, change, conflict) = item?; - self.collector.visit_entry(entry, path, change, conflict); - } - Ok(()) - } - - fn finalize(self) -> Result { - Ok(()) - } -} - -struct WorktreeBlob<'a> { - buf: &'a mut Vec, - path: &'a Path, - entry: &'a gix_index::Entry, - options: &'a Options, -} - -struct OdbBlob<'a, Find, E> -where - E: std::error::Error + Send + Sync + 'static, - Find: FnMut(&gix_hash::oid, &'a mut Vec) -> Result, E>, -{ - buf: &'a mut Vec, - id: &'a gix_hash::oid, - find: Find, -} - -impl<'a> content::ReadDataOnce<'a, Error> for WorktreeBlob<'a> { - fn read_data(self) -> Result<&'a [u8], Error> { - let res = read::data_to_buf_with_meta( - self.path, - self.buf, - self.entry.mode == gix_index::entry::Mode::SYMLINK, - &self.options.fs, - )?; - Ok(res) - } -} - -impl<'a, Find, E> content::ReadDataOnce<'a, Error> for OdbBlob<'a, Find, E> -where - E: std::error::Error + Send + Sync + 'static, - Find: FnMut(&gix_hash::oid, &'a mut Vec) -> Result, E>, -{ - fn read_data(mut self) -> Result<&'a [u8], Error> { - (self.find)(self.id, self.buf) - .map(|b| b.data) - .map_err(move |err| Error::Find(Box::new(err))) - } -} diff --git a/gix-worktree/src/status/mod.rs b/gix-worktree/src/status/mod.rs deleted file mode 100644 index 8294a54e8ac..00000000000 --- a/gix-worktree/src/status/mod.rs +++ /dev/null @@ -1,11 +0,0 @@ -//! Changes between an index and a worktree. -/// -mod types; -pub use types::{Change, Error, Options, VisitEntry}; - -mod recorder; -pub use recorder::Recorder; - -/// -pub mod content; -pub(crate) mod function; diff --git a/gix-worktree/src/status/recorder.rs b/gix-worktree/src/status/recorder.rs deleted file mode 100644 index ea10303ae60..00000000000 --- a/gix-worktree/src/status/recorder.rs +++ /dev/null @@ -1,27 +0,0 @@ -use bstr::BStr; -use gix_index as index; - -use crate::status::{Change, VisitEntry}; - -/// Convenience implementation of [`VisitEntry`] that collects all non-trivial changes into a `Vec`. -#[derive(Debug, Default)] -pub struct Recorder<'index, T = ()> { - /// collected changes, index entries without conflicts or changes are excluded. - pub records: Vec<(&'index BStr, Option>, bool)>, -} - -impl<'index, T: Send> VisitEntry<'index> for Recorder<'index, T> { - type ContentChange = T; - - fn visit_entry( - &mut self, - _entry: &'index index::Entry, - rela_path: &'index BStr, - status: Option>, - conflict: bool, - ) { - if conflict || status.is_some() { - self.records.push((rela_path, status, conflict)) - } - } -} diff --git a/gix-worktree/src/status/types.rs b/gix-worktree/src/status/types.rs deleted file mode 100644 index 3d488d24ef4..00000000000 --- a/gix-worktree/src/status/types.rs +++ /dev/null @@ -1,69 +0,0 @@ -use bstr::BStr; - -/// The error returned by [`status()`][crate::status()]. -#[derive(Debug, thiserror::Error)] -#[allow(missing_docs)] -pub enum Error { - #[error("Could not convert path to UTF8")] - IllformedUtf8, - #[error("The clock was off when reading file related metadata after updating a file on disk")] - Time(#[from] std::time::SystemTimeError), - #[error("IO error while writing blob or reading file metadata or changing filetype")] - Io(#[from] std::io::Error), - #[error("Failed to obtain blob from object database")] - Find(#[source] Box), -} - -#[derive(Clone, Default)] -/// Options that control how the index status with a worktree is computed. -pub struct Options { - /// Capabilities of the file system which affect the status computation. - pub fs: gix_fs::Capabilities, - /// If set, don't use more than this amount of threads. - /// Otherwise, usually use as many threads as there are logical cores. - /// A value of 0 is interpreted as no-limit - pub thread_limit: Option, - /// Options that control how stat comparisons are made when checking if a file is fresh. - pub stat: gix_index::entry::stat::Options, -} - -/// How an index entry needs to be changed to obtain the destination worktree state, i.e. `entry.apply(this_change) == worktree-entry`. -#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Debug)] -pub enum Change { - /// This corresponding file does not exist in the worktree anymore. - Removed, - /// The type of file changed compared to the worktree, i.e. a symlink s now a file. - Type, - /// This worktree file was modified in some form, like a permission change or content change or both, - /// as compared to this entry. - Modification { - /// Indicates that one of the stat changes was an executable bit change - /// which is a significant change itself. - executable_bit_changed: bool, - /// The output of the [`CompareBlobs`][crate::status::content::CompareBlobs] run on this entry. - /// If there is no content change and only the executable bit - /// changed than this is `None`. - content_change: Option, - }, - /// An index entry that correspond to an untracked worktree file marked with `git add --intent-to-add`. - /// - /// This means it's not available in the object database yet or the index was created from, - /// even though now an entry exists that represents the worktree file. - IntentToAdd, -} - -/// Observe changes by comparing an index entry to the worktree or another index. -pub trait VisitEntry<'index> { - /// Data generated by comparing an entry with a file. - type ContentChange; - /// Observe the `change` of `entry` at the repository-relative `rela_path`, indicating whether - /// or not it has a `conflict`. - /// If `change` is `None`, there is no change. - fn visit_entry( - &mut self, - entry: &'index gix_index::Entry, - rela_path: &'index BStr, - change: Option>, - conflict: bool, - ); -} diff --git a/gix-worktree/src/untracked.rs b/gix-worktree/src/untracked.rs deleted file mode 100644 index 6e77d7fa3ba..00000000000 --- a/gix-worktree/src/untracked.rs +++ /dev/null @@ -1 +0,0 @@ -// TODO: untracked file detection, needs fs::Cache diff --git a/gix-worktree/tests/fixtures/generated-archives/.gitignore b/gix-worktree/tests/fixtures/generated-archives/.gitignore index e8d0fd48dce..6f631797de0 100644 --- a/gix-worktree/tests/fixtures/generated-archives/.gitignore +++ b/gix-worktree/tests/fixtures/generated-archives/.gitignore @@ -1,7 +1,2 @@ make_ignore_and_attributes_setup.tar.xz -make_mixed_without_submodules.tar.xz -make_mixed_without_submodules_and_symlinks.tar.xz make_attributes_baseline.tar.xz -make_dangerous_symlink.tar.xz -status_unchanged.tar.xz -status_changed.tar.xz diff --git a/gix-worktree/tests/fixtures/generated-archives/make_ignorecase_collisions.tar.xz b/gix-worktree/tests/fixtures/generated-archives/make_ignorecase_collisions.tar.xz deleted file mode 100644 index 6e4ed4be056..00000000000 --- a/gix-worktree/tests/fixtures/generated-archives/make_ignorecase_collisions.tar.xz +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:2984e2e61b28635014165351cd872ea7e4f09c28b8b4bbe50692a465ef648033 -size 10616 diff --git a/gix-worktree/tests/fixtures/generated-archives/make_special_exclude_case.tar.xz b/gix-worktree/tests/fixtures/generated-archives/make_special_exclude_case.tar.xz index 56edd71ff4a..dc93af2130b 100644 --- a/gix-worktree/tests/fixtures/generated-archives/make_special_exclude_case.tar.xz +++ b/gix-worktree/tests/fixtures/generated-archives/make_special_exclude_case.tar.xz @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:24f605623efc49819d1b30c52fe22da8f94f2d267e8030ec9bc3b9b845801f76 -size 9220 +oid sha256:1804dc740055b8a5afe65a2db14f29c8ae4691896e67342a8dcb11530fd448c6 +size 9240 diff --git a/gix-worktree/tests/fixtures/generated-archives/racy_git.tar.xz b/gix-worktree/tests/fixtures/generated-archives/racy_git.tar.xz deleted file mode 100644 index 2d045b26aab..00000000000 --- a/gix-worktree/tests/fixtures/generated-archives/racy_git.tar.xz +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:35b728a127f3b6170bac44469ff36d5ad0be2a4247a8926f1aaffb97b5973efc -size 1596 diff --git a/gix-worktree/tests/fixtures/generated-archives/status_conflict.tar.xz b/gix-worktree/tests/fixtures/generated-archives/status_conflict.tar.xz deleted file mode 100644 index dbe191fbe1f..00000000000 --- a/gix-worktree/tests/fixtures/generated-archives/status_conflict.tar.xz +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:cd6d32ab7a1e372d80a617926cac2463f6620baedf74642d78fe7f8c956fd031 -size 11036 diff --git a/gix-worktree/tests/fixtures/generated-archives/status_intent_to_add.tar.xz b/gix-worktree/tests/fixtures/generated-archives/status_intent_to_add.tar.xz deleted file mode 100644 index 76feea7dc82..00000000000 --- a/gix-worktree/tests/fixtures/generated-archives/status_intent_to_add.tar.xz +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:876670d74c01249d361aa73d83ab20d846db7c922a3ca825f778b5f9d746c401 -size 9304 diff --git a/gix-worktree/tests/fixtures/generated-archives/status_removed.tar.xz b/gix-worktree/tests/fixtures/generated-archives/status_removed.tar.xz deleted file mode 100644 index 7b1462fc83e..00000000000 --- a/gix-worktree/tests/fixtures/generated-archives/status_removed.tar.xz +++ /dev/null @@ -1,3 +0,0 @@ -version https://git-lfs.github.com/spec/v1 -oid sha256:e5fe85a65e3689e9e60598130be60761dc4ea129e04d7d5501320f7ebad1eb2b -size 10520 diff --git a/gix-worktree/tests/fixtures/make_dangerous_symlink.sh b/gix-worktree/tests/fixtures/make_dangerous_symlink.sh deleted file mode 100755 index 31437285a37..00000000000 --- a/gix-worktree/tests/fixtures/make_dangerous_symlink.sh +++ /dev/null @@ -1,24 +0,0 @@ -#!/bin/bash -set -eu -o pipefail - -git init -q - -# Every symlink is dangerous as it might either link to another directory and thus redirect -# all writes in the path, or it might point to a file and opening the symlink actually opens -# the target. -# We handle this by either validating symlinks specifically or create symlinks -empty_oid=$(git hash-object -w --stdin .git/info/exclude -# a sample .git/info/exclude -file-anywhere -/file-from-top - -dir-anywhere/ -/dir-from-top - -subdir-anywhere/file -subdir-anywhere/dir/ -EOF - -git commit --allow-empty -m "init" diff --git a/gix-worktree/tests/fixtures/make_ignorecase_collisions.sh b/gix-worktree/tests/fixtures/make_ignorecase_collisions.sh deleted file mode 100755 index d91bd542588..00000000000 --- a/gix-worktree/tests/fixtures/make_ignorecase_collisions.sh +++ /dev/null @@ -1,27 +0,0 @@ -#!/bin/bash -set -eu -o pipefail - -git init -q - -empty_oid=$(git hash-object -w --stdin .gitattributes -git add -A - -git update-index --index-info <<-EOF -100644 $content_oid FILE_X -100644 $content_oid FILE_x -100644 $content_oid file_X -100644 $content_oid file_x -100644 $empty_oid D/B -100644 $empty_oid D/C -100644 $empty_oid d -100644 $empty_oid X -120000 $symlink_target x -120000 $symlink_target link-to-X -EOF - -git commit -m "init" -git checkout -f HEAD; diff --git a/gix-worktree/tests/fixtures/make_mixed_without_submodules.sh b/gix-worktree/tests/fixtures/make_mixed_without_submodules.sh deleted file mode 100755 index 43fafbad944..00000000000 --- a/gix-worktree/tests/fixtures/make_mixed_without_submodules.sh +++ /dev/null @@ -1,20 +0,0 @@ -#!/bin/bash -set -eu -o pipefail - -git init -q - -touch empty -echo -n "content" > executable -chmod +x executable - -mkdir dir -echo "other content" > dir/content -echo "* filter=arrow" > .gitattributes -echo "executable -filter" >> .gitattributes -echo ".gitattributes -filter" >> .gitattributes - -mkdir dir/sub-dir -(cd dir/sub-dir && ln -sf ../content symlink) - -git add -A -git commit -m "Commit" diff --git a/gix-worktree/tests/fixtures/make_mixed_without_submodules_and_symlinks.sh b/gix-worktree/tests/fixtures/make_mixed_without_submodules_and_symlinks.sh deleted file mode 100755 index 0e0e95ae301..00000000000 --- a/gix-worktree/tests/fixtures/make_mixed_without_submodules_and_symlinks.sh +++ /dev/null @@ -1,20 +0,0 @@ -#!/bin/bash -set -eu -o pipefail - -git init -q - -touch empty -echo -n "content" > executable -chmod +x executable - -mkdir dir -echo "other content" > dir/content -echo "* filter=arrow" > .gitattributes -echo "executable -filter" >> .gitattributes -echo ".gitattributes -filter" >> .gitattributes - -mkdir dir/sub-dir -echo "even other content" > dir/sub-dir/file - -git add -A -git commit -m "Commit" diff --git a/gix-worktree/tests/fixtures/racy_git.sh b/gix-worktree/tests/fixtures/racy_git.sh deleted file mode 100755 index 7fdef456f87..00000000000 --- a/gix-worktree/tests/fixtures/racy_git.sh +++ /dev/null @@ -1,12 +0,0 @@ -#!/bin/bash -set -eu -o pipefail - -git init -q - -echo -n "foo" > content - -git add -A -git commit -m "Commit" - -# file size should not be changed by this -echo -n "bar" > content diff --git a/gix-worktree/tests/fixtures/status_changed.sh b/gix-worktree/tests/fixtures/status_changed.sh deleted file mode 100755 index 033c6a8336f..00000000000 --- a/gix-worktree/tests/fixtures/status_changed.sh +++ /dev/null @@ -1,26 +0,0 @@ -#!/bin/bash -set -eu -o pipefail - -git init -q - -touch empty -echo -n "content" > executable -chmod +x executable - -mkdir dir -echo -n "other content" > dir/content -echo -n "other content" > dir/content2 -mkdir dir/sub-dir -(cd dir/sub-dir && ln -sf ../content symlink) - -git add -A -git commit -m "Commit" - -chmod +x dir/content -echo "new content" > dir/content2 -chmod -x executable -echo -n "foo" > executable - -rm empty -ln -sf dir/content empty -git reset \ No newline at end of file diff --git a/gix-worktree/tests/fixtures/status_conflict.sh b/gix-worktree/tests/fixtures/status_conflict.sh deleted file mode 100755 index d78e81bfe7e..00000000000 --- a/gix-worktree/tests/fixtures/status_conflict.sh +++ /dev/null @@ -1,18 +0,0 @@ -#!/bin/bash -set -eu -o pipefail - -git init -q - -echo base > content -git add -A -git commit -m "base" - -git checkout -b feat -echo feat > content -git commit -am "feat" - -git checkout main -echo base-change > content -git commit -am "new base" - -git merge feat || : diff --git a/gix-worktree/tests/fixtures/status_intent_to_add.sh b/gix-worktree/tests/fixtures/status_intent_to_add.sh deleted file mode 100755 index 7d1601385c0..00000000000 --- a/gix-worktree/tests/fixtures/status_intent_to_add.sh +++ /dev/null @@ -1,9 +0,0 @@ -#!/bin/bash -set -eu -o pipefail - -git init -q - -touch content -echo -n "content" > content - -git add --intent-to-add -A diff --git a/gix-worktree/tests/fixtures/status_removed.sh b/gix-worktree/tests/fixtures/status_removed.sh deleted file mode 100755 index 30cdfb94993..00000000000 --- a/gix-worktree/tests/fixtures/status_removed.sh +++ /dev/null @@ -1,18 +0,0 @@ -#!/bin/bash -set -eu -o pipefail - -git init -q - -touch empty -echo -n "content" > executable -chmod +x executable - -mkdir dir -echo -n "other content" > dir/content -mkdir dir/sub-dir -(cd dir/sub-dir && ln -sf ../content symlink) - -git add -A -git commit -m "Commit" -rm -rf ./empty ./executable ./dir/content ./dir/sub-dir/symlink -git reset \ No newline at end of file diff --git a/gix-worktree/tests/fixtures/status_unchanged.sh b/gix-worktree/tests/fixtures/status_unchanged.sh deleted file mode 100755 index 67684549509..00000000000 --- a/gix-worktree/tests/fixtures/status_unchanged.sh +++ /dev/null @@ -1,20 +0,0 @@ -#!/bin/bash -set -eu -o pipefail - -git init -q - -touch empty -echo -n "content" > executable -chmod +x executable - -mkdir dir -echo -n "other content" > dir/content -mkdir dir/sub-dir -(cd dir/sub-dir && ln -sf ../content symlink) - -git add -A -git commit -m "Commit" - -touch ./empty ./executable ./dir/content ./dir/sub-dir/symlink - -git reset # ensure index timestamp is large enough to not mark everything racy \ No newline at end of file diff --git a/gix-worktree/tests/worktree/cache/create_directory.rs b/gix-worktree/tests/worktree/cache/create_directory.rs index 48bfcd862e2..f0f16b83172 100644 --- a/gix-worktree/tests/worktree/cache/create_directory.rs +++ b/gix-worktree/tests/worktree/cache/create_directory.rs @@ -1,7 +1,7 @@ use std::path::Path; +use gix_testtools::tempfile::{tempdir, TempDir}; use gix_worktree::{cache, Cache}; -use tempfile::{tempdir, TempDir}; #[allow(clippy::ptr_arg)] fn panic_on_find<'buf>(_oid: &gix_hash::oid, _buf: &'buf mut Vec) -> std::io::Result> { diff --git a/gix-worktree/tests/worktree/checkout.rs b/gix-worktree/tests/worktree/checkout.rs deleted file mode 100644 index db12d1fb4cc..00000000000 --- a/gix-worktree/tests/worktree/checkout.rs +++ /dev/null @@ -1,526 +0,0 @@ -#[cfg(unix)] -use std::os::unix::prelude::MetadataExt; -use std::{ - fs, - io::{ErrorKind, ErrorKind::AlreadyExists}, - path::{Path, PathBuf}, - sync::atomic::{AtomicBool, AtomicUsize, Ordering}, -}; - -use gix_features::progress; -use gix_object::bstr::ByteSlice; -use gix_odb::FindExt; -use gix_worktree::checkout::Collision; -use once_cell::sync::Lazy; -use tempfile::TempDir; - -use crate::fixture_path; - -static DRIVER: Lazy = Lazy::new(|| { - let mut cargo = std::process::Command::new(env!("CARGO")); - let res = cargo - .args(["build", "-p=gix-filter", "--example", "arrow"]) - .status() - .expect("cargo should run fine"); - assert!(res.success(), "cargo invocation should be successful"); - - let path = PathBuf::from(env!("CARGO_TARGET_TMPDIR")) - .ancestors() - .nth(1) - .expect("first parent in target dir") - .join("debug") - .join("examples") - .join(if cfg!(windows) { "arrow.exe" } else { "arrow" }); - assert!(path.is_file(), "Expecting driver to be located at {path:?}"); - path -}); - -fn driver_exe() -> String { - let mut exe = DRIVER.to_string_lossy().into_owned(); - if cfg!(windows) { - exe = exe.replace('\\', "/"); - } - exe -} - -#[test] -fn accidental_writes_through_symlinks_are_prevented_if_overwriting_is_forbidden() { - let mut opts = opts_from_probe(); - // without overwrite mode, everything is safe. - opts.overwrite_existing = false; - let (source_tree, destination, _index, outcome) = - checkout_index_in_tmp_dir(opts.clone(), "make_dangerous_symlink").unwrap(); - - let source_files = dir_structure(&source_tree); - let worktree_files = dir_structure(&destination); - - if opts.fs.ignore_case { - assert_eq!( - stripped_prefix(&source_tree, &source_files), - stripped_prefix(&destination, &worktree_files), - ); - if multi_threaded() { - assert_eq!(outcome.collisions.len(), 2); - } else { - assert_eq!( - outcome.collisions, - vec![ - Collision { - path: "FAKE-DIR".into(), - error_kind: AlreadyExists - }, - Collision { - path: "FAKE-FILE".into(), - error_kind: AlreadyExists - } - ] - ); - } - } else { - let expected = ["A-dir/a", "A-file", "FAKE-DIR", "FAKE-FILE", "fake-dir/b", "fake-file"]; - assert_eq!(stripped_prefix(&source_tree, &source_files), paths(expected)); - assert_eq!(stripped_prefix(&destination, &worktree_files), paths(expected)); - assert!(outcome.collisions.is_empty()); - }; -} - -#[test] -fn writes_through_symlinks_are_prevented_even_if_overwriting_is_allowed() { - let mut opts = opts_from_probe(); - // with overwrite mode - opts.overwrite_existing = true; - let (source_tree, destination, _index, outcome) = - checkout_index_in_tmp_dir(opts.clone(), "make_dangerous_symlink").unwrap(); - - let source_files = dir_structure(&source_tree); - let worktree_files = dir_structure(&destination); - - if opts.fs.ignore_case { - assert_eq!( - stripped_prefix(&source_tree, &source_files), - paths(["A-dir/a", "A-file", "fake-dir/b", "fake-file"]), - ); - assert_eq!( - stripped_prefix(&destination, &worktree_files), - paths(["A-dir/a", "A-file", "FAKE-DIR", "FAKE-FILE"]), - ); - assert!(outcome.collisions.is_empty()); - } else { - let expected = ["A-dir/a", "A-file", "FAKE-DIR", "FAKE-FILE", "fake-dir/b", "fake-file"]; - assert_eq!(stripped_prefix(&source_tree, &source_files), paths(expected)); - assert_eq!(stripped_prefix(&destination, &worktree_files), paths(expected)); - assert!(outcome.collisions.is_empty()); - }; -} - -#[test] -fn delayed_driver_process() -> crate::Result { - let mut opts = opts_from_probe(); - opts.overwrite_existing = true; - opts.filter_process_delay = gix_filter::driver::apply::Delay::Allow; - opts.destination_is_initially_empty = false; - setup_filter_pipeline(opts.filters.options_mut()); - let (_source, destination, _index, outcome) = - checkout_index_in_tmp_dir_opts(opts, "make_mixed_without_submodules_and_symlinks", |_| true, |_| Ok(()))?; - assert_eq!(outcome.collisions.len(), 0); - assert_eq!(outcome.errors.len(), 0); - assert_eq!(outcome.files_updated, 5); - - let dest = destination.path(); - assert_eq!( - std::fs::read(dest.join("executable"))?.as_bstr(), - "content", - "unfiltered" - ); - assert_eq!( - std::fs::read(dest.join("dir").join("content"))?.as_bstr(), - "➡other content\r\n" - ); - assert_eq!( - std::fs::read(dest.join("dir").join("sub-dir").join("file"))?.as_bstr(), - "➡even other content\r\n" - ); - Ok(()) -} - -#[test] -#[cfg_attr( - windows, - ignore = "on windows, the symlink to a directory doesn't seem to work and we really want to test with symlinks" -)] -fn overwriting_files_and_lone_directories_works() -> crate::Result { - for delay in [ - gix_filter::driver::apply::Delay::Allow, - gix_filter::driver::apply::Delay::Forbid, - ] { - let mut opts = opts_from_probe(); - opts.overwrite_existing = true; - opts.filter_process_delay = delay; - opts.destination_is_initially_empty = false; - setup_filter_pipeline(opts.filters.options_mut()); - let (source, destination, _index, outcome) = checkout_index_in_tmp_dir_opts( - opts.clone(), - "make_mixed_without_submodules", - |_| true, - |d| { - let empty = d.join("empty"); - symlink::symlink_dir(d.join(".."), &empty)?; // empty is symlink to the directory above - std::fs::write(d.join("executable"), b"foo")?; // executable is regular file and has different content - let dir = d.join("dir"); - std::fs::create_dir(&dir)?; - std::fs::create_dir(dir.join("content"))?; // 'content' is a directory now - - let dir = dir.join("sub-dir"); - std::fs::create_dir(&dir)?; - - symlink::symlink_dir(empty, dir.join("symlink"))?; // 'symlink' is a symlink to another file - Ok(()) - }, - )?; - - assert!(outcome.collisions.is_empty()); - - assert_eq!( - stripped_prefix(&destination, &dir_structure(&destination)), - paths(["dir/content", "dir/sub-dir/symlink", "empty", "executable"]) - ); - let meta = std::fs::symlink_metadata(destination.path().join("empty"))?; - assert!(meta.is_file(), "'empty' is now a file"); - assert_eq!(meta.len(), 0, "'empty' is indeed empty"); - - let exe = destination.path().join("executable"); - assert_eq!(std::fs::read(&exe)?, b"content", "'exe' has the correct content"); - - let meta = std::fs::symlink_metadata(exe)?; - assert!(meta.is_file()); - if opts.fs.executable_bit { - #[cfg(unix)] - assert_eq!(meta.mode() & 0o700, 0o700, "the executable bit is set where supported"); - } - - assert_eq!( - std::fs::read(source.join("dir/content"))?.as_bstr(), - "other content\n", - "in the worktree, we have LF" - ); - assert_eq!( - std::fs::read(destination.path().join("dir/content"))?.as_bstr(), - "➡other content\r\n", - "autocrlf is enabled, so we get CRLF when checking out as the pipeline is active, and we have a filter" - ); - - let symlink = destination.path().join("dir/sub-dir/symlink"); - // on windows, git won't create symlinks as its probe won't detect the capability, even though we do. - assert_eq!(std::fs::symlink_metadata(&symlink)?.is_symlink(), cfg!(unix)); - assert_eq!( - std::fs::read(symlink)?.as_bstr(), - "➡other content\r\n", - "autocrlf is enabled" - ); - } - Ok(()) -} - -#[test] -fn symlinks_become_files_if_disabled() -> crate::Result { - let mut opts = opts_from_probe(); - opts.fs.symlink = false; - let (source_tree, destination, _index, outcome) = - checkout_index_in_tmp_dir(opts.clone(), "make_mixed_without_submodules")?; - - assert_equality(&source_tree, &destination, opts.fs.symlink)?; - assert!(outcome.collisions.is_empty()); - Ok(()) -} - -#[test] -fn allow_or_disallow_symlinks() -> crate::Result { - let mut opts = opts_from_probe(); - for allowed in &[false, true] { - opts.fs.symlink = *allowed; - let (source_tree, destination, _index, outcome) = - checkout_index_in_tmp_dir(opts.clone(), "make_mixed_without_submodules")?; - - assert_equality(&source_tree, &destination, opts.fs.symlink)?; - assert!(outcome.collisions.is_empty()); - } - Ok(()) -} - -#[test] -fn keep_going_collects_results() { - let mut opts = opts_from_probe(); - opts.keep_going = true; - let count = AtomicUsize::default(); - let (_source_tree, destination, _index, outcome) = checkout_index_in_tmp_dir_opts( - opts, - "make_mixed_without_submodules", - |_id| { - !matches!( - count.fetch_update(Ordering::SeqCst, Ordering::SeqCst, |current| { - (current < 2).then_some(current + 1) - }), - Ok(_) - ) - }, - |_| Ok(()), - ) - .unwrap(); - - if multi_threaded() { - assert_eq!( - outcome.errors.len(), - 2, - "content changes due to non-deterministic nature of racy threads" - ) - } else { - assert_eq!( - outcome - .errors - .iter() - .map(|r| r.path.to_path_lossy().into_owned()) - .collect::>(), - paths(if cfg!(unix) { - [".gitattributes", "dir/content"] - } else { - // not actually a symlink anymore, even though symlinks are supported but git think differently. - ["dir/content", "dir/sub-dir/symlink"] - }) - ); - } - - if multi_threaded() { - let actual = dir_structure(&destination); - assert!( - (2..=3).contains(&actual.len()), - "it's 3 most of the time, but can be 2 of the 'empty' file is missing as the object couldn't be accessed.\ - It's unclear why there isn't more, as it would keep going" - ); - } else { - assert_eq!( - stripped_prefix(&destination, &dir_structure(&destination)), - paths(if cfg!(unix) { - Box::new(["dir/sub-dir/symlink", "empty", "executable"].into_iter()) as Box> - } else { - Box::new(["empty", "executable"].into_iter()) - }), - "some files could not be created" - ); - } - - assert!(outcome.collisions.is_empty()); -} - -#[test] -fn no_case_related_collisions_on_case_sensitive_filesystem() { - let opts = opts_from_probe(); - if opts.fs.ignore_case { - eprintln!("Skipping case-sensitive testing on what would be a case-insensitive file system"); - return; - } - let (source_tree, destination, index, outcome) = - checkout_index_in_tmp_dir(opts.clone(), "make_ignorecase_collisions").unwrap(); - - assert!(outcome.collisions.is_empty()); - let num_files = assert_equality(&source_tree, &destination, opts.fs.symlink).unwrap(); - assert_eq!( - num_files, - index.entries().len() - 1, - "it checks out all files (minus 1 to account for .gitattributes which is skipped in the worktree in our tests)" - ); - assert!( - destination.path().join(".gitattributes").is_file(), - "we do have attributes even though, dot files are ignored in `assert_equality`" - ); -} - -#[test] -fn collisions_are_detected_on_a_case_insensitive_filesystem_even_with_delayed_filters() { - let mut opts = opts_from_probe(); - if !opts.fs.ignore_case { - eprintln!("Skipping case-insensitive testing on what would be a case-sensitive file system"); - return; - } - setup_filter_pipeline(opts.filters.options_mut()); - opts.filter_process_delay = gix_filter::driver::apply::Delay::Allow; - let (source_tree, destination, _index, outcome) = - checkout_index_in_tmp_dir(opts, "make_ignorecase_collisions").unwrap(); - - let source_files = dir_structure(&source_tree); - assert_eq!( - stripped_prefix(&source_tree, &source_files), - paths(["d", "file_x", "link-to-X", "x"]), - "plenty of collisions prevent a checkout" - ); - - let dest_files = dir_structure(&destination); - if multi_threaded() { - assert!( - (4..=6).contains(&dest_files.len()), - "due to the clash happening at nearly any time, and keep-going is false, we get a variance of files" - ); - } else { - assert_eq!( - stripped_prefix(&destination, &dest_files), - paths(["D/B", "D/C", "FILE_X", "X", "link-to-X"]), - "we checkout files in order and generally handle collision detection differently, hence the difference" - ); - } - - let error_kind = ErrorKind::AlreadyExists; - #[cfg(windows)] - let error_kind_dir = ErrorKind::PermissionDenied; - #[cfg(not(windows))] - let error_kind_dir = error_kind; - - if multi_threaded() { - assert!( - (5..=6).contains(&outcome.collisions.len()), - "can only assert on number as it's racily creating files so unclear which one clashes, and due to keep-going = false there is variance" - ); - } else { - assert_eq!( - outcome.collisions, - vec![ - Collision { - path: "d".into(), - error_kind: error_kind_dir, - }, - Collision { - path: "FILE_x".into(), - error_kind, - }, - Collision { - path: "file_X".into(), - error_kind, - }, - Collision { - path: "file_x".into(), - error_kind, - }, - Collision { - path: "x".into(), - error_kind, - }, - ], - "these files couldn't be checked out" - ); - } -} - -fn multi_threaded() -> bool { - gix_features::parallel::num_threads(None) > 1 -} - -fn assert_equality(source_tree: &Path, destination: &TempDir, allow_symlinks: bool) -> crate::Result { - let source_files = dir_structure(source_tree); - let worktree_files = dir_structure(destination); - - assert_eq!( - stripped_prefix(source_tree, &source_files), - stripped_prefix(destination, &worktree_files), - ); - - let mut count = 0; - for (source_file, worktree_file) in source_files.iter().zip(worktree_files.iter()) { - count += 1; - if !allow_symlinks && source_file.is_symlink() { - assert!(!worktree_file.is_symlink()); - assert_eq!(fs::read(worktree_file)?.to_path()?, fs::read_link(source_file)?); - } else { - assert_eq!(fs::read(source_file)?, fs::read(worktree_file)?); - #[cfg(unix)] - assert_eq!( - fs::symlink_metadata(source_file)?.mode() & 0o700, - fs::symlink_metadata(worktree_file)?.mode() & 0o700, - "permissions of source and checked out file are comparable" - ); - } - } - Ok(count) -} - -pub fn dir_structure>(path: P) -> Vec { - let path = path.as_ref(); - let mut files: Vec<_> = walkdir::WalkDir::new(path) - .follow_links(false) - .into_iter() - .filter_entry(|e| e.path() == path || !e.file_name().to_string_lossy().starts_with('.')) - .flatten() - .filter_map(|e| (!e.path().symlink_metadata().map_or(true, |m| m.is_dir())).then(|| e.path().to_path_buf())) - .collect(); - files.sort(); - files -} - -fn checkout_index_in_tmp_dir( - opts: gix_worktree::checkout::Options, - name: &str, -) -> crate::Result<(PathBuf, TempDir, gix_index::File, gix_worktree::checkout::Outcome)> { - checkout_index_in_tmp_dir_opts(opts, name, |_d| true, |_| Ok(())) -} - -fn checkout_index_in_tmp_dir_opts( - opts: gix_worktree::checkout::Options, - name: &str, - mut allow_return_object: impl FnMut(&gix_hash::oid) -> bool + Send + Clone, - prep_dest: impl Fn(&Path) -> std::io::Result<()>, -) -> crate::Result<(PathBuf, TempDir, gix_index::File, gix_worktree::checkout::Outcome)> { - let source_tree = fixture_path(name); - let git_dir = source_tree.join(".git"); - let mut index = gix_index::File::at(git_dir.join("index"), gix_hash::Kind::Sha1, Default::default())?; - let odb = gix_odb::at(git_dir.join("objects"))?.into_inner().into_arc()?; - let destination = tempfile::tempdir_in(std::env::current_dir()?)?; - prep_dest(destination.path()).expect("preparation must succeed"); - - let outcome = gix_worktree::checkout( - &mut index, - destination.path(), - move |oid, buf| { - if allow_return_object(oid) { - odb.find_blob(oid, buf) - } else { - Err(gix_odb::find::existing_object::Error::NotFound { oid: oid.to_owned() }) - } - }, - &mut progress::Discard, - &mut progress::Discard, - &AtomicBool::default(), - opts, - )?; - Ok((source_tree, destination, index, outcome)) -} - -fn stripped_prefix(prefix: impl AsRef, source_files: &[PathBuf]) -> Vec<&Path> { - source_files.iter().flat_map(|p| p.strip_prefix(&prefix)).collect() -} - -fn probe_gitoxide_dir() -> crate::Result { - Ok(gix_fs::Capabilities::probe( - std::env::current_dir()?.join("..").join(".git"), - )) -} - -fn opts_from_probe() -> gix_worktree::checkout::Options { - gix_worktree::checkout::Options { - fs: probe_gitoxide_dir().unwrap(), - destination_is_initially_empty: true, - thread_limit: gix_features::parallel::num_threads(None).into(), - ..Default::default() - } -} - -fn paths<'a>(p: impl IntoIterator) -> Vec { - p.into_iter().map(PathBuf::from).collect() -} - -fn setup_filter_pipeline(opts: &mut gix_filter::pipeline::Options) { - opts.eol_config.auto_crlf = gix_filter::eol::AutoCrlf::Enabled; - opts.drivers = vec![gix_filter::Driver { - name: "arrow".into(), - clean: None, - smudge: None, - process: Some((driver_exe() + " process").into()), - required: true, - }]; -} diff --git a/gix-worktree/tests/worktree/mod.rs b/gix-worktree/tests/worktree/mod.rs index 85ffef380a4..f51aa9528b1 100644 --- a/gix-worktree/tests/worktree/mod.rs +++ b/gix-worktree/tests/worktree/mod.rs @@ -1,8 +1,4 @@ mod cache; -mod checkout; -mod status; - -use std::path::{Path, PathBuf}; use gix_hash::ObjectId; pub type Result = std::result::Result>; @@ -10,8 +6,3 @@ pub type Result = std::result::Result>; pub fn hex_to_id(hex: &str) -> ObjectId { ObjectId::from_hex(hex.as_bytes()).expect("40 bytes hex") } - -pub fn fixture_path(name: &str) -> PathBuf { - let dir = gix_testtools::scripted_fixture_read_only(Path::new(name).with_extension("sh")).expect("script works"); - dir -} diff --git a/gix-worktree/tests/worktree/status.rs b/gix-worktree/tests/worktree/status.rs deleted file mode 100644 index 11689b5f6e2..00000000000 --- a/gix-worktree/tests/worktree/status.rs +++ /dev/null @@ -1,226 +0,0 @@ -use std::sync::{ - atomic::{AtomicUsize, Ordering}, - Arc, -}; - -use bstr::BStr; -use filetime::{set_file_mtime, FileTime}; -use gix_index as index; -use gix_index::Entry; -use gix_worktree::{ - status, - status::{ - content::{CompareBlobs, FastEq, ReadDataOnce}, - Change, Options, Recorder, - }, -}; - -use crate::fixture_path; - -// since tests are fixtures a bunch of stat information (like inode number) -// changes when extracting the data so we need to disable all advanced stat -// changes and only look at mtime seconds and file size to properly -// test all code paths (and to trigger racy git). -const TEST_OPTIONS: index::entry::stat::Options = index::entry::stat::Options { - trust_ctime: false, - check_stat: false, - use_nsec: false, - use_stdev: false, -}; - -fn fixture(name: &str, expected_status: &[(&BStr, Option, bool)]) { - let worktree = fixture_path(name); - let git_dir = worktree.join(".git"); - let mut index = gix_index::File::at(git_dir.join("index"), gix_hash::Kind::Sha1, Default::default()).unwrap(); - let mut recorder = Recorder::default(); - status( - &mut index, - &worktree, - &mut recorder, - FastEq, - |_, _| Ok::<_, std::convert::Infallible>(gix_object::BlobRef { data: &[] }), - Options { - fs: gix_fs::Capabilities::probe(git_dir), - stat: TEST_OPTIONS, - ..Options::default() - }, - ) - .unwrap(); - recorder.records.sort_unstable_by_key(|(name, _, _)| *name); - assert_eq!(recorder.records, expected_status) -} - -#[test] -fn removed() { - fixture( - "status_removed", - &[ - (BStr::new(b"dir/content"), Some(Change::Removed), false), - (BStr::new(b"dir/sub-dir/symlink"), Some(Change::Removed), false), - (BStr::new(b"empty"), Some(Change::Removed), false), - (BStr::new(b"executable"), Some(Change::Removed), false), - ], - ); -} - -#[test] -fn intent_to_add() { - fixture( - "status_intent_to_add", - &[(BStr::new(b"content"), Some(Change::IntentToAdd), false)], - ); -} - -#[test] -fn conflict() { - fixture( - "status_conflict", - &[( - BStr::new(b"content"), - Some(Change::Modification { - executable_bit_changed: false, - content_change: Some(()), - }), - true, - )], - ); -} - -#[test] -fn unchanged() { - fixture("status_unchanged", &[]); -} - -#[test] -#[cfg_attr( - windows, - ignore = "needs work, on windows plenty of additional files are considered modified for some reason" -)] -fn modified() { - fixture( - "status_changed", - &[ - ( - BStr::new(b"dir/content"), - Some(Change::Modification { - executable_bit_changed: true, - content_change: None, - }), - false, - ), - ( - BStr::new(b"dir/content2"), - Some(Change::Modification { - executable_bit_changed: false, - content_change: Some(()), - }), - false, - ), - (BStr::new(b"empty"), Some(Change::Type), false), - ( - BStr::new(b"executable"), - Some(Change::Modification { - executable_bit_changed: true, - content_change: Some(()), - }), - false, - ), - ], - ); -} - -#[test] -fn racy_git() { - let timestamp = 940040400; - // we need a writable fixture because we have to mess with `mtimes` manually, because touch -d - // respects the locale so the test wouldn't work depending on the timezone you - // run your test in. - let dir = gix_testtools::scripted_fixture_writable("racy_git.sh").expect("script works"); - let worktree = dir.path(); - let git_dir = worktree.join(".git"); - let fs = gix_fs::Capabilities::probe(&git_dir); - let mut index = gix_index::File::at(git_dir.join("index"), gix_hash::Kind::Sha1, Default::default()).unwrap(); - - #[derive(Clone)] - struct CountCalls(Arc, FastEq); - impl CompareBlobs for CountCalls { - type Output = (); - - fn compare_blobs<'a, E>( - &mut self, - entry: &'a Entry, - worktree_blob_size: usize, - worktree_blob: impl ReadDataOnce<'a, E>, - entry_blob: impl ReadDataOnce<'a, E>, - ) -> Result, E> { - self.0.fetch_add(1, Ordering::Relaxed); - self.1 - .compare_blobs(entry, worktree_blob_size, worktree_blob, entry_blob) - } - } - - // We artificially mess with the entry's `mtime` so that it's before the timestamp saved by git. - // This would usually mean an invalid fs/invalid index file and as a result the racy git - // mitigation doesn't work and the worktree shows up as unchanged even tough the file did - // change. - // This case doesn't happen in the realworld (except for file corruption) but - // makes sure we are actually hitting the right codepath. - index.entries_mut()[0].stat.mtime.secs = timestamp; - set_file_mtime(worktree.join("content"), FileTime::from_unix_time(timestamp as i64, 0)) - .expect("changing filetime works"); - let mut recorder = Recorder::default(); - - let count = Arc::new(AtomicUsize::new(0)); - let counter = CountCalls(count.clone(), FastEq); - status( - &mut index, - worktree, - &mut recorder, - counter.clone(), - |_, _| Err(std::io::Error::new(std::io::ErrorKind::Other, "no odb access expected")), - Options { - fs, - stat: TEST_OPTIONS, - ..Options::default() - }, - ) - .unwrap(); - assert_eq!(count.load(Ordering::Relaxed), 0, "no blob content is accessed"); - assert_eq!(recorder.records, &[], "the testcase triggers racy git"); - - // Now we also backdate the index timestamp to match the artificially created - // mtime above this is now a realistic realworld race-condition which should trigger racy git - // and cause proper output. - index.set_timestamp(FileTime::from_unix_time(timestamp as i64, 0)); - let mut recorder = Recorder::default(); - status( - &mut index, - worktree, - &mut recorder, - counter, - |_, _| Err(std::io::Error::new(std::io::ErrorKind::Other, "no odb access expected")), - Options { - fs, - stat: TEST_OPTIONS, - ..Options::default() - }, - ) - .unwrap(); - assert_eq!( - count.load(Ordering::Relaxed), - 1, - "no we needed to access the blob content" - ); - assert_eq!( - recorder.records, - &[( - BStr::new(b"content"), - Some(Change::Modification { - executable_bit_changed: false, - content_change: Some(()), - }), - false - )], - "racy change is correctly detected" - ); -} diff --git a/justfile b/justfile index 961bfd9f482..5284615fbeb 100755 --- a/justfile +++ b/justfile @@ -147,10 +147,6 @@ unit-tests: set -ex; \ cargo test; \ cargo test --features verbose-object-parsing-errors - cd gix-worktree; \ - set -ex; \ - cargo test; \ - cargo test --features "internal-testing-gix-features-parallel" cargo test -p gix-tempfile --features signals cargo test -p gix-tempfile cargo test -p gix-features