diff --git a/eden/scm/lib/vfs/src/pathauditor.rs b/eden/scm/lib/vfs/src/pathauditor.rs index e96bb147f295c..ec3049f530a8e 100644 --- a/eden/scm/lib/vfs/src/pathauditor.rs +++ b/eden/scm/lib/vfs/src/pathauditor.rs @@ -5,6 +5,7 @@ * GNU General Public License version 2. */ +use std::borrow::Cow; use std::fs::symlink_metadata; use std::path::Path; use std::path::PathBuf; @@ -43,6 +44,13 @@ static INVALID_COMPONENTS: Lazy> = Lazy::new(|| { .collect() }); +// From encoding.py: These unicode characters are ignored by HFS+ (Apple Technote 1150, +// "Unicode Subtleties"), so we need to ignore them in some places for sanity. +const IGNORED_HFS_CHARS: [char; 16] = [ + '\u{200c}', '\u{200d}', '\u{200e}', '\u{200f}', '\u{202a}', '\u{202b}', '\u{202c}', '\u{202d}', + '\u{202e}', '\u{206a}', '\u{206b}', '\u{206c}', '\u{206d}', '\u{206e}', '\u{206f}', '\u{feff}', +]; + #[derive(thiserror::Error, Debug)] pub enum AuditError { #[error("Can't read/write file through ancestor symlink \"{0}\"")] @@ -135,14 +143,19 @@ fn valid_windows_component(component: &str) -> bool { /// It also checks that no trailing dots are part of the component and checks that shortnames /// on Windows are valid. fn audit_invalid_components(path: &str) -> Result<(), AuditError> { - let path = if cfg!(not(windows)) { - path.to_owned() + let path: Cow = if cfg!(not(windows)) { + Cow::Borrowed(path) } else { - path.to_lowercase() + Cow::Owned(path.to_lowercase()) }; for s in path.split(SEPARATORS) { - if s.is_empty() || INVALID_COMPONENTS.contains(&s) || !valid_windows_component(s) { - return Err(AuditError::InvalidComponent(s.to_owned())); + let s = if s.contains(IGNORED_HFS_CHARS) { + Cow::Owned(s.replace(IGNORED_HFS_CHARS, "")) + } else { + Cow::Borrowed(s) + }; + if s.is_empty() || INVALID_COMPONENTS.contains(&&*s) || !valid_windows_component(&s) { + return Err(AuditError::InvalidComponent(s.into_owned())); } } Ok(()) diff --git a/eden/scm/tests/test-commit.t b/eden/scm/tests/test-commit.t index 3635920050629..450d91ba72be5 100644 --- a/eden/scm/tests/test-commit.t +++ b/eden/scm/tests/test-commit.t @@ -3,6 +3,7 @@ $ eagerepo This is needed to avoid filelog() revset in "log", which isn't compatible w/ eagerepo. $ setconfig experimental.pathhistory=true + $ setconfig checkout.use-rust=true commit date test @@ -668,10 +669,15 @@ verify pathauditor blocks evil filepaths [255] #else $ hg co --clean tip - abort: path contains illegal component: .h\xe2\x80\x8cg/hgrc (esc) + abort: Can't write 'RepoPath(".h\u{200c}g/hgrc")' after handling error "Can't write into .h‌g/hgrc + + Caused by: + 0: Invalid component in ".h‌g/hgrc" + 1: Invalid path component ".hg"": Invalid path component ".hg" [255] #endif +#if windows $ cd $TESTTMP/audit2 $ cat > evil-commit.py < from __future__ import absolute_import @@ -709,6 +715,7 @@ verify pathauditor blocks evil filepaths $ hg co --clean tip abort: path contains illegal component: HG8B6C~2/hgrc [255] +#endif # test that an unmodified commit template message aborts