Skip to content

Commit

Permalink
checkout: take into account path chars ignored on HFS+
Browse files Browse the repository at this point in the history
Summary: Apparently the HFS+ file system ignores some characters, so we must also ignore them when validating paths.

Reviewed By: sggutier

Differential Revision: D54037548

fbshipit-source-id: 2a323aa45557cf206439a9d5711e911180ea42c2
  • Loading branch information
muirdm authored and facebook-github-bot committed Feb 28, 2024
1 parent 980eef7 commit bfa4d8e
Show file tree
Hide file tree
Showing 2 changed files with 26 additions and 6 deletions.
23 changes: 18 additions & 5 deletions eden/scm/lib/vfs/src/pathauditor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
* GNU General Public License version 2.
*/

use std::borrow::Cow;
use std::fs::symlink_metadata;
use std::path::Path;
use std::path::PathBuf;
Expand Down Expand Up @@ -43,6 +44,13 @@ static INVALID_COMPONENTS: Lazy<Vec<&'static str>> = Lazy::new(|| {
.collect()
});

// From encoding.py: These unicode characters are ignored by HFS+ (Apple Technote 1150,
// "Unicode Subtleties"), so we need to ignore them in some places for sanity.
const IGNORED_HFS_CHARS: [char; 16] = [
'\u{200c}', '\u{200d}', '\u{200e}', '\u{200f}', '\u{202a}', '\u{202b}', '\u{202c}', '\u{202d}',
'\u{202e}', '\u{206a}', '\u{206b}', '\u{206c}', '\u{206d}', '\u{206e}', '\u{206f}', '\u{feff}',
];

#[derive(thiserror::Error, Debug)]
pub enum AuditError {
#[error("Can't read/write file through ancestor symlink \"{0}\"")]
Expand Down Expand Up @@ -135,14 +143,19 @@ fn valid_windows_component(component: &str) -> bool {
/// It also checks that no trailing dots are part of the component and checks that shortnames
/// on Windows are valid.
fn audit_invalid_components(path: &str) -> Result<(), AuditError> {
let path = if cfg!(not(windows)) {
path.to_owned()
let path: Cow<str> = if cfg!(not(windows)) {
Cow::Borrowed(path)
} else {
path.to_lowercase()
Cow::Owned(path.to_lowercase())
};
for s in path.split(SEPARATORS) {
if s.is_empty() || INVALID_COMPONENTS.contains(&s) || !valid_windows_component(s) {
return Err(AuditError::InvalidComponent(s.to_owned()));
let s = if s.contains(IGNORED_HFS_CHARS) {
Cow::Owned(s.replace(IGNORED_HFS_CHARS, ""))
} else {
Cow::Borrowed(s)
};
if s.is_empty() || INVALID_COMPONENTS.contains(&&*s) || !valid_windows_component(&s) {
return Err(AuditError::InvalidComponent(s.into_owned()));
}
}
Ok(())
Expand Down
9 changes: 8 additions & 1 deletion eden/scm/tests/test-commit.t
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
$ eagerepo
This is needed to avoid filelog() revset in "log", which isn't compatible w/ eagerepo.
$ setconfig experimental.pathhistory=true
$ setconfig checkout.use-rust=true

commit date test

Expand Down Expand Up @@ -668,10 +669,15 @@ verify pathauditor blocks evil filepaths
[255]
#else
$ hg co --clean tip
abort: path contains illegal component: .h\xe2\x80\x8cg/hgrc (esc)
abort: Can't write 'RepoPath(".h\u{200c}g/hgrc")' after handling error "Can't write into .h‌g/hgrc
Caused by:
0: Invalid component in ".h‌g/hgrc"
1: Invalid path component ".hg"": Invalid path component ".hg"
[255]
#endif

#if windows
$ cd $TESTTMP/audit2
$ cat > evil-commit.py <<EOF
> from __future__ import absolute_import
Expand Down Expand Up @@ -709,6 +715,7 @@ verify pathauditor blocks evil filepaths
$ hg co --clean tip
abort: path contains illegal component: HG8B6C~2/hgrc
[255]
#endif

# test that an unmodified commit template message aborts

Expand Down

0 comments on commit bfa4d8e

Please sign in to comment.