Skip to content

Commit

Permalink
filter mark_sensitive based on save output
Browse files Browse the repository at this point in the history
  • Loading branch information
bpinsard committed Feb 21, 2024
1 parent 6ba6cba commit 2844d54
Show file tree
Hide file tree
Showing 2 changed files with 22 additions and 23 deletions.
38 changes: 18 additions & 20 deletions heudiconv/external/dlad.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,23 +146,27 @@ def add_to_datalad(
message="Added gitattributes to place all .heudiconv content"
" under annex",
)
ds.save(
save_res = ds.save(
".",
recursive=True
# not in effect! ?
# annex_add_opts=['--include-dotfiles']
)
annexed_files = [sr["path"] for sr in save_res if sr["key"]]

# Provide metadata for sensitive information
last_commit = "HEAD"
mark_sensitive(ds, "sourcedata", last_commit)
mark_sensitive(ds, "*_scans.tsv", last_commit) # top level
mark_sensitive(ds, "*/*_scans.tsv", last_commit) # within subj
mark_sensitive(ds, "*/*/*_scans.tsv", last_commit) # within sess/subj
mark_sensitive(ds, "*/anat", last_commit) # within subj
mark_sensitive(ds, "*/*/anat", last_commit) # within ses/subj
sensitive_patterns = [
"sourcedata",
"*_scans.tsv", # top level
"*/*_scans.tsv", # within subj
"*/*/*_scans.tsv", # within sess/subj
"*/anat", # within subj
"*/*/anat", # within ses/subj
]
for sp in sensitive_patterns:
mark_sensitive(ds, sp, annexed_files)
if dsh_path:
mark_sensitive(ds, ".heudiconv", last_commit) # entire .heudiconv!
mark_sensitive(ds, ".heudiconv") # entire .heudiconv!
superds.save(path=ds.path, message=msg, recursive=True)

assert not ds.repo.dirty
Expand All @@ -178,30 +182,24 @@ def add_to_datalad(
"""


def mark_sensitive(ds: Dataset, path_glob: str, commit: str = None) -> None:
def mark_sensitive(ds: Dataset, path_glob: str, files: list[str] = None) -> None:
"""
Parameters
----------
ds : Dataset to operate on
path_glob : str
glob of the paths within dataset to work on
commit : str
commit which files to mark
files : list[str]
subset of files to mark
Returns
-------
None
"""
paths = glob(op.join(ds.path, path_glob))
if commit:
paths_in_commit = [
op.join(ds.path, nf)
for nf in ds.repo.call_git(
["show", "--name-only", commit, "--format=oneline"]
).split("\n")[1:]
]
paths = [p for p in paths if p in paths_in_commit]
if files:
paths = [p for p in paths if p in files]
if not paths:
return
lgr.debug("Marking %d files with distribution-restrictions field", len(paths))
Expand Down
7 changes: 4 additions & 3 deletions heudiconv/external/tests/test_dlad.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ def test_mark_sensitive(tmp_path: Path) -> None:
assert all_meta == {"f1": target_rec, "f2": target_rec, "g2": target_rec}


def test_mark_sensitive_last_commit(tmp_path: Path) -> None:
def test_mark_sensitive_subset(tmp_path: Path) -> None:
ds = dl.Dataset(tmp_path).create(force=True)
create_tree(
str(tmp_path),
Expand All @@ -42,9 +42,10 @@ def test_mark_sensitive_last_commit(tmp_path: Path) -> None:
},
)
ds.save(".")
mark_sensitive(ds, "f*", "HEAD")
mark_sensitive(ds, "f*", [str(tmp_path / "f1")])
all_meta = dict(ds.repo.get_metadata("."))
target_rec = {"distribution-restrictions": ["sensitive"]}
# g2 since the same content
assert not all_meta.pop("g1", None) # nothing or empty record
assert all_meta == {"f1": target_rec, "f2": target_rec, "g2": target_rec}
assert not all_meta.pop("f2", None) # nothing or empty record
assert all_meta == {"f1": target_rec, "g2": target_rec}

0 comments on commit 2844d54

Please sign in to comment.