Skip to content

Commit

Permalink
mark sensitive only files added in the commit. metadata add rather th…
Browse files Browse the repository at this point in the history
…an init to fix reruns issue
  • Loading branch information
bpinsard committed Feb 20, 2024
1 parent d3385d7 commit 6ba6cba
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 10 deletions.
30 changes: 20 additions & 10 deletions heudiconv/external/dlad.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,16 +153,16 @@ def add_to_datalad(
# annex_add_opts=['--include-dotfiles']
)

# TODO: filter for only changed files?
# Provide metadata for sensitive information
mark_sensitive(ds, "sourcedata")
mark_sensitive(ds, "*_scans.tsv") # top level
mark_sensitive(ds, "*/*_scans.tsv") # within subj
mark_sensitive(ds, "*/*/*_scans.tsv") # within sess/subj
mark_sensitive(ds, "*/anat") # within subj
mark_sensitive(ds, "*/*/anat") # within ses/subj
last_commit = "HEAD"
mark_sensitive(ds, "sourcedata", last_commit)
mark_sensitive(ds, "*_scans.tsv", last_commit) # top level
mark_sensitive(ds, "*/*_scans.tsv", last_commit) # within subj
mark_sensitive(ds, "*/*/*_scans.tsv", last_commit) # within sess/subj
mark_sensitive(ds, "*/anat", last_commit) # within subj
mark_sensitive(ds, "*/*/anat", last_commit) # within ses/subj
if dsh_path:
mark_sensitive(ds, ".heudiconv") # entire .heudiconv!
mark_sensitive(ds, ".heudiconv", last_commit) # entire .heudiconv!
superds.save(path=ds.path, message=msg, recursive=True)

assert not ds.repo.dirty
Expand All @@ -178,26 +178,36 @@ def add_to_datalad(
"""


def mark_sensitive(ds: Dataset, path_glob: str) -> None:
def mark_sensitive(ds: Dataset, path_glob: str, commit: str = None) -> None:
"""
Parameters
----------
ds : Dataset to operate on
path_glob : str
glob of the paths within dataset to work on
commit : str
commit which files to mark
Returns
-------
None
"""
paths = glob(op.join(ds.path, path_glob))
if commit:
paths_in_commit = [
op.join(ds.path, nf)
for nf in ds.repo.call_git(
["show", "--name-only", commit, "--format=oneline"]
).split("\n")[1:]
]
paths = [p for p in paths if p in paths_in_commit]
if not paths:
return
lgr.debug("Marking %d files with distribution-restrictions field", len(paths))
# set_metadata can be a bloody generator
res = ds.repo.set_metadata(
paths, init=dict([("distribution-restrictions", "sensitive")]), recursive=True
paths, add=dict([("distribution-restrictions", "sensitive")]), recursive=True
)
if inspect.isgenerator(res):
res = list(res)
20 changes: 20 additions & 0 deletions heudiconv/external/tests/test_dlad.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,3 +28,23 @@ def test_mark_sensitive(tmp_path: Path) -> None:
# g2 since the same content
assert not all_meta.pop("g1", None) # nothing or empty record
assert all_meta == {"f1": target_rec, "f2": target_rec, "g2": target_rec}


def test_mark_sensitive_last_commit(tmp_path: Path) -> None:
ds = dl.Dataset(tmp_path).create(force=True)
create_tree(
str(tmp_path),
{
"f1": "d1",
"f2": "d2",
"g1": "d3",
"g2": "d1",
},
)
ds.save(".")
mark_sensitive(ds, "f*", "HEAD")
all_meta = dict(ds.repo.get_metadata("."))
target_rec = {"distribution-restrictions": ["sensitive"]}
# g2 since the same content
assert not all_meta.pop("g1", None) # nothing or empty record
assert all_meta == {"f1": target_rec, "f2": target_rec, "g2": target_rec}

0 comments on commit 6ba6cba

Please sign in to comment.