Skip to content

Commit

Permalink
Merge pull request #57 from ResearchObject/data_paths_from_manifest
Browse files Browse the repository at this point in the history
Get data paths from manifest
  • Loading branch information
simleo authored Jun 23, 2023
2 parents 1cc00ac + bd0f8d7 commit 54dac55
Showing 1 changed file with 11 additions and 1 deletion.
12 changes: 11 additions & 1 deletion src/runcrate/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@

WORKFLOW_BASENAME = "packed.cwl"
INPUTS_FILE_BASENAME = "primary-job.json"
MANIFEST_FILE = "manifest-sha1.txt"

CWL_TYPE_MAP = {
"string": "Text",
Expand Down Expand Up @@ -211,6 +212,7 @@ def __init__(self, root, workflow_name=None, license=None, readme=None):
self.hashes = {}
# map source files to destination files
self.file_map = {}
self.manifest = self._get_manifest()

@staticmethod
def _get_step_maps(cwl_defs):
Expand All @@ -225,6 +227,14 @@ def _get_step_maps(cwl_defs):
rval[k][f] = {"tool": get_fragment(s.run), "pos": pos_map[f]}
return rval

def _get_manifest(self):
manifest = {}
with open(self.root / Path(MANIFEST_FILE)) as f:
for line in f:
hash_, relpath = line.strip().split(None, 1)
manifest[hash_] = self.root / relpath
return manifest

def _resolve_plan(self, activity):
job_qname = activity.plan()
plan = activity.provenance.entity(job_qname)
Expand Down Expand Up @@ -576,7 +586,7 @@ def convert_param(self, prov_param, crate, convert_secondary=True, parent=None):
dest = Path(parent.id if parent else "") / hash_
action_p = crate.dereference(dest.as_posix())
if not action_p:
source = self.root / Path("data") / hash_[:2] / hash_
source = self.manifest[hash_]
action_p = crate.add_file(source, dest, properties={
"sha1": hash_,
})
Expand Down

0 comments on commit 54dac55

Please sign in to comment.