From bd0f8d71964b6449abbe7e42cfc638d3806ae6f9 Mon Sep 17 00:00:00 2001 From: simleo Date: Fri, 23 Jun 2023 10:14:04 +0200 Subject: [PATCH] get data paths from manifest --- src/runcrate/convert.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/src/runcrate/convert.py b/src/runcrate/convert.py index 3a08e09..a37ec6b 100644 --- a/src/runcrate/convert.py +++ b/src/runcrate/convert.py @@ -40,6 +40,7 @@ WORKFLOW_BASENAME = "packed.cwl" INPUTS_FILE_BASENAME = "primary-job.json" +MANIFEST_FILE = "manifest-sha1.txt" CWL_TYPE_MAP = { "string": "Text", @@ -211,6 +212,7 @@ def __init__(self, root, workflow_name=None, license=None, readme=None): self.hashes = {} # map source files to destination files self.file_map = {} + self.manifest = self._get_manifest() @staticmethod def _get_step_maps(cwl_defs): @@ -225,6 +227,14 @@ def _get_step_maps(cwl_defs): rval[k][f] = {"tool": get_fragment(s.run), "pos": pos_map[f]} return rval + def _get_manifest(self): + manifest = {} + with open(self.root / Path(MANIFEST_FILE)) as f: + for line in f: + hash_, relpath = line.strip().split(None, 1) + manifest[hash_] = self.root / relpath + return manifest + def _resolve_plan(self, activity): job_qname = activity.plan() plan = activity.provenance.entity(job_qname) @@ -576,7 +586,7 @@ def convert_param(self, prov_param, crate, convert_secondary=True, parent=None): dest = Path(parent.id if parent else "") / hash_ action_p = crate.dereference(dest.as_posix()) if not action_p: - source = self.root / Path("data") / hash_[:2] / hash_ + source = self.manifest[hash_] action_p = crate.add_file(source, dest, properties={ "sha1": hash_, })