Skip to content

Commit

Permalink
Merge pull request #18873 from mvdbeek/fix_history_import_when_implic…
Browse files Browse the repository at this point in the history
…it_hda_not_serialized

[24.1] Fix history import when parent_hda not serialized
  • Loading branch information
mvdbeek committed Sep 23, 2024
2 parents 6c85d45 + 69a3cda commit 2a7954c
Show file tree
Hide file tree
Showing 3 changed files with 78 additions and 15 deletions.
1 change: 1 addition & 0 deletions lib/galaxy/model/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4547,6 +4547,7 @@ class DatasetInstance(RepresentById, UsesCreateAndUpdateTime, _HasTable):
copied_from_library_dataset_dataset_association: Optional["LibraryDatasetDatasetAssociation"]
dependent_jobs: List[JobToInputLibraryDatasetAssociation]
implicitly_converted_datasets: List["ImplicitlyConvertedDatasetAssociation"]
implicitly_converted_parent_datasets: List["ImplicitlyConvertedDatasetAssociation"]

validated_states = DatasetValidatedState

Expand Down
30 changes: 22 additions & 8 deletions lib/galaxy/model/store/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -1023,7 +1023,7 @@ def _reassign_hids(self, object_import_tracker: "ObjectImportTracker", history:

if object_import_tracker.copy_hid_for:
# in an if to avoid flush if unneeded
for from_dataset, to_dataset in object_import_tracker.copy_hid_for.items():
for from_dataset, to_dataset in object_import_tracker.copy_hid_for:
to_dataset.hid = from_dataset.hid
self._session_add(to_dataset)
self._flush()
Expand Down Expand Up @@ -1276,18 +1276,24 @@ def _import_implicit_dataset_conversions(self, object_import_tracker: "ObjectImp
metadata_safe = False
idc = model.ImplicitlyConvertedDatasetAssociation(metadata_safe=metadata_safe, for_import=True)
idc.type = idc_attrs["file_type"]
if idc_attrs.get("parent_hda"):
idc.parent_hda = object_import_tracker.hdas_by_key[idc_attrs["parent_hda"]]
# We may not have exported the parent, so only set the parent_hda attribute if we did.
if (parent_hda_id := idc_attrs.get("parent_hda")) and (
parent_hda := object_import_tracker.hdas_by_key.get(parent_hda_id)
):
# exports created prior to 24.2 may not have a parent if the parent had been purged
idc.parent_hda = parent_hda
if idc_attrs.get("hda"):
idc.dataset = object_import_tracker.hdas_by_key[idc_attrs["hda"]]

# we have a the dataset and the parent, lets ensure they land up with the same HID
if idc.dataset and idc.parent_hda and idc.parent_hda in object_import_tracker.requires_hid:
# we have the dataset and the parent, lets ensure they land up with the same HID
if idc.dataset and idc.parent_hda:
try:
object_import_tracker.requires_hid.remove(idc.dataset)
except ValueError:
pass # we wanted to remove it anyway.
object_import_tracker.copy_hid_for[idc.parent_hda] = idc.dataset
# A HDA can be the parent of multiple implicitly converted dataset,
# that's thy we use [(source, target)] here
object_import_tracker.copy_hid_for.append((idc.parent_hda, idc.dataset))

self._session_add(idc)

Expand Down Expand Up @@ -1370,7 +1376,7 @@ class ObjectImportTracker:
hdca_copied_from_sinks: Dict[ObjectKeyType, ObjectKeyType]
jobs_by_key: Dict[ObjectKeyType, model.Job]
requires_hid: List["HistoryItem"]
copy_hid_for: Dict["HistoryItem", "HistoryItem"]
copy_hid_for: List[Tuple["HistoryItem", "HistoryItem"]]

def __init__(self) -> None:
self.libraries_by_key = {}
Expand All @@ -1388,7 +1394,7 @@ def __init__(self) -> None:
self.implicit_collection_jobs_by_key: Dict[str, ImplicitCollectionJobs] = {}
self.workflows_by_key: Dict[str, model.Workflow] = {}
self.requires_hid = []
self.copy_hid_for = {}
self.copy_hid_for = []

self.new_history: Optional[model.History] = None

Expand Down Expand Up @@ -2301,6 +2307,14 @@ def add_implicit_conversion_dataset(
include_files: bool,
conversion: model.ImplicitlyConvertedDatasetAssociation,
) -> None:
parent_hda = conversion.parent_hda
if parent_hda and parent_hda not in self.included_datasets:
# We should always include the parent of an implicit conversion
# to avoid holes in the provenance.
self.included_datasets[parent_hda] = (parent_hda, include_files)
grand_parent_association = parent_hda.implicitly_converted_parent_datasets
if grand_parent_association and (grand_parent_hda := grand_parent_association[0].parent_hda):
self.add_implicit_conversion_dataset(grand_parent_hda, include_files, grand_parent_association[0])
self.included_datasets[dataset] = (dataset, include_files)
self.dataset_implicit_conversions[dataset] = conversion

Expand Down
62 changes: 55 additions & 7 deletions test/unit/data/model/test_model_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,32 +122,80 @@ def test_import_export_history_allow_discarded_data():
assert imported_job.output_datasets[0].dataset == datasets[1]


def test_import_export_history_with_implicit_conversion():
def setup_history_with_implicit_conversion():
app = _mock_app()

u, h, d1, d2, j = _setup_simple_cat_job(app)

intermediate_ext = "bam"
intermediate_implicit_hda = model.HistoryDatasetAssociation(
extension=intermediate_ext, create_dataset=True, flush=False, history=h
)
intermediate_implicit_hda.hid = d2.hid
convert_ext = "fasta"
implicit_hda = model.HistoryDatasetAssociation(extension=convert_ext, create_dataset=True, flush=False, history=h)
implicit_hda.hid = d2.hid
# this adds and flushes the result...
d2.attach_implicitly_converted_dataset(app.model.context, implicit_hda, convert_ext)
intermediate_implicit_hda.attach_implicitly_converted_dataset(app.model.context, implicit_hda, convert_ext)
d2.attach_implicitly_converted_dataset(app.model.context, intermediate_implicit_hda, intermediate_ext)

app.object_store.update_from_file(intermediate_implicit_hda.dataset, file_name=TEST_PATH_2_CONVERTED, create=True)
app.object_store.update_from_file(implicit_hda.dataset, file_name=TEST_PATH_2_CONVERTED, create=True)

assert len(h.active_datasets) == 3
assert len(h.active_datasets) == 4
return app, h, implicit_hda


def test_import_export_history_with_implicit_conversion():
app, h, _ = setup_history_with_implicit_conversion()
imported_history = _import_export_history(app, h, export_files="copy", include_hidden=True)

assert len(imported_history.active_datasets) == 3
assert len(imported_history.active_datasets) == 4
recovered_hda_2 = imported_history.active_datasets[1]
assert recovered_hda_2.implicitly_converted_datasets
imported_conversion = recovered_hda_2.implicitly_converted_datasets[0]
assert imported_conversion.type == "fasta"
assert imported_conversion.dataset == imported_history.active_datasets[2]
intermediate_conversion = recovered_hda_2.implicitly_converted_datasets[0]
assert intermediate_conversion.type == "bam"
intermediate_hda = intermediate_conversion.dataset
assert intermediate_hda.implicitly_converted_datasets
final_conversion = intermediate_hda.implicitly_converted_datasets[0]

assert final_conversion.type == "fasta"
assert final_conversion.dataset == imported_history.active_datasets[-1]

# implicit conversions have the same HID... ensure this property is recovered...
assert imported_history.active_datasets[2].hid == imported_history.active_datasets[1].hid


def test_import_export_history_with_implicit_conversion_parents_purged():
app, h, implicit_hda = setup_history_with_implicit_conversion()
# Purge parents
parent = implicit_hda.implicitly_converted_parent_datasets[0].parent_hda
parent.dataset.purged = True
grandparent = parent.implicitly_converted_parent_datasets[0].parent_hda
grandparent.dataset.purged = True
app.model.context.commit()
imported_history = _import_export_history(app, h, export_files="copy", include_hidden=True)

assert len(imported_history.active_datasets) == 2
assert len(imported_history.datasets) == 4
imported_implicit_hda = imported_history.active_datasets[1]
assert imported_implicit_hda.extension == "fasta"

# implicit conversions have the same HID... ensure this property is recovered...
assert imported_implicit_hda.hid == implicit_hda.hid
assert imported_implicit_hda.implicitly_converted_parent_datasets
intermediate_implicit_conversion = imported_implicit_hda.implicitly_converted_parent_datasets[0]
intermediate_hda = intermediate_implicit_conversion.parent_hda
assert intermediate_hda.hid == implicit_hda.hid
assert intermediate_hda.extension == "bam"
assert intermediate_hda.implicitly_converted_datasets
assert intermediate_hda.implicitly_converted_parent_datasets
first_implicit_conversion = intermediate_hda.implicitly_converted_parent_datasets[0]
source_hda = first_implicit_conversion.parent_hda
assert source_hda.hid == implicit_hda.hid
assert source_hda.extension == "txt"


def test_import_export_history_with_implicit_conversion_and_extra_files():
app = _mock_app()

Expand Down

0 comments on commit 2a7954c

Please sign in to comment.