Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[24.1] Fix history import when parent_hda not serialized #18873

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions lib/galaxy/model/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4546,6 +4546,7 @@ class DatasetInstance(RepresentById, UsesCreateAndUpdateTime, _HasTable):
copied_from_history_dataset_association: Optional["HistoryDatasetAssociation"]
copied_from_library_dataset_dataset_association: Optional["LibraryDatasetDatasetAssociation"]
implicitly_converted_datasets: List["ImplicitlyConvertedDatasetAssociation"]
implicitly_converted_parent_datasets: List["ImplicitlyConvertedDatasetAssociation"]

validated_states = DatasetValidatedState

Expand Down
30 changes: 22 additions & 8 deletions lib/galaxy/model/store/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -1023,7 +1023,7 @@ def _reassign_hids(self, object_import_tracker: "ObjectImportTracker", history:

if object_import_tracker.copy_hid_for:
# in an if to avoid flush if unneeded
for from_dataset, to_dataset in object_import_tracker.copy_hid_for.items():
for from_dataset, to_dataset in object_import_tracker.copy_hid_for:
to_dataset.hid = from_dataset.hid
self._session_add(to_dataset)
self._flush()
Expand Down Expand Up @@ -1276,18 +1276,24 @@ def _import_implicit_dataset_conversions(self, object_import_tracker: "ObjectImp
metadata_safe = False
idc = model.ImplicitlyConvertedDatasetAssociation(metadata_safe=metadata_safe, for_import=True)
idc.type = idc_attrs["file_type"]
if idc_attrs.get("parent_hda"):
idc.parent_hda = object_import_tracker.hdas_by_key[idc_attrs["parent_hda"]]
# We may not have exported the parent, so only set the parent_hda attribute if we did.
if (parent_hda_id := idc_attrs.get("parent_hda")) and (
parent_hda := object_import_tracker.hdas_by_key.get(parent_hda_id)
):
# exports created prior to 24.2 may not have a parent if the parent had been purged
idc.parent_hda = parent_hda
if idc_attrs.get("hda"):
idc.dataset = object_import_tracker.hdas_by_key[idc_attrs["hda"]]

# we have a the dataset and the parent, lets ensure they land up with the same HID
if idc.dataset and idc.parent_hda and idc.parent_hda in object_import_tracker.requires_hid:
# we have the dataset and the parent, lets ensure they land up with the same HID
if idc.dataset and idc.parent_hda:
try:
object_import_tracker.requires_hid.remove(idc.dataset)
except ValueError:
pass # we wanted to remove it anyway.
object_import_tracker.copy_hid_for[idc.parent_hda] = idc.dataset
# A HDA can be the parent of multiple implicitly converted dataset,
# that's thy we use [(source, target)] here
object_import_tracker.copy_hid_for.append((idc.parent_hda, idc.dataset))

self._session_add(idc)

Expand Down Expand Up @@ -1370,7 +1376,7 @@ class ObjectImportTracker:
hdca_copied_from_sinks: Dict[ObjectKeyType, ObjectKeyType]
jobs_by_key: Dict[ObjectKeyType, model.Job]
requires_hid: List["HistoryItem"]
copy_hid_for: Dict["HistoryItem", "HistoryItem"]
copy_hid_for: List[Tuple["HistoryItem", "HistoryItem"]]

def __init__(self) -> None:
self.libraries_by_key = {}
Expand All @@ -1388,7 +1394,7 @@ def __init__(self) -> None:
self.implicit_collection_jobs_by_key: Dict[str, ImplicitCollectionJobs] = {}
self.workflows_by_key: Dict[str, model.Workflow] = {}
self.requires_hid = []
self.copy_hid_for = {}
self.copy_hid_for = []

self.new_history: Optional[model.History] = None

Expand Down Expand Up @@ -2301,6 +2307,14 @@ def add_implicit_conversion_dataset(
include_files: bool,
conversion: model.ImplicitlyConvertedDatasetAssociation,
) -> None:
parent_hda = conversion.parent_hda
if parent_hda and parent_hda not in self.included_datasets:
# We should always include the parent of an implicit conversion
# to avoid holes in the provenance.
self.included_datasets[parent_hda] = (parent_hda, include_files)
grand_parent_association = parent_hda.implicitly_converted_parent_datasets
if grand_parent_association and (grand_parent_hda := grand_parent_association[0].parent_hda):
self.add_implicit_conversion_dataset(grand_parent_hda, include_files, grand_parent_association[0])
self.included_datasets[dataset] = (dataset, include_files)
self.dataset_implicit_conversions[dataset] = conversion

Expand Down
62 changes: 55 additions & 7 deletions test/unit/data/model/test_model_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,32 +122,80 @@ def test_import_export_history_allow_discarded_data():
assert imported_job.output_datasets[0].dataset == datasets[1]


def test_import_export_history_with_implicit_conversion():
def setup_history_with_implicit_conversion():
app = _mock_app()

u, h, d1, d2, j = _setup_simple_cat_job(app)

intermediate_ext = "bam"
intermediate_implicit_hda = model.HistoryDatasetAssociation(
extension=intermediate_ext, create_dataset=True, flush=False, history=h
)
intermediate_implicit_hda.hid = d2.hid
convert_ext = "fasta"
implicit_hda = model.HistoryDatasetAssociation(extension=convert_ext, create_dataset=True, flush=False, history=h)
implicit_hda.hid = d2.hid
# this adds and flushes the result...
d2.attach_implicitly_converted_dataset(app.model.context, implicit_hda, convert_ext)
intermediate_implicit_hda.attach_implicitly_converted_dataset(app.model.context, implicit_hda, convert_ext)
d2.attach_implicitly_converted_dataset(app.model.context, intermediate_implicit_hda, intermediate_ext)

app.object_store.update_from_file(intermediate_implicit_hda.dataset, file_name=TEST_PATH_2_CONVERTED, create=True)
app.object_store.update_from_file(implicit_hda.dataset, file_name=TEST_PATH_2_CONVERTED, create=True)

assert len(h.active_datasets) == 3
assert len(h.active_datasets) == 4
return app, h, implicit_hda


def test_import_export_history_with_implicit_conversion():
app, h, _ = setup_history_with_implicit_conversion()
imported_history = _import_export_history(app, h, export_files="copy", include_hidden=True)

assert len(imported_history.active_datasets) == 3
assert len(imported_history.active_datasets) == 4
recovered_hda_2 = imported_history.active_datasets[1]
assert recovered_hda_2.implicitly_converted_datasets
imported_conversion = recovered_hda_2.implicitly_converted_datasets[0]
assert imported_conversion.type == "fasta"
assert imported_conversion.dataset == imported_history.active_datasets[2]
intermediate_conversion = recovered_hda_2.implicitly_converted_datasets[0]
assert intermediate_conversion.type == "bam"
intermediate_hda = intermediate_conversion.dataset
assert intermediate_hda.implicitly_converted_datasets
final_conversion = intermediate_hda.implicitly_converted_datasets[0]

assert final_conversion.type == "fasta"
assert final_conversion.dataset == imported_history.active_datasets[-1]

# implicit conversions have the same HID... ensure this property is recovered...
assert imported_history.active_datasets[2].hid == imported_history.active_datasets[1].hid


def test_import_export_history_with_implicit_conversion_parents_purged():
app, h, implicit_hda = setup_history_with_implicit_conversion()
# Purge parents
parent = implicit_hda.implicitly_converted_parent_datasets[0].parent_hda
parent.dataset.purged = True
grandparent = parent.implicitly_converted_parent_datasets[0].parent_hda
grandparent.dataset.purged = True
app.model.context.commit()
imported_history = _import_export_history(app, h, export_files="copy", include_hidden=True)

assert len(imported_history.active_datasets) == 2
assert len(imported_history.datasets) == 4
imported_implicit_hda = imported_history.active_datasets[1]
assert imported_implicit_hda.extension == "fasta"

# implicit conversions have the same HID... ensure this property is recovered...
assert imported_implicit_hda.hid == implicit_hda.hid
assert imported_implicit_hda.implicitly_converted_parent_datasets
intermediate_implicit_conversion = imported_implicit_hda.implicitly_converted_parent_datasets[0]
intermediate_hda = intermediate_implicit_conversion.parent_hda
assert intermediate_hda.hid == implicit_hda.hid
assert intermediate_hda.extension == "bam"
assert intermediate_hda.implicitly_converted_datasets
assert intermediate_hda.implicitly_converted_parent_datasets
first_implicit_conversion = intermediate_hda.implicitly_converted_parent_datasets[0]
source_hda = first_implicit_conversion.parent_hda
assert source_hda.hid == implicit_hda.hid
assert source_hda.extension == "txt"


def test_import_export_history_with_implicit_conversion_and_extra_files():
app = _mock_app()

Expand Down
Loading