Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enable all-vs-all collection analysis patterns. #17366

Merged
merged 3 commits into from
May 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions lib/galaxy/config/sample/tool_conf.xml.sample
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@
<tool file="${model_tools_path}/filter_from_file.xml" />
<tool file="${model_tools_path}/sort_collection_list.xml" />
<tool file="${model_tools_path}/harmonize_two_collections_list.xml" />
<tool file="${model_tools_path}/cross_product_flat.xml" />
<tool file="${model_tools_path}/cross_product_nested.xml" />
<tool file="${model_tools_path}/tag_collection_from_file.xml" />
<tool file="${model_tools_path}/apply_rules.xml" />
<tool file="${model_tools_path}/build_list.xml" />
Expand Down
12 changes: 12 additions & 0 deletions lib/galaxy/model/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,7 @@
)
from galaxy.model.orm.now import now
from galaxy.model.orm.util import add_object_to_object_session
from galaxy.objectstore import ObjectStorePopulator
from galaxy.schema.invocation import (
InvocationCancellationUserRequest,
InvocationState,
Expand Down Expand Up @@ -4581,6 +4582,17 @@ def get_quota_source_label(self):

quota_source_label = property(get_quota_source_label)

def set_skipped(self, object_store_populator: ObjectStorePopulator):
assert self.dataset
object_store_populator.set_object_store_id(self)
self.extension = "expression.json"
self.state = self.states.OK
self.blurb = "skipped"
self.visible = False
with open(self.dataset.get_file_name(), "w") as out:
out.write(json.dumps(None))
self.set_total_size()

def get_file_name(self, sync_cache=True) -> str:
if self.dataset.purged:
return ""
Expand Down
89 changes: 89 additions & 0 deletions lib/galaxy/tools/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from collections.abc import MutableMapping
from pathlib import Path
from typing import (
Any,
cast,
Dict,
List,
Expand Down Expand Up @@ -3305,6 +3306,94 @@ def produce_outputs(self, trans, out_data, output_collections, incoming, history
)


class CrossProductFlatCollectionTool(DatabaseOperationTool):
tool_type = "cross_product_flat"
require_terminal_states = False
require_dataset_ok = False

def produce_outputs(self, trans, out_data, output_collections, incoming, history, **kwds):
input_a = incoming["input_a"]
input_b = incoming["input_b"]
join_identifier = incoming["join_identifier"]

output_a = {}
output_b = {}
all_copied_hdas = []

for input_a_dce in input_a.collection.elements:
element_identifier_a = input_a_dce.element_identifier
for input_b_dce in input_b.collection.elements:
element_identifier_b = input_b_dce.element_identifier
identifier = f"{element_identifier_a}{join_identifier}{element_identifier_b}"

hda_a_copy = input_a_dce.element_object.copy(copy_tags=input_a_dce.element_object.tags, flush=False)
hda_b_copy = input_b_dce.element_object.copy(copy_tags=input_b_dce.element_object.tags, flush=False)
all_copied_hdas.append(hda_a_copy)
all_copied_hdas.append(hda_b_copy)
output_a[identifier] = hda_a_copy
output_b[identifier] = hda_b_copy

self._add_datasets_to_history(history, all_copied_hdas)
output_collections.create_collection(
self.outputs["output_a"], "output_a", elements=output_a, propagate_hda_tags=False
)
output_collections.create_collection(
self.outputs["output_b"], "output_b", elements=output_b, propagate_hda_tags=False
)


class CrossProductNestedCollectionTool(DatabaseOperationTool):
tool_type = "cross_product_nested"
require_terminal_states = False
require_dataset_ok = False

def produce_outputs(self, trans, out_data, output_collections, incoming, history, **kwds):
input_a = incoming["input_a"]
input_b = incoming["input_b"]

output_a = {}
output_b = {}
all_copied_hdas = []

for input_a_dce in input_a.collection.elements:
element_identifier_a = input_a_dce.element_identifier

iter_elements_a = {}
iter_elements_b = {}

for input_b_dce in input_b.collection.elements:
element_identifier_b = input_b_dce.element_identifier

hda_a_copy = input_a_dce.element_object.copy(copy_tags=input_a_dce.element_object.tags, flush=False)
hda_b_copy = input_b_dce.element_object.copy(copy_tags=input_b_dce.element_object.tags, flush=False)
all_copied_hdas.append(hda_a_copy)
all_copied_hdas.append(hda_b_copy)
iter_elements_a[element_identifier_b] = hda_a_copy
iter_elements_b[element_identifier_b] = hda_b_copy

sub_collection_a: Dict[str, Any] = {}
sub_collection_a["src"] = "new_collection"
sub_collection_a["collection_type"] = "list"
sub_collection_a["elements"] = iter_elements_a

output_a[element_identifier_a] = sub_collection_a

sub_collection_b: Dict[str, Any] = {}
sub_collection_b["src"] = "new_collection"
sub_collection_b["collection_type"] = "list"
sub_collection_b["elements"] = iter_elements_b

output_b[element_identifier_a] = sub_collection_b

self._add_datasets_to_history(history, all_copied_hdas)
output_collections.create_collection(
self.outputs["output_a"], "output_a", elements=output_a, propagate_hda_tags=False
)
output_collections.create_collection(
self.outputs["output_b"], "output_b", elements=output_b, propagate_hda_tags=False
)


class BuildListCollectionTool(DatabaseOperationTool):
tool_type = "build_list"
require_terminal_states = False
Expand Down
9 changes: 1 addition & 8 deletions lib/galaxy/tools/actions/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -675,14 +675,7 @@ def handle_output(name, output, hidden=None):
hdca.visible = False
object_store_populator = ObjectStorePopulator(trans.app, trans.user)
for data in out_data.values():
object_store_populator.set_object_store_id(data)
data.extension = "expression.json"
data.state = "ok"
data.blurb = "skipped"
data.visible = False
with open(data.dataset.get_file_name(), "w") as out:
out.write(json.dumps(None))
data.set_total_size()
data.set_skipped(object_store_populator)
job.preferred_object_store_id = preferred_object_store_id
self._record_inputs(trans, tool, job, incoming, inp_data, inp_dataset_collections)
self._record_outputs(job, out_data, output_collections)
Expand Down
7 changes: 5 additions & 2 deletions lib/galaxy/tools/actions/model_operations.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import logging
from typing import TYPE_CHECKING

from galaxy.objectstore import ObjectStorePopulator
from galaxy.tools.actions import (
DefaultToolAction,
OutputCollections,
Expand Down Expand Up @@ -137,8 +138,10 @@ def _produce_outputs(
if skip:
for output_collection in output_collections.out_collections.values():
output_collection.mark_as_populated()
object_store_populator = ObjectStorePopulator(trans.app, trans.user)
for hdca in output_collections.out_collection_instances.values():
hdca.visible = False
# Would we also need to replace the datasets with skipped datasets?

# Would we also need to replace the datasets with skipped datasets?
for data in hdca.dataset_instances:
data.set_skipped(object_store_populator)
trans.sa_session.add_all(out_data.values())
89 changes: 89 additions & 0 deletions lib/galaxy/tools/cross_product_flat.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
<tool id="__CROSS_PRODUCT_FLAT__"
name="Flat Cross Product"
version="1.0.0">
<description></description>
<type class="CrossProductFlatCollectionTool" module="galaxy.tools" />
<macros>
<import>model_operation_macros.xml</import>
</macros>
<expand macro="uses_a_model_operation_action" />
<edam_operations>
<edam_operation>operation_3436</edam_operation> <!-- DataHandling -> Aggregation -->
</edam_operations>
<inputs>
<param type="data_collection" name="input_a" collection_type="list" label="Input Collection A" />
<param type="data_collection" name="input_b" collection_type="list" label="Input Collection B" />
<expand macro="join_identifier" />
</inputs>
<outputs>
<collection name="output_a" format_source="input_a" type="list" label="${on_string} (A files)" >
</collection>
<collection name="output_b" format_source="input_b" type="list" label="${on_string} (B files)" >
</collection>
</outputs>
<tests>
<test>
<param name="input_a">
<collection type="list">
<element name="a1" value="simple_line.txt" />
<element name="a2" value="simple_line_alternative.txt" />
</collection>
</param>
<param name="input_b">
<collection type="list">
<element name="b1" value="1.txt" />
<element name="b2" value="1.fasta" />
</collection>
</param>
<param name="join_identifier" value="_" />
<output_collection name="output_a" type="list">
<element name="a1_b1">
<expand macro="assert_is_simple_line" />
</element>
<element name="a1_b2">
<expand macro="assert_is_simple_line" />
</element>
<element name="a2_b1">
<expand macro="assert_is_simple_line_alt" />
</element>
<element name="a2_b2">
<expand macro="assert_is_simple_line_alt" />
</element>
</output_collection>
<output_collection name="output_b" type="list">
<element name="a1_b1">
<expand macro="assert_is_1_dot_txt" />
</element>
<element name="a1_b2">
<expand macro="assert_is_1_dot_fasta" />
</element>
<element name="a2_b1">
<expand macro="assert_is_1_dot_txt" />
</element>
<element name="a2_b2">
<expand macro="assert_is_1_dot_fasta" />
</element>
</output_collection>
</test>
</tests>
<help><![CDATA[

========
Synopsis
========



===========
Description
===========


----

.. class:: infomark

@QUOTA_USAGE_NOTE@

]]></help>
</tool>
93 changes: 93 additions & 0 deletions lib/galaxy/tools/cross_product_nested.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
<tool id="__CROSS_PRODUCT_NESTED__"
name="Nested Cross Product"
version="1.0.0">
<description></description>
<type class="CrossProductNestedCollectionTool" module="galaxy.tools" />
<macros>
<import>model_operation_macros.xml</import>
</macros>
<expand macro="uses_a_model_operation_action" />
<expand macro="annotate_as_aggregation_operation" />
<inputs>
<param type="data_collection" name="input_a" collection_type="list" label="Input Collection A" />
<param type="data_collection" name="input_b" collection_type="list" label="Input Collection B" />
</inputs>
<outputs>
<collection name="output_a" format_source="input_a" type="list:list" label="${on_string} (A files)" >
</collection>
<collection name="output_b" format_source="input_b" type="list:list" label="${on_string} (B files)" >
</collection>
</outputs>
<tests>
<test>
<param name="input_a">
<collection type="list">
<element name="a1" value="simple_line.txt" />
<element name="a2" value="simple_line_alternative.txt" />
</collection>
</param>
<param name="input_b">
<collection type="list">
<element name="b1" value="1.txt" />
<element name="b2" value="1.fasta" />
</collection>
</param>
<output_collection name="output_a" type="list:list">
<element name="a1">
<element name="b1">
<expand macro="assert_is_simple_line" />
</element>
<element name="b2">
<expand macro="assert_is_simple_line" />
</element>
</element>
<element name="a2">
<element name="b1">
<expand macro="assert_is_simple_line_alt" />
</element>
<element name="b2">
<expand macro="assert_is_simple_line_alt" />
</element>
</element>
</output_collection>
<output_collection name="output_b" type="list:list">
<element name="a1">
<element name="b1">
<expand macro="assert_is_1_dot_txt" />
</element>
<element name="b2">
<expand macro="assert_is_1_dot_fasta" />
</element>
</element>
<element name="a2">
<element name="b1">
<expand macro="assert_is_1_dot_txt" />
</element>
<element name="b2">
<expand macro="assert_is_1_dot_fasta" />
</element>
</element>
</output_collection>
</test>
</tests>
<help><![CDATA[

========
Synopsis
========



===========
Description
===========


----

.. class:: infomark

@QUOTA_USAGE_NOTE@

]]></help>
</tool>
17 changes: 6 additions & 11 deletions lib/galaxy/tools/flatten_collection.xml
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,12 @@
<edam_operations>
<edam_operation>operation_2409</edam_operation>
</edam_operations>
<macros>
<import>model_operation_macros.xml</import>
</macros>
<inputs>
<param type="data_collection" name="input" label="Input Collection" />
<param type="select" name="join_identifier" label="Join collection identifiers using" help="Separator for merging dataset identifiers">
<option value="_">underscore ( _ )</option>
<option value=":">colon ( : )</option>
<option value="-">dash ( - )</option>
</param>
<expand macro="join_identifier" />
</inputs>
<outputs>
<collection name="output" format_source="input" type="list" label="${on_string} (flattened)" >
Expand All @@ -35,14 +34,10 @@
</param>
<output_collection name="output" type="list">
<element name="i1_forward">
<assert_contents>
<has_text_matching expression="^This is a line of text.\n$" />
</assert_contents>
<expand macro="assert_is_simple_line" />
</element>
<element name="i1_reverse">
<assert_contents>
<has_text_matching expression="^This is a different line of text.\n$" />
</assert_contents>
<expand macro="assert_is_simple_line_alt" />
</element>
</output_collection>
</test>
Expand Down
Loading
Loading