diff --git a/pipeline_versions.txt b/pipeline_versions.txt index eb77d3561a..0c8b90a4f5 100644 --- a/pipeline_versions.txt +++ b/pipeline_versions.txt @@ -27,14 +27,14 @@ BroadInternalImputation 1.1.14 2024-11-04 BroadInternalArrays 1.1.14 2024-11-04 BroadInternalRNAWithUMIs 1.0.36 2024-11-04 RNAWithUMIsPipeline 1.0.18 2024-11-04 -Multiome 5.9.5 2024-12-12 -MultiSampleSmartSeq2SingleNucleus 2.0.7 2024-12-12 +Multiome 5.9.5 2025-01-13 +MultiSampleSmartSeq2SingleNucleus 2.0.7 2025-01-13 BuildIndices 3.1.0 2024-11-26 -SlideSeq 3.4.8 2024-12-12 -PairedTag 1.9.1 2024-12-12 -atac 2.5.3 2024-11-22 +SlideSeq 3.4.8 2025-01-13 +PairedTag 1.9.1 2025-01-13 +atac 2.5.4 2025-01-13 scATAC 1.3.2 2023-08-03 snm3C 4.0.4 2024-08-06 -Optimus 7.9.1 2024-12-12 +Optimus 7.9.1 2025-01-13 MultiSampleSmartSeq2 2.2.22 2024-09-11 SmartSeq2SingleSample 5.1.21 2024-09-11 diff --git a/pipelines/skylab/atac/atac.changelog.md b/pipelines/skylab/atac/atac.changelog.md index 578088a0d6..074e2e3614 100644 --- a/pipelines/skylab/atac/atac.changelog.md +++ b/pipelines/skylab/atac/atac.changelog.md @@ -1,3 +1,8 @@ +# 2.5.4 +2025-01-13 (Date of Last Commit) + +* Added reference_gtf_file to the output h5ad unstructured metadata + # 2.5.3 2024-11-22 (Date of Last Commit) diff --git a/pipelines/skylab/atac/atac.wdl b/pipelines/skylab/atac/atac.wdl index c0c748c042..32a06b6951 100644 --- a/pipelines/skylab/atac/atac.wdl +++ b/pipelines/skylab/atac/atac.wdl @@ -49,7 +49,7 @@ workflow ATAC { String adapter_seq_read3 = "TCGTCGGCAGCGTCAGATGTGTATAAGAGACAG" } - String pipeline_version = "2.5.3" + String pipeline_version = "2.5.4" # Determine docker prefix based on cloud provider String gcr_docker_prefix = "us.gcr.io/broad-gotc-prod/" @@ -57,7 +57,7 @@ workflow ATAC { String docker_prefix = if cloud_provider == "gcp" then gcr_docker_prefix else acr_docker_prefix # Docker image names - String warp_tools_2_2_0 = "warp-tools:2.5.0" + String warp_tools_docker = "warp-tools:2.6.0" String cutadapt_docker = "cutadapt:1.0.0-4.4-1686752919" String samtools_docker = "samtools-dist-bwa:3.0.0" String upstools_docker = "upstools:1.0.0-2023.03.03-1704300311" @@ -99,7 +99,7 @@ workflow ATAC { output_base_name = input_id, num_output_files = GetNumSplits.ranks_per_node_out, whitelist = whitelist, - docker_path = docker_prefix + warp_tools_2_2_0 + docker_path = docker_prefix + warp_tools_docker } scatter(idx in range(length(SplitFastq.fastq_R1_output_array))) { @@ -521,6 +521,7 @@ task CreateFragmentFile { String atac_nhash_id = "" String input_id Int atac_expected_cells = 3000 + String gtf_path = annotations_gtf } parameter_meta { @@ -601,6 +602,12 @@ task CreateFragmentFile { atac_data = ad.read_h5ad("temp_metrics.h5ad") # Add nhash_id to h5ad file as unstructured metadata atac_data.uns['NHashID'] = atac_nhash_id + + # Add GTF to uns field + # Original path from args.annotation_file + gtf_path = "~{gtf_path}" # e.g., 'gs://gcp-public-data--broad-references/hg38/v0/star/v2_7_10a/modified_v43.annotation.gtf' + + atac_data.uns["reference_gtf_file"] = gtf_path # calculate tsse metrics snap.metrics.tsse(atac_data, atac_gtf) # Write new atac file diff --git a/pipelines/skylab/multiome/Multiome.changelog.md b/pipelines/skylab/multiome/Multiome.changelog.md index f324ff841c..42b92421f5 100644 --- a/pipelines/skylab/multiome/Multiome.changelog.md +++ b/pipelines/skylab/multiome/Multiome.changelog.md @@ -1,7 +1,8 @@ # 5.9.5 -2024-12-12 (Date of Last Commit) +2025-01-13 (Date of Last Commit) * Added a boolean variable is_slidetags; default is false but it is set to true if the Slide-Tags pipeline is calling Optimus +* Added reference_gtf_file to the output h5ad unstructured metadata # 5.9.4 2024-12-05 (Date of Last Commit) diff --git a/pipelines/skylab/optimus/Optimus.changelog.md b/pipelines/skylab/optimus/Optimus.changelog.md index 59f45b5005..a1df2a0575 100644 --- a/pipelines/skylab/optimus/Optimus.changelog.md +++ b/pipelines/skylab/optimus/Optimus.changelog.md @@ -1,8 +1,10 @@ # 7.9.1 -2024-12-12 (Date of Last Commit) +2025-01-13 (Date of Last Commit) * Added a boolean variable is_slidetags; set to false by default, but set to true if the Slide-Tags pipeline is calling Optimus +* Added reference_gtf_file to the output h5ad unstructured metadata + # 7.9.0 2024-12-05 (Date of Last Commit) diff --git a/pipelines/skylab/optimus/Optimus.wdl b/pipelines/skylab/optimus/Optimus.wdl index 938b293e4b..a975931245 100644 --- a/pipelines/skylab/optimus/Optimus.wdl +++ b/pipelines/skylab/optimus/Optimus.wdl @@ -99,7 +99,7 @@ workflow Optimus { String pytools_docker = "pytools:1.0.0-1661263730" String empty_drops_docker = "empty-drops:1.0.1-4.2" String star_docker = "star:1.0.1-2.7.11a-1692706072" - String warp_tools_docker_2_2_0 = "warp-tools:2.5.0" + String warp_tools_docker = "warp-tools:2.6.0" String star_merge_docker = "star-merge-npz:1.3.0" String samtools_star = "samtools-star:1.0.0-1.11-2.7.11a-1731516196" @@ -175,7 +175,7 @@ workflow Optimus { chemistry = tenx_chemistry_version, sample_id = input_id, read_struct = read_struct, - warp_tools_docker_path = docker_prefix + warp_tools_docker_2_2_0 + warp_tools_docker_path = docker_prefix + warp_tools_docker } scatter(idx in range(length(SplitFastq.fastq_R1_output_array))) { @@ -208,7 +208,7 @@ workflow Optimus { mt_genes = mt_genes, original_gtf = annotations_gtf, input_id = input_id, - warp_tools_docker_path = docker_prefix + warp_tools_docker_2_2_0 + warp_tools_docker_path = docker_prefix + warp_tools_docker } call Metrics.CalculateCellMetrics as CellMetrics { @@ -217,7 +217,7 @@ workflow Optimus { mt_genes = mt_genes, original_gtf = annotations_gtf, input_id = input_id, - warp_tools_docker_path = docker_prefix + warp_tools_docker_2_2_0 + warp_tools_docker_path = docker_prefix + warp_tools_docker } call StarAlign.MergeStarOutput as MergeStarOutputs { @@ -266,7 +266,7 @@ workflow Optimus { empty_drops_result = RunEmptyDrops.empty_drops_result, counting_mode = counting_mode, pipeline_version = "Optimus_v~{pipeline_version}", - warp_tools_docker_path = docker_prefix + warp_tools_docker_2_2_0 + warp_tools_docker_path = docker_prefix + warp_tools_docker } } if (count_exons && counting_mode=="sn_rna") { @@ -305,7 +305,7 @@ workflow Optimus { cell_id_exon = MergeStarOutputsExons.row_index, gene_id_exon = MergeStarOutputsExons.col_index, pipeline_version = "Optimus_v~{pipeline_version}", - warp_tools_docker_path = docker_prefix + warp_tools_docker_2_2_0 + warp_tools_docker_path = docker_prefix + warp_tools_docker } } diff --git a/pipelines/skylab/paired_tag/PairedTag.changelog.md b/pipelines/skylab/paired_tag/PairedTag.changelog.md index ac48970ae8..1b272008cd 100644 --- a/pipelines/skylab/paired_tag/PairedTag.changelog.md +++ b/pipelines/skylab/paired_tag/PairedTag.changelog.md @@ -1,7 +1,8 @@ # 1.9.1 -2024-12-12 (Date of Last Commit) +2025-01-13 (Date of Last Commit) -* Added a boolean variable is_slidetags. Set to true if Slide-Tags pipeline calling Optimus, otherwise false. +* Added a boolean variable is_slidetags; default is false, but set to true if Slide-Tags pipeline is calling Optimus +* Added reference_gtf_file to the output h5ad unstructured metadata # 1.9.0 2024-12-05 (Date of Last Commit) diff --git a/pipelines/skylab/slideseq/SlideSeq.changelog.md b/pipelines/skylab/slideseq/SlideSeq.changelog.md index 1aecacb54c..cc1ba27ed1 100644 --- a/pipelines/skylab/slideseq/SlideSeq.changelog.md +++ b/pipelines/skylab/slideseq/SlideSeq.changelog.md @@ -1,7 +1,8 @@ # 3.4.8 -2024-12-12 (Date of Last Commit) +2025-01-13 (Date of Last Commit) * Added a boolean variable is_slidetags; this does not affect the outputs of the pipeline +* Added reference_gtf_file to the output h5ad unstructured metadata # 3.4.7 2024-12-3 (Date of Last Commit) diff --git a/pipelines/skylab/slideseq/SlideSeq.wdl b/pipelines/skylab/slideseq/SlideSeq.wdl index a19397b1d7..94ba0a2ca4 100644 --- a/pipelines/skylab/slideseq/SlideSeq.wdl +++ b/pipelines/skylab/slideseq/SlideSeq.wdl @@ -48,7 +48,7 @@ workflow SlideSeq { # docker images String pytools_docker = "pytools:1.0.0-1661263730" String picard_cloud_docker = "picard-cloud:2.26.10" - String warp_tools_docker_2_2_0 = "warp-tools:2.5.0" + String warp_tools_docker = "warp-tools:2.6.0" String star_merge_docker = "star-merge-npz:1.3.0" String ubuntu_docker = "ubuntu_16_0_4@sha256:025124e2f1cf4d29149958f17270596bffe13fc6acca6252977c572dd5ba01bf" @@ -124,7 +124,7 @@ workflow SlideSeq { bam_input = MergeBam.output_bam, original_gtf = annotations_gtf, input_id = input_id, - warp_tools_docker_path = docker_prefix + warp_tools_docker_2_2_0 + warp_tools_docker_path = docker_prefix + warp_tools_docker } call Metrics.CalculateUMIsMetrics as UMIsMetrics { input: @@ -138,7 +138,7 @@ workflow SlideSeq { bam_input = MergeBam.output_bam, original_gtf = annotations_gtf, input_id = input_id, - warp_tools_docker_path = docker_prefix + warp_tools_docker_2_2_0 + warp_tools_docker_path = docker_prefix + warp_tools_docker } @@ -162,7 +162,7 @@ workflow SlideSeq { gene_id = MergeStarOutputs.col_index, add_emptydrops_data = "no", pipeline_version = "SlideSeq_v~{pipeline_version}", - warp_tools_docker_path = docker_prefix + warp_tools_docker_2_2_0 + warp_tools_docker_path = docker_prefix + warp_tools_docker } } @@ -188,7 +188,7 @@ workflow SlideSeq { cell_id_exon = MergeStarOutputsExons.row_index, gene_id_exon = MergeStarOutputsExons.col_index, pipeline_version = "SlideSeq_v~{pipeline_version}", - warp_tools_docker_path = docker_prefix + warp_tools_docker_2_2_0 + warp_tools_docker_path = docker_prefix + warp_tools_docker } } diff --git a/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.changelog.md b/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.changelog.md index 908720365d..2bfa234f11 100644 --- a/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.changelog.md +++ b/pipelines/skylab/smartseq2_single_nucleus_multisample/MultiSampleSmartSeq2SingleNucleus.changelog.md @@ -1,7 +1,8 @@ # 2.0.7 -2024-12-12 (Date of Last Commit) +2025-01-13 (Date of Last Commit) * Added a boolean variable is_slidetags; this does not affect the outputs of the pipeline +* Added reference_gtf_file to the output h5ad unstructured metadata # 2.0.6 2024-11-15 (Date of Last Commit) diff --git a/tasks/skylab/FastqProcessing.wdl b/tasks/skylab/FastqProcessing.wdl index 5263f53ef2..530eee652b 100644 --- a/tasks/skylab/FastqProcessing.wdl +++ b/tasks/skylab/FastqProcessing.wdl @@ -138,7 +138,7 @@ task FastqProcessingSlidSeq { # Runtime attributes - String docker = "us.gcr.io/broad-gotc-prod/warp-tools:2.5.0" + String docker = "us.gcr.io/broad-gotc-prod/warp-tools:2.6.0" Int cpu = 16 Int machine_mb = 40000 Int disk = ceil(size(r1_fastq, "GiB")*3 + size(r2_fastq, "GiB")*3) + 50 diff --git a/tasks/skylab/H5adUtils.wdl b/tasks/skylab/H5adUtils.wdl index af83a9e3f8..dedd01a509 100644 --- a/tasks/skylab/H5adUtils.wdl +++ b/tasks/skylab/H5adUtils.wdl @@ -34,6 +34,7 @@ task OptimusH5adGeneration { File? empty_drops_result String counting_mode = "sc_rna" String add_emptydrops_data = "yes" + String gtf_path = annotation_file String pipeline_version @@ -55,7 +56,7 @@ task OptimusH5adGeneration { command <<< set -euo pipefail - touch empty_drops_result.csv + touch empty_drops_result.csvs if [ "~{counting_mode}" == "sc_rna" ]; then python3 /warptools/scripts/create_h5ad_optimus.py \ @@ -73,7 +74,8 @@ task OptimusH5adGeneration { ~{"--input_name_metadata_field " + input_name_metadata_field} \ --count_matrix ~{sparse_count_matrix} \ --expression_data_type "exonic" \ - --pipeline_version ~{pipeline_version} + --pipeline_version ~{pipeline_version} \ + --gtf_path ~{gtf_path} else python3 /warptools/scripts/create_snrna_optimus_full_h5ad.py \ --annotation_file ~{annotation_file} \ @@ -88,7 +90,8 @@ task OptimusH5adGeneration { ~{"--input_name_metadata_field " + input_name_metadata_field} \ --count_matrix ~{sparse_count_matrix} \ --expression_data_type "whole_transcript"\ - --pipeline_version ~{pipeline_version} + --pipeline_version ~{pipeline_version} \ + --gtf_path ~{gtf_path} fi # modify h5ad to include doublets, NHASHID, and build library metrics @@ -158,6 +161,7 @@ task SingleNucleusOptimusH5adOutput { File? library_metrics # Cell calls from starsolo in TSV format File? cellbarcodes + String gtf_path = annotation_file String pipeline_version @@ -194,7 +198,8 @@ task SingleNucleusOptimusH5adOutput { ~{"--input_id_metadata_field " + input_id_metadata_field} \ ~{"--input_name_metadata_field " + input_name_metadata_field} \ --expression_data_type "whole_transcript" \ - --pipeline_version ~{pipeline_version} + --pipeline_version ~{pipeline_version} \ + --gtf_path ~{gtf_path} # modify h5ad to include doublets, NHASHID, and build library metrics python3 /warptools/scripts/add_library_tso_doublets.py \