This repository has been archived by the owner on Jan 25, 2020. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 44
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
21 changed files
with
2,567 additions
and
2,447 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
{ | ||
"WholeGenomeGermlineSingleSample.sample_and_unmapped_bams": { | ||
"sample_name": "NA12878 PLUMBING", | ||
"base_file_name": "NA12878_PLUMBING", | ||
"flowcell_unmapped_bams": [ | ||
"gs://broad-public-datasets/NA12878_downsampled_for_testing/unmapped/H06HDADXX130110.1.ATCACGAT.20k_reads.bam", | ||
"gs://broad-public-datasets/NA12878_downsampled_for_testing/unmapped/H06HDADXX130110.2.ATCACGAT.20k_reads.bam", | ||
"gs://broad-public-datasets/NA12878_downsampled_for_testing/unmapped/H06JUADXX130110.1.ATCACGAT.20k_reads.bam" | ||
], | ||
"final_gvcf_base_name": "NA12878_PLUMBING", | ||
"unmapped_bam_suffix": ".bam" | ||
}, | ||
|
||
"WholeGenomeGermlineSingleSample.references": { | ||
"fingerprint_genotypes_file": "gs://dsde-data-na12878-public/NA12878.hg38.reference.fingerprint.vcf", | ||
"fingerprint_genotypes_index": "gs://dsde-data-na12878-public/NA12878.hg38.reference.fingerprint.vcf.idx", | ||
"contamination_sites_ud": "gs://broad-references/hg38/v0/Homo_sapiens_assembly38.contam.UD", | ||
"contamination_sites_bed": "gs://broad-references/hg38/v0/Homo_sapiens_assembly38.contam.bed", | ||
"contamination_sites_mu": "gs://broad-references/hg38/v0/Homo_sapiens_assembly38.contam.mu", | ||
"calling_interval_list": "gs://broad-references/hg38/v0/wgs_calling_regions.hg38.interval_list", | ||
"haplotype_scatter_count": 10, | ||
"break_bands_at_multiples_of": 100000, | ||
"reference_fasta" : { | ||
"ref_dict": "gs://broad-references/hg38/v0/Homo_sapiens_assembly38.dict", | ||
"ref_fasta": "gs://broad-references/hg38/v0/Homo_sapiens_assembly38.fasta", | ||
"ref_fasta_index": "gs://broad-references/hg38/v0/Homo_sapiens_assembly38.fasta.fai", | ||
"ref_alt": "gs://broad-references/hg38/v0/Homo_sapiens_assembly38.fasta.64.alt", | ||
"ref_sa": "gs://broad-references/hg38/v0/Homo_sapiens_assembly38.fasta.64.sa", | ||
"ref_amb": "gs://broad-references/hg38/v0/Homo_sapiens_assembly38.fasta.64.amb", | ||
"ref_bwt": "gs://broad-references/hg38/v0/Homo_sapiens_assembly38.fasta.64.bwt", | ||
"ref_ann": "gs://broad-references/hg38/v0/Homo_sapiens_assembly38.fasta.64.ann", | ||
"ref_pac": "gs://broad-references/hg38/v0/Homo_sapiens_assembly38.fasta.64.pac" | ||
}, | ||
"known_indels_sites_vcfs": [ | ||
"gs://broad-references/hg38/v0/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz", | ||
"gs://broad-references/hg38/v0/Homo_sapiens_assembly38.known_indels.vcf.gz" | ||
], | ||
"known_indels_sites_indices": [ | ||
"gs://broad-references/hg38/v0/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz.tbi", | ||
"gs://broad-references/hg38/v0/Homo_sapiens_assembly38.known_indels.vcf.gz.tbi" | ||
], | ||
"dbsnp_vcf": "gs://broad-references/hg38/v0/Homo_sapiens_assembly38.dbsnp138.vcf", | ||
"dbsnp_vcf_index": "gs://broad-references/hg38/v0/Homo_sapiens_assembly38.dbsnp138.vcf.idx", | ||
"evaluation_interval_list": "gs://broad-references/hg38/v0/wgs_evaluation_regions.hg38.interval_list" | ||
}, | ||
|
||
"WholeGenomeGermlineSingleSample.wgs_coverage_interval_list": "gs://broad-references/hg38/v0/wgs_coverage_regions.hg38.interval_list", | ||
|
||
"WholeGenomeGermlineSingleSample.papi_settings": { | ||
"preemptible_tries": 3, | ||
"agg_preemptible_tries": 3 | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,219 @@ | ||
version 1.0 | ||
|
||
## Copyright Broad Institute, 2018 | ||
## | ||
## This WDL pipeline implements data pre-processing and initial variant calling (GVCF | ||
## generation) according to the GATK Best Practices (June 2016) for germline SNP and | ||
## Indel discovery in human whole-genome data. | ||
## | ||
## Requirements/expectations : | ||
## - Human whole-genome pair-end sequencing data in unmapped BAM (uBAM) format | ||
## - One or more read groups, one per uBAM file, all belonging to a single sample (SM) | ||
## - Input uBAM files must additionally comply with the following requirements: | ||
## - - filenames all have the same suffix (we use ".unmapped.bam") | ||
## - - files must pass validation by ValidateSamFile | ||
## - - reads are provided in query-sorted order | ||
## - - all reads must have an RG tag | ||
## - GVCF output names must end in ".g.vcf.gz" | ||
## - Reference genome must be Hg38 with ALT contigs | ||
## | ||
## Runtime parameters are optimized for Broad's Google Cloud Platform implementation. | ||
## For program versions, see docker containers. | ||
## | ||
## LICENSING : | ||
## This script is released under the WDL source code license (BSD-3) (see LICENSE in | ||
## https://github.com/broadinstitute/wdl). Note however that the programs it calls may | ||
## be subject to different licenses. Users are responsible for checking that they are | ||
## authorized to run all programs before running this script. Please see the docker | ||
## page at https://hub.docker.com/r/broadinstitute/genomes-in-the-cloud/ for detailed | ||
## licensing information pertaining to the included programs. | ||
# Local import | ||
#import "../../../../pipelines/dna_seq/UnmappedBamToAlignedBam.wdl" as ToBam | ||
#import "../../../../tasks/AggregatedBamQC.wdl" as AggregatedQC | ||
#import "../../../../tasks/GermlineVariantDiscovery.wdl" as Calling | ||
#import "../../../../tasks/Qc.wdl" as QC | ||
#import "../../../../tasks/Utilities.wdl" as Utils | ||
#import "../../../../tasks/BamToCram.wdl" as ToCram | ||
#import "../../../../tasks/VariantCalling.wdl" as ToGvcf | ||
#import "../../../../structs/dna_seq/germline/GermlineStructs.wdl" | ||
# Git URL import | ||
import "https://raw.githubusercontent.com/gatk-workflows/five-dollar-genome-analysis-pipeline/1.2.0/tasks/UnmappedBamToAlignedBam.wdl" as ToBam | ||
import "https://raw.githubusercontent.com/gatk-workflows/five-dollar-genome-analysis-pipeline/1.2.0/tasks/AggregatedBamQC.wdl" as AggregatedQC | ||
import "https://raw.githubusercontent.com/gatk-workflows/five-dollar-genome-analysis-pipeline/1.2.0/tasks/GermlineVariantDiscovery.wdl" as Calling | ||
import "https://raw.githubusercontent.com/gatk-workflows/five-dollar-genome-analysis-pipeline/1.2.0/tasks/Qc.wdl" as QC | ||
import "https://raw.githubusercontent.com/gatk-workflows/five-dollar-genome-analysis-pipeline/1.2.0/tasks/Utilities.wdl" as Utils | ||
import "https://raw.githubusercontent.com/gatk-workflows/five-dollar-genome-analysis-pipeline/1.2.0/tasks/BamToCram.wdl" as ToCram | ||
import "https://raw.githubusercontent.com/gatk-workflows/five-dollar-genome-analysis-pipeline/1.2.0/tasks/VariantCalling.wdl" as ToGvcf | ||
import "https://raw.githubusercontent.com/gatk-workflows/five-dollar-genome-analysis-pipeline/1.2.0/structs/GermlineStructs.wdl" | ||
|
||
# WORKFLOW DEFINITION | ||
workflow WholeGenomeGermlineSingleSample { | ||
input { | ||
SampleAndUnmappedBams sample_and_unmapped_bams | ||
GermlineSingleSampleReferences references | ||
PapiSettings papi_settings | ||
File wgs_coverage_interval_list | ||
|
||
File? haplotype_database_file | ||
Boolean provide_bam_output = false | ||
Boolean use_gatk3_haplotype_caller = true | ||
} | ||
|
||
# Not overridable: | ||
Int read_length = 250 | ||
Float lod_threshold = -20.0 | ||
String cross_check_fingerprints_by = "READGROUP" | ||
String recalibrated_bam_basename = sample_and_unmapped_bams.base_file_name + ".aligned.duplicates_marked.recalibrated" | ||
|
||
call ToBam.UnmappedBamToAlignedBam { | ||
input: | ||
sample_and_unmapped_bams = sample_and_unmapped_bams, | ||
references = references, | ||
papi_settings = papi_settings, | ||
|
||
cross_check_fingerprints_by = cross_check_fingerprints_by, | ||
haplotype_database_file = haplotype_database_file, | ||
lod_threshold = lod_threshold, | ||
recalibrated_bam_basename = recalibrated_bam_basename | ||
} | ||
|
||
call AggregatedQC.AggregatedBamQC { | ||
input: | ||
base_recalibrated_bam = UnmappedBamToAlignedBam.output_bam, | ||
base_recalibrated_bam_index = UnmappedBamToAlignedBam.output_bam_index, | ||
base_name = sample_and_unmapped_bams.base_file_name, | ||
sample_name = sample_and_unmapped_bams.sample_name, | ||
recalibrated_bam_base_name = recalibrated_bam_basename, | ||
haplotype_database_file = haplotype_database_file, | ||
references = references, | ||
papi_settings = papi_settings | ||
} | ||
|
||
call ToCram.BamToCram as BamToCram { | ||
input: | ||
input_bam = UnmappedBamToAlignedBam.output_bam, | ||
ref_fasta = references.reference_fasta.ref_fasta, | ||
ref_fasta_index = references.reference_fasta.ref_fasta_index, | ||
ref_dict = references.reference_fasta.ref_dict, | ||
duplication_metrics = UnmappedBamToAlignedBam.duplicate_metrics, | ||
chimerism_metrics = AggregatedBamQC.agg_alignment_summary_metrics, | ||
base_file_name = sample_and_unmapped_bams.base_file_name, | ||
agg_preemptible_tries = papi_settings.agg_preemptible_tries | ||
} | ||
|
||
# QC the sample WGS metrics (stringent thresholds) | ||
call QC.CollectWgsMetrics as CollectWgsMetrics { | ||
input: | ||
input_bam = UnmappedBamToAlignedBam.output_bam, | ||
input_bam_index = UnmappedBamToAlignedBam.output_bam_index, | ||
metrics_filename = sample_and_unmapped_bams.base_file_name + ".wgs_metrics", | ||
ref_fasta = references.reference_fasta.ref_fasta, | ||
ref_fasta_index = references.reference_fasta.ref_fasta_index, | ||
wgs_coverage_interval_list = wgs_coverage_interval_list, | ||
read_length = read_length, | ||
preemptible_tries = papi_settings.agg_preemptible_tries | ||
} | ||
|
||
# QC the sample raw WGS metrics (common thresholds) | ||
call QC.CollectRawWgsMetrics as CollectRawWgsMetrics { | ||
input: | ||
input_bam = UnmappedBamToAlignedBam.output_bam, | ||
input_bam_index = UnmappedBamToAlignedBam.output_bam_index, | ||
metrics_filename = sample_and_unmapped_bams.base_file_name + ".raw_wgs_metrics", | ||
ref_fasta = references.reference_fasta.ref_fasta, | ||
ref_fasta_index = references.reference_fasta.ref_fasta_index, | ||
wgs_coverage_interval_list = wgs_coverage_interval_list, | ||
read_length = read_length, | ||
preemptible_tries = papi_settings.agg_preemptible_tries | ||
} | ||
|
||
call ToGvcf.VariantCalling as BamToGvcf { | ||
input: | ||
calling_interval_list = references.calling_interval_list, | ||
evaluation_interval_list = references.evaluation_interval_list, | ||
haplotype_scatter_count = references.haplotype_scatter_count, | ||
break_bands_at_multiples_of = references.break_bands_at_multiples_of, | ||
contamination = UnmappedBamToAlignedBam.contamination, | ||
input_bam = UnmappedBamToAlignedBam.output_bam, | ||
ref_fasta = references.reference_fasta.ref_fasta, | ||
ref_fasta_index = references.reference_fasta.ref_fasta_index, | ||
ref_dict = references.reference_fasta.ref_dict, | ||
dbsnp_vcf = references.dbsnp_vcf, | ||
dbsnp_vcf_index = references.dbsnp_vcf_index, | ||
base_file_name = sample_and_unmapped_bams.base_file_name, | ||
final_vcf_base_name = sample_and_unmapped_bams.final_gvcf_base_name, | ||
agg_preemptible_tries = papi_settings.agg_preemptible_tries, | ||
use_gatk3_haplotype_caller = use_gatk3_haplotype_caller | ||
} | ||
|
||
if (provide_bam_output) { | ||
File provided_output_bam = UnmappedBamToAlignedBam.output_bam | ||
File provided_output_bam_index = UnmappedBamToAlignedBam.output_bam_index | ||
} | ||
|
||
# Outputs that will be retained when execution is complete | ||
output { | ||
Array[File] quality_yield_metrics = UnmappedBamToAlignedBam.quality_yield_metrics | ||
|
||
Array[File] unsorted_read_group_base_distribution_by_cycle_pdf = UnmappedBamToAlignedBam.unsorted_read_group_base_distribution_by_cycle_pdf | ||
Array[File] unsorted_read_group_base_distribution_by_cycle_metrics = UnmappedBamToAlignedBam.unsorted_read_group_base_distribution_by_cycle_metrics | ||
Array[File] unsorted_read_group_insert_size_histogram_pdf = UnmappedBamToAlignedBam.unsorted_read_group_insert_size_histogram_pdf | ||
Array[File] unsorted_read_group_insert_size_metrics = UnmappedBamToAlignedBam.unsorted_read_group_insert_size_metrics | ||
Array[File] unsorted_read_group_quality_by_cycle_pdf = UnmappedBamToAlignedBam.unsorted_read_group_quality_by_cycle_pdf | ||
Array[File] unsorted_read_group_quality_by_cycle_metrics = UnmappedBamToAlignedBam.unsorted_read_group_quality_by_cycle_metrics | ||
Array[File] unsorted_read_group_quality_distribution_pdf = UnmappedBamToAlignedBam.unsorted_read_group_quality_distribution_pdf | ||
Array[File] unsorted_read_group_quality_distribution_metrics = UnmappedBamToAlignedBam.unsorted_read_group_quality_distribution_metrics | ||
|
||
File read_group_alignment_summary_metrics = AggregatedBamQC.read_group_alignment_summary_metrics | ||
File read_group_gc_bias_detail_metrics = AggregatedBamQC.read_group_gc_bias_detail_metrics | ||
File read_group_gc_bias_pdf = AggregatedBamQC.read_group_gc_bias_pdf | ||
File read_group_gc_bias_summary_metrics = AggregatedBamQC.read_group_gc_bias_summary_metrics | ||
|
||
File? cross_check_fingerprints_metrics = UnmappedBamToAlignedBam.cross_check_fingerprints_metrics | ||
|
||
File selfSM = UnmappedBamToAlignedBam.selfSM | ||
Float contamination = UnmappedBamToAlignedBam.contamination | ||
|
||
File calculate_read_group_checksum_md5 = AggregatedBamQC.calculate_read_group_checksum_md5 | ||
|
||
File agg_alignment_summary_metrics = AggregatedBamQC.agg_alignment_summary_metrics | ||
File agg_bait_bias_detail_metrics = AggregatedBamQC.agg_bait_bias_detail_metrics | ||
File agg_bait_bias_summary_metrics = AggregatedBamQC.agg_bait_bias_summary_metrics | ||
File agg_gc_bias_detail_metrics = AggregatedBamQC.agg_gc_bias_detail_metrics | ||
File agg_gc_bias_pdf = AggregatedBamQC.agg_gc_bias_pdf | ||
File agg_gc_bias_summary_metrics = AggregatedBamQC.agg_gc_bias_summary_metrics | ||
File agg_insert_size_histogram_pdf = AggregatedBamQC.agg_insert_size_histogram_pdf | ||
File agg_insert_size_metrics = AggregatedBamQC.agg_insert_size_metrics | ||
File agg_pre_adapter_detail_metrics = AggregatedBamQC.agg_pre_adapter_detail_metrics | ||
File agg_pre_adapter_summary_metrics = AggregatedBamQC.agg_pre_adapter_summary_metrics | ||
File agg_quality_distribution_pdf = AggregatedBamQC.agg_quality_distribution_pdf | ||
File agg_quality_distribution_metrics = AggregatedBamQC.agg_quality_distribution_metrics | ||
File agg_error_summary_metrics = AggregatedBamQC.agg_error_summary_metrics | ||
|
||
File? fingerprint_summary_metrics = AggregatedBamQC.fingerprint_summary_metrics | ||
File? fingerprint_detail_metrics = AggregatedBamQC.fingerprint_detail_metrics | ||
|
||
File wgs_metrics = CollectWgsMetrics.metrics | ||
File raw_wgs_metrics = CollectRawWgsMetrics.metrics | ||
|
||
File duplicate_metrics = UnmappedBamToAlignedBam.duplicate_metrics | ||
File output_bqsr_reports = UnmappedBamToAlignedBam.output_bqsr_reports | ||
|
||
File gvcf_summary_metrics = BamToGvcf.vcf_summary_metrics | ||
File gvcf_detail_metrics = BamToGvcf.vcf_detail_metrics | ||
|
||
File? output_bam = provided_output_bam | ||
File? output_bam_index = provided_output_bam_index | ||
|
||
File output_cram = BamToCram.output_cram | ||
File output_cram_index = BamToCram.output_cram_index | ||
File output_cram_md5 = BamToCram.output_cram_md5 | ||
|
||
File validate_cram_file_report = BamToCram.validate_cram_file_report | ||
|
||
File output_vcf = BamToGvcf.output_vcf | ||
File output_vcf_index = BamToGvcf.output_vcf_index | ||
} | ||
} |
Oops, something went wrong.