-
Notifications
You must be signed in to change notification settings - Fork 51
/
sqanti3_wrapper.conf
142 lines (118 loc) · 5.06 KB
/
sqanti3_wrapper.conf
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
#!/bin/bash
# Conf for SQANTI3 wrapper
# Includes all variables needed to execute all optional parameters of SQANTI3_QC,
# SQANTI3_filter and SQANTI3_rescue.
# Using this wrapper helps to know the execution parameters of a run
sqanti3_qc="python3 sqanti3_qc.py"
sqanti3_filter="python3 sqanti3_filter.py"
sqanti3_rescue="python3 sqanti3_rescue.py"
# Somewhat common arguments for QC, Filter and rescue
# This parameters will (99.9% of the time) be common between the steps
# They only need to be defined once, and referenced within the script or conf
reference_gtf="example/gencode.v38.basic_chr22.gtf"
reference_fasta="example/GRCh38.p13_chr22.fasta"
cpus="4"
json_for_rules="utilities/filter/filter_default.json" # json file with rules for filter and rescue rules
threshold="0.7"
# Execution control
# Skip parts at will, however, be careful if you skip QC or filter, but you
# need one of its ouput later and still does not exist, it will fail.
skip_qc="true" #true to skip the QC
skip_filter="true" # true to skip the filter
filter_mode="both" # rules, ml or both
skip_rescue="false" # true to skip the rescue
rescue_mode="both" # rules, ml or both
# Sqanti3.qc
QC_input="example/UHR_chr22.gtf" # Input data
QC_min_ref_length="" # minimum reference transcript length. Default 0 bp
QC_force_id_ignore=""
QC_cage_peak_bed_file="data/ref_TSS_annotation/human.refTSS_v3.1.hg38.bed"
QC_aligner_choice="" # minimap2, deSALT, gmap or uLTRA
QC_polyA_motif_list="data/polyA_motifs/mouse_and_human.polyA_motif.txt"
QC_polyA_peak=""
QC_phylobed=""
QC_skipORF="true"
QC_is_fusion="false"
QC_orf_input=""
QC_is_fastq="false" # Requiered to be true if QC_input is a fastq file.
QC_expression_matrix=""
QC_gmap_index=""
QC_chunks=1
QC_output_prefix="UHR_chr22"
QC_destination_folder="/tmp/sqanti3_wrapper/QC/"
QC_coverage="" # Junctions coverage file
QC_sites=""
QC_window=""
QC_genename="" # Column name from GTF to define gene names
QC_full_length_pacbio_abundance_tsv="example/UHR_abundance.tsv"
QC_saturation="true"
QC_report_file="both" # pdf, html or both
QC_isoAnnotLite=""
QC_gff3=""
QC_short_reads_fofn="example/UHR_chr22_short_reads.fofn"
QC_SR_bam=""
QC_isoform_hits=""
QC_ratio_TSS_metric="" # Which metric should be reported in the ratio_TSS column
# Sqanti3.filter
# Common elements for filter rules and ml
filter_skip_report="false"
filter_input_classification="${QC_destination_folder}/${QC_output_prefix}_classification.txt"
filter_corrected_gtf="${QC_destination_folder}/${QC_output_prefix}_corrected.gtf"
filter_isoforms="GMST/GMST_tmp.faa"
filter_isoannotgff3="example/SQANTI3_QC_output/UHR_chr22.gff3"
filter_sam=""
filter_faa="${QC_destination_folder}/${QC_output_prefix}_corrected.faa"
filter_monoexonic="true"
filter_skip_report=""
filter_cpus=${cpus}
# Filter rules parameters
filter_rules_ouput_folder="/tmp/sqanti3_wrapper/sqanti3_filter_rules"
filter_rules_prefix="${QC_output_prefix}"
filter_rules_json_file=${json_for_rules}
# Filter ml parameters
filter_ml_ouput_folder="/tmp/sqanti3_wrapper/sqanti3_filter_ml"
filter_ml_prefix="${QC_output_prefix}"
filter_ml_percent_training="0.8"
filter_ml_TP=""
filter_ml_TN=""
filter_ml_threshold=${threshold}
filter_ml_remove_columns=""
filter_ml_intermediate_files=""
filter_ml_max_class_size=""
filter_ml_intrapriming=""
# Sqanti3.rescue
# Rescue rules parameters
rescue_rules_filtered_classification="${filter_rules_ouput_folder}/${filter_rules_prefix}_RulesFilter_result_classification.txt"
rescue_rules_reference_classification="${QC_destination_folder}/${QC_output_prefix}_classification.txt"
rescue_rules_isoforms="${QC_destination_folder}/${QC_output_prefix}_corrected.fasta"
rescue_rules_gtf="${filter_rules_ouput_folder}/${filter_rules_prefix}.filtered.gtf"
rescue_rules_monoexons="all"
rescue_rules_mode="full"
rescue_rules_output_prefix="${QC_output_prefix}"
rescue_rules_output_folder="/tmp/sqanti3_wrapper/sqanti3_rescue_rules"
# Rescue ml parameters
rescue_ml_filtered_classification="${filter_ml_ouput_folder}/${filter_ml_prefix}_MLresult_classification.txt"
rescue_ml_reference_classification="${QC_destination_folder}/${QC_output_prefix}_classification.txt"
rescue_ml_isoforms="${QC_destination_folder}/${QC_output_prefix}_corrected.fasta"
rescue_ml_gtf="${filter_ml_ouput_folder}/${filter_ml_prefix}.filtered.gtf"
rescue_ml_monoexons="all"
rescue_ml_mode="full"
rescue_ml_output_prefix="${QC_output_prefix}"
rescue_ml_output_folder="/tmp/sqanti3_wrapper/sqanti3_rescue_ml"
rescue_ml_monoexons="all"
rescue_ml_mode="full"
rescue_ml_randomforest_rdata="${filter_ml_ouput_folder}/randomforest.RData"
# WARNING ZONE: By default the QC, filter and Rescue use the common
# parameters for references and static files but they can be changed
# if needed (although you probably won't need to').
# QC warning zone
QC_reference_gtf=${reference_gtf}
QC_reference_fasta=${reference_fasta}
QC_cpus=${cpus}
# Rescue warning zone
rescue_rules_reference_genome=${reference_fasta}
rescue_rules_reference_gtf=${reference_gtf}
rescue_rules_json_file="${json_for_rules}"
rescue_ml_reference_genome=${reference_fasta}
rescue_ml_reference_gtf=${reference_gtf}
rescue_ml_threshold=${threshold}