Skip to content

Commit

Permalink
add new jasminecv features, update test data
Browse files Browse the repository at this point in the history
  • Loading branch information
simonbray committed Oct 21, 2021
1 parent 1f53a44 commit 542ae69
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 3 deletions.
36 changes: 33 additions & 3 deletions tools/jasminesv/jasminesv.xml
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
## Flags:
'${ignore_strand}'
'${ignore_type}'
'${combine_translocations}'
#if $dup_to_ins.dup_to_ins:
'${dup_to_ins}'
#end if
Expand All @@ -58,6 +59,13 @@
#if $normalize.normalize_chrs and $normalize.chr_norm_file:
'chr_norm_file=${normalize.chr_norm_file}'
#end if
'${non_mutual_distance}'
#if $sample_dists.sample_dists:
'sample_dists=${sample_dists.sample_dists_file}'
#end if
#if $min_overlap:
'min_overlap=${min_overlap}'
#end if
## Required args
file_list='${vcffilelist}'
Expand All @@ -82,8 +90,8 @@ ${vcf_file}
Params
-->
<param argument="max_dist" type="integer" value="1000" min="0" label="The maximum distance variants can be apart when being merged" help="Setting both max_dist_linear and max_dist sets thresholds to minimum of max_dist and max_dist_linear * sv_length"/>
<param argument="min_dist" type="integer" value="-1" min="-1" label="The minimum distance threshold a variant can have when using max_dist_linear" />
<param argument="max_dist_linear" type="float" value="0." min="0.0" label="Make max_dist this proportion of the length of each variant" help="Setting both max_dist_linear and max_dist sets thresholds to minimum of max_dist and max_dist_linear * sv_length"/>
<param argument="min_dist" type="integer" value="100" min="-1" label="The minimum distance threshold a variant can have when using max_dist_linear" />
<param argument="max_dist_linear" type="float" value="0.0" min="0.0" label="Make max_dist this proportion of the length of each variant" help="Setting both max_dist_linear and max_dist sets thresholds to minimum of max_dist and max_dist_linear * sv_length"/>
<param argument="kd_tree_norm" type="integer" value="2" min="1" label="The power to use in kd-tree distances (1 is Manhattan, 2 is Euclidean, etc.)" />
<param argument="min_seq_id" type="float" value="0." min="0." label="The minimum sequence identity for two insertions to be merged" />
<param argument="k_jaccard" type="integer" value="9" min="1" label="The kmer size to use when computing Jaccard similarity of insertions" />
Expand All @@ -96,6 +104,7 @@ ${vcf_file}
-->
<param argument="--ignore_strand" type="boolean" checked="false" truevalue="--ignore_strand" falsevalue="" label="Allow variants with different strands to be merged" />
<param argument="--ignore_type" type="boolean" checked="false" truevalue="--ignore_type" falsevalue="" label="Allow variants with different types to be merged" />
<param argument="--combine_translocations" type="boolean" checked="false" truevalue="--combine_translocations" falsevalue="" label="Keep all translocations together to reduce number of groups" />
<conditional name="dup_to_ins">
<param argument="--dup_to_ins" type="select" checked="false" label="Convert duplications to insertions for SV merging and then convert them back?" help="Requires reference genome" >
<option value="--dup_to_ins">Convert duplications to insertions for SV merging and then convert them back</option>
Expand Down Expand Up @@ -131,6 +140,18 @@ ${vcf_file}
</when>
<when value=""/>
</conditional>
<param argument="--non_mutual_distance" type="boolean" checked="false" truevalue="--non_mutual_distance" falsevalue="" label="Do not require a pair of points to be within both of their distance thresholds" />
<conditional name="sample_dists">
<param name="sample_dists" type="select" checked="false" label="Specify distance thresholds for each sample?">
<option value="sample_dists">Specify distance thresholds</option>
<option value="" selected="true">Don't specify distance thresholds</option>
</param>
<when value="sample_dists">
<param name="sample_dists_file" type="data" format="txt,tsv" value="" label="A file containing distance thresholds for each sample, one per line"/>
</when>
<when value=""/>
</conditional>
<param argument="min_overlap" type="float" value="0.0" min="0.0" label="The minimum reciprocal overlap for DEL/INV/DUP SVs" optional="true"/>
</inputs>
<outputs>
<!-- standard -->
Expand Down Expand Up @@ -172,7 +193,16 @@ ${vcf_file}
</conditional>
<output name="out_vcf" file="out1.vcf"/>
</test>

<!-- test 1.0.11 -> 1.1.4 features -->
<test expect_num_outputs="1">
<param name="vcf_list" value="a.vcf,b.vcf" ftype="vcf"/>
<param name="combine_translocations" value="--combine_translocations"/>
<param name="non_mutual_distance" value="--non_mutual_distance"/>
<param name="sample_dists" value="sample_dists"/>
<param name="sample_dists_file" value="sample_dists.txt"/>
<param name="min_overlap" value="0.1"/>
<output name="out_vcf" file="out1.vcf"/>
</test>
</tests>
<help><![CDATA[
.. class:: infomark
Expand Down
1 change: 1 addition & 0 deletions tools/jasminesv/test-data/out1.vcf
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
##INFO=<ID=AVG_START,Number=1,Type=String,Description="Average start position for variants merged into this one">
##INFO=<ID=AVG_END,Number=1,Type=String,Description="Average end position for variants merged into this one">
##INFO=<ID=AVG_LEN,Number=1,Type=String,Description="Average length for variants merged into this one">
##INFO=<ID=IMPRECISE,Number=0,Type=Flag,Description="Imprecise structural variation">
##FORMAT=<ID=GT,Number=1,Type=String,Description="Genotype">
1 100 0_1 CACGTACGTACGTACGTACGTACGTACTGACGTACGT C . PASS PRECISE;CHR2=1;END=136;SVTYPE=DEL;SUPTYPE=AL;SVLEN=-36;STRANDS=+-;RE=12;IS_SPECIFIC=1;STARTVARIANCE=0.000000;ENDVARIANCE=4.000000;AVG_LEN=-34.000000;AVG_START=100.000000;AVG_END=134.000000;SUPP_VEC_EXT=11;IDLIST_EXT=1,1;SUPP_EXT=2;SUPP_VEC=11;SUPP=2;SVMETHOD=JASMINE;IDLIST=1,1 GT 1/1
1 200 0_2 C CACGTACGTACGTACGTACGTACGTACTGACGTACGT . PASS PRECISE;CHR2=1;END=200;SVTYPE=INS;SUPTYPE=AL;SVLEN=36;STRANDS=+-;RE=10;IS_SPECIFIC=1;STARTVARIANCE=6.250000;ENDVARIANCE=6.250000;AVG_LEN=38.000000;AVG_START=202.500000;AVG_END=202.500000;SUPP_VEC_EXT=11;IDLIST_EXT=2,2;SUPP_EXT=2;SUPP_VEC=11;SUPP=2;SVMETHOD=JASMINE;IDLIST=2,2 GT 1/1
Expand Down
2 changes: 2 additions & 0 deletions tools/jasminesv/test-data/sample_dists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
10
50

0 comments on commit 542ae69

Please sign in to comment.