Skip to content

Commit

Permalink
Merge pull request #83 from wtsi-npg/devel
Browse files Browse the repository at this point in the history
release 0.16
  • Loading branch information
dozy committed Mar 16, 2015
2 parents 63dfdc4 + e324ec2 commit 2658486
Show file tree
Hide file tree
Showing 13 changed files with 752 additions and 310 deletions.
7 changes: 7 additions & 0 deletions Changes
Original file line number Diff line number Diff line change
@@ -1,6 +1,13 @@
CHANGES LOG
-----------

release 0.16
- human split: new alignment_wtsi_stage2_humansplit_template.json, seqchksum_hs.json; addition of subst_params to alignment_common
- seqchksum comparisons: merge bamseqchksum files for outputs for comparison with initial bam file in seqchksum.json
- added comparison of cram and bam seqchksum within final_output_prep
- scramble reference optional in final_output_prep template (reference name passed as a parameter instead of via subgraph_io)
- realignment templates fixes/amendents: default value for common subst_params file; default to cram input

release 0.15
- fix construction of alternate hash command to construct sha512primesums512 seqchksum file

Expand Down
64 changes: 49 additions & 15 deletions bin/vtfp.pl
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,8 @@
my $absolute_program_paths=1;
my @keys = ();
my @vals = ();
GetOptions('help' => \$help, 'strict_checks!' => \$strict_checks, 'verbosity_level=i' => \$verbosity_level, 'template_path=s' => \$template_path, 'logfile=s' => \$logfile, 'outname:s' => \$outname, 'query_mode!' => \$query_mode, 'keys=s' => \@keys, 'values|vals=s' => \@vals, 'absolute_program_paths!' => \$absolute_program_paths);
my @nullkeys = ();
GetOptions('help' => \$help, 'strict_checks!' => \$strict_checks, 'verbosity_level=i' => \$verbosity_level, 'template_path=s' => \$template_path, 'logfile=s' => \$logfile, 'outname:s' => \$outname, 'query_mode!' => \$query_mode, 'keys=s' => \@keys, 'values|vals=s' => \@vals, 'nullkeys=s' => \@nullkeys, 'absolute_program_paths!' => \$absolute_program_paths);

if($help) {
croak q[Usage: ], $progname, q{ [-h] [-q] [-s] [-l <log_file>] [-o <output_config_name>] [-v <verbose_level>] [-keys <key> -vals <val> ...] <viv_template>};
Expand All @@ -54,7 +55,7 @@
@keys = split(/,/, join(',', @keys));
@vals = split(/,/, join(',', @vals));

my $subst_requests = initialise_subst_requests(\@keys, \@vals);
my $subst_requests = initialise_subst_requests(\@keys, \@vals, \@nullkeys);

$query_mode ||= 0;
$verbosity_level = $VLMIN unless defined $verbosity_level;
Expand All @@ -71,6 +72,9 @@
if($template_path) {
$template_path = [ (split q[:], $template_path) ];
}
else {
$template_path = [];
}

my $param_store;
my $globals = { node_prefixes => { auto_node_prefix => 0, used_prefixes => {}}, vt_file_stack => [], processed_sp_files => {}, template_path => $template_path, };
Expand Down Expand Up @@ -121,6 +125,10 @@
sub process_vtnode {
my ($vtnode_id, $vtf_name, $node_prefix, $param_store, $subst_requests, $globals) = @_;

unless(is_valid_name($vtf_name)) {
$logger->($VLFATAL, q[Missing or invalid name for VTFILE element id], $vtnode_id, q[ (], , join(q[->], @{$globals->{vt_file_stack}}), q[)]);
}

if(any { $_ eq $vtf_name} @{$globals->{vt_file_stack}}) {
$logger->($VLFATAL, q[Nesting of VTFILE ], $vtf_name, q[ within itself: ], join(q[->], @{$globals->{vt_file_stack}}));
}
Expand Down Expand Up @@ -227,7 +235,6 @@ sub process_subst_params {
for my $i (0..$#{$unprocessed_subst_params}) {

my $sp = $unprocessed_subst_params->[$i];
my $spname = $sp->{name};
my $spid = $sp->{id};
my $sptype = $sp->{type};
$sptype ||= q[PARAM];
Expand Down Expand Up @@ -268,10 +275,10 @@ sub process_subst_params {
################################
for my $spfile (@spfile_node_queue) {
subst_walk($spfile, $param_store, $subst_requests, []);
my $spname = $spfile->{name};
my $spname = is_valid_name($spfile->{name});
if(not $spname) {
# it would be better to cache these errors and report as many as possible before exit (TBI)
$logger->($VLFATAL, q[No name for SPFILE element (], , join(q[->], @$sp_file_stack), q[)]);
$logger->($VLFATAL, q[Missing or invalid name for SPFILE element id], $spfile->{id}, q[ (], , join(q[->], @$sp_file_stack), q[)]);
}

if(not $globals->{processed_sp_files}->{$spname}) { # but only process a given SPFILE once
Expand Down Expand Up @@ -381,7 +388,6 @@ sub subst_walk {
$logger->($VLFATAL, q[value for a subst directive must be a param name (not a reference), index for subst is: ], $i);
}

# $elem->[$i] = fetch_subst_value($param_name, $param_store, $subst_requests);
my $sval = fetch_subst_value($param_name, $param_store, $subst_requests);
if(ref $sval eq q[ARRAY]) {
splice @$elem, $i, 1, @$sval;
Expand Down Expand Up @@ -442,7 +448,7 @@ sub fetch_subst_value {
}

if(not defined $param_store->[0]->{varnames}->{$param_name}) { # create a "writeable" param_store entry at local level
my $new_param_entry = (not defined $param_entry)? { name => $param_name, }: dclone $param_entry;
my $new_param_entry = (not defined $param_entry)? { name => $param_name, _declared_by => [], }: dclone $param_entry;

$param_store->[0]->{varnames}->{$param_name} = $new_param_entry; # adding to the "local" variable store

Expand All @@ -454,13 +460,10 @@ sub fetch_subst_value {
}

for my $sr (@$subst_requests) {
$retval = $sr->{$param_name};
if(defined $retval) { last; }
}

if(defined $retval) {
$param_entry->{_value} = $retval;
return $retval;
if(exists $sr->{$param_name}) { # allow undef value
$param_entry->{_value} = $sr->{$param_name};
return $sr->{$param_name};
}
}

if($param_entry->{subst_constructor}) {
Expand Down Expand Up @@ -727,13 +730,17 @@ sub get_child_prefix {
# if a key is specified more than once, its value becomes a list ref
#####################################################################
sub initialise_subst_requests {
my ($keys, $vals) = @_;
my ($keys, $vals, $nullkeys) = @_;
my %subst_requests = ();

if(@$keys != @$vals) {
croak q[Mismatch between keys and vals];
}

for my $nullkey (@$nullkeys) {
$subst_requests{$nullkey} = undef;
}

for my $i (0..$#{$keys}) {
if(defined $subst_requests{$keys->[$i]}) {
if(ref $subst_requests{$keys->[$i]} ne q[ARRAY]) {
Expand All @@ -750,6 +757,33 @@ sub initialise_subst_requests {
return [ \%subst_requests ]; # note: the return value is a ref to a list of hash refs
}

#############################################################################################
# is_valid_name:
# valid names should be defined strings. Whether invalidity is fatal is left to the caller.
#############################################################################################
sub is_valid_name {
my ($name, $id) = @_;

if(not $name) {
$logger->($VLMIN, q[No name for element with id ], $id);
}

if(my $r = ref $name) {
if($r eq q[ARRAY]) {
$logger->($VLMIN, q{Element with id }, $id, q{ has name of type ARRAY ref, it should be a string. Elements: [ }, join(q[;], @$name), q{]});

return;
}
elsif($r eq q[HASH]) {
$logger->($VLMIN, q[Element with id ], $id, q[ has name of type HASH ref, it should be a string.]);

return;
}
}

return $name;
}

sub read_vtf_version_check {
my ($vtf_name, $version_minimum, $template_path) = @_;

Expand Down
5 changes: 3 additions & 2 deletions data/vtlib/README.vtlib
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ $ vtfp.pl -l aln_vtf.log -o aln.json \
-keys aligner_numthreads -vals <NUMBER_OF_THREADS_USED_BY_ALIGNER> \
cfgdata/wtsi_alignment_stage2_template.json

$ viv.pl -x -s -o v 3 -o viv_run.log aln.json
$ viv.pl -x -s -v 3 -o viv_run.log aln.json

* vtfp.pl flags (subst_params) description:
indatadir - input data, default to '.'
Expand All @@ -43,5 +43,6 @@ $ vtfp.pl -l aws2_bwa_mem.vtf.log -o aws2_bwa_mem.json -keys indatadir -vals ind
tophat2:
$ vtfp.pl -l aws2_tophat2.vtf.log -o aws2_tophat2.json -keys indatadir -vals indata -keys outdatadir -vals outdata_tophat2 -keys cfgdatadir -vals cfgdata -keys tmpdir -vals tmpdata -keys rpt -vals 13430_8#1 -keys alignment_method -vals tophat2 -keys reposdir -vals /path/to/references -keys alignment_reference_genome_name -vals Homo_sapiens/1000Genomes_hs37d5/all/bowtie2/hs37d5.fa -keys reference_dict_name -vals Homo_sapiens/1000Genomes_hs37d5/all/picard/hs37d5.fa.dict -keys reference_genome_fasta_name -vals Homo_sapiens/1000Genomes_hs37d5/all/fasta/hs37d5.fa -keys phix_reference_genome_fasta_name -vals PhiX/default/all/fasta/phix_unsnipped_short_no_N.fa -keys aligner_numthreads -vals 8 cfgdata/alignment_wtsi_stage2_template.json


tophat2 with ysplit:
$ vtfp.pl -l aws2_tophat2_ysplit.vtf.log -o aws2_tophat2_ysplit.json -keys indatadir -vals indata -keys outdatadir -vals outdata_tophat2_ysplit -keys cfgdatadir -vals cfgdata -keys tmpdir -vals tmpdata -keys rpt -vals 13430_8#1 -keys alignment_method -vals tophat2 -keys reposdir -vals /path/to/reference_repository -keys alignment_reference_genome_name -vals references/Homo_sapiens/1000Genomes_hs37d5/all/bowtie2/hs37d5.fa -keys reference_dict_name -vals references/Homo_sapiens/1000Genomes_hs37d5/all/picard/hs37d5.fa.dict -keys reference_genome_fasta_name -vals references/Homo_sapiens/1000Genomes_hs37d5/all/fasta/hs37d5.fa -keys phix_reference_genome_fasta_name -vals references/PhiX/default/all/fasta/phix_unsnipped_short_no_N.fa -keys transcriptome_subpath -vals transcriptomes/Homo_sapiens/ensembl_75_transcriptome/1000Genomes_hs37d5/tophat2/1000Genomes_hs37d5.known -keys aligner_numthreads -vals 8 -keys library_type -vals fr-firststrand -keys final_output_prep_target_name -vals split_by_chromosome -keys split_bam_by_chromosome_flags -vals S=Y -keys split_bam_by_chromosome_flags -vals V=true -keys split_indicator -vals _yhuman cfgdata/alignment_wtsi_stage2_template.json

28 changes: 28 additions & 0 deletions data/vtlib/alignment_common.json
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
{"id":"reposdir","required":"no","default":"."},
{"id":"samtools_executable","required":"no","default":"samtools"},
{"id":"bwa_executable","required":"no","default":"bwa0_6"},
{"id":"scramble_executable","required":"no","default":"scramble"},
{"id":"aligner_numthreads","required":"no","default":"2"},
{"id":"bam_ext","required":"no","default":".bam"},
{"id":"cram_ext","required":"no","default":".cram"},
Expand All @@ -32,6 +33,20 @@
"postproc":{"op":"concat", "pad":""}
}
},
{"id":"hs_alignment_reference_genome_name","required":"yes"},
{
"id":"hs_alignment_reference_genome",
"description":"full path to properly formatted reference genome data for the aligner for human split",
"required":"yes",
"subst_constructor":{
"vals":[
{"subst":"reposdir"},
"/",
{"subst":"hs_alignment_reference_genome_name"}
],
"postproc":{"op":"concat", "pad":""}
}
},
{"id":"reference_genome_fasta_name","required":"yes"},
{
"id":"reference_genome_fasta",
Expand All @@ -57,6 +72,19 @@
],
"postproc":{"op":"concat", "pad":""}
}
},
{"id":"hs_reference_genome_fasta_name","required":"yes"},
{
"id":"hs_reference_genome_fasta",
"required":"yes",
"subst_constructor":{
"vals":[
{"subst":"reposdir"},
"/",
{"subst":"hs_reference_genome_fasta_name"}
],
"postproc":{"op":"concat", "pad":""}
}
}
]
}
Loading

0 comments on commit 2658486

Please sign in to comment.