Skip to content

Commit

Permalink
Merge pull request #102 from wtsi-npg/devel
Browse files Browse the repository at this point in the history
merge from devel to master to create release 39.11
  • Loading branch information
dozy authored Jan 14, 2019
2 parents 1530272 + 1d615ad commit ae8f402
Show file tree
Hide file tree
Showing 197 changed files with 655 additions and 1,080 deletions.
11 changes: 11 additions & 0 deletions Changes
Original file line number Diff line number Diff line change
@@ -1,5 +1,16 @@
LIST OF CHANGES

release 39.11
- auto QC loader query costruction: do not use invalid option
- wh loader: skip loading results for multi-component entities
- to retrieve lane-level results for multi-component entities,
ask for lanes and plexes separately
- stop loading Illumina qc data that came from chached_query table
of the npg_qc database (pf_cluster_count, raw_cluster_count, pf_bases)
- load q30 and q40 yields from qX_yield autoqc check results
- load bam_flagstats target metrics into iseq_product_metrics table
- a script to launch warehouse loader script for certain runs

release 39.10
- two gbs metrics to be added to the iseq_product_metrics table

Expand Down
177 changes: 96 additions & 81 deletions MANIFEST

Large diffs are not rendered by default.

186 changes: 186 additions & 0 deletions bin/warehouse_loader_launcher
Original file line number Diff line number Diff line change
@@ -0,0 +1,186 @@
#!/usr/bin/env perl

use strict;
use warnings;
use FindBin qw($Bin);
use lib ( -d "$Bin/../lib/perl5" ? "$Bin/../lib/perl5" : "$Bin/../lib" );
use DateTime;
use DateTime::Duration;
use Getopt::Long;
use Pod::Usage;
use Readonly;

use npg_tracking::Schema;

our $VERSION = '0';

Readonly::Scalar my $ML_WH_LOADER_COMMAND => 'npg_runs2mlwarehouse';
Readonly::Scalar my $SS_WH_LOADER_COMMAND => 'warehouse_loader';
Readonly::Array my @RUN_STATUSES =>
('secondary analysis in progress', 'qc review pending');
Readonly::Scalar my $NUM_HOURS_LOOK_BACK => 3;

my $look_back = $NUM_HOURS_LOOK_BACK;
my $run_statuses = \@RUN_STATUSES;
my $dry_run = 0;
my $old_wh = 0;
my $help;

GetOptions (
'help' => \$help,
'dry_run!' => \$dry_run,
'sswh!' => \$old_wh,
'num_hours=i' => \$look_back,
'run_status=s@' => \$run_statuses,
);

if ($help) { pod2usage(0); }

my $date = DateTime->now();
my $script_name = $old_wh ? $SS_WH_LOADER_COMMAND : $ML_WH_LOADER_COMMAND;

warn "$date == Running warehouse_loader_launcher, looking back " .
($look_back ? "$look_back hours" : 'without limit') . qq[\n];
warn 'Considering statuses: ' . join(q[, ], @{$run_statuses}) . qq[\n];
warn "Will use $script_name warehouse loader\n";
if ($dry_run) {
warn "DRY RUN\n";
}

my $query = { 'run_status_dict.description' => $run_statuses,
'run_statuses.iscurrent' => 1 };
if ($look_back) {
$date->subtract(DateTime::Duration->new(hours => $look_back));
$date = sprintf q[%s], $date;
$query->{'run_statuses.date'} = {q[>], $date};
}

my @rows = npg_tracking::Schema->connect()
->resultset('Run')
->search( $query,
{ join => { 'run_statuses' => 'run_status_dict' } } )->all();

my @id_runs = ();
foreach my $run (@rows) {
my @dirs = glob $run->folder_path_glob;
if (@dirs) {
my $id_run = $run->id_run;
warn "Run $id_run: found " . join(q[, ], @dirs) . qq[\n];
push @id_runs, $id_run;
}
}

if (@id_runs) {
# This script might be run as a cron job. If we srecify a full path
# to the wh loader, we do not need to set PATH for the job.
my $command = join q[ --id_run ],
"$Bin/$script_name --verbose",
sort {$a <=> $b} @id_runs;
warn qq[Will run command:\n"$command"\n];
if (!$dry_run) {
system($command) == 0 or die 'Failed to execute comand';
}
} else {
warn "No eligible runs\n";
}

0;

__END__
=head1 NAME
warehouse_loader_launcher
=head1 SYNOPSIS
Finds runs that recently reached "secondary analysis in progress"
or "qc review pending" status and calls ml warehouse loader (default)
or the sequencescape (old) warehouse loader for those runs whose
run folder location is visible on the host where this script is running.
By default looks at statuses with dates within last 3 hours.
=head1 USAGE
warehouse_loader_launcher
warehouse_loader_launcher --dry_run
warehouse_loader_launcher --num_hours 24
warehouse_loader_launcher --num_hours 0 # no time limit on status
warehouse_loader_launcher --run_status 'archival pending' --run_status 'archival in progress'
warehouse_loader_launcher --sswh # to launch the old warehouse loader
=head1 DESCRIPTION
=head1 REQUIRED ARGUMENTS
None
=head1 OPTIONS
--help - brief help message
--dry_run - a boolean flag; if true, the script prints what will
happen and exists
--num_hours - number of hours to look back at status dates
--run_status - an array of run status descriptions
--sswh - a boolean flag, switching from ml to ss warehouse
=head1 EXIT STATUS
0
=head1 CONFIGURATION
=head1 DIAGNOSTICS
=head1 DEPENDENCIES
=over
=item strict
=item warnings
=item lib
=item FindBin
=item Getopt::Long
=item Pod::Usage
=item npg_tracking::Schema
=item DateTime
=item DateTime::Duration
=item Readonly
=back
=head1 INCOMPATIBILITIES
=head1 BUGS AND LIMITATIONS
=head1 AUTHOR
Marina Gourtovaia
=head1 LICENSE AND COPYRIGHT
Copyright (C) 2018 by Genome Research Limited
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
78 changes: 42 additions & 36 deletions lib/npg_warehouse/loader/autoqc.pm
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,9 @@ use MooseX::StrictConstructor;
use Readonly;

use npg_qc::autoqc::qc_store;
use npg_qc::autoqc::qc_store::options qw/$ALL/;
use npg_qc::autoqc::qc_store::options qw/$LANES $PLEXES/;
use npg_qc::autoqc::qc_store::query;
use npg_qc::autoqc::results::collection;

our $VERSION = '0';

Expand Down Expand Up @@ -46,15 +47,15 @@ Readonly::Hash our %AUTOQC_MAPPING => {
'rna_intronic_rate' => 'intronic_rate',
'rna_transcripts_detected' => 'transcripts_detected',
'rna_globin_percent_tpm' => 'globin_pct_tpm',
'rna_mitochondrial_percent_tpm' => 'mt_pct_tpm',
},
genotype_call => {
'gbs_call_rate' => 'genotype_call_rate',
'gbs_pass_rate' => 'genotype_passed_rate',
},
};

Readonly::Scalar our $Q_TWENTY => 20;
Readonly::Scalar our $HUNDRED => 100;
Readonly::Scalar my $HUNDRED => 100;

=head1 NAME
Expand Down Expand Up @@ -86,20 +87,13 @@ has 'verbose' => ( isa => 'Bool',

=head2 autoqc_store
A driver to retrieve autoqc objects. If DB storage is not available,
it will give no error, so no need to mock DB for this one in tests.
Just mock the staging area in your tests
A driver to retrieve autoqc objects, required attribute.
=cut
has 'autoqc_store' => ( isa => 'npg_qc::autoqc::qc_store',
is => 'ro',
required => 0,
lazy_build => 1,
required => 1,
);
sub _build_autoqc_store {
my $self = shift;
return npg_qc::autoqc::qc_store->new(verbose => $self->verbose);
}

=head2 plex_key
Expand Down Expand Up @@ -180,22 +174,22 @@ sub _insert_size {
sub _qX_yield {
my ($self, $result, $autoqc) = @_;

if ($result->threshold_quality != $Q_TWENTY) {
croak 'Need Q20 quality, got ' . $result->threshold_quality;
}

my $data = {};
if (defined $result->yield1) {
$data->{q20_yield_kb_forward_read} = $result->yield1;
}
if (defined $result->yield2) {
$data->{q20_yield_kb_reverse_read} = $result->yield2;
foreach my $read (qw/1 2/) {
foreach my $quality (qw/20 30 40/) {
my $autoqc_method_name = sprintf 'yield%s_q%s', $read, $quality;
my $wh_column_name = sprintf 'q%s_yield_kb_%s_read',
$quality, ($read eq '1') ? 'forward' : 'reverse';
my $value = $result->$autoqc_method_name;
if (defined $value) {
$data->{$wh_column_name} = $result->$autoqc_method_name;
}
}
}
$self->_copy_fields($data, $autoqc, $result->position, $result->tag_index);
return;
}


sub _ref_match {
my ($self, $result, $autoqc) = @_;

Expand Down Expand Up @@ -335,6 +329,13 @@ sub _bam_flagstats {
? ($result->mate_mapped_defferent_chr_5 * $HUNDRED / $num_reads)
: 0.00);
$self->_copy_fields({chimeric_reads_percent => $chimeric_reads}, $autoqc, $position, $tag_index);
foreach my $method (qw(target_filter target_length target_mapped_reads
target_proper_pair_mapped_reads target_mapped_bases target_coverage_threshold
target_percent_gt_coverage_threshold)) {
if(my $r = $result->$method ) {
$self->_copy_fields({$method => $r}, $autoqc, $position, $tag_index);
}
}
return;
}

Expand Down Expand Up @@ -400,10 +401,6 @@ sub _genotype {
sub _autoqc_check {
my ($self, $result, $autoqc) = @_;

my $num_components = $result->composition->num_components();
if ($num_components > 1){
croak q[Too many components for check ] . $result->class_name;
}
my $component = $result->composition->get_component(0);
my $position = $component->position;
my $tag_index = $component->tag_index;
Expand All @@ -428,29 +425,38 @@ sub _autoqc_check {

=head2 retrieve
Retrieves autoqc results for a run
Retrieves autoqc results for a run. Skips results for multi-component entities.
=cut
sub retrieve {
my ($self, $id_run, $npg_schema) = @_;

my $query = npg_qc::autoqc::qc_store::query->new(
id_run => $id_run,
option => $ALL,
npg_tracking_schema=> $npg_schema,
propagate_npg_tracking_schema => 1);

my $autoqc = {};
my $collection = $self->autoqc_store->load($query);
my $query1 = npg_qc::autoqc::qc_store::query->new(
id_run => $id_run,
option => $LANES,
npg_tracking_schema => $npg_schema
);
my $query2 = npg_qc::autoqc::qc_store::query->new(
id_run => $id_run,
option => $PLEXES,
npg_tracking_schema => $npg_schema
);
my $collection = npg_qc::autoqc::results::collection->join_collections(
$self->autoqc_store->load($query1), $self->autoqc_store->load($query2));
$collection->sort_collection(q[check_name]); # tag metrics object are after tag decode stats now

my $i = $collection->size - 1;
my $autoqc = {};
while ($i >= 0) { # iterating from tail to head
my $result = $collection->get($i);
$i--;
if ($result->composition->num_components() > 1) {
next;
}
my $method_name = exists $AUTOQC_MAPPING{$result->class_name} ? q[_autoqc_check] : q[_] . $result->class_name;
if ($self->can($method_name)) {
$self->$method_name($result, $autoqc);
}
$i--;
}
return $autoqc;
}
Expand Down
Loading

0 comments on commit ae8f402

Please sign in to comment.