Merge pull request #102 from wtsi-npg/devel

merge from devel to master to create release 39.11
wtsi-npg · Jan 14, 2019 · ae8f402 · ae8f402
2 parents 1530272 + 1d615ad
commit ae8f402
Show file tree

Hide file tree

Showing 197 changed files with 655 additions and 1,080 deletions.
diff --git a/Changes b/Changes
@@ -1,5 +1,16 @@
 LIST OF CHANGES
 
+release 39.11
+ - auto QC loader query costruction: do not use invalid option
+ - wh loader: skip loading results for multi-component entities
+ - to retrieve lane-level results for multi-component entities,
+   ask for lanes and plexes separately
+ - stop loading Illumina qc data that came from chached_query table
+   of the npg_qc database (pf_cluster_count, raw_cluster_count, pf_bases)
+ - load q30 and q40 yields from qX_yield autoqc check results     
+ - load bam_flagstats target metrics into iseq_product_metrics table
+ - a script to launch warehouse loader script for certain runs
+
 release 39.10
  - two gbs metrics to be added to the iseq_product_metrics table
 

diff --git a/MANIFEST b/MANIFEST
diff --git a/bin/warehouse_loader_launcher b/bin/warehouse_loader_launcher
@@ -0,0 +1,186 @@
+#!/usr/bin/env perl
+
+use strict;
+use warnings;
+use FindBin qw($Bin);
+use lib ( -d "$Bin/../lib/perl5" ? "$Bin/../lib/perl5" : "$Bin/../lib" );
+use DateTime;
+use DateTime::Duration;
+use Getopt::Long;
+use Pod::Usage;
+use Readonly;
+
+use npg_tracking::Schema;
+
+our $VERSION = '0';
+
+Readonly::Scalar my $ML_WH_LOADER_COMMAND => 'npg_runs2mlwarehouse';
+Readonly::Scalar my $SS_WH_LOADER_COMMAND => 'warehouse_loader';
+Readonly::Array  my @RUN_STATUSES =>
+                 ('secondary analysis in progress', 'qc review pending');
+Readonly::Scalar my $NUM_HOURS_LOOK_BACK  => 3;
+
+my $look_back    = $NUM_HOURS_LOOK_BACK;
+my $run_statuses = \@RUN_STATUSES;
+my $dry_run      = 0;
+my $old_wh       = 0;
+my $help;
+
+GetOptions (
+            'help'          => \$help,
+            'dry_run!'      => \$dry_run,
+            'sswh!'         => \$old_wh,
+            'num_hours=i'   => \$look_back,
+            'run_status=s@' => \$run_statuses,
+           );
+
+if ($help) { pod2usage(0); }
+
+my $date = DateTime->now();
+my $script_name = $old_wh ? $SS_WH_LOADER_COMMAND : $ML_WH_LOADER_COMMAND;
+
+warn "$date == Running warehouse_loader_launcher, looking back " .
+               ($look_back ? "$look_back hours" : 'without limit') . qq[\n];
+warn 'Considering statuses: ' . join(q[, ], @{$run_statuses}) . qq[\n];
+warn "Will use $script_name warehouse loader\n";
+if ($dry_run) {
+  warn "DRY RUN\n";
+}
+
+my $query =   { 'run_status_dict.description' => $run_statuses,
+                'run_statuses.iscurrent'      => 1 };
+if ($look_back) {
+  $date->subtract(DateTime::Duration->new(hours => $look_back));
+  $date = sprintf q[%s], $date;
+  $query->{'run_statuses.date'} = {q[>], $date};
+}
+
+my @rows = npg_tracking::Schema->connect()
+                               ->resultset('Run')
+                               ->search( $query,
+  { join => { 'run_statuses' => 'run_status_dict' } } )->all();
+
+my @id_runs = ();
+foreach my $run (@rows) {
+  my @dirs = glob $run->folder_path_glob;
+  if (@dirs) {
+    my $id_run = $run->id_run;
+    warn "Run $id_run: found " . join(q[, ], @dirs) . qq[\n];
+    push @id_runs, $id_run;
+  }
+}
+
+if (@id_runs) {
+  # This script might be run as a cron job. If we srecify a full path
+  # to the wh loader, we do not need to set PATH for the job.
+  my $command = join q[ --id_run ],
+                "$Bin/$script_name  --verbose",
+                sort {$a <=> $b} @id_runs;
+  warn qq[Will run command:\n"$command"\n];
+  if (!$dry_run) {
+    system($command) == 0 or die 'Failed to execute comand';
+  }
+} else {
+  warn "No eligible runs\n";
+}
+
+0;
+
+__END__
+
+=head1 NAME
+
+warehouse_loader_launcher
+
+=head1 SYNOPSIS
+
+Finds runs that recently reached "secondary analysis in progress"
+or "qc review pending" status and calls ml warehouse loader (default)
+or the sequencescape (old) warehouse loader for those runs whose
+run folder location is visible on the host where this script is running.
+
+By default looks at statuses with dates within last 3 hours.
+
+=head1 USAGE
+
+  warehouse_loader_launcher
+  warehouse_loader_launcher --dry_run
+  warehouse_loader_launcher --num_hours 24
+  warehouse_loader_launcher --num_hours 0  # no time limit on status
+  warehouse_loader_launcher --run_status 'archival pending' --run_status 'archival in progress'
+  warehouse_loader_launcher --sswh  # to launch the old warehouse loader
+
+=head1 DESCRIPTION
+  
+=head1 REQUIRED ARGUMENTS
+
+None
+
+=head1 OPTIONS
+
+  --help       - brief help message
+  --dry_run    - a boolean flag; if true, the script prints what will
+                 happen and exists
+  --num_hours  - number of hours to look back at status dates
+  --run_status - an array of run status descriptions
+  --sswh       - a boolean flag, switching from ml to ss warehouse
+
+=head1 EXIT STATUS
+
+0
+
+=head1 CONFIGURATION
+
+=head1 DIAGNOSTICS
+
+=head1 DEPENDENCIES
+
+=over
+
+=item strict
+
+=item warnings
+
+=item lib
+
+=item FindBin
+
+=item Getopt::Long
+
+=item Pod::Usage
+
+=item npg_tracking::Schema
+
+=item DateTime
+
+=item DateTime::Duration
+
+=item Readonly
+
+=back
+
+=head1 INCOMPATIBILITIES
+
+=head1 BUGS AND LIMITATIONS
+
+=head1 AUTHOR
+
+Marina Gourtovaia
+
+=head1 LICENSE AND COPYRIGHT
+
+Copyright (C) 2018 by Genome Research Limited
+
+This program is free software: you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
diff --git a/lib/npg_warehouse/loader/autoqc.pm b/lib/npg_warehouse/loader/autoqc.pm
@@ -6,8 +6,9 @@ use MooseX::StrictConstructor;
 use Readonly;
 
 use npg_qc::autoqc::qc_store;
-use npg_qc::autoqc::qc_store::options qw/$ALL/;
+use npg_qc::autoqc::qc_store::options qw/$LANES $PLEXES/;
 use npg_qc::autoqc::qc_store::query;
+use npg_qc::autoqc::results::collection;
 
 our $VERSION = '0';
 
@@ -46,15 +47,15 @@ Readonly::Hash   our %AUTOQC_MAPPING  => {
                            'rna_intronic_rate'             => 'intronic_rate',
                            'rna_transcripts_detected'      => 'transcripts_detected',
                            'rna_globin_percent_tpm'        => 'globin_pct_tpm',
+                           'rna_mitochondrial_percent_tpm'  => 'mt_pct_tpm',
                          },
      genotype_call    => {
                            'gbs_call_rate'                 => 'genotype_call_rate',
                            'gbs_pass_rate'                 => 'genotype_passed_rate',
                          },
 };
 
-Readonly::Scalar our $Q_TWENTY => 20;
-Readonly::Scalar our $HUNDRED  => 100;
+Readonly::Scalar my $HUNDRED  => 100;
 
 =head1 NAME
 
@@ -86,20 +87,13 @@ has 'verbose'      => ( isa        => 'Bool',
 
 =head2 autoqc_store
 
-A driver to retrieve autoqc objects. If DB storage is not available,
-it will give no error, so no need to mock DB for this one in tests.
-Just mock the staging area in your tests
+A driver to retrieve autoqc objects, required attribute.
 
 =cut
 has 'autoqc_store' =>    ( isa        => 'npg_qc::autoqc::qc_store',
                            is         => 'ro',
-                           required   => 0,
-                           lazy_build => 1,
+                           required   => 1,
                          );
-sub _build_autoqc_store {
-    my $self = shift;
-    return npg_qc::autoqc::qc_store->new(verbose => $self->verbose);
-}
 
 =head2 plex_key
 
@@ -180,22 +174,22 @@ sub _insert_size {
 sub _qX_yield {
     my ($self, $result, $autoqc) = @_;
 
-    if ($result->threshold_quality != $Q_TWENTY) {
-        croak 'Need Q20 quality, got ' . $result->threshold_quality;
-    }
-
     my $data = {};
-    if (defined $result->yield1) {
-        $data->{q20_yield_kb_forward_read} = $result->yield1;
-    }
-    if (defined $result->yield2) {
-        $data->{q20_yield_kb_reverse_read} = $result->yield2;
+    foreach my $read (qw/1 2/) {
+        foreach my $quality (qw/20 30 40/) {
+            my $autoqc_method_name = sprintf 'yield%s_q%s', $read, $quality;
+            my $wh_column_name     = sprintf 'q%s_yield_kb_%s_read',
+                $quality, ($read eq '1') ? 'forward' : 'reverse';
+            my $value = $result->$autoqc_method_name;
+            if (defined $value) {
+                $data->{$wh_column_name} = $result->$autoqc_method_name;
+            }
+        }
     }
     $self->_copy_fields($data, $autoqc, $result->position, $result->tag_index);
     return;
 }
 
-
 sub _ref_match {
     my ($self, $result, $autoqc) = @_;
 
@@ -335,6 +329,13 @@ sub _bam_flagstats {
         ? ($result->mate_mapped_defferent_chr_5 * $HUNDRED / $num_reads)
         : 0.00);
     $self->_copy_fields({chimeric_reads_percent => $chimeric_reads}, $autoqc, $position, $tag_index);
+    foreach my $method (qw(target_filter target_length target_mapped_reads 
+           target_proper_pair_mapped_reads target_mapped_bases target_coverage_threshold 
+           target_percent_gt_coverage_threshold)) {
+       if(my $r = $result->$method ) {
+          $self->_copy_fields({$method => $r}, $autoqc, $position, $tag_index);
+       }
+    }
     return;
 }
 
@@ -400,10 +401,6 @@ sub _genotype {
 sub _autoqc_check {
     my ($self, $result, $autoqc) = @_;
 
-    my $num_components = $result->composition->num_components();
-    if ($num_components > 1){
-        croak q[Too many components for check ] . $result->class_name;
-    }
     my $component = $result->composition->get_component(0);
     my $position = $component->position;
     my $tag_index = $component->tag_index;
@@ -428,29 +425,38 @@ sub _autoqc_check {
 
 =head2 retrieve
 
-Retrieves autoqc results for a run
+Retrieves autoqc results for a run. Skips results for multi-component entities.
 
 =cut
 sub retrieve {
     my ($self, $id_run, $npg_schema) = @_;
 
-    my $query = npg_qc::autoqc::qc_store::query->new(
-                                                id_run => $id_run,
-                                                option => $ALL,
-                                                npg_tracking_schema=> $npg_schema,
-                                                propagate_npg_tracking_schema => 1);
-
-    my $autoqc = {};
-    my $collection = $self->autoqc_store->load($query);
+    my $query1 = npg_qc::autoqc::qc_store::query->new(
+                                                id_run              => $id_run,
+                                                option              => $LANES,
+                                                npg_tracking_schema => $npg_schema
+                                                     );
+    my $query2 = npg_qc::autoqc::qc_store::query->new(
+                                                id_run              => $id_run,
+                                                option              => $PLEXES,
+                                                npg_tracking_schema => $npg_schema
+                                                     );
+    my $collection = npg_qc::autoqc::results::collection->join_collections(
+                     $self->autoqc_store->load($query1), $self->autoqc_store->load($query2));
     $collection->sort_collection(q[check_name]); # tag metrics object are after tag decode stats now
+
     my $i = $collection->size - 1;
+    my $autoqc = {};
     while ($i >= 0) { # iterating from tail to head
         my $result = $collection->get($i);
+        $i--;
+        if ($result->composition->num_components() > 1) {
+            next;
+        }
         my $method_name = exists $AUTOQC_MAPPING{$result->class_name} ? q[_autoqc_check] : q[_] . $result->class_name;
         if ($self->can($method_name)) {
             $self->$method_name($result, $autoqc);
         }
-        $i--;
     }
     return $autoqc;
 }