diff --git a/README.md b/README.md index 3c4d704..9c02b3e 100644 --- a/README.md +++ b/README.md @@ -3,19 +3,18 @@ Sequence element enrichment analysis. This document contains installation instuctions. Usage can be found on the [wiki](https://github.com/johnlees/seer/wiki/Usage), and more information in the [paper](http://www.nature.com/articles/ncomms12797). -Installation -============== -###Use a pre-compiled release +## Installation +### Use a pre-compiled release Head to the [release](https://github.com/johnlees/seer/releases) page and download and unpack the tarball. If you have the dependencies installed use the dynamic version, otherwise use the static version (tested on Ubuntu only; static_all should work on other 64-bit Linux platforms). -###Use on a virtual machine +### Use on a virtual machine We have a virtual machine, containing SEER and other useful bioinformatics programs, which is available at ftp://ftp.sanger.ac.uk/pub/pathogens/pathogens-vm/pathogens-vm.latest.ova and can be imported as an appliance in [VirtualBox](https://www.virtualbox.org/). -###Compile source code +### Compile source code First clone the repository @@ -32,8 +31,7 @@ Currently tested on Linux only, installation should proceed as Full installation instructions are available below -Dependencies --------------- +## Dependencies seer currently depends on - gzstream @@ -49,7 +47,7 @@ You will also require You probably already have boost, HDF5 and dlib (as long as you did clone --recursive). -###Installation on Ubuntu/biolinux +### Installation on Ubuntu/biolinux Running the following commands will install seer @@ -75,7 +73,7 @@ Running the following commands will install seer cd .. cd src && make CXX=/usr/bin/g++-4.9 -###General installation instructions +### General installation instructions **gzstream** @@ -142,6 +140,5 @@ do by running make CXX=g++-4.9 -Usage, interpretation of results, and troubleshooting -============= +## Usage, interpretation of results, and troubleshooting See the [wiki](https://github.com/johnlees/seer/wiki/Usage) diff --git a/src/seerIO.cpp b/src/seerIO.cpp index 7a5dac2..5f12edf 100644 --- a/src/seerIO.cpp +++ b/src/seerIO.cpp @@ -138,6 +138,7 @@ arma::mat readHDF5(const std::string& file_name) arma::mat readMDS(const std::string& file_name, const std::vector& sample_names) { + std::map mds_idx; arma::mat MDS = readHDF5(file_name); // Check that the sample names match up @@ -147,31 +148,40 @@ arma::mat readMDS(const std::string& file_name, const std::vector& sampl std::ifstream samples_in(sample_name_file.c_str()); if (samples_in) { - arma::uvec keep_indices(sample_names.size()); - unsigned int sample_row = 0; + arma::uvec keep_indices(MDS.n_rows); unsigned int file_row = 0; + // Read in sample file to get MDS row order while (samples_in) { std::string sample_name; samples_in >> sample_name; - - // Must be ordered, and lines in sample_names be a subset of what - // is in the file. Otherwise a non-compatible mds will be returned - // which will throw - if (sample_name == sample_names.at(sample_row).iid()) + if (samples_in) { - keep_indices(sample_row) = file_row; - if (++sample_row >= sample_names.size()) - { - break; - } + mds_idx[sample_name] = file_row; } ++file_row; } + // Get MDS rows (using sample file read above) in same sorted order as + // sample vector + unsigned int sample_row = 0; + for (auto it = sample_names.begin(); it != sample_names.end(); ++it) + { + auto find_it = mds_idx.find(it->iid()); + if (find_it == mds_idx.end()) + { + throw std::runtime_error("Could not find sample " + it->iid() + " in the pheno file"); + } + else + { + keep_indices(sample_row) = find_it->second; + } + sample_row++; + } + // Only keep the rows where the pheno file has data - if (sample_row == sample_names.size()) + if (mds_idx.size() >= sample_names.size()) { MDS = MDS.rows(keep_indices); } diff --git a/src/seercommon.hpp b/src/seercommon.hpp index 221bbc0..9a16465 100644 --- a/src/seercommon.hpp +++ b/src/seercommon.hpp @@ -41,7 +41,7 @@ #include "covar.hpp" // Constants -const std::string VERSION = "1.2alpha2"; +const std::string VERSION = "1.2alpha3"; // Default options const double maf_default = 0.01; const long int max_length_default = 100;