diff --git a/README.md b/README.md
index 3c4d704..9c02b3e 100644
--- a/README.md
+++ b/README.md
@@ -3,19 +3,18 @@
Sequence element enrichment analysis. This document contains
installation instuctions. Usage can be found on the [wiki](https://github.com/johnlees/seer/wiki/Usage), and more information in the [paper](http://www.nature.com/articles/ncomms12797).
-Installation
-==============
-###Use a pre-compiled release
+## Installation
+### Use a pre-compiled release
Head to the [release](https://github.com/johnlees/seer/releases) page and download and unpack the tarball. If you have the dependencies installed use the dynamic version, otherwise use the static version (tested on Ubuntu only; static_all should work on other 64-bit Linux platforms).
-###Use on a virtual machine
+### Use on a virtual machine
We have a virtual machine, containing SEER and other useful bioinformatics programs, which is available at
ftp://ftp.sanger.ac.uk/pub/pathogens/pathogens-vm/pathogens-vm.latest.ova
and can be imported as an appliance in [VirtualBox](https://www.virtualbox.org/).
-###Compile source code
+### Compile source code
First clone the repository
@@ -32,8 +31,7 @@ Currently tested on Linux only, installation should proceed as
Full installation instructions are available below
-Dependencies
---------------
+## Dependencies
seer currently depends on
- gzstream
@@ -49,7 +47,7 @@ You will also require
You probably already have boost, HDF5 and dlib (as long as you did clone --recursive).
-###Installation on Ubuntu/biolinux
+### Installation on Ubuntu/biolinux
Running the following commands will install seer
@@ -75,7 +73,7 @@ Running the following commands will install seer
cd ..
cd src && make CXX=/usr/bin/g++-4.9
-###General installation instructions
+### General installation instructions
**gzstream**
@@ -142,6 +140,5 @@ do by running
make CXX=g++-4.9
-Usage, interpretation of results, and troubleshooting
-=============
+## Usage, interpretation of results, and troubleshooting
See the [wiki](https://github.com/johnlees/seer/wiki/Usage)
diff --git a/src/seerIO.cpp b/src/seerIO.cpp
index 7a5dac2..5f12edf 100644
--- a/src/seerIO.cpp
+++ b/src/seerIO.cpp
@@ -138,6 +138,7 @@ arma::mat readHDF5(const std::string& file_name)
arma::mat readMDS(const std::string& file_name, const std::vector& sample_names)
{
+ std::map mds_idx;
arma::mat MDS = readHDF5(file_name);
// Check that the sample names match up
@@ -147,31 +148,40 @@ arma::mat readMDS(const std::string& file_name, const std::vector& sampl
std::ifstream samples_in(sample_name_file.c_str());
if (samples_in)
{
- arma::uvec keep_indices(sample_names.size());
- unsigned int sample_row = 0;
+ arma::uvec keep_indices(MDS.n_rows);
unsigned int file_row = 0;
+ // Read in sample file to get MDS row order
while (samples_in)
{
std::string sample_name;
samples_in >> sample_name;
-
- // Must be ordered, and lines in sample_names be a subset of what
- // is in the file. Otherwise a non-compatible mds will be returned
- // which will throw
- if (sample_name == sample_names.at(sample_row).iid())
+ if (samples_in)
{
- keep_indices(sample_row) = file_row;
- if (++sample_row >= sample_names.size())
- {
- break;
- }
+ mds_idx[sample_name] = file_row;
}
++file_row;
}
+ // Get MDS rows (using sample file read above) in same sorted order as
+ // sample vector
+ unsigned int sample_row = 0;
+ for (auto it = sample_names.begin(); it != sample_names.end(); ++it)
+ {
+ auto find_it = mds_idx.find(it->iid());
+ if (find_it == mds_idx.end())
+ {
+ throw std::runtime_error("Could not find sample " + it->iid() + " in the pheno file");
+ }
+ else
+ {
+ keep_indices(sample_row) = find_it->second;
+ }
+ sample_row++;
+ }
+
// Only keep the rows where the pheno file has data
- if (sample_row == sample_names.size())
+ if (mds_idx.size() >= sample_names.size())
{
MDS = MDS.rows(keep_indices);
}
diff --git a/src/seercommon.hpp b/src/seercommon.hpp
index 221bbc0..9a16465 100644
--- a/src/seercommon.hpp
+++ b/src/seercommon.hpp
@@ -41,7 +41,7 @@
#include "covar.hpp"
// Constants
-const std::string VERSION = "1.2alpha2";
+const std::string VERSION = "1.2alpha3";
// Default options
const double maf_default = 0.01;
const long int max_length_default = 100;