Skip to content

Commit

Permalink
Updates to BUS format, fixes htslib config mistake
Browse files Browse the repository at this point in the history
  • Loading branch information
pmelsted committed Nov 16, 2018
1 parent b6f6dc5 commit 95dc71a
Show file tree
Hide file tree
Showing 5 changed files with 33 additions and 8 deletions.
2 changes: 1 addition & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ ExternalProject_Add(htslib
PREFIX ${PROJECT_SOURCE_DIR}/ext/htslib
SOURCE_DIR ${PROJECT_SOURCE_DIR}/ext/htslib
BUILD_IN_SOURCE 1
CONFIGURE_COMMAND ${PROJECT_SOURCE_DIR}/ext/htslib/configure
CONFIGURE_COMMAND autoheader && autoconf && ${PROJECT_SOURCE_DIR}/ext/htslib/configure
--prefix=${PREFIX} --disable-bz2 --disable-lzma --disable-libcurl
BUILD_COMMAND make lib-static
INSTALL_COMMAND ""
Expand Down
4 changes: 3 additions & 1 deletion src/BUSData.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
#include <vector>
#include <stdint.h>

const uint32_t BUSFORMAT_VERSION = 1;

struct BUSTranscript {
std::string name;
uint32_t transcriptLength;
Expand All @@ -13,7 +15,7 @@ struct BUSTranscript {


struct BUSHeader {
std::string text;
std::string text;
std::vector<BUSTranscript> transcripts;
std::vector<std::vector<int32_t>> ecs;
};
Expand Down
6 changes: 6 additions & 0 deletions src/BUSTools.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,14 @@
#include "BUSData.h"

void writeBUSHeader(std::ofstream &out, int bclen, int umilen) {
out.write("BUS\0", 4);
out.write((char*)(&BUSFORMAT_VERSION), sizeof(BUSFORMAT_VERSION));
out.write((char*)(&bclen), sizeof(bclen));
out.write((char*)(&umilen), sizeof(umilen));
std::string header_text = "BUS file produced by kallisto";
uint32_t len = header_text.size();
out.write((char*)(&len),sizeof(len));
out.write(header_text.c_str(), len);
}

void writeBUSData(std::ofstream &out, const std::vector<BUSData> &bv) {
Expand Down
14 changes: 12 additions & 2 deletions src/ProcessReads.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1338,21 +1338,31 @@ void BUSProcessor::processBuffer() {

// copy the umi
int umilen = (busopt.umi.start == busopt.umi.stop) ? l[busopt.umi.fileno] - busopt.umi.start : busopt.umi.stop - busopt.umi.start;
if (l[busopt.umi.fileno] < busopt.umi.start + umilen) {
continue; // too short
}
memcpy(umi, s[busopt.umi.fileno] + busopt.umi.start, umilen);
umi[umilen] = 0;
if (umilen >= 0 && umilen <= 32) {
umi_len[umilen]++;
}


// TODO handle concatenated barcodes

auto &bcc = busopt.bc[0];
int blen = 0;
bool bad_bc = false;
for (auto &bcc : busopt.bc) {
int bclen = (bcc.start == bcc.stop) ? l[bcc.fileno] - bcc.start : bcc.stop - bcc.start;
if (l[bcc.fileno] < bcc.start + bclen) {
bad_bc = true;
break;
}
memcpy(bc+blen, s[bcc.fileno] + bcc.start, bclen);
blen += bclen;
}
if (bad_bc) {
continue;
}
bc[blen] = 0;

if (blen >= 0 && blen <= 32) {
Expand Down
15 changes: 11 additions & 4 deletions src/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1644,12 +1644,11 @@ int main(int argc, char *argv[]) {
}


if (write) {
//std::cout << bclen << "\t" << umilen << endl;
if (write) {
std::FILE* fp = std::fopen((opt.output + "/output.bus").c_str(), "r+b");
if (fp != nullptr) {
std::fseek(fp,0,SEEK_SET);
//write to int values
std::fseek(fp,8,SEEK_SET); // skip magic string and version
// write to uint32_t values
std::fwrite(&bclen, sizeof(bclen),1,fp);
std::fwrite(&umilen, sizeof(umilen),1,fp);
std::fclose(fp);
Expand All @@ -1660,6 +1659,14 @@ int main(int argc, char *argv[]) {

writeECList(opt.output + "/matrix.ec", index);

// write transcript names
std::ofstream transout_f((opt.output + "/transcripts.txt"));
for (const auto &t : index.target_names_) {
transout_f << t << "\n";
}
transout_f.close();


// gather stats
num_unique = 0;
for (int i = 0; i < index.num_trans; i++) {
Expand Down

0 comments on commit 95dc71a

Please sign in to comment.