From 90247ce90cbd243c7a6ff64079326398ca49e9aa Mon Sep 17 00:00:00 2001 From: Brent Pedersen Date: Fri, 22 Sep 2023 10:08:30 +0200 Subject: [PATCH] use xcf --- Cargo.toml | 2 +- src/bedder_bed.rs | 2 ++ src/bedder_vcf.rs | 15 ++++++++----- src/sniff.rs | 54 +++++++++++++++++++++++------------------------ 4 files changed, 40 insertions(+), 33 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 2291d94..2df8250 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -26,7 +26,7 @@ env_logger = "0.10.0" log = "0.4.19" linear-map = "1.2.0" hashbrown = "0.14.0" -xvcf = { version = "0.1.1", git = "https://github.com/brentp/xvcf-rs" } +xvcf = { version = "0.1.2", git = "https://github.com/brentp/xvcf-rs" } [features] default = ["bed", "vcf", "bcf", "csi", "core", "bam", "sam", "bgzf"] diff --git a/src/bedder_bed.rs b/src/bedder_bed.rs index fd84c2a..de97584 100644 --- a/src/bedder_bed.rs +++ b/src/bedder_bed.rs @@ -1,3 +1,5 @@ +#![allow(clippy::useless_conversion)] // these are needed to support e.g. smartstring + use crate::position::{Field, FieldError, Position, Positioned, Value}; use crate::string::String; pub use bed::record::Record; diff --git a/src/bedder_vcf.rs b/src/bedder_vcf.rs index 5e84045..f799e0e 100644 --- a/src/bedder_vcf.rs +++ b/src/bedder_vcf.rs @@ -1,8 +1,8 @@ +#![allow(clippy::useless_conversion)] // these are needed to support e.g. smartstring use crate::position::{Field, FieldError, Position, Positioned, Value}; use crate::string::String; -use noodles::bcf; use noodles::vcf::{self, record::Chromosome}; -use std::io::{self, BufRead, Read}; +use std::io::{self, Read}; use std::result; use vcf::record::info::field; use vcf::record::QualityScore; @@ -12,6 +12,7 @@ pub use xvcf; pub struct BedderVCF { reader: xvcf::Reader, record_number: u64, + header: vcf::Header, } impl BedderVCF @@ -19,24 +20,28 @@ where R: Read + 'static, { pub fn new(r: xvcf::Reader) -> io::Result> { + let h = r.header().clone(); let v = BedderVCF { reader: r, record_number: 0, + header: h, }; Ok(v) } } -pub fn from_reader(r: Box) -> io::Result> { +/* +pub fn _from_reader(r: Box) -> io::Result> { let reader = xvcf::Reader::from_reader(r, None)?; BedderVCF::new(reader) } +*/ fn match_info_value(info: &vcf::record::Info, name: &str) -> result::Result { //let info = record.info(); let key: vcf::record::info::field::Key = name .parse() - .map_err(|_| FieldError::InvalidFieldName(String::from(name.clone())))?; + .map_err(|_| FieldError::InvalidFieldName(String::from(name)))?; match info.get(&key) { Some(value) => match value { @@ -142,7 +147,7 @@ where ) -> Option> { let mut v = vcf::Record::default(); - match self.reader.next_record(&self.reader.header(), &mut v) { + match self.reader.next_record(&self.header, &mut v) { Ok(0) => None, // EOF Ok(_) => { self.record_number += 1; diff --git a/src/sniff.rs b/src/sniff.rs index 9983eb8..bed0310 100644 --- a/src/sniff.rs +++ b/src/sniff.rs @@ -3,10 +3,9 @@ use std::io::{BufRead, Read}; use std::path::Path; use crate::bedder_bed::BedderBed; -use crate::bedder_vcf::{BedderVCF, VCF}; +use crate::bedder_vcf::BedderVCF; use crate::position::PositionedIterator; use noodles::bgzf; -use noodles::vcf; /// File formats supported by this file detector. #[derive(Debug, PartialEq)] @@ -52,35 +51,36 @@ where format, compression ); - let br: Box = match compression { - Compression::None => Box::new(reader), - Compression::GZ => Box::new(std::io::BufReader::new(GzDecoder::new(reader))), - Compression::BGZF => match format { - // BCF|BAM will appear as bgzf so we don't want to do this outside - FileFormat::BCF | FileFormat::BAM => Box::new(reader), - _ => Box::new(bgzf::Reader::new(reader)), - }, - Compression::RAZF => unimplemented!(), - }; + /* + */ match format { - FileFormat::VCF => { - let mut vcf = vcf::reader::Builder.build_from_reader(br)?; - let hdr = vcf.read_header()?; - let bed_vcf = BedderVCF::new(VCF::VCF(vcf), hdr)?; - Ok(Box::new(bed_vcf)) - } - FileFormat::BCF => { - let mut bcf = noodles::bcf::Reader::new(br); - let hdr = bcf.read_header()?; - let bed_vcf = BedderVCF::new(VCF::BCF(bcf), hdr)?; + FileFormat::VCF | FileFormat::BCF => { + // get &str from path + let path = path.as_ref().to_str().unwrap(); + let x = xvcf::Reader::from_reader(Box::new(reader), Some(path))?; + let bed_vcf = BedderVCF::new(x)?; Ok(Box::new(bed_vcf)) } - - FileFormat::BED => { - let reader = BedderBed::new(br); - Ok(Box::new(reader)) + _ => { + let br: Box = match compression { + Compression::None => Box::new(reader), + Compression::GZ => Box::new(std::io::BufReader::new(GzDecoder::new(reader))), + Compression::BGZF => match format { + // BCF|BAM will appear as bgzf so we don't want to do this outside + FileFormat::BCF | FileFormat::BAM => Box::new(reader), + _ => Box::new(bgzf::Reader::new(reader)), + }, + Compression::RAZF => unimplemented!(), + }; + + match format { + FileFormat::BED => { + let reader = BedderBed::new(br); + Ok(Box::new(reader)) + } + _ => unimplemented!("{format:?} not yet supported"), + } } - _ => unimplemented!("{format:?} not yet supported"), } }