diff --git a/CHANGELOG.md b/CHANGELOG.md index 53b3dec5..8918fc26 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Fixed + +- Forced reading the `duplicates` and `coverage` columns of KrakenUniq profiles as + float (#123). + ## [0.4.0] - (2023-07-02) ### Added diff --git a/src/taxpasta/infrastructure/application/krakenuniq/krakenuniq_profile_reader.py b/src/taxpasta/infrastructure/application/krakenuniq/krakenuniq_profile_reader.py index 98d41d0d..341342a0 100644 --- a/src/taxpasta/infrastructure/application/krakenuniq/krakenuniq_profile_reader.py +++ b/src/taxpasta/infrastructure/application/krakenuniq/krakenuniq_profile_reader.py @@ -52,7 +52,11 @@ def read(cls, profile: BufferOrFilepath) -> DataFrame[KrakenUniqProfile]: header=0, index_col=False, skipinitialspace=True, - dtype={"%": float}, + dtype={ + KrakenUniqProfile.percent: float, + KrakenUniqProfile.duplicates: float, + KrakenUniqProfile.coverage: float, + }, ) cls._check_num_columns(result, KrakenUniqProfile) return result diff --git a/tests/data/krakenuniq/ERR3201952.krakenuniq.report.txt b/tests/data/krakenuniq/ERR3201952.krakenuniq.report.txt new file mode 100644 index 00000000..46975d6c --- /dev/null +++ b/tests/data/krakenuniq/ERR3201952.krakenuniq.report.txt @@ -0,0 +1,4 @@ +# KrakenUniq v1.0.2 DATE:2023-07-13T11:10:57Z DB:./testdb-krakenuniq DB_SIZE:358660 WD:/home/james/git/nf-core/taxprofiler/testing/blah/work/d4/9641b418650bf507e0b05143a5800d +# CL:/usr/local/bin/krakenuniq --db testdb-krakenuniq --threads 2 --report-file ERR3201952.krakenuniq.report.txt ERR3201952.merged.fastq.gz +% reads taxReads kmers dup cov taxID rank taxName +100 4937 4937 10648888 1 NA 0 no rank unclassified diff --git a/tests/integration/test_krakenuniq_etl.py b/tests/integration/test_krakenuniq_etl.py index f450f880..4bd8309a 100644 --- a/tests/integration/test_krakenuniq_etl.py +++ b/tests/integration/test_krakenuniq_etl.py @@ -55,6 +55,7 @@ def other_profile(data_dir: Path, request: pytest.FixtureRequest) -> Path: [ "test3.krakenuniq.report.txt", "test1.krakenuniq.report.txt", + "ERR3201952.krakenuniq.report.txt", pytest.param( "test1-invalid.krakenuniq.report.txt", marks=pytest.mark.raises(exception=SchemaErrors),