Skip to content

Commit

Permalink
Merge pull request #19 from CountESS-Project/dev
Browse files Browse the repository at this point in the history
version 1.3.0 updates
  • Loading branch information
afrubin committed Aug 29, 2023
2 parents c82cb73 + 69b29cd commit 5f01fcb
Show file tree
Hide file tree
Showing 12 changed files with 57 additions and 109 deletions.
4 changes: 0 additions & 4 deletions .flake8

This file was deleted.

15 changes: 7 additions & 8 deletions .github/workflows/python-package.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,20 +24,19 @@ jobs:
uses: actions/setup-python@v3
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
python -m pip install --upgrade pip
python -m pip install flake8 pytest
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
- name: Install package
run: |
python -m pip install .
python -m pip install --upgrade pip
python -m pip install .[dev]
- name: Lint with flake8
run: |
# stop the build if there are Python syntax errors or undefined names
flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
# exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
# exit-zero treats all errors as warnings
flake8 . --count --exit-zero --statistics
- name: Test with pytest
run: |
pytest
python -m pytest
- name: Type check with mypy
run: |
python -m mypy src/
2 changes: 2 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,5 @@ repos:
rev: 5.0.4
hooks:
- id: flake8
additional_dependencies:
- Flake8-pyproject
9 changes: 9 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,17 @@ dev = [
"flake8",
"pre-commit",
"pytest",
"Flake8-pyproject",
"mypy",
]

[tool.black]
line-length = 120

[tool.flake8]
extend-ignore = ["E203", "E501"]
max-complexity = 10

[tool.hatch.version]
path = "src/fqfa/__init__.py"

Expand Down
2 changes: 1 addition & 1 deletion src/fqfa/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
)
from fqfa.util.translate import translate_dna, ncbi_genetic_code_to_dict

__version__ = "1.2.3"
__version__ = "1.3.0"

__all__ = [
"__version__",
Expand Down
9 changes: 5 additions & 4 deletions src/fqfa/fastq/fastq.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,8 @@ def parse_fastq_reads(handle: TextIO) -> Generator[FastqRead, None, None]:
raise ValueError("incomplete FASTQ record")
else:
lines = [x.rstrip() for x in lines] # remove trailing newlines
yield FastqRead(*lines)
# TODO: figure out why mypy doesn't like this
yield FastqRead(*lines) # type: ignore[arg-type]


def parse_fastq_pe_reads(
Expand Down Expand Up @@ -80,9 +81,9 @@ def parse_fastq_pe_reads(
for fwd, rev in zip_longest(fwd_generator, rev_generator, fillvalue=None):
if None in (fwd, rev):
raise ValueError("mismatched FASTQ file lengths")
elif fwd.header.split()[0] != rev.header.split()[0]:
elif fwd.header.split()[0] != rev.header.split()[0]: # type: ignore[union-attr]
raise ValueError("forward and reverse read headers do not match")
else:
if revcomp:
rev.reverse_complement()
yield fwd, rev
rev.reverse_complement() # type: ignore[union-attr]
yield fwd, rev # type: ignore[misc]
16 changes: 7 additions & 9 deletions src/fqfa/fastq/fastqread.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@

from dataclasses import dataclass, field, InitVar
from typing import List, Optional, ClassVar, Callable, Match
from statistics import mean
from fqfa.util.nucleotide import reverse_complement
from fqfa.validator.create import create_validator
from fqfa.constants.iupac.dna import DNA_BASES
Expand Down Expand Up @@ -52,9 +51,7 @@ class FastqRead:
quality: List[int] = field(init=False)
quality_string: InitVar[str]
quality_encoding_value: int = 33
_sequence_validator: ClassVar[
Callable[[str], Optional[Match[str]]]
] = create_validator(DNA_BASES + ["N"])
_sequence_validator: ClassVar[Callable[[str], Optional[Match[str]]]] = create_validator(DNA_BASES + ["N"])

def __post_init__(self, quality_string: str) -> None:
"""Perform some basic checks on the input and converts the quality string into a
Expand Down Expand Up @@ -99,7 +96,10 @@ def __post_init__(self, quality_string: str) -> None:
# mypy false positive: https://github.com/python/mypy/issues/5485
raise ValueError("unexpected characters in sequence")

self.quality = [ord(c) - self.quality_encoding_value for c in quality_string]
quality_string_bytes = quality_string.encode("ascii")
qev = self.quality_encoding_value
self.quality = [qsb - qev for qsb in quality_string_bytes]

if min(self.quality) < 0:
raise ValueError("sequence quality value below 0")
if max(self.quality) > 93:
Expand All @@ -125,9 +125,7 @@ def __str__(self) -> str:
Reconstruction of the original FASTQ record.
"""
quality_string = "".join(
[chr(q + self.quality_encoding_value) for q in self.quality]
)
quality_string = "".join([chr(q + self.quality_encoding_value) for q in self.quality])
return "\n".join((self.header, self.sequence, self.header2, quality_string))

def average_quality(self) -> float:
Expand All @@ -139,7 +137,7 @@ def average_quality(self) -> float:
Mean quality value.
"""
return mean(self.quality)
return sum(self.quality) / len(self.quality)

def min_quality(self) -> int:
"""Calculates and returns the read's minimum quality value.
Expand Down
4 changes: 1 addition & 3 deletions src/fqfa/util/infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,9 +59,7 @@ def infer_sequence_type(seq: str, report_iupac: bool = True) -> Optional[str]:
return None


def infer_all_sequence_types( # noqa: max-complexity: 11
seq: str, report_iupac: bool = True
) -> Optional[List[str]]:
def infer_all_sequence_types(seq: str, report_iupac: bool = True) -> Optional[List[str]]: # noqa: max-complexity: 11
"""Return all inferred types for the given sequence.
Sequence types include:
Expand Down
17 changes: 3 additions & 14 deletions src/fqfa/util/translate.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,7 @@
__all__ = ["translate_dna", "ncbi_genetic_code_to_dict"]


def translate_dna(
seq: str, table: Optional[Dict[str, str]] = None, frame: int = 0
) -> Tuple[str, Optional[str]]:
def translate_dna(seq: str, table: Optional[Dict[str, str]] = None, frame: int = 0) -> Tuple[str, Optional[str]]:
"""
Translate a DNA sequence into the corresponding amino acid sequence.
Expand Down Expand Up @@ -116,11 +114,7 @@ def ncbi_genetic_code_to_dict( # noqa: max-complexity: 11
If the AAs row contains a character other than an amino acid.
"""
lines = [
s.strip()
for s in ncbi_string.split("\n")
if len(s) > 0 and not s.startswith("#") and not s.isspace()
]
lines = [s.strip() for s in ncbi_string.split("\n") if len(s) > 0 and not s.startswith("#") and not s.isspace()]
if len(lines) != 5:
raise ValueError("transl_table string must have 5 lines")

Expand Down Expand Up @@ -152,12 +146,7 @@ def ncbi_genetic_code_to_dict( # noqa: max-complexity: 11
codon_dict: Dict[str, str] = dict()
for aa, codon in zip(
transl_table["AAs"],
(
"".join(nts)
for nts in zip(
transl_table["Base1"], transl_table["Base2"], transl_table["Base3"]
)
),
("".join(nts) for nts in zip(transl_table["Base1"], transl_table["Base2"], transl_table["Base3"])),
):
if codon not in codon_dict:
codon_dict[codon] = aa
Expand Down
52 changes: 13 additions & 39 deletions tests/test_fastqread.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,7 @@ def test_creation_no_errors(self) -> None:
self.assertEqual(test_read.sequence, self.test_kwargs["sequence"])
self.assertEqual(test_read.header2, self.test_kwargs["header2"])
self.assertListEqual(test_read.quality, self.test_quality)
self.assertEqual(
test_read.quality_encoding_value, self.test_kwargs["quality_encoding_value"]
)
self.assertEqual(test_read.quality_encoding_value, self.test_kwargs["quality_encoding_value"])

def test_creation_bad_header(self) -> None:
test_kwargs = self.test_kwargs.copy()
Expand Down Expand Up @@ -55,9 +53,7 @@ def test_creation_bad_bases(self) -> None:

# bad internal base/ambiguity character
test_kwargs = self.test_kwargs.copy()
test_kwargs["sequence"] = (
self.test_kwargs["sequence"][:1] + "W" + self.test_kwargs["sequence"][2:]
)
test_kwargs["sequence"] = self.test_kwargs["sequence"][:1] + "W" + self.test_kwargs["sequence"][2:]
self.assertRaises(ValueError, FastqRead, **test_kwargs)

# bad last base/number
Expand All @@ -74,9 +70,7 @@ def test_creation_bad_quality(self) -> None:
# bad internal value/unicode character
test_kwargs = self.test_kwargs.copy()
test_kwargs["quality_string"] = (
self.test_kwargs["quality_string"][:1]
+ "µ"
+ self.test_kwargs["quality_string"][2:]
self.test_kwargs["quality_string"][:1] + "µ" + self.test_kwargs["quality_string"][2:]
)
self.assertRaises(ValueError, FastqRead, **test_kwargs)

Expand Down Expand Up @@ -117,9 +111,7 @@ def test_trim_start(self) -> None:
test_read.trim(start=len(test_read))
self.assertEqual(len(test_read), 1)
self.assertEqual(len(test_read.sequence), len(test_read.quality))
self.assertEqual(
test_read.sequence, FastqRead(**self.test_kwargs).sequence[-1:]
)
self.assertEqual(test_read.sequence, FastqRead(**self.test_kwargs).sequence[-1:])

def test_trim_end(self) -> None:
test_read = FastqRead(**self.test_kwargs)
Expand All @@ -130,9 +122,7 @@ def test_trim_end(self) -> None:
test_read.trim(end=len(test_read) - 1)
self.assertEqual(len(test_read), len(FastqRead(**self.test_kwargs)) - 1)
self.assertEqual(len(test_read.sequence), len(test_read.quality))
self.assertEqual(
test_read.sequence, FastqRead(**self.test_kwargs).sequence[:-1]
)
self.assertEqual(test_read.sequence, FastqRead(**self.test_kwargs).sequence[:-1])

test_read = FastqRead(**self.test_kwargs)
test_read.trim(end=1)
Expand All @@ -145,17 +135,13 @@ def test_trim_both_ends(self) -> None:
test_read.trim(start=2, end=2)
self.assertEqual(len(test_read), 1)
self.assertEqual(len(test_read.sequence), len(test_read.quality))
self.assertEqual(
test_read.sequence, FastqRead(**self.test_kwargs).sequence[1:2]
)
self.assertEqual(test_read.sequence, FastqRead(**self.test_kwargs).sequence[1:2])

test_read = FastqRead(**self.test_kwargs)
test_read.trim(start=2, end=4)
self.assertEqual(len(test_read), 3)
self.assertEqual(len(test_read.sequence), len(test_read.quality))
self.assertEqual(
test_read.sequence, FastqRead(**self.test_kwargs).sequence[1:4]
)
self.assertEqual(test_read.sequence, FastqRead(**self.test_kwargs).sequence[1:4])

def test_trim_bad_parameters(self) -> None:
test_read = FastqRead(**self.test_kwargs)
Expand Down Expand Up @@ -201,21 +187,15 @@ def test_trim_length(self) -> None:
test_read.trim_length(start=2, length=4)
self.assertEqual(len(test_read), 4)
self.assertEqual(len(test_read.sequence), len(test_read.quality))
self.assertEqual(
test_read.sequence, FastqRead(**self.test_kwargs).sequence[1:5]
)
self.assertEqual(test_read.sequence, FastqRead(**self.test_kwargs).sequence[1:5])

def test_trim_length_bad_parameters(self) -> None:
test_read = FastqRead(**self.test_kwargs)

# bad start parameters
self.assertRaises(
ValueError, test_read.trim_length, start=-1, length=len(test_read)
)
self.assertRaises(ValueError, test_read.trim_length, start=-1, length=len(test_read))
self.assertEqual(test_read, FastqRead(**self.test_kwargs))
self.assertRaises(
ValueError, test_read.trim_length, start=0, length=len(test_read)
)
self.assertRaises(ValueError, test_read.trim_length, start=0, length=len(test_read))
self.assertEqual(test_read, FastqRead(**self.test_kwargs))
self.assertRaises(
ValueError,
Expand All @@ -236,24 +216,18 @@ def test_trim_length_bad_parameters(self) -> None:
# bad parameter combinations
self.assertRaises(ValueError, test_read.trim_length, start=-1, length=0)
self.assertEqual(test_read, FastqRead(**self.test_kwargs))
self.assertRaises(
ValueError, test_read.trim_length, start=2, length=len(test_read)
)
self.assertRaises(ValueError, test_read.trim_length, start=2, length=len(test_read))
self.assertEqual(test_read, FastqRead(**self.test_kwargs))

def test_reverse_complement(self) -> None:
test_read = FastqRead(**self.test_kwargs)
test_read.reverse_complement()

self.assertEqual(test_read.header, self.test_kwargs["header"])
self.assertEqual(
test_read.sequence, reverse_complement(self.test_kwargs["sequence"])
)
self.assertEqual(test_read.sequence, reverse_complement(self.test_kwargs["sequence"]))
self.assertEqual(test_read.header2, self.test_kwargs["header2"])
self.assertListEqual(test_read.quality, self.test_quality[::-1])
self.assertEqual(
test_read.quality_encoding_value, self.test_kwargs["quality_encoding_value"]
)
self.assertEqual(test_read.quality_encoding_value, self.test_kwargs["quality_encoding_value"])


if __name__ == "__main__":
Expand Down
28 changes: 7 additions & 21 deletions tests/test_util_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,7 @@ def test_protein(self):
self.assertEqual(infer_sequence_type("MDLSALRVEE"), "protein")

def test_protein_iupac(self):
self.assertEqual(
infer_sequence_type("LIVWZ", report_iupac=True), "protein-iupac"
)
self.assertEqual(infer_sequence_type("LIVWZ", report_iupac=True), "protein-iupac")
self.assertEqual(infer_sequence_type("LIVWZ", report_iupac=False), "protein")

def test_lowercase(self):
Expand All @@ -50,25 +48,19 @@ def test_dna(self):
infer_all_sequence_types("ACGT", report_iupac=True),
["dna", "dna-iupac", "protein", "protein-iupac"],
)
self.assertListEqual(
infer_all_sequence_types("ACGT", report_iupac=False), ["dna", "protein"]
)
self.assertListEqual(infer_all_sequence_types("ACGT", report_iupac=False), ["dna", "protein"])
self.assertListEqual(
infer_all_sequence_types("TTTTT", report_iupac=True),
["dna", "dna-iupac", "protein", "protein-iupac"],
)
self.assertListEqual(
infer_all_sequence_types("TTTTT", report_iupac=False), ["dna", "protein"]
)
self.assertListEqual(infer_all_sequence_types("TTTTT", report_iupac=False), ["dna", "protein"])

def test_dna_iupac(self):
self.assertListEqual(
infer_all_sequence_types("AWGT", report_iupac=True),
["dna-iupac", "protein", "protein-iupac"],
)
self.assertListEqual(
infer_all_sequence_types("AWGT", report_iupac=False), ["dna", "protein"]
)
self.assertListEqual(infer_all_sequence_types("AWGT", report_iupac=False), ["dna", "protein"])

def test_rna(self):
self.assertListEqual(infer_all_sequence_types("ACGU"), ["rna"])
Expand All @@ -79,17 +71,11 @@ def test_protein(self):
infer_all_sequence_types("LIVW", report_iupac=True),
["protein", "protein-iupac"],
)
self.assertListEqual(
infer_all_sequence_types("MDLSALRVEE", report_iupac=False), ["protein"]
)
self.assertListEqual(infer_all_sequence_types("MDLSALRVEE", report_iupac=False), ["protein"])

def test_protein_iupac(self):
self.assertListEqual(
infer_all_sequence_types("LIVWZ", report_iupac=True), ["protein-iupac"]
)
self.assertListEqual(
infer_all_sequence_types("LIVWZ", report_iupac=False), ["protein"]
)
self.assertListEqual(infer_all_sequence_types("LIVWZ", report_iupac=True), ["protein-iupac"])
self.assertListEqual(infer_all_sequence_types("LIVWZ", report_iupac=False), ["protein"])

def test_lowercase(self):
self.assertIsNone(infer_all_sequence_types("acgt"))
Expand Down
8 changes: 2 additions & 6 deletions tests/test_validator_create.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,13 +64,9 @@ def test_create_from_list(self) -> None:

# invalid list arguments
self.assertRaises(ValueError, create_validator, ["A", "C", "GT"])
self.assertRaises(
ValueError, create_validator, ["A", "C", "GT"], case_sensitive=False
)
self.assertRaises(ValueError, create_validator, ["A", "C", "GT"], case_sensitive=False)
self.assertRaises(ValueError, create_validator, ["A", "C", ""])
self.assertRaises(
ValueError, create_validator, ["A", "C", ""], case_sensitive=False
)
self.assertRaises(ValueError, create_validator, ["A", "C", ""], case_sensitive=False)


if __name__ == "__main__":
Expand Down

0 comments on commit 5f01fcb

Please sign in to comment.