Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

version 1.3.0 updates #19

Merged
merged 9 commits into from
Aug 29, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 0 additions & 4 deletions .flake8

This file was deleted.

15 changes: 7 additions & 8 deletions .github/workflows/python-package.yml
Original file line number Diff line number Diff line change
Expand Up @@ -24,20 +24,19 @@ jobs:
uses: actions/setup-python@v3
with:
python-version: ${{ matrix.python-version }}
- name: Install dependencies
run: |
python -m pip install --upgrade pip
python -m pip install flake8 pytest
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
- name: Install package
run: |
python -m pip install .
python -m pip install --upgrade pip
python -m pip install .[dev]
- name: Lint with flake8
run: |
# stop the build if there are Python syntax errors or undefined names
flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
# exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
# exit-zero treats all errors as warnings
flake8 . --count --exit-zero --statistics
- name: Test with pytest
run: |
pytest
python -m pytest
- name: Type check with mypy
run: |
python -m mypy src/
2 changes: 2 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,3 +8,5 @@ repos:
rev: 5.0.4
hooks:
- id: flake8
additional_dependencies:
- Flake8-pyproject
9 changes: 9 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,17 @@ dev = [
"flake8",
"pre-commit",
"pytest",
"Flake8-pyproject",
"mypy",
]

[tool.black]
line-length = 120

[tool.flake8]
extend-ignore = ["E203", "E501"]
max-complexity = 10

[tool.hatch.version]
path = "src/fqfa/__init__.py"

Expand Down
2 changes: 1 addition & 1 deletion src/fqfa/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
)
from fqfa.util.translate import translate_dna, ncbi_genetic_code_to_dict

__version__ = "1.2.3"
__version__ = "1.3.0"

__all__ = [
"__version__",
Expand Down
9 changes: 5 additions & 4 deletions src/fqfa/fastq/fastq.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,8 @@ def parse_fastq_reads(handle: TextIO) -> Generator[FastqRead, None, None]:
raise ValueError("incomplete FASTQ record")
else:
lines = [x.rstrip() for x in lines] # remove trailing newlines
yield FastqRead(*lines)
# TODO: figure out why mypy doesn't like this
yield FastqRead(*lines) # type: ignore[arg-type]


def parse_fastq_pe_reads(
Expand Down Expand Up @@ -80,9 +81,9 @@ def parse_fastq_pe_reads(
for fwd, rev in zip_longest(fwd_generator, rev_generator, fillvalue=None):
if None in (fwd, rev):
raise ValueError("mismatched FASTQ file lengths")
elif fwd.header.split()[0] != rev.header.split()[0]:
elif fwd.header.split()[0] != rev.header.split()[0]: # type: ignore[union-attr]
raise ValueError("forward and reverse read headers do not match")
else:
if revcomp:
rev.reverse_complement()
yield fwd, rev
rev.reverse_complement() # type: ignore[union-attr]
yield fwd, rev # type: ignore[misc]
16 changes: 7 additions & 9 deletions src/fqfa/fastq/fastqread.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@

from dataclasses import dataclass, field, InitVar
from typing import List, Optional, ClassVar, Callable, Match
from statistics import mean
from fqfa.util.nucleotide import reverse_complement
from fqfa.validator.create import create_validator
from fqfa.constants.iupac.dna import DNA_BASES
Expand Down Expand Up @@ -52,9 +51,7 @@ class FastqRead:
quality: List[int] = field(init=False)
quality_string: InitVar[str]
quality_encoding_value: int = 33
_sequence_validator: ClassVar[
Callable[[str], Optional[Match[str]]]
] = create_validator(DNA_BASES + ["N"])
_sequence_validator: ClassVar[Callable[[str], Optional[Match[str]]]] = create_validator(DNA_BASES + ["N"])

def __post_init__(self, quality_string: str) -> None:
"""Perform some basic checks on the input and converts the quality string into a
Expand Down Expand Up @@ -99,7 +96,10 @@ def __post_init__(self, quality_string: str) -> None:
# mypy false positive: https://github.com/python/mypy/issues/5485
raise ValueError("unexpected characters in sequence")

self.quality = [ord(c) - self.quality_encoding_value for c in quality_string]
quality_string_bytes = quality_string.encode("ascii")
qev = self.quality_encoding_value
self.quality = [qsb - qev for qsb in quality_string_bytes]

if min(self.quality) < 0:
raise ValueError("sequence quality value below 0")
if max(self.quality) > 93:
Expand All @@ -125,9 +125,7 @@ def __str__(self) -> str:
Reconstruction of the original FASTQ record.

"""
quality_string = "".join(
[chr(q + self.quality_encoding_value) for q in self.quality]
)
quality_string = "".join([chr(q + self.quality_encoding_value) for q in self.quality])
return "\n".join((self.header, self.sequence, self.header2, quality_string))

def average_quality(self) -> float:
Expand All @@ -139,7 +137,7 @@ def average_quality(self) -> float:
Mean quality value.

"""
return mean(self.quality)
return sum(self.quality) / len(self.quality)

def min_quality(self) -> int:
"""Calculates and returns the read's minimum quality value.
Expand Down
4 changes: 1 addition & 3 deletions src/fqfa/util/infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,9 +59,7 @@ def infer_sequence_type(seq: str, report_iupac: bool = True) -> Optional[str]:
return None


def infer_all_sequence_types( # noqa: max-complexity: 11
seq: str, report_iupac: bool = True
) -> Optional[List[str]]:
def infer_all_sequence_types(seq: str, report_iupac: bool = True) -> Optional[List[str]]: # noqa: max-complexity: 11
"""Return all inferred types for the given sequence.

Sequence types include:
Expand Down
17 changes: 3 additions & 14 deletions src/fqfa/util/translate.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,7 @@
__all__ = ["translate_dna", "ncbi_genetic_code_to_dict"]


def translate_dna(
seq: str, table: Optional[Dict[str, str]] = None, frame: int = 0
) -> Tuple[str, Optional[str]]:
def translate_dna(seq: str, table: Optional[Dict[str, str]] = None, frame: int = 0) -> Tuple[str, Optional[str]]:
"""
Translate a DNA sequence into the corresponding amino acid sequence.

Expand Down Expand Up @@ -116,11 +114,7 @@ def ncbi_genetic_code_to_dict( # noqa: max-complexity: 11
If the AAs row contains a character other than an amino acid.

"""
lines = [
s.strip()
for s in ncbi_string.split("\n")
if len(s) > 0 and not s.startswith("#") and not s.isspace()
]
lines = [s.strip() for s in ncbi_string.split("\n") if len(s) > 0 and not s.startswith("#") and not s.isspace()]
if len(lines) != 5:
raise ValueError("transl_table string must have 5 lines")

Expand Down Expand Up @@ -152,12 +146,7 @@ def ncbi_genetic_code_to_dict( # noqa: max-complexity: 11
codon_dict: Dict[str, str] = dict()
for aa, codon in zip(
transl_table["AAs"],
(
"".join(nts)
for nts in zip(
transl_table["Base1"], transl_table["Base2"], transl_table["Base3"]
)
),
("".join(nts) for nts in zip(transl_table["Base1"], transl_table["Base2"], transl_table["Base3"])),
):
if codon not in codon_dict:
codon_dict[codon] = aa
Expand Down
52 changes: 13 additions & 39 deletions tests/test_fastqread.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,7 @@ def test_creation_no_errors(self) -> None:
self.assertEqual(test_read.sequence, self.test_kwargs["sequence"])
self.assertEqual(test_read.header2, self.test_kwargs["header2"])
self.assertListEqual(test_read.quality, self.test_quality)
self.assertEqual(
test_read.quality_encoding_value, self.test_kwargs["quality_encoding_value"]
)
self.assertEqual(test_read.quality_encoding_value, self.test_kwargs["quality_encoding_value"])

def test_creation_bad_header(self) -> None:
test_kwargs = self.test_kwargs.copy()
Expand Down Expand Up @@ -55,9 +53,7 @@ def test_creation_bad_bases(self) -> None:

# bad internal base/ambiguity character
test_kwargs = self.test_kwargs.copy()
test_kwargs["sequence"] = (
self.test_kwargs["sequence"][:1] + "W" + self.test_kwargs["sequence"][2:]
)
test_kwargs["sequence"] = self.test_kwargs["sequence"][:1] + "W" + self.test_kwargs["sequence"][2:]
self.assertRaises(ValueError, FastqRead, **test_kwargs)

# bad last base/number
Expand All @@ -74,9 +70,7 @@ def test_creation_bad_quality(self) -> None:
# bad internal value/unicode character
test_kwargs = self.test_kwargs.copy()
test_kwargs["quality_string"] = (
self.test_kwargs["quality_string"][:1]
+ "µ"
+ self.test_kwargs["quality_string"][2:]
self.test_kwargs["quality_string"][:1] + "µ" + self.test_kwargs["quality_string"][2:]
)
self.assertRaises(ValueError, FastqRead, **test_kwargs)

Expand Down Expand Up @@ -117,9 +111,7 @@ def test_trim_start(self) -> None:
test_read.trim(start=len(test_read))
self.assertEqual(len(test_read), 1)
self.assertEqual(len(test_read.sequence), len(test_read.quality))
self.assertEqual(
test_read.sequence, FastqRead(**self.test_kwargs).sequence[-1:]
)
self.assertEqual(test_read.sequence, FastqRead(**self.test_kwargs).sequence[-1:])

def test_trim_end(self) -> None:
test_read = FastqRead(**self.test_kwargs)
Expand All @@ -130,9 +122,7 @@ def test_trim_end(self) -> None:
test_read.trim(end=len(test_read) - 1)
self.assertEqual(len(test_read), len(FastqRead(**self.test_kwargs)) - 1)
self.assertEqual(len(test_read.sequence), len(test_read.quality))
self.assertEqual(
test_read.sequence, FastqRead(**self.test_kwargs).sequence[:-1]
)
self.assertEqual(test_read.sequence, FastqRead(**self.test_kwargs).sequence[:-1])

test_read = FastqRead(**self.test_kwargs)
test_read.trim(end=1)
Expand All @@ -145,17 +135,13 @@ def test_trim_both_ends(self) -> None:
test_read.trim(start=2, end=2)
self.assertEqual(len(test_read), 1)
self.assertEqual(len(test_read.sequence), len(test_read.quality))
self.assertEqual(
test_read.sequence, FastqRead(**self.test_kwargs).sequence[1:2]
)
self.assertEqual(test_read.sequence, FastqRead(**self.test_kwargs).sequence[1:2])

test_read = FastqRead(**self.test_kwargs)
test_read.trim(start=2, end=4)
self.assertEqual(len(test_read), 3)
self.assertEqual(len(test_read.sequence), len(test_read.quality))
self.assertEqual(
test_read.sequence, FastqRead(**self.test_kwargs).sequence[1:4]
)
self.assertEqual(test_read.sequence, FastqRead(**self.test_kwargs).sequence[1:4])

def test_trim_bad_parameters(self) -> None:
test_read = FastqRead(**self.test_kwargs)
Expand Down Expand Up @@ -201,21 +187,15 @@ def test_trim_length(self) -> None:
test_read.trim_length(start=2, length=4)
self.assertEqual(len(test_read), 4)
self.assertEqual(len(test_read.sequence), len(test_read.quality))
self.assertEqual(
test_read.sequence, FastqRead(**self.test_kwargs).sequence[1:5]
)
self.assertEqual(test_read.sequence, FastqRead(**self.test_kwargs).sequence[1:5])

def test_trim_length_bad_parameters(self) -> None:
test_read = FastqRead(**self.test_kwargs)

# bad start parameters
self.assertRaises(
ValueError, test_read.trim_length, start=-1, length=len(test_read)
)
self.assertRaises(ValueError, test_read.trim_length, start=-1, length=len(test_read))
self.assertEqual(test_read, FastqRead(**self.test_kwargs))
self.assertRaises(
ValueError, test_read.trim_length, start=0, length=len(test_read)
)
self.assertRaises(ValueError, test_read.trim_length, start=0, length=len(test_read))
self.assertEqual(test_read, FastqRead(**self.test_kwargs))
self.assertRaises(
ValueError,
Expand All @@ -236,24 +216,18 @@ def test_trim_length_bad_parameters(self) -> None:
# bad parameter combinations
self.assertRaises(ValueError, test_read.trim_length, start=-1, length=0)
self.assertEqual(test_read, FastqRead(**self.test_kwargs))
self.assertRaises(
ValueError, test_read.trim_length, start=2, length=len(test_read)
)
self.assertRaises(ValueError, test_read.trim_length, start=2, length=len(test_read))
self.assertEqual(test_read, FastqRead(**self.test_kwargs))

def test_reverse_complement(self) -> None:
test_read = FastqRead(**self.test_kwargs)
test_read.reverse_complement()

self.assertEqual(test_read.header, self.test_kwargs["header"])
self.assertEqual(
test_read.sequence, reverse_complement(self.test_kwargs["sequence"])
)
self.assertEqual(test_read.sequence, reverse_complement(self.test_kwargs["sequence"]))
self.assertEqual(test_read.header2, self.test_kwargs["header2"])
self.assertListEqual(test_read.quality, self.test_quality[::-1])
self.assertEqual(
test_read.quality_encoding_value, self.test_kwargs["quality_encoding_value"]
)
self.assertEqual(test_read.quality_encoding_value, self.test_kwargs["quality_encoding_value"])


if __name__ == "__main__":
Expand Down
28 changes: 7 additions & 21 deletions tests/test_util_infer.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,7 @@ def test_protein(self):
self.assertEqual(infer_sequence_type("MDLSALRVEE"), "protein")

def test_protein_iupac(self):
self.assertEqual(
infer_sequence_type("LIVWZ", report_iupac=True), "protein-iupac"
)
self.assertEqual(infer_sequence_type("LIVWZ", report_iupac=True), "protein-iupac")
self.assertEqual(infer_sequence_type("LIVWZ", report_iupac=False), "protein")

def test_lowercase(self):
Expand All @@ -50,25 +48,19 @@ def test_dna(self):
infer_all_sequence_types("ACGT", report_iupac=True),
["dna", "dna-iupac", "protein", "protein-iupac"],
)
self.assertListEqual(
infer_all_sequence_types("ACGT", report_iupac=False), ["dna", "protein"]
)
self.assertListEqual(infer_all_sequence_types("ACGT", report_iupac=False), ["dna", "protein"])
self.assertListEqual(
infer_all_sequence_types("TTTTT", report_iupac=True),
["dna", "dna-iupac", "protein", "protein-iupac"],
)
self.assertListEqual(
infer_all_sequence_types("TTTTT", report_iupac=False), ["dna", "protein"]
)
self.assertListEqual(infer_all_sequence_types("TTTTT", report_iupac=False), ["dna", "protein"])

def test_dna_iupac(self):
self.assertListEqual(
infer_all_sequence_types("AWGT", report_iupac=True),
["dna-iupac", "protein", "protein-iupac"],
)
self.assertListEqual(
infer_all_sequence_types("AWGT", report_iupac=False), ["dna", "protein"]
)
self.assertListEqual(infer_all_sequence_types("AWGT", report_iupac=False), ["dna", "protein"])

def test_rna(self):
self.assertListEqual(infer_all_sequence_types("ACGU"), ["rna"])
Expand All @@ -79,17 +71,11 @@ def test_protein(self):
infer_all_sequence_types("LIVW", report_iupac=True),
["protein", "protein-iupac"],
)
self.assertListEqual(
infer_all_sequence_types("MDLSALRVEE", report_iupac=False), ["protein"]
)
self.assertListEqual(infer_all_sequence_types("MDLSALRVEE", report_iupac=False), ["protein"])

def test_protein_iupac(self):
self.assertListEqual(
infer_all_sequence_types("LIVWZ", report_iupac=True), ["protein-iupac"]
)
self.assertListEqual(
infer_all_sequence_types("LIVWZ", report_iupac=False), ["protein"]
)
self.assertListEqual(infer_all_sequence_types("LIVWZ", report_iupac=True), ["protein-iupac"])
self.assertListEqual(infer_all_sequence_types("LIVWZ", report_iupac=False), ["protein"])

def test_lowercase(self):
self.assertIsNone(infer_all_sequence_types("acgt"))
Expand Down
8 changes: 2 additions & 6 deletions tests/test_validator_create.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,13 +64,9 @@ def test_create_from_list(self) -> None:

# invalid list arguments
self.assertRaises(ValueError, create_validator, ["A", "C", "GT"])
self.assertRaises(
ValueError, create_validator, ["A", "C", "GT"], case_sensitive=False
)
self.assertRaises(ValueError, create_validator, ["A", "C", "GT"], case_sensitive=False)
self.assertRaises(ValueError, create_validator, ["A", "C", ""])
self.assertRaises(
ValueError, create_validator, ["A", "C", ""], case_sensitive=False
)
self.assertRaises(ValueError, create_validator, ["A", "C", ""], case_sensitive=False)


if __name__ == "__main__":
Expand Down