diff --git a/.flake8 b/.flake8 deleted file mode 100644 index 3237997..0000000 --- a/.flake8 +++ /dev/null @@ -1,4 +0,0 @@ -[flake8] -extend-ignore = E203 -max-line-length = 88 -max-complexity = 10 diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 28f0840..f34b399 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -24,20 +24,19 @@ jobs: uses: actions/setup-python@v3 with: python-version: ${{ matrix.python-version }} - - name: Install dependencies - run: | - python -m pip install --upgrade pip - python -m pip install flake8 pytest - if [ -f requirements.txt ]; then pip install -r requirements.txt; fi - name: Install package run: | - python -m pip install . + python -m pip install --upgrade pip + python -m pip install .[dev] - name: Lint with flake8 run: | # stop the build if there are Python syntax errors or undefined names flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics - # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide + # exit-zero treats all errors as warnings flake8 . --count --exit-zero --statistics - name: Test with pytest run: | - pytest + python -m pytest + - name: Type check with mypy + run: | + python -m mypy src/ diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 1912aef..adf0b9a 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -8,3 +8,5 @@ repos: rev: 5.0.4 hooks: - id: flake8 + additional_dependencies: + - Flake8-pyproject diff --git a/pyproject.toml b/pyproject.toml index ca9bd2e..2a7ef4e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -34,8 +34,17 @@ dev = [ "flake8", "pre-commit", "pytest", + "Flake8-pyproject", + "mypy", ] +[tool.black] +line-length = 120 + +[tool.flake8] +extend-ignore = ["E203", "E501"] +max-complexity = 10 + [tool.hatch.version] path = "src/fqfa/__init__.py" diff --git a/src/fqfa/__init__.py b/src/fqfa/__init__.py index 1e7fbba..0ecbff1 100644 --- a/src/fqfa/__init__.py +++ b/src/fqfa/__init__.py @@ -9,7 +9,7 @@ ) from fqfa.util.translate import translate_dna, ncbi_genetic_code_to_dict -__version__ = "1.2.3" +__version__ = "1.3.0" __all__ = [ "__version__", diff --git a/src/fqfa/fastq/fastq.py b/src/fqfa/fastq/fastq.py index af5037c..1d2b294 100644 --- a/src/fqfa/fastq/fastq.py +++ b/src/fqfa/fastq/fastq.py @@ -37,7 +37,8 @@ def parse_fastq_reads(handle: TextIO) -> Generator[FastqRead, None, None]: raise ValueError("incomplete FASTQ record") else: lines = [x.rstrip() for x in lines] # remove trailing newlines - yield FastqRead(*lines) + # TODO: figure out why mypy doesn't like this + yield FastqRead(*lines) # type: ignore[arg-type] def parse_fastq_pe_reads( @@ -80,9 +81,9 @@ def parse_fastq_pe_reads( for fwd, rev in zip_longest(fwd_generator, rev_generator, fillvalue=None): if None in (fwd, rev): raise ValueError("mismatched FASTQ file lengths") - elif fwd.header.split()[0] != rev.header.split()[0]: + elif fwd.header.split()[0] != rev.header.split()[0]: # type: ignore[union-attr] raise ValueError("forward and reverse read headers do not match") else: if revcomp: - rev.reverse_complement() - yield fwd, rev + rev.reverse_complement() # type: ignore[union-attr] + yield fwd, rev # type: ignore[misc] diff --git a/src/fqfa/fastq/fastqread.py b/src/fqfa/fastq/fastqread.py index 106a07c..af7aba6 100644 --- a/src/fqfa/fastq/fastqread.py +++ b/src/fqfa/fastq/fastqread.py @@ -4,7 +4,6 @@ from dataclasses import dataclass, field, InitVar from typing import List, Optional, ClassVar, Callable, Match -from statistics import mean from fqfa.util.nucleotide import reverse_complement from fqfa.validator.create import create_validator from fqfa.constants.iupac.dna import DNA_BASES @@ -52,9 +51,7 @@ class FastqRead: quality: List[int] = field(init=False) quality_string: InitVar[str] quality_encoding_value: int = 33 - _sequence_validator: ClassVar[ - Callable[[str], Optional[Match[str]]] - ] = create_validator(DNA_BASES + ["N"]) + _sequence_validator: ClassVar[Callable[[str], Optional[Match[str]]]] = create_validator(DNA_BASES + ["N"]) def __post_init__(self, quality_string: str) -> None: """Perform some basic checks on the input and converts the quality string into a @@ -99,7 +96,10 @@ def __post_init__(self, quality_string: str) -> None: # mypy false positive: https://github.com/python/mypy/issues/5485 raise ValueError("unexpected characters in sequence") - self.quality = [ord(c) - self.quality_encoding_value for c in quality_string] + quality_string_bytes = quality_string.encode("ascii") + qev = self.quality_encoding_value + self.quality = [qsb - qev for qsb in quality_string_bytes] + if min(self.quality) < 0: raise ValueError("sequence quality value below 0") if max(self.quality) > 93: @@ -125,9 +125,7 @@ def __str__(self) -> str: Reconstruction of the original FASTQ record. """ - quality_string = "".join( - [chr(q + self.quality_encoding_value) for q in self.quality] - ) + quality_string = "".join([chr(q + self.quality_encoding_value) for q in self.quality]) return "\n".join((self.header, self.sequence, self.header2, quality_string)) def average_quality(self) -> float: @@ -139,7 +137,7 @@ def average_quality(self) -> float: Mean quality value. """ - return mean(self.quality) + return sum(self.quality) / len(self.quality) def min_quality(self) -> int: """Calculates and returns the read's minimum quality value. diff --git a/src/fqfa/util/infer.py b/src/fqfa/util/infer.py index f71a7eb..f7f447d 100644 --- a/src/fqfa/util/infer.py +++ b/src/fqfa/util/infer.py @@ -59,9 +59,7 @@ def infer_sequence_type(seq: str, report_iupac: bool = True) -> Optional[str]: return None -def infer_all_sequence_types( # noqa: max-complexity: 11 - seq: str, report_iupac: bool = True -) -> Optional[List[str]]: +def infer_all_sequence_types(seq: str, report_iupac: bool = True) -> Optional[List[str]]: # noqa: max-complexity: 11 """Return all inferred types for the given sequence. Sequence types include: diff --git a/src/fqfa/util/translate.py b/src/fqfa/util/translate.py index cd6f979..abf512d 100644 --- a/src/fqfa/util/translate.py +++ b/src/fqfa/util/translate.py @@ -9,9 +9,7 @@ __all__ = ["translate_dna", "ncbi_genetic_code_to_dict"] -def translate_dna( - seq: str, table: Optional[Dict[str, str]] = None, frame: int = 0 -) -> Tuple[str, Optional[str]]: +def translate_dna(seq: str, table: Optional[Dict[str, str]] = None, frame: int = 0) -> Tuple[str, Optional[str]]: """ Translate a DNA sequence into the corresponding amino acid sequence. @@ -116,11 +114,7 @@ def ncbi_genetic_code_to_dict( # noqa: max-complexity: 11 If the AAs row contains a character other than an amino acid. """ - lines = [ - s.strip() - for s in ncbi_string.split("\n") - if len(s) > 0 and not s.startswith("#") and not s.isspace() - ] + lines = [s.strip() for s in ncbi_string.split("\n") if len(s) > 0 and not s.startswith("#") and not s.isspace()] if len(lines) != 5: raise ValueError("transl_table string must have 5 lines") @@ -152,12 +146,7 @@ def ncbi_genetic_code_to_dict( # noqa: max-complexity: 11 codon_dict: Dict[str, str] = dict() for aa, codon in zip( transl_table["AAs"], - ( - "".join(nts) - for nts in zip( - transl_table["Base1"], transl_table["Base2"], transl_table["Base3"] - ) - ), + ("".join(nts) for nts in zip(transl_table["Base1"], transl_table["Base2"], transl_table["Base3"])), ): if codon not in codon_dict: codon_dict[codon] = aa diff --git a/tests/test_fastqread.py b/tests/test_fastqread.py index 0f0569e..3d34139 100644 --- a/tests/test_fastqread.py +++ b/tests/test_fastqread.py @@ -22,9 +22,7 @@ def test_creation_no_errors(self) -> None: self.assertEqual(test_read.sequence, self.test_kwargs["sequence"]) self.assertEqual(test_read.header2, self.test_kwargs["header2"]) self.assertListEqual(test_read.quality, self.test_quality) - self.assertEqual( - test_read.quality_encoding_value, self.test_kwargs["quality_encoding_value"] - ) + self.assertEqual(test_read.quality_encoding_value, self.test_kwargs["quality_encoding_value"]) def test_creation_bad_header(self) -> None: test_kwargs = self.test_kwargs.copy() @@ -55,9 +53,7 @@ def test_creation_bad_bases(self) -> None: # bad internal base/ambiguity character test_kwargs = self.test_kwargs.copy() - test_kwargs["sequence"] = ( - self.test_kwargs["sequence"][:1] + "W" + self.test_kwargs["sequence"][2:] - ) + test_kwargs["sequence"] = self.test_kwargs["sequence"][:1] + "W" + self.test_kwargs["sequence"][2:] self.assertRaises(ValueError, FastqRead, **test_kwargs) # bad last base/number @@ -74,9 +70,7 @@ def test_creation_bad_quality(self) -> None: # bad internal value/unicode character test_kwargs = self.test_kwargs.copy() test_kwargs["quality_string"] = ( - self.test_kwargs["quality_string"][:1] - + "µ" - + self.test_kwargs["quality_string"][2:] + self.test_kwargs["quality_string"][:1] + "µ" + self.test_kwargs["quality_string"][2:] ) self.assertRaises(ValueError, FastqRead, **test_kwargs) @@ -117,9 +111,7 @@ def test_trim_start(self) -> None: test_read.trim(start=len(test_read)) self.assertEqual(len(test_read), 1) self.assertEqual(len(test_read.sequence), len(test_read.quality)) - self.assertEqual( - test_read.sequence, FastqRead(**self.test_kwargs).sequence[-1:] - ) + self.assertEqual(test_read.sequence, FastqRead(**self.test_kwargs).sequence[-1:]) def test_trim_end(self) -> None: test_read = FastqRead(**self.test_kwargs) @@ -130,9 +122,7 @@ def test_trim_end(self) -> None: test_read.trim(end=len(test_read) - 1) self.assertEqual(len(test_read), len(FastqRead(**self.test_kwargs)) - 1) self.assertEqual(len(test_read.sequence), len(test_read.quality)) - self.assertEqual( - test_read.sequence, FastqRead(**self.test_kwargs).sequence[:-1] - ) + self.assertEqual(test_read.sequence, FastqRead(**self.test_kwargs).sequence[:-1]) test_read = FastqRead(**self.test_kwargs) test_read.trim(end=1) @@ -145,17 +135,13 @@ def test_trim_both_ends(self) -> None: test_read.trim(start=2, end=2) self.assertEqual(len(test_read), 1) self.assertEqual(len(test_read.sequence), len(test_read.quality)) - self.assertEqual( - test_read.sequence, FastqRead(**self.test_kwargs).sequence[1:2] - ) + self.assertEqual(test_read.sequence, FastqRead(**self.test_kwargs).sequence[1:2]) test_read = FastqRead(**self.test_kwargs) test_read.trim(start=2, end=4) self.assertEqual(len(test_read), 3) self.assertEqual(len(test_read.sequence), len(test_read.quality)) - self.assertEqual( - test_read.sequence, FastqRead(**self.test_kwargs).sequence[1:4] - ) + self.assertEqual(test_read.sequence, FastqRead(**self.test_kwargs).sequence[1:4]) def test_trim_bad_parameters(self) -> None: test_read = FastqRead(**self.test_kwargs) @@ -201,21 +187,15 @@ def test_trim_length(self) -> None: test_read.trim_length(start=2, length=4) self.assertEqual(len(test_read), 4) self.assertEqual(len(test_read.sequence), len(test_read.quality)) - self.assertEqual( - test_read.sequence, FastqRead(**self.test_kwargs).sequence[1:5] - ) + self.assertEqual(test_read.sequence, FastqRead(**self.test_kwargs).sequence[1:5]) def test_trim_length_bad_parameters(self) -> None: test_read = FastqRead(**self.test_kwargs) # bad start parameters - self.assertRaises( - ValueError, test_read.trim_length, start=-1, length=len(test_read) - ) + self.assertRaises(ValueError, test_read.trim_length, start=-1, length=len(test_read)) self.assertEqual(test_read, FastqRead(**self.test_kwargs)) - self.assertRaises( - ValueError, test_read.trim_length, start=0, length=len(test_read) - ) + self.assertRaises(ValueError, test_read.trim_length, start=0, length=len(test_read)) self.assertEqual(test_read, FastqRead(**self.test_kwargs)) self.assertRaises( ValueError, @@ -236,9 +216,7 @@ def test_trim_length_bad_parameters(self) -> None: # bad parameter combinations self.assertRaises(ValueError, test_read.trim_length, start=-1, length=0) self.assertEqual(test_read, FastqRead(**self.test_kwargs)) - self.assertRaises( - ValueError, test_read.trim_length, start=2, length=len(test_read) - ) + self.assertRaises(ValueError, test_read.trim_length, start=2, length=len(test_read)) self.assertEqual(test_read, FastqRead(**self.test_kwargs)) def test_reverse_complement(self) -> None: @@ -246,14 +224,10 @@ def test_reverse_complement(self) -> None: test_read.reverse_complement() self.assertEqual(test_read.header, self.test_kwargs["header"]) - self.assertEqual( - test_read.sequence, reverse_complement(self.test_kwargs["sequence"]) - ) + self.assertEqual(test_read.sequence, reverse_complement(self.test_kwargs["sequence"])) self.assertEqual(test_read.header2, self.test_kwargs["header2"]) self.assertListEqual(test_read.quality, self.test_quality[::-1]) - self.assertEqual( - test_read.quality_encoding_value, self.test_kwargs["quality_encoding_value"] - ) + self.assertEqual(test_read.quality_encoding_value, self.test_kwargs["quality_encoding_value"]) if __name__ == "__main__": diff --git a/tests/test_util_infer.py b/tests/test_util_infer.py index 9089178..bd71563 100644 --- a/tests/test_util_infer.py +++ b/tests/test_util_infer.py @@ -21,9 +21,7 @@ def test_protein(self): self.assertEqual(infer_sequence_type("MDLSALRVEE"), "protein") def test_protein_iupac(self): - self.assertEqual( - infer_sequence_type("LIVWZ", report_iupac=True), "protein-iupac" - ) + self.assertEqual(infer_sequence_type("LIVWZ", report_iupac=True), "protein-iupac") self.assertEqual(infer_sequence_type("LIVWZ", report_iupac=False), "protein") def test_lowercase(self): @@ -50,25 +48,19 @@ def test_dna(self): infer_all_sequence_types("ACGT", report_iupac=True), ["dna", "dna-iupac", "protein", "protein-iupac"], ) - self.assertListEqual( - infer_all_sequence_types("ACGT", report_iupac=False), ["dna", "protein"] - ) + self.assertListEqual(infer_all_sequence_types("ACGT", report_iupac=False), ["dna", "protein"]) self.assertListEqual( infer_all_sequence_types("TTTTT", report_iupac=True), ["dna", "dna-iupac", "protein", "protein-iupac"], ) - self.assertListEqual( - infer_all_sequence_types("TTTTT", report_iupac=False), ["dna", "protein"] - ) + self.assertListEqual(infer_all_sequence_types("TTTTT", report_iupac=False), ["dna", "protein"]) def test_dna_iupac(self): self.assertListEqual( infer_all_sequence_types("AWGT", report_iupac=True), ["dna-iupac", "protein", "protein-iupac"], ) - self.assertListEqual( - infer_all_sequence_types("AWGT", report_iupac=False), ["dna", "protein"] - ) + self.assertListEqual(infer_all_sequence_types("AWGT", report_iupac=False), ["dna", "protein"]) def test_rna(self): self.assertListEqual(infer_all_sequence_types("ACGU"), ["rna"]) @@ -79,17 +71,11 @@ def test_protein(self): infer_all_sequence_types("LIVW", report_iupac=True), ["protein", "protein-iupac"], ) - self.assertListEqual( - infer_all_sequence_types("MDLSALRVEE", report_iupac=False), ["protein"] - ) + self.assertListEqual(infer_all_sequence_types("MDLSALRVEE", report_iupac=False), ["protein"]) def test_protein_iupac(self): - self.assertListEqual( - infer_all_sequence_types("LIVWZ", report_iupac=True), ["protein-iupac"] - ) - self.assertListEqual( - infer_all_sequence_types("LIVWZ", report_iupac=False), ["protein"] - ) + self.assertListEqual(infer_all_sequence_types("LIVWZ", report_iupac=True), ["protein-iupac"]) + self.assertListEqual(infer_all_sequence_types("LIVWZ", report_iupac=False), ["protein"]) def test_lowercase(self): self.assertIsNone(infer_all_sequence_types("acgt")) diff --git a/tests/test_validator_create.py b/tests/test_validator_create.py index f707a79..d2713d6 100644 --- a/tests/test_validator_create.py +++ b/tests/test_validator_create.py @@ -64,13 +64,9 @@ def test_create_from_list(self) -> None: # invalid list arguments self.assertRaises(ValueError, create_validator, ["A", "C", "GT"]) - self.assertRaises( - ValueError, create_validator, ["A", "C", "GT"], case_sensitive=False - ) + self.assertRaises(ValueError, create_validator, ["A", "C", "GT"], case_sensitive=False) self.assertRaises(ValueError, create_validator, ["A", "C", ""]) - self.assertRaises( - ValueError, create_validator, ["A", "C", ""], case_sensitive=False - ) + self.assertRaises(ValueError, create_validator, ["A", "C", ""], case_sensitive=False) if __name__ == "__main__":