Skip to content

Commit

Permalink
fixed bugs; updated tests
Browse files Browse the repository at this point in the history
  • Loading branch information
Ruibin-Liu committed Dec 7, 2023
1 parent 361343b commit a65dd49
Show file tree
Hide file tree
Showing 10 changed files with 1,087 additions and 168 deletions.
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -161,3 +161,8 @@ cython_debug/

# vscode
.vscode/

# some test files
tests/tmp/
tests/kinases/
tests/panther/
2 changes: 1 addition & 1 deletion pyuniprot/DB.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ class PANTHER:
class SeqRange:
"""Store a sequence range record."""

seq_begein: int | str
seq_begin: int | str
seq_end: int | str


Expand Down
20 changes: 13 additions & 7 deletions pyuniprot/Uniprot.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ class EC:
class DE:
"""Protein names"""

recommended_name: str
recommended_name: str | None
ec_record: EC | None
alt_names: list[str] | None
contains: str | None
Expand Down Expand Up @@ -291,7 +291,7 @@ def _get_category_lines(self) -> None:

line: str = u_file.readline()
references: list = []
while line[0:2] != "//":
while line and line[0:2] != "//":
if line.startswith("ID"):
items = line[2:-1].split()
entry_name = items[0]
Expand Down Expand Up @@ -321,9 +321,10 @@ def _get_category_lines(self) -> None:
continue
elif line.startswith("DE"):
alt_names: list[str] = []
ec_line = "DE EC="
ec_line: str = "DE EC="
ec_record: EC | None = None
contains = ""
contains: str | None = None
recommended_name: str | None = None
while line.startswith("DE"):
if line.startswith("DE RecName:"):
recommended_name = line[:-2].split("=")[-1]
Expand Down Expand Up @@ -480,8 +481,13 @@ def _get_category_lines(self) -> None:
seq = seq_covering.split("=")
chain_ids: list[str] = seq[0].split("/")
all_chain_ids.extend(chain_ids)
uniprot_resid_start: int = int(seq[1].split("-")[0])
uniprot_resid_end: int = int(seq[1].split("-")[1])
if len(seq) > 1 and "-" != seq[1]:
uniprot_resid_start: int | str = int(
seq[1].split("-")[0]
)
uniprot_resid_end: int | str = int(seq[1].split("-")[1])
else:
uniprot_resid_start, uniprot_resid_end = "", ""
all_seq_ranges.append(
SeqRange(uniprot_resid_start, uniprot_resid_end)
)
Expand Down Expand Up @@ -625,4 +631,4 @@ def _get_category_lines(self) -> None:
# print(line)

self._category_lines = content_dict
# print(self.category_lines)
# print(self.category_lines["FT"].feature_tables)
10 changes: 5 additions & 5 deletions P36952.txt → tests/P36952.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ ID SPB5_HUMAN Reviewed; 375 AA.
AC P36952; B2R6Y4; Q6N0B4; Q8WW89;
DT 01-JUN-1994, integrated into UniProtKB/Swiss-Prot.
DT 05-MAY-2009, sequence version 2.
DT 28-JUN-2023, entry version 197.
DT 08-NOV-2023, entry version 199.
DE RecName: Full=Serpin B5;
DE AltName: Full=Maspin;
DE AltName: Full=Peptidase inhibitor 5;
Expand Down Expand Up @@ -194,13 +194,14 @@ DR EPD; P36952; -.
DR jPOST; P36952; -.
DR MassIVE; P36952; -.
DR MaxQB; P36952; -.
DR PaxDb; P36952; -.
DR PaxDb; 9606-ENSP00000372221; -.
DR PeptideAtlas; P36952; -.
DR PRIDE; P36952; -.
DR ProteomicsDB; 55240; -. [P36952-1]
DR ProteomicsDB; 55241; -. [P36952-2]
DR Pumba; P36952; -.
DR TopDownProteomics; P36952-1; -. [P36952-1]
DR Antibodypedia; 4036; 620 antibodies from 43 providers.
DR Antibodypedia; 4036; 599 antibodies from 43 providers.
DR DNASU; 5268; -.
DR Ensembl; ENST00000382771.9; ENSP00000372221.4; ENSG00000206075.14. [P36952-1]
DR Ensembl; ENST00000489441.5; ENSP00000467158.1; ENSG00000206075.14. [P36952-2]
Expand Down Expand Up @@ -238,7 +239,7 @@ DR GenomeRNAi; 5268; -.
DR Pharos; P36952; Tbio.
DR PRO; PR:P36952; -.
DR Proteomes; UP000005640; Chromosome 18.
DR RNAct; P36952; protein.
DR RNAct; P36952; Protein.
DR Bgee; ENSG00000206075; Expressed in skin of abdomen and 95 other tissues.
DR ExpressionAtlas; P36952; baseline and differential.
DR Genevisible; P36952; HS.
Expand All @@ -249,7 +250,6 @@ DR GO; GO:0016528; C:sarcoplasm; IEA:Ensembl.
DR GO; GO:0004867; F:serine-type endopeptidase inhibitor activity; IBA:GO_Central.
DR GO; GO:0030198; P:extracellular matrix organization; IEA:Ensembl.
DR GO; GO:0002009; P:morphogenesis of an epithelium; IEA:Ensembl.
DR GO; GO:0010951; P:negative regulation of endopeptidase activity; IBA:GO_Central.
DR GO; GO:0060512; P:prostate gland morphogenesis; IEA:Ensembl.
DR GO; GO:0050678; P:regulation of epithelial cell proliferation; IEA:Ensembl.
DR CDD; cd02057; serpinB5_maspin; 1.
Expand Down
Loading

0 comments on commit a65dd49

Please sign in to comment.