From db722a81f97f910d056c2d9a761c002e43250cba Mon Sep 17 00:00:00 2001 From: Ruibin Liu Date: Wed, 17 Apr 2024 16:02:15 -0400 Subject: [PATCH] Parse json instead for UniProt --- .pre-commit-config.yaml | 2 +- pyuniprot/UniProt.py | 171 ++++++++++++++++++++++++++++++++++ pyuniprot/UniRef.py | 4 +- pyuniprot/Uniprot.py | 2 + pyuniprot/__init__.py | 2 + pyuniprot/dict_to_property.py | 93 ++++++++++++++++++ tests/test_dtop.py | 72 ++++++++++++++ tests/test_files/P36952.json | 1 + tests/test_uniprot.py | 120 ++++++++---------------- tests/test_uniprot_legacy.py | 124 ++++++++++++++++++++++++ 10 files changed, 506 insertions(+), 85 deletions(-) create mode 100644 pyuniprot/UniProt.py create mode 100644 pyuniprot/dict_to_property.py create mode 100644 tests/test_dtop.py create mode 100644 tests/test_files/P36952.json create mode 100644 tests/test_uniprot_legacy.py diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 513925a..9827388 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -4,7 +4,7 @@ repos: hooks: - id: check-builtin-literals - id: check-added-large-files - - id: check-case-conflict + # - id: check-case-conflict - id: check-toml - id: check-yaml - id: debug-statements diff --git a/pyuniprot/UniProt.py b/pyuniprot/UniProt.py new file mode 100644 index 0000000..89f6a7a --- /dev/null +++ b/pyuniprot/UniProt.py @@ -0,0 +1,171 @@ +from __future__ import annotations + +import io +import json +import os +import urllib.request +from dataclasses import dataclass +from pathlib import Path + +from .dict_to_property import DictToProp + + +@dataclass +class Sequence: + """Sequence.""" + + sequence: str + length: int + weight: int + crc_checksum_value: str + crc_bits: int + + +class UniProt: + """The python object representing all information of a Uniprot JSON file.""" + + def __init__( + self, + uniprot_id: str, + save_json: bool = True, + local_download_dir: str | os.PathLike | None = None, + ) -> None: + """Init class with a Uniprot ID. + + Args: + uniprot_id (str): Uniprot Access Number. It looks at the the first for .json, + and if not found, it will try to fetch content from https://rest.uniprot.org/uniprotkb/. + save_json (bool, optional): whether to save the fetched json content to a /.json + file when that file no already existing. Defaults to False. + local_download_dir (str | os.PathLike | None, optional): where to save the downloaded Uniprot json file. + Defaults to None and the current working directory is used instead. + """ # noqa + self._uniprot_id: str = uniprot_id + self.save_json: bool = save_json + if local_download_dir is None: + local_download_dir = os.getcwd() + self._local_download_dir: str | os.PathLike | None = local_download_dir + self._uniprot_json_url = f"https://rest.uniprot.org/uniprotkb/{self.uniprot_id}" + self._uniprot_json_file: str | os.PathLike | io.StringIO | None = None + json_file = Path(self.local_download_dir, f"{self.uniprot_id}.json") + if json_file.exists(): + self._uniprot_json_file = json_file + self._raw_json: str | None = None # json is only a str in python + self._properties: dict = {} + + self._get_raw_json() + self._get_properties() + + @property + def uniprot_id(self): + return self._uniprot_id + + @property + def local_download_dir(self): + return self._local_download_dir + + @local_download_dir.setter + def local_download_dir(self, dir: str | os.PathLike): + """Set the directory to save downloaded Uniprot json files. + + Args: + dir (str | os.PathLike): directory path str or Path. + """ + self._local_download_dir = dir + + @property + def uniprot_json_url(self): + return self._uniprot_json_url + + @uniprot_json_url.setter + def uniprot_json_url(self, url: str): + """Set the Uniprot json file URL if not the official REST one. + + Args: + url (str): URL link. + """ + self._uniprot_json_url = url + + @property + def uniprot_json_file(self): + return self._uniprot_json_file + + @uniprot_json_file.setter + def uniprot_json_file(self, path: str | os.PathLike | io.StringIO): + """Set the UniProt json file path + + Args: + path (str | os.PathLike| io.StringIO): file-like or path to the file. + + Raises: + FileExistsError: if is not in the file system. + """ + if isinstance(path, os.PathLike) and not Path(path).exists(): + raise FileExistsError(f"Cannot find {path}.") + self._uniprot_json_file = path + + @property + def raw_json(self): + return self._raw_json + + @raw_json.setter + def raw_json(self, content: str): + """Set the uniprot JSON by json content (str repr in python). + + Args: + content (str): JSON as a python str. + + Raises: + AttributeError: if it is already set. + """ + if self.raw_json is None: + self._raw_json = content + else: + raise AttributeError("raw_json already set.") + + def _get_raw_json(self) -> None: + """Get json content""" + if self.uniprot_json_file is None: + try: + with urllib.request.urlopen(self.uniprot_json_url) as response: + raw_data = response.read() + json_content = raw_data.decode("utf-8") + json_file: io.StringIO | os.PathLike = io.StringIO(json_content) + if self.save_json: + json_file = Path( + self.local_download_dir, + f"{self.uniprot_id}.json", + ) + with open(json_file, "w", encoding="utf-8") as j_file: + j_file.write(json_content) + + self.uniprot_json_file = json_file + except urllib.error.HTTPError: + raise ValueError(f"Cannot download from url {self.uniprot_json_url}.") + + if not isinstance(self.uniprot_json_file, io.StringIO): + j_file = open(self.uniprot_json_file, "r", encoding="utf-8") + else: + j_file = self.uniprot_json_file + + self.raw_json = json.load(j_file) + + try: + j_file.close() + except Exception: + pass + + def _get_properties(self) -> None: + """ + Turn raw json to properties. + """ + self._properties = DictToProp(self.raw_json)._properties + + def __getattr__(self, key: str) -> str | list | DictToProp: + """Retrieve properties.""" + if key in self._properties: + return self._properties[key] + else: + raise AttributeError( + f"'{type(self).__name__}' object has no attribute '{key}'" + ) diff --git a/pyuniprot/UniRef.py b/pyuniprot/UniRef.py index 5fb60b5..6ed887f 100644 --- a/pyuniprot/UniRef.py +++ b/pyuniprot/UniRef.py @@ -170,9 +170,9 @@ def uniref_json(self, content: str): if self.uniref_json is None: self._uniref_json = content else: - raise AttributeError("category_lines already set.") + raise AttributeError("uniref_json already set.") - def _get_uniref_json(self): + def _get_uniref_json(self) -> None: """Get json content""" if self.uniref_json_file is None: try: diff --git a/pyuniprot/Uniprot.py b/pyuniprot/Uniprot.py index dbaed9a..a7b1358 100644 --- a/pyuniprot/Uniprot.py +++ b/pyuniprot/Uniprot.py @@ -1,3 +1,5 @@ +"""Legacy code to process txt file""" + from __future__ import annotations import io diff --git a/pyuniprot/__init__.py b/pyuniprot/__init__.py index 367f44e..1661269 100644 --- a/pyuniprot/__init__.py +++ b/pyuniprot/__init__.py @@ -1,10 +1,12 @@ from .Uniprot import Uniprot +from .UniProt import UniProt from .UniRef import UniRef from .utils import get_alt_resids, get_isoforms from .version import __version__ __all__ = [ "Uniprot", + "UniProt", "UniRef", "get_isoforms", "get_alt_resids", diff --git a/pyuniprot/dict_to_property.py b/pyuniprot/dict_to_property.py new file mode 100644 index 0000000..826a9a3 --- /dev/null +++ b/pyuniprot/dict_to_property.py @@ -0,0 +1,93 @@ +from __future__ import annotations + +import keyword +import warnings + + +def validify_name(name: str) -> tuple[bool, str]: + """ + Checks if the given name is a valid Python property name. + If not return a valid one as well. + + Args: + name (str, required): dictioanry key as str. + + Returns: + (whether it is valid, validified one) + """ + is_valid: bool = True + valid_name: str = name + if " " in name: + is_valid = False + valid_name = name.strip() + + if (not valid_name) or valid_name[0].isnumeric(): + is_valid = False + valid_name = "_" + valid_name + + if " " in valid_name: + is_valid = False + valid_name = valid_name.replace(" ", "_") + + if any([c for c in valid_name.replace("_", "") if not c.isalnum()]): + is_valid = False + valid_name = "".join([c for c in valid_name if c.isalnum() or c == "_"]) + + if keyword.iskeyword(valid_name): + is_valid = False + valid_name = "_" + valid_name + + if not is_valid: + warnings.warn( + f"key '{name}' is not a valid python variable. '{valid_name}' is used instead.", + RuntimeWarning, + stacklevel=2, + ) + + return is_valid, valid_name + + +class DictToProp: + def __init__(self, data): + self._data = data + if not isinstance(self._data, dict): + raise ValueError("Input is not a python dict") + self._properties = {} + self.create_properties() + + def create_properties(self): + """Create properties based on the dictionary keys and values.""" + for key, value in self._data.items(): + key = validify_name(key)[1] + if isinstance(value, dict): + sub_instance = DictToProp(value) + sub_instance.create_properties() + self._properties[key] = sub_instance + elif isinstance(value, list): + self._properties[key] = DictToProp.parse_list(value) + else: + self._properties[key] = value + + @classmethod + def parse_list(cls, lst: list) -> list: + """Parse a list of dict recursively""" + result: list = [] + for e in lst: + if isinstance(e, dict): + instance = DictToProp(e) + instance.create_properties() + result.append(instance) + elif isinstance(e, list): + result.append(DictToProp.parse_list(e)) + else: + result.append(e) + return result + + def __getattr__(self, key): + """Override the attribute access to retrieve properties.""" + if key in self._properties: + return self._properties[key] + else: + raise AttributeError( + f"'{type(self).__name__}' object has no attribute '{key}'" + ) diff --git a/tests/test_dtop.py b/tests/test_dtop.py new file mode 100644 index 0000000..118c7c4 --- /dev/null +++ b/tests/test_dtop.py @@ -0,0 +1,72 @@ +import os +import sys + +import pytest + +from pyuniprot.dict_to_property import DictToProp, validify_name + +sys.path.append("..") +CFD = os.path.dirname(__file__) +CWD = os.getcwd() + + +def test_validify_name(): + """ + Test the validify_name function. + """ + empty = "" + warning_msg = "key '' is not a valid python variable. '_' is used instead." + with pytest.warns(RuntimeWarning, match=warning_msg): + assert validify_name(empty) == (False, "_"), "empty string not validified" + + py_kw = "de#f" + warning_msg = "key 'de#f' is not a valid python variable. '_def' is used instead." + with pytest.warns(RuntimeWarning, match=warning_msg): + assert validify_name(py_kw) == (False, "_def"), "string 'de#f' not validified" + + space_in = "t est" + warning_msg = "key 't est' is not a valid python variable. 't_est' is used instead." + with pytest.warns(RuntimeWarning, match=warning_msg): + assert validify_name(space_in) == ( + False, + "t_est", + ), "string 't est' not validified" + + wrong_start = "1a" + warning_msg = "key '1a' is not a valid python variable. '_1a' is used instead." + with pytest.warns(RuntimeWarning, match=warning_msg): + assert validify_name(wrong_start) == ( + False, + "_1a", + ), "string '1a' not validified" + + correct = "test" + assert validify_name(correct) == (True, "test"), "string 'test' not validified" + + +@pytest.mark.filterwarnings("ignore") +def test_DictToProp(): + """ + Test the DictToProp class. + """ + test = { + "normal": 0, + "a_list": ["t", "e", "s", "t"], + "a_dict": { + "": "empty", + "def": "python keyword", + "t est": "space-in", + "1a": "wrong-start", + "_1234": "underscore-start", + }, + } + + t = DictToProp(test) + + assert t.normal == 0, "normal propery failed" + assert t.a_list == ["t", "e", "s", "t"], "list property failed" + assert t.a_dict._ == "empty", "emtpy string key failed" + assert t.a_dict._def == "python keyword", "python keyword key failed" + assert t.a_dict.t_est == "space-in", "space-in key failed" + assert t.a_dict._1a == "wrong-start", "wrong-start key failed" + assert t.a_dict._1234 == "underscore-start", "underscore-start key failed" diff --git a/tests/test_files/P36952.json b/tests/test_files/P36952.json new file mode 100644 index 0000000..840c496 --- /dev/null +++ b/tests/test_files/P36952.json @@ -0,0 +1 @@ +{"entryType":"UniProtKB reviewed (Swiss-Prot)","primaryAccession":"P36952","secondaryAccessions":["B2R6Y4","Q6N0B4","Q8WW89"],"uniProtkbId":"SPB5_HUMAN","entryAudit":{"firstPublicDate":"1994-06-01","lastAnnotationUpdateDate":"2024-01-24","lastSequenceUpdateDate":"2009-05-05","entryVersion":200,"sequenceVersion":2},"annotationScore":5.0,"organism":{"scientificName":"Homo sapiens","commonName":"Human","taxonId":9606,"lineage":["Eukaryota","Metazoa","Chordata","Craniata","Vertebrata","Euteleostomi","Mammalia","Eutheria","Euarchontoglires","Primates","Haplorrhini","Catarrhini","Hominidae","Homo"]},"proteinExistence":"1: Evidence at protein level","proteinDescription":{"recommendedName":{"fullName":{"value":"Serpin B5"}},"alternativeNames":[{"fullName":{"value":"Maspin"}},{"fullName":{"value":"Peptidase inhibitor 5"},"shortNames":[{"value":"PI-5"}]}]},"genes":[{"geneName":{"value":"SERPINB5"},"synonyms":[{"value":"PI5"}]}],"comments":[{"texts":[{"value":"Tumor suppressor. It blocks the growth, invasion, and metastatic properties of mammary tumors. As it does not undergo the S (stressed) to R (relaxed) conformational transition characteristic of active serpins, it exhibits no serine protease inhibitory activity"}],"commentType":"FUNCTION"},{"texts":[{"evidences":[{"evidenceCode":"ECO:0000269","source":"PubMed","id":"16049006"}],"value":"Interacts with IRF6"}],"commentType":"SUBUNIT"},{"commentType":"INTERACTION","interactions":[{"interactantOne":{"uniProtKBAccession":"P36952","intActId":"EBI-2371394"},"interactantTwo":{"uniProtKBAccession":"Q9Y6M0","geneName":"PRSS21","intActId":"EBI-7054564"},"numberOfExperiments":7,"organismDiffer":false}]},{"commentType":"SUBCELLULAR LOCATION","subcellularLocations":[{"location":{"value":"Secreted, extracellular space","id":"SL-0112"}}]},{"commentType":"ALTERNATIVE PRODUCTS","events":["Alternative splicing"],"isoforms":[{"name":{"value":"1"},"isoformIds":["P36952-1"],"isoformSequenceStatus":"Displayed"},{"name":{"value":"2"},"isoformIds":["P36952-2"],"sequenceIds":["VSP_037145","VSP_037146"],"isoformSequenceStatus":"Described"}]},{"texts":[{"value":"Normal mammary epithelial cells"}],"commentType":"TISSUE SPECIFICITY"},{"texts":[{"evidences":[{"evidenceCode":"ECO:0000305"}],"value":"Belongs to the serpin family. Ov-serpin subfamily"}],"commentType":"SIMILARITY"},{"commentType":"WEB RESOURCE","resourceName":"Atlas of Genetics and Cytogenetics in Oncology and Haematology","resourceUrl":"https://atlasgeneticsoncology.org/gene/42267/SerpinB5","ftp":false}],"features":[{"type":"Chain","location":{"start":{"value":1,"modifier":"EXACT"},"end":{"value":375,"modifier":"EXACT"}},"description":"Serpin B5","featureId":"PRO_0000032486"},{"type":"Site","location":{"start":{"value":340,"modifier":"EXACT"},"end":{"value":341,"modifier":"EXACT"}},"description":"Reactive bond homolog","evidences":[{"evidenceCode":"ECO:0000250"}]},{"type":"Glycosylation","location":{"start":{"value":99,"modifier":"EXACT"},"end":{"value":99,"modifier":"EXACT"}},"description":"N-linked (GlcNAc...) asparagine","evidences":[{"evidenceCode":"ECO:0000255"}],"featureId":""},{"type":"Glycosylation","location":{"start":{"value":133,"modifier":"EXACT"},"end":{"value":133,"modifier":"EXACT"}},"description":"N-linked (GlcNAc...) asparagine","evidences":[{"evidenceCode":"ECO:0000255"}],"featureId":""},{"type":"Glycosylation","location":{"start":{"value":188,"modifier":"EXACT"},"end":{"value":188,"modifier":"EXACT"}},"description":"N-linked (GlcNAc...) asparagine","evidences":[{"evidenceCode":"ECO:0000255"}],"featureId":""},{"type":"Glycosylation","location":{"start":{"value":361,"modifier":"EXACT"},"end":{"value":361,"modifier":"EXACT"}},"description":"N-linked (GlcNAc...) asparagine","evidences":[{"evidenceCode":"ECO:0000255"}],"featureId":""},{"type":"Alternative sequence","location":{"start":{"value":190,"modifier":"EXACT"},"end":{"value":231,"modifier":"EXACT"}},"description":"in isoform 2","evidences":[{"evidenceCode":"ECO:0000303","source":"PubMed","id":"15489334"}],"featureId":"VSP_037145","alternativeSequence":{"originalSequence":"TDTKPVQMMNMEATFCMGNIDSINCKIIELPFQNKHLSMFIL","alternativeSequences":["VCGAACSSKRSPIIDVKNDRDRVGHKSIPMRNLRARPAKCLS"]}},{"type":"Alternative sequence","location":{"start":{"value":232,"modifier":"EXACT"},"end":{"value":375,"modifier":"EXACT"}},"description":"in isoform 2","evidences":[{"evidenceCode":"ECO:0000303","source":"PubMed","id":"15489334"}],"featureId":"VSP_037146","alternativeSequence":{}},{"type":"Natural variant","location":{"start":{"value":176,"modifier":"EXACT"},"end":{"value":176,"modifier":"EXACT"}},"description":"in dbSNP:rs2289519","featureCrossReferences":[{"database":"dbSNP","id":"rs2289519"}],"evidences":[{"evidenceCode":"ECO:0000269","source":"PubMed","id":"14702039"},{"evidenceCode":"ECO:0000269","source":"PubMed","id":"15489334"},{"evidenceCode":"ECO:0000269","source":"PubMed","id":"8290962"},{"evidenceCode":"ECO:0007744","source":"PubMed","id":"21269460"}],"featureId":"VAR_055223","alternativeSequence":{"originalSequence":"S","alternativeSequences":["P"]}},{"type":"Natural variant","location":{"start":{"value":187,"modifier":"EXACT"},"end":{"value":187,"modifier":"EXACT"}},"description":"in dbSNP:rs2289520","featureCrossReferences":[{"database":"dbSNP","id":"rs2289520"}],"evidences":[{"evidenceCode":"ECO:0000269","source":"PubMed","id":"8290962"}],"featureId":"VAR_055224","alternativeSequence":{"originalSequence":"V","alternativeSequences":["L"]}},{"type":"Natural variant","location":{"start":{"value":319,"modifier":"EXACT"},"end":{"value":319,"modifier":"EXACT"}},"description":"in dbSNP:rs1455555","featureCrossReferences":[{"database":"dbSNP","id":"rs1455555"}],"evidences":[{"evidenceCode":"ECO:0000269","source":"PubMed","id":"17974005"}],"featureId":"VAR_022115","alternativeSequence":{"originalSequence":"I","alternativeSequences":["V"]}},{"type":"Sequence conflict","location":{"start":{"value":66,"modifier":"EXACT"},"end":{"value":66,"modifier":"EXACT"}},"description":"in Ref. 1; AAA18957","evidences":[{"evidenceCode":"ECO:0000305"}],"alternativeSequence":{"originalSequence":"V","alternativeSequences":["I"]}},{"type":"Sequence conflict","location":{"start":{"value":245,"modifier":"EXACT"},"end":{"value":245,"modifier":"EXACT"}},"description":"in Ref. 5; CAE45703","evidences":[{"evidenceCode":"ECO:0000305"}],"alternativeSequence":{"originalSequence":"K","alternativeSequences":["Q"]}},{"type":"Helix","location":{"start":{"value":2,"modifier":"EXACT"},"end":{"value":22,"modifier":"EXACT"}},"description":"","evidences":[{"evidenceCode":"ECO:0007829","source":"PDB","id":"1WZ9"}]},{"type":"Beta strand","location":{"start":{"value":28,"modifier":"EXACT"},"end":{"value":30,"modifier":"EXACT"}},"description":"","evidences":[{"evidenceCode":"ECO:0007829","source":"PDB","id":"1XQG"}]},{"type":"Helix","location":{"start":{"value":32,"modifier":"EXACT"},"end":{"value":45,"modifier":"EXACT"}},"description":"","evidences":[{"evidenceCode":"ECO:0007829","source":"PDB","id":"1WZ9"}]},{"type":"Helix","location":{"start":{"value":48,"modifier":"EXACT"},"end":{"value":57,"modifier":"EXACT"}},"description":"","evidences":[{"evidenceCode":"ECO:0007829","source":"PDB","id":"1WZ9"}]},{"type":"Helix","location":{"start":{"value":60,"modifier":"EXACT"},"end":{"value":62,"modifier":"EXACT"}},"description":"","evidences":[{"evidenceCode":"ECO:0007829","source":"PDB","id":"1XQG"}]},{"type":"Helix","location":{"start":{"value":66,"modifier":"EXACT"},"end":{"value":80,"modifier":"EXACT"}},"description":"","evidences":[{"evidenceCode":"ECO:0007829","source":"PDB","id":"1WZ9"}]},{"type":"Turn","location":{"start":{"value":81,"modifier":"EXACT"},"end":{"value":83,"modifier":"EXACT"}},"description":"","evidences":[{"evidenceCode":"ECO:0007829","source":"PDB","id":"1WZ9"}]},{"type":"Beta strand","location":{"start":{"value":84,"modifier":"EXACT"},"end":{"value":95,"modifier":"EXACT"}},"description":"","evidences":[{"evidenceCode":"ECO:0007829","source":"PDB","id":"1WZ9"}]},{"type":"Helix","location":{"start":{"value":96,"modifier":"EXACT"},"end":{"value":98,"modifier":"EXACT"}},"description":"","evidences":[{"evidenceCode":"ECO:0007829","source":"PDB","id":"1WZ9"}]},{"type":"Helix","location":{"start":{"value":102,"modifier":"EXACT"},"end":{"value":108,"modifier":"EXACT"}},"description":"","evidences":[{"evidenceCode":"ECO:0007829","source":"PDB","id":"1WZ9"}]},{"type":"Turn","location":{"start":{"value":109,"modifier":"EXACT"},"end":{"value":115,"modifier":"EXACT"}},"description":"","evidences":[{"evidenceCode":"ECO:0007829","source":"PDB","id":"1WZ9"}]},{"type":"Beta strand","location":{"start":{"value":116,"modifier":"EXACT"},"end":{"value":119,"modifier":"EXACT"}},"description":"","evidences":[{"evidenceCode":"ECO:0007829","source":"PDB","id":"1WZ9"}]},{"type":"Turn","location":{"start":{"value":121,"modifier":"EXACT"},"end":{"value":123,"modifier":"EXACT"}},"description":"","evidences":[{"evidenceCode":"ECO:0007829","source":"PDB","id":"1WZ9"}]},{"type":"Helix","location":{"start":{"value":125,"modifier":"EXACT"},"end":{"value":139,"modifier":"EXACT"}},"description":"","evidences":[{"evidenceCode":"ECO:0007829","source":"PDB","id":"1WZ9"}]},{"type":"Turn","location":{"start":{"value":140,"modifier":"EXACT"},"end":{"value":142,"modifier":"EXACT"}},"description":"","evidences":[{"evidenceCode":"ECO:0007829","source":"PDB","id":"1WZ9"}]},{"type":"Turn","location":{"start":{"value":147,"modifier":"EXACT"},"end":{"value":150,"modifier":"EXACT"}},"description":"","evidences":[{"evidenceCode":"ECO:0007829","source":"PDB","id":"1WZ9"}]},{"type":"Beta strand","location":{"start":{"value":159,"modifier":"EXACT"},"end":{"value":168,"modifier":"EXACT"}},"description":"","evidences":[{"evidenceCode":"ECO:0007829","source":"PDB","id":"1WZ9"}]},{"type":"Beta strand","location":{"start":{"value":171,"modifier":"EXACT"},"end":{"value":173,"modifier":"EXACT"}},"description":"","evidences":[{"evidenceCode":"ECO:0007829","source":"PDB","id":"1WZ9"}]},{"type":"Helix","location":{"start":{"value":177,"modifier":"EXACT"},"end":{"value":179,"modifier":"EXACT"}},"description":"","evidences":[{"evidenceCode":"ECO:0007829","source":"PDB","id":"1WZ9"}]},{"type":"Beta strand","location":{"start":{"value":181,"modifier":"EXACT"},"end":{"value":190,"modifier":"EXACT"}},"description":"","evidences":[{"evidenceCode":"ECO:0007829","source":"PDB","id":"1WZ9"}]},{"type":"Beta strand","location":{"start":{"value":192,"modifier":"EXACT"},"end":{"value":209,"modifier":"EXACT"}},"description":"","evidences":[{"evidenceCode":"ECO:0007829","source":"PDB","id":"1WZ9"}]},{"type":"Turn","location":{"start":{"value":210,"modifier":"EXACT"},"end":{"value":213,"modifier":"EXACT"}},"description":"","evidences":[{"evidenceCode":"ECO:0007829","source":"PDB","id":"1WZ9"}]},{"type":"Beta strand","location":{"start":{"value":214,"modifier":"EXACT"},"end":{"value":221,"modifier":"EXACT"}},"description":"","evidences":[{"evidenceCode":"ECO:0007829","source":"PDB","id":"1WZ9"}]},{"type":"Helix","location":{"start":{"value":222,"modifier":"EXACT"},"end":{"value":224,"modifier":"EXACT"}},"description":"","evidences":[{"evidenceCode":"ECO:0007829","source":"PDB","id":"1WZ9"}]},{"type":"Beta strand","location":{"start":{"value":225,"modifier":"EXACT"},"end":{"value":235,"modifier":"EXACT"}},"description":"","evidences":[{"evidenceCode":"ECO:0007829","source":"PDB","id":"1WZ9"}]},{"type":"Helix","location":{"start":{"value":239,"modifier":"EXACT"},"end":{"value":249,"modifier":"EXACT"}},"description":"","evidences":[{"evidenceCode":"ECO:0007829","source":"PDB","id":"1WZ9"}]},{"type":"Helix","location":{"start":{"value":252,"modifier":"EXACT"},"end":{"value":258,"modifier":"EXACT"}},"description":"","evidences":[{"evidenceCode":"ECO:0007829","source":"PDB","id":"1WZ9"}]},{"type":"Helix","location":{"start":{"value":261,"modifier":"EXACT"},"end":{"value":263,"modifier":"EXACT"}},"description":"","evidences":[{"evidenceCode":"ECO:0007829","source":"PDB","id":"1WZ9"}]},{"type":"Beta strand","location":{"start":{"value":265,"modifier":"EXACT"},"end":{"value":274,"modifier":"EXACT"}},"description":"","evidences":[{"evidenceCode":"ECO:0007829","source":"PDB","id":"1WZ9"}]},{"type":"Beta strand","location":{"start":{"value":276,"modifier":"EXACT"},"end":{"value":282,"modifier":"EXACT"}},"description":"","evidences":[{"evidenceCode":"ECO:0007829","source":"PDB","id":"1WZ9"}]},{"type":"Helix","location":{"start":{"value":284,"modifier":"EXACT"},"end":{"value":291,"modifier":"EXACT"}},"description":"","evidences":[{"evidenceCode":"ECO:0007829","source":"PDB","id":"1WZ9"}]},{"type":"Turn","location":{"start":{"value":295,"modifier":"EXACT"},"end":{"value":297,"modifier":"EXACT"}},"description":"","evidences":[{"evidenceCode":"ECO:0007829","source":"PDB","id":"1XU8"}]},{"type":"Turn","location":{"start":{"value":299,"modifier":"EXACT"},"end":{"value":301,"modifier":"EXACT"}},"description":"","evidences":[{"evidenceCode":"ECO:0007829","source":"PDB","id":"1WZ9"}]},{"type":"Turn","location":{"start":{"value":305,"modifier":"EXACT"},"end":{"value":307,"modifier":"EXACT"}},"description":"","evidences":[{"evidenceCode":"ECO:0007829","source":"PDB","id":"1WZ9"}]},{"type":"Beta strand","location":{"start":{"value":315,"modifier":"EXACT"},"end":{"value":326,"modifier":"EXACT"}},"description":"","evidences":[{"evidenceCode":"ECO:0007829","source":"PDB","id":"1WZ9"}]},{"type":"Turn","location":{"start":{"value":337,"modifier":"EXACT"},"end":{"value":341,"modifier":"EXACT"}},"description":"","evidences":[{"evidenceCode":"ECO:0007829","source":"PDB","id":"1XQG"}]},{"type":"Beta strand","location":{"start":{"value":344,"modifier":"EXACT"},"end":{"value":349,"modifier":"EXACT"}},"description":"","evidences":[{"evidenceCode":"ECO:0007829","source":"PDB","id":"1WZ9"}]},{"type":"Beta strand","location":{"start":{"value":354,"modifier":"EXACT"},"end":{"value":360,"modifier":"EXACT"}},"description":"","evidences":[{"evidenceCode":"ECO:0007829","source":"PDB","id":"1WZ9"}]},{"type":"Turn","location":{"start":{"value":361,"modifier":"EXACT"},"end":{"value":364,"modifier":"EXACT"}},"description":"","evidences":[{"evidenceCode":"ECO:0007829","source":"PDB","id":"1WZ9"}]},{"type":"Beta strand","location":{"start":{"value":365,"modifier":"EXACT"},"end":{"value":372,"modifier":"EXACT"}},"description":"","evidences":[{"evidenceCode":"ECO:0007829","source":"PDB","id":"1WZ9"}]}],"keywords":[{"id":"KW-0002","category":"Technical term","name":"3D-structure"},{"id":"KW-0025","category":"Coding sequence diversity","name":"Alternative splicing"},{"id":"KW-0903","category":"Technical term","name":"Direct protein sequencing"},{"id":"KW-0325","category":"PTM","name":"Glycoprotein"},{"id":"KW-1185","category":"Technical term","name":"Reference proteome"},{"id":"KW-0964","category":"Cellular component","name":"Secreted"}],"references":[{"referenceNumber":1,"citation":{"id":"8290962","citationType":"journal article","authors":["Zou Z.","Anisowicz A.","Hendrix M.J.C.","Thor A.","Neveu M.","Sheng S.","Rafidi K.","Seftor E.","Sager R."],"citationCrossReferences":[{"database":"PubMed","id":"8290962"},{"database":"DOI","id":"10.1126/science.8290962"}],"title":"Maspin, a serpin with tumor-suppressing activity in human mammary epithelial cells.","publicationDate":"1994","journal":"Science","firstPage":"526","lastPage":"529","volume":"263"},"referencePositions":["NUCLEOTIDE SEQUENCE [MRNA] (ISOFORM 1)","VARIANTS PRO-176 AND LEU-187"],"referenceComments":[{"value":"Mammary gland","type":"TISSUE"}]},{"referenceNumber":2,"citation":{"id":"14702039","citationType":"journal article","authors":["Ota T.","Suzuki Y.","Nishikawa T.","Otsuki T.","Sugiyama T.","Irie R.","Wakamatsu A.","Hayashi K.","Sato H.","Nagai K.","Kimura K.","Makita H.","Sekine M.","Obayashi M.","Nishi T.","Shibahara T.","Tanaka T.","Ishii S.","Yamamoto J.","Saito K.","Kawai Y.","Isono Y.","Nakamura Y.","Nagahari K.","Murakami K.","Yasuda T.","Iwayanagi T.","Wagatsuma M.","Shiratori A.","Sudo H.","Hosoiri T.","Kaku Y.","Kodaira H.","Kondo H.","Sugawara M.","Takahashi M.","Kanda K.","Yokoi T.","Furuya T.","Kikkawa E.","Omura Y.","Abe K.","Kamihara K.","Katsuta N.","Sato K.","Tanikawa M.","Yamazaki M.","Ninomiya K.","Ishibashi T.","Yamashita H.","Murakawa K.","Fujimori K.","Tanai H.","Kimata M.","Watanabe M.","Hiraoka S.","Chiba Y.","Ishida S.","Ono Y.","Takiguchi S.","Watanabe S.","Yosida M.","Hotuta T.","Kusano J.","Kanehori K.","Takahashi-Fujii A.","Hara H.","Tanase T.-O.","Nomura Y.","Togiya S.","Komai F.","Hara R.","Takeuchi K.","Arita M.","Imose N.","Musashino K.","Yuuki H.","Oshima A.","Sasaki N.","Aotsuka S.","Yoshikawa Y.","Matsunawa H.","Ichihara T.","Shiohata N.","Sano S.","Moriya S.","Momiyama H.","Satoh N.","Takami S.","Terashima Y.","Suzuki O.","Nakagawa S.","Senoh A.","Mizoguchi H.","Goto Y.","Shimizu F.","Wakebe H.","Hishigaki H.","Watanabe T.","Sugiyama A.","Takemoto M.","Kawakami B.","Yamazaki M.","Watanabe K.","Kumagai A.","Itakura S.","Fukuzumi Y.","Fujimori Y.","Komiyama M.","Tashiro H.","Tanigami A.","Fujiwara T.","Ono T.","Yamada K.","Fujii Y.","Ozaki K.","Hirao M.","Ohmori Y.","Kawabata A.","Hikiji T.","Kobatake N.","Inagaki H.","Ikema Y.","Okamoto S.","Okitani R.","Kawakami T.","Noguchi S.","Itoh T.","Shigeta K.","Senba T.","Matsumura K.","Nakajima Y.","Mizuno T.","Morinaga M.","Sasaki M.","Togashi T.","Oyama M.","Hata H.","Watanabe M.","Komatsu T.","Mizushima-Sugano J.","Satoh T.","Shirai Y.","Takahashi Y.","Nakagawa K.","Okumura K.","Nagase T.","Nomura N.","Kikuchi H.","Masuho Y.","Yamashita R.","Nakai K.","Yada T.","Nakamura Y.","Ohara O.","Isogai T.","Sugano S."],"citationCrossReferences":[{"database":"PubMed","id":"14702039"},{"database":"DOI","id":"10.1038/ng1285"}],"title":"Complete sequencing and characterization of 21,243 full-length human cDNAs.","publicationDate":"2004","journal":"Nat. Genet.","firstPage":"40","lastPage":"45","volume":"36"},"referencePositions":["NUCLEOTIDE SEQUENCE [LARGE SCALE MRNA] (ISOFORM 1)","VARIANT PRO-176"],"referenceComments":[{"value":"Esophagus","type":"TISSUE"}]},{"referenceNumber":3,"citation":{"id":"16177791","citationType":"journal article","authors":["Nusbaum C.","Zody M.C.","Borowsky M.L.","Kamal M.","Kodira C.D.","Taylor T.D.","Whittaker C.A.","Chang J.L.","Cuomo C.A.","Dewar K.","FitzGerald M.G.","Yang X.","Abouelleil A.","Allen N.R.","Anderson S.","Bloom T.","Bugalter B.","Butler J.","Cook A.","DeCaprio D.","Engels R.","Garber M.","Gnirke A.","Hafez N.","Hall J.L.","Norman C.H.","Itoh T.","Jaffe D.B.","Kuroki Y.","Lehoczky J.","Lui A.","Macdonald P.","Mauceli E.","Mikkelsen T.S.","Naylor J.W.","Nicol R.","Nguyen C.","Noguchi H.","O'Leary S.B.","Piqani B.","Smith C.L.","Talamas J.A.","Topham K.","Totoki Y.","Toyoda A.","Wain H.M.","Young S.K.","Zeng Q.","Zimmer A.R.","Fujiyama A.","Hattori M.","Birren B.W.","Sakaki Y.","Lander E.S."],"citationCrossReferences":[{"database":"PubMed","id":"16177791"},{"database":"DOI","id":"10.1038/nature03983"}],"title":"DNA sequence and analysis of human chromosome 18.","publicationDate":"2005","journal":"Nature","firstPage":"551","lastPage":"555","volume":"437"},"referencePositions":["NUCLEOTIDE SEQUENCE [LARGE SCALE GENOMIC DNA]"]},{"referenceNumber":4,"citation":{"id":"15489334","citationType":"journal article","authoringGroup":["The MGC Project Team"],"citationCrossReferences":[{"database":"PubMed","id":"15489334"},{"database":"DOI","id":"10.1101/gr.2596504"}],"title":"The status, quality, and expansion of the NIH full-length cDNA project: the Mammalian Gene Collection (MGC).","publicationDate":"2004","journal":"Genome Res.","firstPage":"2121","lastPage":"2127","volume":"14"},"referencePositions":["NUCLEOTIDE SEQUENCE [LARGE SCALE MRNA] (ISOFORM 2)","VARIANT PRO-176"],"referenceComments":[{"value":"Prostate","type":"TISSUE"}]},{"referenceNumber":5,"citation":{"id":"17974005","citationType":"journal article","authors":["Bechtel S.","Rosenfelder H.","Duda A.","Schmidt C.P.","Ernst U.","Wellenreuther R.","Mehrle A.","Schuster C.","Bahr A.","Bloecker H.","Heubner D.","Hoerlein A.","Michel G.","Wedler H.","Koehrer K.","Ottenwaelder B.","Poustka A.","Wiemann S.","Schupp I."],"citationCrossReferences":[{"database":"PubMed","id":"17974005"},{"database":"DOI","id":"10.1186/1471-2164-8-399"}],"title":"The full-ORF clone resource of the German cDNA consortium.","publicationDate":"2007","journal":"BMC Genomics","firstPage":"399","lastPage":"399","volume":"8"},"referencePositions":["NUCLEOTIDE SEQUENCE [LARGE SCALE MRNA] OF 245-375 (ISOFORM 1)","VARIANT VAL-319"],"referenceComments":[{"value":"Small intestine","type":"TISSUE"}]},{"referenceNumber":6,"citation":{"id":"7797587","citationType":"journal article","authors":["Pemberton P.A.","Wong D.T.","Gibson H.L.","Kiefer M.C.","Fitzpatrick P.A.","Sager R.","Barr P.J."],"citationCrossReferences":[{"database":"PubMed","id":"7797587"},{"database":"DOI","id":"10.1074/jbc.270.26.15832"}],"title":"The tumor suppressor maspin does not undergo the stressed to relaxed transition or inhibit trypsin-like serine proteases. Evidence that maspin is not a protease inhibitory serpin.","publicationDate":"1995","journal":"J. Biol. Chem.","firstPage":"15832","lastPage":"15837","volume":"270"},"referencePositions":["PROTEIN SEQUENCE OF 341-360","CHARACTERIZATION"]},{"referenceNumber":7,"citation":{"id":"16049006","citationType":"journal article","authors":["Bailey C.M.","Khalkhali-Ellis Z.","Kondo S.","Margaryan N.V.","Seftor R.E.B.","Wheaton W.W.","Amir S.","Pins M.R.","Schutte B.C.","Hendrix M.J.C."],"citationCrossReferences":[{"database":"PubMed","id":"16049006"},{"database":"DOI","id":"10.1074/jbc.m503523200"}],"title":"Mammary serine protease inhibitor (Maspin) binds directly to interferon regulatory factor 6: identification of a novel serpin partnership.","publicationDate":"2005","journal":"J. Biol. Chem.","firstPage":"34210","lastPage":"34217","volume":"280"},"referencePositions":["INTERACTION WITH IRF6"]},{"referenceNumber":8,"citation":{"id":"23186163","citationType":"journal article","authors":["Zhou H.","Di Palma S.","Preisinger C.","Peng M.","Polat A.N.","Heck A.J.","Mohammed S."],"citationCrossReferences":[{"database":"PubMed","id":"23186163"},{"database":"DOI","id":"10.1021/pr300630k"}],"title":"Toward a comprehensive characterization of a human cancer cell phosphoproteome.","publicationDate":"2013","journal":"J. Proteome Res.","firstPage":"260","lastPage":"271","volume":"12"},"referencePositions":["IDENTIFICATION BY MASS SPECTROMETRY [LARGE SCALE ANALYSIS]"],"referenceComments":[{"value":"Cervix carcinoma","type":"TISSUE"}]},{"referenceNumber":9,"citation":{"id":"15760906","citationType":"journal article","authors":["Law R.H.","Irving J.A.","Buckle A.M.","Ruzyla K.","Buzza M.","Bashtannyk-Puhalovich T.A.","Beddoe T.C.","Nguyen K.","Worrall D.M.","Bottomley S.P.","Bird P.I.","Rossjohn J.","Whisstock J.C."],"citationCrossReferences":[{"database":"PubMed","id":"15760906"},{"database":"DOI","id":"10.1074/jbc.m412043200"}],"title":"The high resolution crystal structure of the human tumor suppressor maspin reveals a novel conformational switch in the G-helix.","publicationDate":"2005","journal":"J. Biol. Chem.","firstPage":"22356","lastPage":"22364","volume":"280"},"referencePositions":["X-RAY CRYSTALLOGRAPHY (2.1 ANGSTROMS)"]},{"referenceNumber":10,"citation":{"id":"21269460","citationType":"journal article","authors":["Burkard T.R.","Planyavsky M.","Kaupe I.","Breitwieser F.P.","Buerckstuemmer T.","Bennett K.L.","Superti-Furga G.","Colinge J."],"citationCrossReferences":[{"database":"PubMed","id":"21269460"},{"database":"DOI","id":"10.1186/1752-0509-5-17"}],"title":"Initial characterization of the human central proteome.","publicationDate":"2011","journal":"BMC Syst. Biol.","firstPage":"17","lastPage":"17","volume":"5"},"referencePositions":["VARIANT [LARGE SCALE ANALYSIS] PRO-176","IDENTIFICATION BY MASS SPECTROMETRY [LARGE SCALE ANALYSIS]"]}],"uniProtKBCrossReferences":[{"database":"EMBL","id":"U04313","properties":[{"key":"ProteinId","value":"AAA18957.1"},{"key":"Status","value":"-"},{"key":"MoleculeType","value":"mRNA"}]},{"database":"EMBL","id":"AK312765","properties":[{"key":"ProteinId","value":"BAG35631.1"},{"key":"Status","value":"-"},{"key":"MoleculeType","value":"mRNA"}]},{"database":"EMBL","id":"AC036176","properties":[{"key":"ProteinId","value":"-"},{"key":"Status","value":"NOT_ANNOTATED_CDS"},{"key":"MoleculeType","value":"Genomic_DNA"}]},{"database":"EMBL","id":"BC020713","properties":[{"key":"ProteinId","value":"AAH20713.1"},{"key":"Status","value":"-"},{"key":"MoleculeType","value":"mRNA"}]},{"database":"EMBL","id":"BX640597","properties":[{"key":"ProteinId","value":"CAE45703.1"},{"key":"Status","value":"-"},{"key":"MoleculeType","value":"mRNA"}]},{"database":"CCDS","id":"CCDS32839.1","properties":[{"key":"Description","value":"-"}],"isoformId":"P36952-1"},{"database":"PIR","id":"A36898","properties":[{"key":"EntryName","value":"A36898"}]},{"database":"RefSeq","id":"NP_002630.2","properties":[{"key":"NucleotideSequenceId","value":"NM_002639.4"}],"isoformId":"P36952-1"},{"database":"PDB","id":"1WZ9","properties":[{"key":"Method","value":"X-ray"},{"key":"Resolution","value":"2.10 A"},{"key":"Chains","value":"A/B=1-375"}]},{"database":"PDB","id":"1XQG","properties":[{"key":"Method","value":"X-ray"},{"key":"Resolution","value":"3.10 A"},{"key":"Chains","value":"A/B=1-375"}]},{"database":"PDB","id":"1XQJ","properties":[{"key":"Method","value":"X-ray"},{"key":"Resolution","value":"3.10 A"},{"key":"Chains","value":"A=1-375"}]},{"database":"PDB","id":"1XU8","properties":[{"key":"Method","value":"X-ray"},{"key":"Resolution","value":"2.80 A"},{"key":"Chains","value":"A/B=1-375"}]},{"database":"PDBsum","id":"1WZ9","properties":[{"key":"Description","value":"-"}]},{"database":"PDBsum","id":"1XQG","properties":[{"key":"Description","value":"-"}]},{"database":"PDBsum","id":"1XQJ","properties":[{"key":"Description","value":"-"}]},{"database":"PDBsum","id":"1XU8","properties":[{"key":"Description","value":"-"}]},{"database":"AlphaFoldDB","id":"P36952","properties":[{"key":"Description","value":"-"}]},{"database":"SMR","id":"P36952","properties":[{"key":"Description","value":"-"}]},{"database":"BioGRID","id":"111286","properties":[{"key":"Interactions","value":"164"}]},{"database":"IntAct","id":"P36952","properties":[{"key":"Interactions","value":"54"}]},{"database":"MINT","id":"P36952","properties":[{"key":"Description","value":"-"}]},{"database":"STRING","id":"9606.ENSP00000372221","properties":[{"key":"Description","value":"-"}]},{"database":"DrugBank","id":"DB04530","properties":[{"key":"GenericName","value":"S,S-(2-Hydroxyethyl)Thiocysteine"}]},{"database":"MEROPS","id":"I04.980","properties":[{"key":"Description","value":"-"}]},{"database":"GlyCosmos","id":"P36952","properties":[{"key":"glycosylation","value":"4 sites, No reported glycans"}]},{"database":"GlyGen","id":"P36952","properties":[{"key":"glycosylation","value":"5 sites, 1 O-linked glycan (1 site)"}]},{"database":"iPTMnet","id":"P36952","properties":[{"key":"Description","value":"-"}]},{"database":"MetOSite","id":"P36952","properties":[{"key":"Description","value":"-"}]},{"database":"PhosphoSitePlus","id":"P36952","properties":[{"key":"Description","value":"-"}]},{"database":"SwissPalm","id":"P36952","properties":[{"key":"Description","value":"-"}]},{"database":"BioMuta","id":"SERPINB5","properties":[{"key":"Description","value":"-"}]},{"database":"DMDM","id":"229462757","properties":[{"key":"Description","value":"-"}]},{"database":"CPTAC","id":"CPTAC-582","properties":[{"key":"Description","value":"-"}]},{"database":"CPTAC","id":"CPTAC-583","properties":[{"key":"Description","value":"-"}]},{"database":"EPD","id":"P36952","properties":[{"key":"Description","value":"-"}]},{"database":"jPOST","id":"P36952","properties":[{"key":"Description","value":"-"}]},{"database":"MassIVE","id":"P36952","properties":[{"key":"Description","value":"-"}]},{"database":"MaxQB","id":"P36952","properties":[{"key":"Description","value":"-"}]},{"database":"PaxDb","id":"9606-ENSP00000372221","properties":[{"key":"Description","value":"-"}]},{"database":"PeptideAtlas","id":"P36952","properties":[{"key":"Description","value":"-"}]},{"database":"PRIDE","id":"P36952","properties":[{"key":"Description","value":"-"}]},{"database":"ProteomicsDB","id":"55240","properties":[{"key":"Description","value":"-"}],"isoformId":"P36952-1"},{"database":"ProteomicsDB","id":"55241","properties":[{"key":"Description","value":"-"}],"isoformId":"P36952-2"},{"database":"Pumba","id":"P36952","properties":[{"key":"Description","value":"-"}]},{"database":"TopDownProteomics","id":"P36952-1","properties":[{"key":"Description","value":"-"}],"isoformId":"P36952-1"},{"database":"Antibodypedia","id":"4036","properties":[{"key":"antibodies","value":"650 antibodies from 43 providers"}]},{"database":"DNASU","id":"5268","properties":[{"key":"Description","value":"-"}]},{"database":"Ensembl","id":"ENST00000382771.9","properties":[{"key":"ProteinId","value":"ENSP00000372221.4"},{"key":"GeneId","value":"ENSG00000206075.14"}],"isoformId":"P36952-1"},{"database":"Ensembl","id":"ENST00000489441.5","properties":[{"key":"ProteinId","value":"ENSP00000467158.1"},{"key":"GeneId","value":"ENSG00000206075.14"}],"isoformId":"P36952-2"},{"database":"GeneID","id":"5268","properties":[{"key":"Description","value":"-"}]},{"database":"KEGG","id":"hsa:5268","properties":[{"key":"Description","value":"-"}]},{"database":"MANE-Select","id":"ENST00000382771.9","properties":[{"key":"ProteinId","value":"ENSP00000372221.4"},{"key":"RefSeqNucleotideId","value":"NM_002639.5"},{"key":"RefSeqProteinId","value":"NP_002630.2"}]},{"database":"UCSC","id":"uc002liy.3","properties":[{"key":"OrganismName","value":"human"}],"isoformId":"P36952-1"},{"database":"AGR","id":"HGNC:8949","properties":[{"key":"Description","value":"-"}]},{"database":"CTD","id":"5268","properties":[{"key":"Description","value":"-"}]},{"database":"DisGeNET","id":"5268","properties":[{"key":"Description","value":"-"}]},{"database":"GeneCards","id":"SERPINB5","properties":[{"key":"Description","value":"-"}]},{"database":"HGNC","id":"HGNC:8949","properties":[{"key":"GeneName","value":"SERPINB5"}]},{"database":"HPA","id":"ENSG00000206075","properties":[{"key":"ExpressionPatterns","value":"Tissue enhanced (esophagus, skin)"}]},{"database":"MIM","id":"154790","properties":[{"key":"Type","value":"gene"}]},{"database":"neXtProt","id":"NX_P36952","properties":[{"key":"Description","value":"-"}]},{"database":"OpenTargets","id":"ENSG00000206075","properties":[{"key":"Description","value":"-"}]},{"database":"PharmGKB","id":"PA35515","properties":[{"key":"Description","value":"-"}]},{"database":"VEuPathDB","id":"HostDB:ENSG00000206075","properties":[{"key":"Description","value":"-"}]},{"database":"eggNOG","id":"KOG2392","properties":[{"key":"ToxonomicScope","value":"Eukaryota"}]},{"database":"GeneTree","id":"ENSGT00940000160674","properties":[{"key":"Description","value":"-"}]},{"database":"HOGENOM","id":"CLU_023330_0_2_1","properties":[{"key":"Description","value":"-"}]},{"database":"InParanoid","id":"P36952","properties":[{"key":"Description","value":"-"}]},{"database":"OMA","id":"IFAPLCT","properties":[{"key":"Fingerprint","value":"-"}]},{"database":"OrthoDB","id":"3218836at2759","properties":[{"key":"Description","value":"-"}]},{"database":"PhylomeDB","id":"P36952","properties":[{"key":"Description","value":"-"}]},{"database":"TreeFam","id":"TF352619","properties":[{"key":"Description","value":"-"}]},{"database":"PathwayCommons","id":"P36952","properties":[{"key":"Description","value":"-"}]},{"database":"SignaLink","id":"P36952","properties":[{"key":"Description","value":"-"}]},{"database":"SIGNOR","id":"P36952","properties":[{"key":"Description","value":"-"}]},{"database":"BioGRID-ORCS","id":"5268","properties":[{"key":"hits","value":"8 hits in 1153 CRISPR screens"}]},{"database":"ChiTaRS","id":"SERPINB5","properties":[{"key":"OrganismName","value":"human"}]},{"database":"EvolutionaryTrace","id":"P36952","properties":[{"key":"Description","value":"-"}]},{"database":"GeneWiki","id":"Maspin","properties":[{"key":"Description","value":"-"}]},{"database":"GenomeRNAi","id":"5268","properties":[{"key":"Description","value":"-"}]},{"database":"Pharos","id":"P36952","properties":[{"key":"DevelopmentLevel","value":"Tbio"}]},{"database":"PRO","id":"PR:P36952","properties":[{"key":"Description","value":"-"}]},{"database":"Proteomes","id":"UP000005640","properties":[{"key":"Component","value":"Chromosome 18"}]},{"database":"RNAct","id":"P36952","properties":[{"key":"moleculeType","value":"Protein"}]},{"database":"Bgee","id":"ENSG00000206075","properties":[{"key":"ExpressionPatterns","value":"Expressed in skin of abdomen and 97 other cell types or tissues"}]},{"database":"ExpressionAtlas","id":"P36952","properties":[{"key":"ExpressionPatterns","value":"baseline and differential"}]},{"database":"Genevisible","id":"P36952","properties":[{"key":"OrganismId","value":"HS"}]},{"database":"GO","id":"GO:0001533","properties":[{"key":"GoTerm","value":"C:cornified envelope"},{"key":"GoEvidenceType","value":"IEA:Ensembl"}]},{"database":"GO","id":"GO:0005737","properties":[{"key":"GoTerm","value":"C:cytoplasm"},{"key":"GoEvidenceType","value":"IDA:UniProtKB"}],"evidences":[{"evidenceCode":"ECO:0000314","source":"PubMed","id":"16049006"}]},{"database":"GO","id":"GO:0005615","properties":[{"key":"GoTerm","value":"C:extracellular space"},{"key":"GoEvidenceType","value":"IBA:GO_Central"}],"evidences":[{"evidenceCode":"ECO:0000318","source":"PubMed","id":"21873635"}]},{"database":"GO","id":"GO:0016528","properties":[{"key":"GoTerm","value":"C:sarcoplasm"},{"key":"GoEvidenceType","value":"IEA:Ensembl"}]},{"database":"GO","id":"GO:0004867","properties":[{"key":"GoTerm","value":"F:serine-type endopeptidase inhibitor activity"},{"key":"GoEvidenceType","value":"IBA:GO_Central"}],"evidences":[{"evidenceCode":"ECO:0000318","source":"PubMed","id":"21873635"}]},{"database":"GO","id":"GO:0030198","properties":[{"key":"GoTerm","value":"P:extracellular matrix organization"},{"key":"GoEvidenceType","value":"IEA:Ensembl"}]},{"database":"GO","id":"GO:0002009","properties":[{"key":"GoTerm","value":"P:morphogenesis of an epithelium"},{"key":"GoEvidenceType","value":"IEA:Ensembl"}]},{"database":"GO","id":"GO:0060512","properties":[{"key":"GoTerm","value":"P:prostate gland morphogenesis"},{"key":"GoEvidenceType","value":"IEA:Ensembl"}]},{"database":"GO","id":"GO:0050678","properties":[{"key":"GoTerm","value":"P:regulation of epithelial cell proliferation"},{"key":"GoEvidenceType","value":"IEA:Ensembl"}]},{"database":"CDD","id":"cd02057","properties":[{"key":"EntryName","value":"serpinB5_maspin"},{"key":"MatchStatus","value":"1"}]},{"database":"Gene3D","id":"2.30.39.10","properties":[{"key":"EntryName","value":"Alpha-1-antitrypsin, domain 1"},{"key":"MatchStatus","value":"1"}]},{"database":"Gene3D","id":"3.30.497.10","properties":[{"key":"EntryName","value":"Antithrombin, subunit I, domain 2"},{"key":"MatchStatus","value":"1"}]},{"database":"InterPro","id":"IPR000240","properties":[{"key":"EntryName","value":"Serpin_B9/Maspin"}]},{"database":"InterPro","id":"IPR023795","properties":[{"key":"EntryName","value":"Serpin_CS"}]},{"database":"InterPro","id":"IPR023796","properties":[{"key":"EntryName","value":"Serpin_dom"}]},{"database":"InterPro","id":"IPR000215","properties":[{"key":"EntryName","value":"Serpin_fam"}]},{"database":"InterPro","id":"IPR036186","properties":[{"key":"EntryName","value":"Serpin_sf"}]},{"database":"InterPro","id":"IPR042178","properties":[{"key":"EntryName","value":"Serpin_sf_1"}]},{"database":"InterPro","id":"IPR042185","properties":[{"key":"EntryName","value":"Serpin_sf_2"}]},{"database":"InterPro","id":"IPR033836","properties":[{"key":"EntryName","value":"SERPINB5_serpin_dom"}]},{"database":"PANTHER","id":"PTHR11461","properties":[{"key":"EntryName","value":"SERINE PROTEASE INHIBITOR, SERPIN"},{"key":"MatchStatus","value":"1"}]},{"database":"PANTHER","id":"PTHR11461:SF55","properties":[{"key":"EntryName","value":"SERPIN B5"},{"key":"MatchStatus","value":"1"}]},{"database":"Pfam","id":"PF00079","properties":[{"key":"EntryName","value":"Serpin"},{"key":"MatchStatus","value":"1"}]},{"database":"PRINTS","id":"PR00676","properties":[{"key":"EntryName","value":"MASPIN"}]},{"database":"SMART","id":"SM00093","properties":[{"key":"EntryName","value":"SERPIN"},{"key":"MatchStatus","value":"1"}]},{"database":"SUPFAM","id":"SSF56574","properties":[{"key":"EntryName","value":"Serpins"},{"key":"MatchStatus","value":"1"}]},{"database":"PROSITE","id":"PS00284","properties":[{"key":"EntryName","value":"SERPIN"},{"key":"MatchStatus","value":"1"}]}],"sequence":{"value":"MDALQLANSAFAVDLFKQLCEKEPLGNVLFSPICLSTSLSLAQVGAKGDTANEIGQVLHFENVKDVPFGFQTVTSDVNKLSSFYSLKLIKRLYVDKSLNLSTEFISSTKRPYAKELETVDFKDKLEETKGQINNSIKDLTDGHFENILADNSVNDQTKILVVNAAYFVGKWMKKFSESETKECPFRVNKTDTKPVQMMNMEATFCMGNIDSINCKIIELPFQNKHLSMFILLPKDVEDESTGLEKIEKQLNSESLSQWTNPSTMANAKVKLSIPKFKVEKMIDPKACLENLGLKHIFSEDTSDFSGMSETKGVALSNVIHKVCLEITEDGGDSIEVPGARILQHKDELNADHPFIYIIRHNKTRNIIFFGKFCSP","length":375,"molWeight":42100,"crc64":"9F24E18505912804","md5":"07D06FD58D1EAB7DD89B019EDD40CC71"},"extraAttributes":{"countByCommentType":{"FUNCTION":1,"SUBUNIT":1,"INTERACTION":1,"SUBCELLULAR LOCATION":1,"ALTERNATIVE PRODUCTS":2,"TISSUE SPECIFICITY":1,"SIMILARITY":1,"WEB RESOURCE":1},"countByFeatureType":{"Chain":1,"Site":1,"Glycosylation":4,"Alternative sequence":2,"Natural variant":3,"Sequence conflict":2,"Helix":14,"Beta strand":15,"Turn":11},"uniParcId":"UPI0000201E51"}} \ No newline at end of file diff --git a/tests/test_uniprot.py b/tests/test_uniprot.py index d475977..2afd9e7 100644 --- a/tests/test_uniprot.py +++ b/tests/test_uniprot.py @@ -2,25 +2,27 @@ import sys from pathlib import Path -from pyuniprot.Uniprot import Uniprot +import pytest + +from pyuniprot.UniProt import UniProt sys.path.append("..") CFD = os.path.dirname(__file__) CWD = os.getcwd() -def test_get_properties(): +@pytest.mark.filterwarnings("ignore") +def test_get_basic_properties(): """ - Test class properties + Test class properties (not json content) """ uniprot_id = "P36952" - uniprot = Uniprot(uniprot_id) + uniprot = UniProt(uniprot_id) assert ( - uniprot.uniprot_txt_url - == f"https://rest.uniprot.org/uniprotkb/{uniprot_id}.txt" - ), "Uniprot TXT file link not right." + uniprot.uniprot_json_url == f"https://rest.uniprot.org/uniprotkb/{uniprot_id}" + ), "UniProt json file link not right." - file_path = Path(uniprot.local_download_dir, f"{uniprot.uniprot_id}.txt") + file_path = Path(uniprot.local_download_dir, f"{uniprot.uniprot_id}.json") if not file_path.exists(): file_path = None @@ -28,97 +30,51 @@ def test_get_properties(): uniprot.local_download_dir == os.getcwd() ), "local_download_dir not user's CWD." assert ( - uniprot.uniprot_txt_file == file_path - ), "Uniprot TXT file path not right when not existed." + uniprot.uniprot_json_file == file_path + ), "UniProt json file path not right when not existed." - uniprot = Uniprot( + uniprot = UniProt( uniprot_id, local_download_dir=Path(CFD, "test_files"), ) assert uniprot.local_download_dir == Path( CFD, "test_files" ), "local_download_dir not pointing to test_files." - assert uniprot.uniprot_txt_file == Path( - CFD, "test_files", f"{uniprot.uniprot_id}.txt" - ), "Uniprot TXT file path not right when existed." + assert uniprot.uniprot_json_file == Path( + CFD, "test_files", f"{uniprot.uniprot_id}.json" + ), "UniProt json file path not right when existed." try: - os.remove(f"{CWD}/{uniprot_id}.txt") + os.remove(f"{CWD}/{uniprot_id}.json") except OSError: pass -def test_get_category_lines(): - """Test the _get_category_lines function.""" - uniprot_id = "P04637" - uniprot = Uniprot( - uniprot_id, save_txt=True, local_download_dir=Path(CFD, "test_files") - ) - category_lines = uniprot.category_lines - assert ( - category_lines["SQ"].length == 393 - ), "P04637 sequence length in SQ not read as integer 393." - - -def test_empty_file(): - """Test the _get_category_lines function.""" - uniprot_id = "P30042" - uniprot = Uniprot( - uniprot_id, save_txt=True, local_download_dir=Path(CFD, "test_files") - ) - category_lines = uniprot.category_lines - assert category_lines == {}, "P04637 sequence length in SQ not read as integer 393." - - -def test_no_panther(): - """Test the _get_category_lines function.""" - uniprot_id = "Q8IUI8" - uniprot = Uniprot( - uniprot_id, save_txt=True, local_download_dir=Path(CFD, "test_files") +@pytest.mark.filterwarnings("ignore") +def test_get_raw_json(): + """Test raw json.""" + uniprot_id = "P36952" + uniprot = UniProt( + uniprot_id, save_json=True, local_download_dir=Path(CFD, "test_files") ) - category_lines = uniprot.category_lines assert ( - len(category_lines["DR"].database_references["PANTHER"]) == 0 - ), "Q8IUI8 has wrong PANTHER," - - -def test_empty_resid(): - """Test the _get_category_lines function.""" - uniprot_id = "Q9NPA5" - uniprot = Uniprot( - uniprot_id, save_txt=True, local_download_dir=Path(CFD, "test_files") - ) - category_lines = uniprot.category_lines + uniprot.raw_json["primaryAccession"] == "P36952" + ), "raw json primaryAccession wrong" assert ( - category_lines["DR"] - .database_references["PDB"][0] - .uniprot_res_range[0] - .seq_begin - == "" - ), "Q9NPA5 first PDB resid is not ''." + uniprot.raw_json["entryAudit"]["firstPublicDate"] == "1994-06-01" + ), "raw json entryAudit->firstPublicDate wrong" -def test_dr_records(): - """Test the different DR records as dataclasses.""" - uniprot_id = "P04637" - uniprot = Uniprot( - uniprot_id, save_txt=True, local_download_dir=Path(CFD, "test_files") +@pytest.mark.filterwarnings("ignore") +def test_get_json_as_properties(): + """Test json as properties.""" + uniprot_id = "P36952" + uniprot = UniProt( + uniprot_id, save_json=True, local_download_dir=Path(CFD, "test_files") ) - category_lines = uniprot.category_lines - dr = category_lines["DR"].database_references - assert dr["PDB"][0].pdb_id == "1A1U", "P04637 first PDB not 1A1U." - assert ( - dr["EMBL"][-1].nucleotide_sequence_id == "AY270155" - ), "P04637 last EMBL nucleotide_sequence_id not AY270155." - assert ( - dr["CCDS"][-1].ccds_id == "CCDS73971.1" - ), "P04637 last CCDS ccds_id not CCDS73971.1." - assert dr["PIR"][0].uid == "A25224", "P04637 PIR uid not A25224." - assert ( - dr["GO"][-1].accession_number == "0016032" - ), "P04637 last GO accession_number not 0016032." + assert uniprot.primaryAccession == "P36952", "prop primaryAccession wrong" assert ( - dr["Reactome"][-1].id == "R-HSA-983231" - ), "P04637 last Reactome id not R-HSA-983231." + uniprot.entryAudit.firstPublicDate == "1994-06-01" + ), "prop entryAudit->firstPublicDate wrong" assert ( - dr["RefSeq"][-1].protein_sequence_id == "NP_001263690.1" - ), "P04637 last RefSeq protein_sequence_id not NP_001263690.1." + uniprot.uniProtKBCrossReferences[0].properties[0].value == "AAA18957.1" + ), "list prop wrong" diff --git a/tests/test_uniprot_legacy.py b/tests/test_uniprot_legacy.py new file mode 100644 index 0000000..d475977 --- /dev/null +++ b/tests/test_uniprot_legacy.py @@ -0,0 +1,124 @@ +import os +import sys +from pathlib import Path + +from pyuniprot.Uniprot import Uniprot + +sys.path.append("..") +CFD = os.path.dirname(__file__) +CWD = os.getcwd() + + +def test_get_properties(): + """ + Test class properties + """ + uniprot_id = "P36952" + uniprot = Uniprot(uniprot_id) + assert ( + uniprot.uniprot_txt_url + == f"https://rest.uniprot.org/uniprotkb/{uniprot_id}.txt" + ), "Uniprot TXT file link not right." + + file_path = Path(uniprot.local_download_dir, f"{uniprot.uniprot_id}.txt") + + if not file_path.exists(): + file_path = None + assert ( + uniprot.local_download_dir == os.getcwd() + ), "local_download_dir not user's CWD." + assert ( + uniprot.uniprot_txt_file == file_path + ), "Uniprot TXT file path not right when not existed." + + uniprot = Uniprot( + uniprot_id, + local_download_dir=Path(CFD, "test_files"), + ) + assert uniprot.local_download_dir == Path( + CFD, "test_files" + ), "local_download_dir not pointing to test_files." + assert uniprot.uniprot_txt_file == Path( + CFD, "test_files", f"{uniprot.uniprot_id}.txt" + ), "Uniprot TXT file path not right when existed." + try: + os.remove(f"{CWD}/{uniprot_id}.txt") + except OSError: + pass + + +def test_get_category_lines(): + """Test the _get_category_lines function.""" + uniprot_id = "P04637" + uniprot = Uniprot( + uniprot_id, save_txt=True, local_download_dir=Path(CFD, "test_files") + ) + category_lines = uniprot.category_lines + assert ( + category_lines["SQ"].length == 393 + ), "P04637 sequence length in SQ not read as integer 393." + + +def test_empty_file(): + """Test the _get_category_lines function.""" + uniprot_id = "P30042" + uniprot = Uniprot( + uniprot_id, save_txt=True, local_download_dir=Path(CFD, "test_files") + ) + category_lines = uniprot.category_lines + assert category_lines == {}, "P04637 sequence length in SQ not read as integer 393." + + +def test_no_panther(): + """Test the _get_category_lines function.""" + uniprot_id = "Q8IUI8" + uniprot = Uniprot( + uniprot_id, save_txt=True, local_download_dir=Path(CFD, "test_files") + ) + category_lines = uniprot.category_lines + assert ( + len(category_lines["DR"].database_references["PANTHER"]) == 0 + ), "Q8IUI8 has wrong PANTHER," + + +def test_empty_resid(): + """Test the _get_category_lines function.""" + uniprot_id = "Q9NPA5" + uniprot = Uniprot( + uniprot_id, save_txt=True, local_download_dir=Path(CFD, "test_files") + ) + category_lines = uniprot.category_lines + assert ( + category_lines["DR"] + .database_references["PDB"][0] + .uniprot_res_range[0] + .seq_begin + == "" + ), "Q9NPA5 first PDB resid is not ''." + + +def test_dr_records(): + """Test the different DR records as dataclasses.""" + uniprot_id = "P04637" + uniprot = Uniprot( + uniprot_id, save_txt=True, local_download_dir=Path(CFD, "test_files") + ) + category_lines = uniprot.category_lines + dr = category_lines["DR"].database_references + assert dr["PDB"][0].pdb_id == "1A1U", "P04637 first PDB not 1A1U." + assert ( + dr["EMBL"][-1].nucleotide_sequence_id == "AY270155" + ), "P04637 last EMBL nucleotide_sequence_id not AY270155." + assert ( + dr["CCDS"][-1].ccds_id == "CCDS73971.1" + ), "P04637 last CCDS ccds_id not CCDS73971.1." + assert dr["PIR"][0].uid == "A25224", "P04637 PIR uid not A25224." + assert ( + dr["GO"][-1].accession_number == "0016032" + ), "P04637 last GO accession_number not 0016032." + assert ( + dr["Reactome"][-1].id == "R-HSA-983231" + ), "P04637 last Reactome id not R-HSA-983231." + assert ( + dr["RefSeq"][-1].protein_sequence_id == "NP_001263690.1" + ), "P04637 last RefSeq protein_sequence_id not NP_001263690.1."