Skip to content

Commit

Permalink
Parse json instead for UniProt
Browse files Browse the repository at this point in the history
  • Loading branch information
Ruibin-Liu committed Apr 17, 2024
1 parent 3a68709 commit db722a8
Show file tree
Hide file tree
Showing 10 changed files with 506 additions and 85 deletions.
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ repos:
hooks:
- id: check-builtin-literals
- id: check-added-large-files
- id: check-case-conflict
# - id: check-case-conflict
- id: check-toml
- id: check-yaml
- id: debug-statements
Expand Down
171 changes: 171 additions & 0 deletions pyuniprot/UniProt.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,171 @@
from __future__ import annotations

import io
import json
import os
import urllib.request
from dataclasses import dataclass
from pathlib import Path

from .dict_to_property import DictToProp


@dataclass
class Sequence:
"""Sequence."""

sequence: str
length: int
weight: int
crc_checksum_value: str
crc_bits: int


class UniProt:
"""The python object representing all information of a Uniprot JSON file."""

def __init__(
self,
uniprot_id: str,
save_json: bool = True,
local_download_dir: str | os.PathLike | None = None,
) -> None:
"""Init class with a Uniprot ID.
Args:
uniprot_id (str): Uniprot Access Number. It looks at the the <local_download_dir> first for <uniprot_id>.json,
and if not found, it will try to fetch content from https://rest.uniprot.org/uniprotkb/<uniprot_id>.
save_json (bool, optional): whether to save the fetched json content to a <local_download_dir>/<uniprot_id>.json
file when that file no already existing. Defaults to False.
local_download_dir (str | os.PathLike | None, optional): where to save the downloaded Uniprot json file.
Defaults to None and the current working directory is used instead.
""" # noqa
self._uniprot_id: str = uniprot_id
self.save_json: bool = save_json
if local_download_dir is None:
local_download_dir = os.getcwd()
self._local_download_dir: str | os.PathLike | None = local_download_dir
self._uniprot_json_url = f"https://rest.uniprot.org/uniprotkb/{self.uniprot_id}"
self._uniprot_json_file: str | os.PathLike | io.StringIO | None = None
json_file = Path(self.local_download_dir, f"{self.uniprot_id}.json")
if json_file.exists():
self._uniprot_json_file = json_file
self._raw_json: str | None = None # json is only a str in python
self._properties: dict = {}

self._get_raw_json()
self._get_properties()

@property
def uniprot_id(self):
return self._uniprot_id

@property
def local_download_dir(self):
return self._local_download_dir

@local_download_dir.setter
def local_download_dir(self, dir: str | os.PathLike):
"""Set the directory to save downloaded Uniprot json files.
Args:
dir (str | os.PathLike): directory path str or Path.
"""
self._local_download_dir = dir

@property
def uniprot_json_url(self):
return self._uniprot_json_url

@uniprot_json_url.setter
def uniprot_json_url(self, url: str):
"""Set the Uniprot json file URL if not the official REST one.
Args:
url (str): URL link.
"""
self._uniprot_json_url = url

@property
def uniprot_json_file(self):
return self._uniprot_json_file

@uniprot_json_file.setter
def uniprot_json_file(self, path: str | os.PathLike | io.StringIO):
"""Set the UniProt json file path
Args:
path (str | os.PathLike| io.StringIO): file-like or path to the file.
Raises:
FileExistsError: if <path> is not in the file system.
"""
if isinstance(path, os.PathLike) and not Path(path).exists():
raise FileExistsError(f"Cannot find {path}.")
self._uniprot_json_file = path

@property
def raw_json(self):
return self._raw_json

@raw_json.setter
def raw_json(self, content: str):
"""Set the uniprot JSON by json content (str repr in python).
Args:
content (str): JSON as a python str.
Raises:
AttributeError: if it is already set.
"""
if self.raw_json is None:
self._raw_json = content
else:
raise AttributeError("raw_json already set.")

def _get_raw_json(self) -> None:
"""Get json content"""
if self.uniprot_json_file is None:
try:
with urllib.request.urlopen(self.uniprot_json_url) as response:
raw_data = response.read()
json_content = raw_data.decode("utf-8")
json_file: io.StringIO | os.PathLike = io.StringIO(json_content)
if self.save_json:
json_file = Path(
self.local_download_dir,
f"{self.uniprot_id}.json",
)
with open(json_file, "w", encoding="utf-8") as j_file:
j_file.write(json_content)

self.uniprot_json_file = json_file
except urllib.error.HTTPError:
raise ValueError(f"Cannot download from url {self.uniprot_json_url}.")

if not isinstance(self.uniprot_json_file, io.StringIO):
j_file = open(self.uniprot_json_file, "r", encoding="utf-8")
else:
j_file = self.uniprot_json_file

self.raw_json = json.load(j_file)

try:
j_file.close()
except Exception:
pass

def _get_properties(self) -> None:
"""
Turn raw json to properties.
"""
self._properties = DictToProp(self.raw_json)._properties

def __getattr__(self, key: str) -> str | list | DictToProp:
"""Retrieve properties."""
if key in self._properties:
return self._properties[key]
else:
raise AttributeError(
f"'{type(self).__name__}' object has no attribute '{key}'"
)
4 changes: 2 additions & 2 deletions pyuniprot/UniRef.py
Original file line number Diff line number Diff line change
Expand Up @@ -170,9 +170,9 @@ def uniref_json(self, content: str):
if self.uniref_json is None:
self._uniref_json = content
else:
raise AttributeError("category_lines already set.")
raise AttributeError("uniref_json already set.")

def _get_uniref_json(self):
def _get_uniref_json(self) -> None:
"""Get json content"""
if self.uniref_json_file is None:
try:
Expand Down
2 changes: 2 additions & 0 deletions pyuniprot/Uniprot.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
"""Legacy code to process txt file"""

from __future__ import annotations

import io
Expand Down
2 changes: 2 additions & 0 deletions pyuniprot/__init__.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
from .Uniprot import Uniprot
from .UniProt import UniProt
from .UniRef import UniRef
from .utils import get_alt_resids, get_isoforms
from .version import __version__

__all__ = [
"Uniprot",
"UniProt",
"UniRef",
"get_isoforms",
"get_alt_resids",
Expand Down
93 changes: 93 additions & 0 deletions pyuniprot/dict_to_property.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
from __future__ import annotations

import keyword
import warnings


def validify_name(name: str) -> tuple[bool, str]:
"""
Checks if the given name is a valid Python property name.
If not return a valid one as well.
Args:
name (str, required): dictioanry key as str.
Returns:
(whether it is valid, validified one)
"""
is_valid: bool = True
valid_name: str = name
if " " in name:
is_valid = False
valid_name = name.strip()

if (not valid_name) or valid_name[0].isnumeric():
is_valid = False
valid_name = "_" + valid_name

if " " in valid_name:
is_valid = False
valid_name = valid_name.replace(" ", "_")

if any([c for c in valid_name.replace("_", "") if not c.isalnum()]):
is_valid = False
valid_name = "".join([c for c in valid_name if c.isalnum() or c == "_"])

if keyword.iskeyword(valid_name):
is_valid = False
valid_name = "_" + valid_name

if not is_valid:
warnings.warn(
f"key '{name}' is not a valid python variable. '{valid_name}' is used instead.",
RuntimeWarning,
stacklevel=2,
)

return is_valid, valid_name


class DictToProp:
def __init__(self, data):
self._data = data
if not isinstance(self._data, dict):
raise ValueError("Input is not a python dict")
self._properties = {}
self.create_properties()

def create_properties(self):
"""Create properties based on the dictionary keys and values."""
for key, value in self._data.items():
key = validify_name(key)[1]
if isinstance(value, dict):
sub_instance = DictToProp(value)
sub_instance.create_properties()
self._properties[key] = sub_instance
elif isinstance(value, list):
self._properties[key] = DictToProp.parse_list(value)
else:
self._properties[key] = value

@classmethod
def parse_list(cls, lst: list) -> list:
"""Parse a list of dict recursively"""
result: list = []
for e in lst:
if isinstance(e, dict):
instance = DictToProp(e)
instance.create_properties()
result.append(instance)
elif isinstance(e, list):
result.append(DictToProp.parse_list(e))
else:
result.append(e)
return result

def __getattr__(self, key):
"""Override the attribute access to retrieve properties."""
if key in self._properties:
return self._properties[key]
else:
raise AttributeError(
f"'{type(self).__name__}' object has no attribute '{key}'"
)
72 changes: 72 additions & 0 deletions tests/test_dtop.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
import os
import sys

import pytest

from pyuniprot.dict_to_property import DictToProp, validify_name

sys.path.append("..")
CFD = os.path.dirname(__file__)
CWD = os.getcwd()


def test_validify_name():
"""
Test the validify_name function.
"""
empty = ""
warning_msg = "key '' is not a valid python variable. '_' is used instead."
with pytest.warns(RuntimeWarning, match=warning_msg):
assert validify_name(empty) == (False, "_"), "empty string not validified"

py_kw = "de#f"
warning_msg = "key 'de#f' is not a valid python variable. '_def' is used instead."
with pytest.warns(RuntimeWarning, match=warning_msg):
assert validify_name(py_kw) == (False, "_def"), "string 'de#f' not validified"

space_in = "t est"
warning_msg = "key 't est' is not a valid python variable. 't_est' is used instead."
with pytest.warns(RuntimeWarning, match=warning_msg):
assert validify_name(space_in) == (
False,
"t_est",
), "string 't est' not validified"

wrong_start = "1a"
warning_msg = "key '1a' is not a valid python variable. '_1a' is used instead."
with pytest.warns(RuntimeWarning, match=warning_msg):
assert validify_name(wrong_start) == (
False,
"_1a",
), "string '1a' not validified"

correct = "test"
assert validify_name(correct) == (True, "test"), "string 'test' not validified"


@pytest.mark.filterwarnings("ignore")
def test_DictToProp():
"""
Test the DictToProp class.
"""
test = {
"normal": 0,
"a_list": ["t", "e", "s", "t"],
"a_dict": {
"": "empty",
"def": "python keyword",
"t est": "space-in",
"1a": "wrong-start",
"_1234": "underscore-start",
},
}

t = DictToProp(test)

assert t.normal == 0, "normal propery failed"
assert t.a_list == ["t", "e", "s", "t"], "list property failed"
assert t.a_dict._ == "empty", "emtpy string key failed"
assert t.a_dict._def == "python keyword", "python keyword key failed"
assert t.a_dict.t_est == "space-in", "space-in key failed"
assert t.a_dict._1a == "wrong-start", "wrong-start key failed"
assert t.a_dict._1234 == "underscore-start", "underscore-start key failed"
1 change: 1 addition & 0 deletions tests/test_files/P36952.json

Large diffs are not rendered by default.

Loading

0 comments on commit db722a8

Please sign in to comment.