-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
3a68709
commit db722a8
Showing
10 changed files
with
506 additions
and
85 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,171 @@ | ||
from __future__ import annotations | ||
|
||
import io | ||
import json | ||
import os | ||
import urllib.request | ||
from dataclasses import dataclass | ||
from pathlib import Path | ||
|
||
from .dict_to_property import DictToProp | ||
|
||
|
||
@dataclass | ||
class Sequence: | ||
"""Sequence.""" | ||
|
||
sequence: str | ||
length: int | ||
weight: int | ||
crc_checksum_value: str | ||
crc_bits: int | ||
|
||
|
||
class UniProt: | ||
"""The python object representing all information of a Uniprot JSON file.""" | ||
|
||
def __init__( | ||
self, | ||
uniprot_id: str, | ||
save_json: bool = True, | ||
local_download_dir: str | os.PathLike | None = None, | ||
) -> None: | ||
"""Init class with a Uniprot ID. | ||
Args: | ||
uniprot_id (str): Uniprot Access Number. It looks at the the <local_download_dir> first for <uniprot_id>.json, | ||
and if not found, it will try to fetch content from https://rest.uniprot.org/uniprotkb/<uniprot_id>. | ||
save_json (bool, optional): whether to save the fetched json content to a <local_download_dir>/<uniprot_id>.json | ||
file when that file no already existing. Defaults to False. | ||
local_download_dir (str | os.PathLike | None, optional): where to save the downloaded Uniprot json file. | ||
Defaults to None and the current working directory is used instead. | ||
""" # noqa | ||
self._uniprot_id: str = uniprot_id | ||
self.save_json: bool = save_json | ||
if local_download_dir is None: | ||
local_download_dir = os.getcwd() | ||
self._local_download_dir: str | os.PathLike | None = local_download_dir | ||
self._uniprot_json_url = f"https://rest.uniprot.org/uniprotkb/{self.uniprot_id}" | ||
self._uniprot_json_file: str | os.PathLike | io.StringIO | None = None | ||
json_file = Path(self.local_download_dir, f"{self.uniprot_id}.json") | ||
if json_file.exists(): | ||
self._uniprot_json_file = json_file | ||
self._raw_json: str | None = None # json is only a str in python | ||
self._properties: dict = {} | ||
|
||
self._get_raw_json() | ||
self._get_properties() | ||
|
||
@property | ||
def uniprot_id(self): | ||
return self._uniprot_id | ||
|
||
@property | ||
def local_download_dir(self): | ||
return self._local_download_dir | ||
|
||
@local_download_dir.setter | ||
def local_download_dir(self, dir: str | os.PathLike): | ||
"""Set the directory to save downloaded Uniprot json files. | ||
Args: | ||
dir (str | os.PathLike): directory path str or Path. | ||
""" | ||
self._local_download_dir = dir | ||
|
||
@property | ||
def uniprot_json_url(self): | ||
return self._uniprot_json_url | ||
|
||
@uniprot_json_url.setter | ||
def uniprot_json_url(self, url: str): | ||
"""Set the Uniprot json file URL if not the official REST one. | ||
Args: | ||
url (str): URL link. | ||
""" | ||
self._uniprot_json_url = url | ||
|
||
@property | ||
def uniprot_json_file(self): | ||
return self._uniprot_json_file | ||
|
||
@uniprot_json_file.setter | ||
def uniprot_json_file(self, path: str | os.PathLike | io.StringIO): | ||
"""Set the UniProt json file path | ||
Args: | ||
path (str | os.PathLike| io.StringIO): file-like or path to the file. | ||
Raises: | ||
FileExistsError: if <path> is not in the file system. | ||
""" | ||
if isinstance(path, os.PathLike) and not Path(path).exists(): | ||
raise FileExistsError(f"Cannot find {path}.") | ||
self._uniprot_json_file = path | ||
|
||
@property | ||
def raw_json(self): | ||
return self._raw_json | ||
|
||
@raw_json.setter | ||
def raw_json(self, content: str): | ||
"""Set the uniprot JSON by json content (str repr in python). | ||
Args: | ||
content (str): JSON as a python str. | ||
Raises: | ||
AttributeError: if it is already set. | ||
""" | ||
if self.raw_json is None: | ||
self._raw_json = content | ||
else: | ||
raise AttributeError("raw_json already set.") | ||
|
||
def _get_raw_json(self) -> None: | ||
"""Get json content""" | ||
if self.uniprot_json_file is None: | ||
try: | ||
with urllib.request.urlopen(self.uniprot_json_url) as response: | ||
raw_data = response.read() | ||
json_content = raw_data.decode("utf-8") | ||
json_file: io.StringIO | os.PathLike = io.StringIO(json_content) | ||
if self.save_json: | ||
json_file = Path( | ||
self.local_download_dir, | ||
f"{self.uniprot_id}.json", | ||
) | ||
with open(json_file, "w", encoding="utf-8") as j_file: | ||
j_file.write(json_content) | ||
|
||
self.uniprot_json_file = json_file | ||
except urllib.error.HTTPError: | ||
raise ValueError(f"Cannot download from url {self.uniprot_json_url}.") | ||
|
||
if not isinstance(self.uniprot_json_file, io.StringIO): | ||
j_file = open(self.uniprot_json_file, "r", encoding="utf-8") | ||
else: | ||
j_file = self.uniprot_json_file | ||
|
||
self.raw_json = json.load(j_file) | ||
|
||
try: | ||
j_file.close() | ||
except Exception: | ||
pass | ||
|
||
def _get_properties(self) -> None: | ||
""" | ||
Turn raw json to properties. | ||
""" | ||
self._properties = DictToProp(self.raw_json)._properties | ||
|
||
def __getattr__(self, key: str) -> str | list | DictToProp: | ||
"""Retrieve properties.""" | ||
if key in self._properties: | ||
return self._properties[key] | ||
else: | ||
raise AttributeError( | ||
f"'{type(self).__name__}' object has no attribute '{key}'" | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,5 @@ | ||
"""Legacy code to process txt file""" | ||
|
||
from __future__ import annotations | ||
|
||
import io | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,93 @@ | ||
from __future__ import annotations | ||
|
||
import keyword | ||
import warnings | ||
|
||
|
||
def validify_name(name: str) -> tuple[bool, str]: | ||
""" | ||
Checks if the given name is a valid Python property name. | ||
If not return a valid one as well. | ||
Args: | ||
name (str, required): dictioanry key as str. | ||
Returns: | ||
(whether it is valid, validified one) | ||
""" | ||
is_valid: bool = True | ||
valid_name: str = name | ||
if " " in name: | ||
is_valid = False | ||
valid_name = name.strip() | ||
|
||
if (not valid_name) or valid_name[0].isnumeric(): | ||
is_valid = False | ||
valid_name = "_" + valid_name | ||
|
||
if " " in valid_name: | ||
is_valid = False | ||
valid_name = valid_name.replace(" ", "_") | ||
|
||
if any([c for c in valid_name.replace("_", "") if not c.isalnum()]): | ||
is_valid = False | ||
valid_name = "".join([c for c in valid_name if c.isalnum() or c == "_"]) | ||
|
||
if keyword.iskeyword(valid_name): | ||
is_valid = False | ||
valid_name = "_" + valid_name | ||
|
||
if not is_valid: | ||
warnings.warn( | ||
f"key '{name}' is not a valid python variable. '{valid_name}' is used instead.", | ||
RuntimeWarning, | ||
stacklevel=2, | ||
) | ||
|
||
return is_valid, valid_name | ||
|
||
|
||
class DictToProp: | ||
def __init__(self, data): | ||
self._data = data | ||
if not isinstance(self._data, dict): | ||
raise ValueError("Input is not a python dict") | ||
self._properties = {} | ||
self.create_properties() | ||
|
||
def create_properties(self): | ||
"""Create properties based on the dictionary keys and values.""" | ||
for key, value in self._data.items(): | ||
key = validify_name(key)[1] | ||
if isinstance(value, dict): | ||
sub_instance = DictToProp(value) | ||
sub_instance.create_properties() | ||
self._properties[key] = sub_instance | ||
elif isinstance(value, list): | ||
self._properties[key] = DictToProp.parse_list(value) | ||
else: | ||
self._properties[key] = value | ||
|
||
@classmethod | ||
def parse_list(cls, lst: list) -> list: | ||
"""Parse a list of dict recursively""" | ||
result: list = [] | ||
for e in lst: | ||
if isinstance(e, dict): | ||
instance = DictToProp(e) | ||
instance.create_properties() | ||
result.append(instance) | ||
elif isinstance(e, list): | ||
result.append(DictToProp.parse_list(e)) | ||
else: | ||
result.append(e) | ||
return result | ||
|
||
def __getattr__(self, key): | ||
"""Override the attribute access to retrieve properties.""" | ||
if key in self._properties: | ||
return self._properties[key] | ||
else: | ||
raise AttributeError( | ||
f"'{type(self).__name__}' object has no attribute '{key}'" | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,72 @@ | ||
import os | ||
import sys | ||
|
||
import pytest | ||
|
||
from pyuniprot.dict_to_property import DictToProp, validify_name | ||
|
||
sys.path.append("..") | ||
CFD = os.path.dirname(__file__) | ||
CWD = os.getcwd() | ||
|
||
|
||
def test_validify_name(): | ||
""" | ||
Test the validify_name function. | ||
""" | ||
empty = "" | ||
warning_msg = "key '' is not a valid python variable. '_' is used instead." | ||
with pytest.warns(RuntimeWarning, match=warning_msg): | ||
assert validify_name(empty) == (False, "_"), "empty string not validified" | ||
|
||
py_kw = "de#f" | ||
warning_msg = "key 'de#f' is not a valid python variable. '_def' is used instead." | ||
with pytest.warns(RuntimeWarning, match=warning_msg): | ||
assert validify_name(py_kw) == (False, "_def"), "string 'de#f' not validified" | ||
|
||
space_in = "t est" | ||
warning_msg = "key 't est' is not a valid python variable. 't_est' is used instead." | ||
with pytest.warns(RuntimeWarning, match=warning_msg): | ||
assert validify_name(space_in) == ( | ||
False, | ||
"t_est", | ||
), "string 't est' not validified" | ||
|
||
wrong_start = "1a" | ||
warning_msg = "key '1a' is not a valid python variable. '_1a' is used instead." | ||
with pytest.warns(RuntimeWarning, match=warning_msg): | ||
assert validify_name(wrong_start) == ( | ||
False, | ||
"_1a", | ||
), "string '1a' not validified" | ||
|
||
correct = "test" | ||
assert validify_name(correct) == (True, "test"), "string 'test' not validified" | ||
|
||
|
||
@pytest.mark.filterwarnings("ignore") | ||
def test_DictToProp(): | ||
""" | ||
Test the DictToProp class. | ||
""" | ||
test = { | ||
"normal": 0, | ||
"a_list": ["t", "e", "s", "t"], | ||
"a_dict": { | ||
"": "empty", | ||
"def": "python keyword", | ||
"t est": "space-in", | ||
"1a": "wrong-start", | ||
"_1234": "underscore-start", | ||
}, | ||
} | ||
|
||
t = DictToProp(test) | ||
|
||
assert t.normal == 0, "normal propery failed" | ||
assert t.a_list == ["t", "e", "s", "t"], "list property failed" | ||
assert t.a_dict._ == "empty", "emtpy string key failed" | ||
assert t.a_dict._def == "python keyword", "python keyword key failed" | ||
assert t.a_dict.t_est == "space-in", "space-in key failed" | ||
assert t.a_dict._1a == "wrong-start", "wrong-start key failed" | ||
assert t.a_dict._1234 == "underscore-start", "underscore-start key failed" |
Large diffs are not rendered by default.
Oops, something went wrong.
Oops, something went wrong.