From e0d5d63bc1f1958e535a3735bc56045bc5e3ab01 Mon Sep 17 00:00:00 2001 From: Chaitanya Joshi Date: Sun, 20 Aug 2023 14:14:45 +0200 Subject: [PATCH] Fix handing id when loading from path (#332) * Fix handing id when loading from path Previously, the id for the pyg data object would become the entire (possibly length) path to the pdb file OR only the filename + chain ID. However, in the later case, the '.pdb' extension would still show up under the id field. This commit fixes the above to be consistent and to not contain '.pdb' extension in the pyg data id. * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Add non-standard Cystine From Wikipedia: "The thiol is susceptible to oxidation to give the disulfide derivative cystine, which serves an important structural role in many proteins. In this case, the symbol Cyx is sometimes used. The deprotonated form can generally be described by the symbol Cym as well." * Update CHANGELOG.md --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- CHANGELOG.md | 2 ++ graphein/protein/resi_atoms.py | 1 + graphein/protein/tensor/io.py | 6 ++++-- 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 982829e03..5286fa214 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -22,6 +22,7 @@ * Fixes initialisation of `Protein` objects. [#317](https://github.com/a-r-j/graphein/issues/317) [#318](https://github.com/a-r-j/graphein/pull/318) * Fixes incorrect `rad` and `embed` argument logic in `graphein.protein.tensor.angles.dihedrals/sidechain_torsion` [#321](https://github.com/a-r-j/graphein/pull/321) * Fixes incorrect start padding in pNeRF output [#321](https://github.com/a-r-j/graphein/pull/321) +* Fixes setting ID for PyG data objects when loading from a path to a `.pdb` file [#332](https://github.com/a-r-j/graphein/pull/332) #### Other Changes * Adds transform composition to FoldComp Dataset [#312](https://github.com/a-r-j/graphein/pull/312) @@ -35,6 +36,7 @@ * Adds transform composition to FoldComp Dataset [#312](https://github.com/a-r-j/graphein/pull/312) * Improve FoldComp dataloading performance and include B factors (pLDDT) in output. [#313](https://github.com/a-r-j/graphein/pull/313) [#315](https://github.com/a-r-j/graphein/pull/315) * Add new helper functions to PDBManager [#322](https://github.com/a-r-j/graphein/pull/322) (@amorehead) +* Add non-standard 'CYX' to `RESI_THREE_TO_1`. ### 1.7.0 - 10 /04/2023 diff --git a/graphein/protein/resi_atoms.py b/graphein/protein/resi_atoms.py index a99e4919b..3bb74d089 100644 --- a/graphein/protein/resi_atoms.py +++ b/graphein/protein/resi_atoms.py @@ -652,6 +652,7 @@ "CSX": "C", "CXM": "M", "CYS": "C", + "CYX": "C", "DAL": "A", "DAR": "R", "DCY": "C", diff --git a/graphein/protein/tensor/io.py b/graphein/protein/tensor/io.py index cc24c6d26..d7021a325 100644 --- a/graphein/protein/tensor/io.py +++ b/graphein/protein/tensor/io.py @@ -166,9 +166,11 @@ def protein_to_pyg( # Get ID if path is not None: id = ( - path.split("/")[-1] + "_" + "".join(chain_selection) + os.path.splitext(path)[0].split("/")[-1] + + "_" + + "".join(chain_selection) if chain_selection != "all" - else path + else os.path.splitext(path)[0].split("/")[-1] ) elif pdb_code is not None: id = (