From 8269403f9f43dd83a2134c929178c0207cdb1ad4 Mon Sep 17 00:00:00 2001 From: Aviv Rosenberg Date: Sun, 3 Mar 2024 05:26:43 +0200 Subject: [PATCH 1/3] backbone: handle rare issue with negative bfactors --- src/pp5/backbone.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/pp5/backbone.py b/src/pp5/backbone.py index d3c5ad7..8948fed 100644 --- a/src/pp5/backbone.py +++ b/src/pp5/backbone.py @@ -37,7 +37,12 @@ def atom_location_sigma(atom: Atom) -> float: :param atom: The atom to calculate the sigma for. :return: The sigma in Angstroms. """ - return math.sqrt(atom.get_bfactor() / CONST_8PI2) + bfactor = atom.get_bfactor() + if bfactor < 0: + # In very rare cases, the B-factor of some atom in a PDB file is negative, + # which doesn't make sense (e.g. 1D9U:B). + return float("nan") + return math.sqrt(bfactor / CONST_8PI2) def residue_backbone_atoms(res: Residue) -> Sequence[Atom]: From 8d7dabe55817137fa62a6c37ecfa635972cbfbce Mon Sep 17 00:00:00 2001 From: Aviv Rosenberg Date: Sun, 3 Mar 2024 05:51:28 +0200 Subject: [PATCH 2/3] contacts: handle rare cases where a residue itself can be disordered --- src/pp5/contacts.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/pp5/contacts.py b/src/pp5/contacts.py index 430ea63..faa2693 100644 --- a/src/pp5/contacts.py +++ b/src/pp5/contacts.py @@ -16,7 +16,7 @@ import pandas as pd from Bio.PDB import NeighborSearch from Bio.PDB.Atom import Atom -from Bio.PDB.Residue import Residue +from Bio.PDB.Residue import Residue, DisorderedResidue import pp5 from pp5.codons import ACIDS_3TO1, UNKNOWN_AA @@ -184,6 +184,12 @@ def __init__( def assign(self, res: Residue) -> Dict[str, Optional[ResidueContacts]]: + # In rare cases, a residue may be disordered and contain other residues. + # This means there's a point mutation and both original and mutated residues + # are present in the crystal. We ignore this and just use the selected residue. + if isinstance(res, DisorderedResidue): + res = res.disordered_get() + # Get all atoms from within the residue, including side chain atoms all_atoms = tuple(res.get_atoms()) From b26845b08e626ca8b84eef03546a52e6ebb6eba3 Mon Sep 17 00:00:00 2001 From: Aviv Rosenberg Date: Sun, 3 Mar 2024 06:16:39 +0200 Subject: [PATCH 3/3] prec: handle rare sequence alignment issue --- src/pp5/prec.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/pp5/prec.py b/src/pp5/prec.py index b51ad3e..3f42276 100644 --- a/src/pp5/prec.py +++ b/src/pp5/prec.py @@ -1217,6 +1217,14 @@ def _find_unp_alignment(self, pdb_aa_seq: str, unp_aa_seq: str) -> Dict[int, int aligner = PairwiseAligner( substitution_matrix=BLOSUM80, open_gap_score=-10, extend_gap_score=-0.5 ) + + # In rare cases, there could be unknown letters in the sequences. This causes + # the alignment to break. Replace with "X" which the aligner can handle. + unknown_aas = set(pdb_aa_seq).union(set(unp_aa_seq)) - set(ACIDS_1TO3) + for unk_aa in unknown_aas: # usually there are none + unp_aa_seq = unp_aa_seq.replace(unk_aa, UNKNOWN_AA) + pdb_aa_seq = pdb_aa_seq.replace(unk_aa, UNKNOWN_AA) + multi_alignments = aligner.align(pdb_aa_seq, unp_aa_seq) alignment = sorted(multi_alignments, key=lambda a: a.score)[-1] LOGGER.info(f"{self}: PDB to UNP sequence alignment score={alignment.score}")