Skip to content

Commit

Permalink
Merge branch 'collection-errors'
Browse files Browse the repository at this point in the history
  • Loading branch information
avivrosenberg committed Apr 8, 2024
2 parents 4becb18 + b26845b commit 1d42e6e
Show file tree
Hide file tree
Showing 3 changed files with 21 additions and 2 deletions.
7 changes: 6 additions & 1 deletion src/pp5/backbone.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,12 @@ def atom_location_sigma(atom: Atom) -> float:
:param atom: The atom to calculate the sigma for.
:return: The sigma in Angstroms.
"""
return math.sqrt(atom.get_bfactor() / CONST_8PI2)
bfactor = atom.get_bfactor()
if bfactor < 0:
# In very rare cases, the B-factor of some atom in a PDB file is negative,
# which doesn't make sense (e.g. 1D9U:B).
return float("nan")
return math.sqrt(bfactor / CONST_8PI2)


def residue_backbone_atoms(res: Residue) -> Sequence[Atom]:
Expand Down
8 changes: 7 additions & 1 deletion src/pp5/contacts.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
import pandas as pd
from Bio.PDB import NeighborSearch
from Bio.PDB.Atom import Atom
from Bio.PDB.Residue import Residue
from Bio.PDB.Residue import Residue, DisorderedResidue

import pp5
from pp5.codons import ACIDS_3TO1, UNKNOWN_AA
Expand Down Expand Up @@ -184,6 +184,12 @@ def __init__(

def assign(self, res: Residue) -> Dict[str, Optional[ResidueContacts]]:

# In rare cases, a residue may be disordered and contain other residues.
# This means there's a point mutation and both original and mutated residues
# are present in the crystal. We ignore this and just use the selected residue.
if isinstance(res, DisorderedResidue):
res = res.disordered_get()

# Get all atoms from within the residue, including side chain atoms
all_atoms = tuple(res.get_atoms())

Expand Down
8 changes: 8 additions & 0 deletions src/pp5/prec.py
Original file line number Diff line number Diff line change
Expand Up @@ -1217,6 +1217,14 @@ def _find_unp_alignment(self, pdb_aa_seq: str, unp_aa_seq: str) -> Dict[int, int
aligner = PairwiseAligner(
substitution_matrix=BLOSUM80, open_gap_score=-10, extend_gap_score=-0.5
)

# In rare cases, there could be unknown letters in the sequences. This causes
# the alignment to break. Replace with "X" which the aligner can handle.
unknown_aas = set(pdb_aa_seq).union(set(unp_aa_seq)) - set(ACIDS_1TO3)
for unk_aa in unknown_aas: # usually there are none
unp_aa_seq = unp_aa_seq.replace(unk_aa, UNKNOWN_AA)
pdb_aa_seq = pdb_aa_seq.replace(unk_aa, UNKNOWN_AA)

multi_alignments = aligner.align(pdb_aa_seq, unp_aa_seq)
alignment = sorted(multi_alignments, key=lambda a: a.score)[-1]
LOGGER.info(f"{self}: PDB to UNP sequence alignment score={alignment.score}")
Expand Down

0 comments on commit 1d42e6e

Please sign in to comment.