Skip to content

Commit

Permalink
prec: handle rare sequence alignment issue
Browse files Browse the repository at this point in the history
  • Loading branch information
avivrosenberg committed Mar 3, 2024
1 parent 8d7dabe commit b26845b
Showing 1 changed file with 8 additions and 0 deletions.
8 changes: 8 additions & 0 deletions src/pp5/prec.py
Original file line number Diff line number Diff line change
Expand Up @@ -1217,6 +1217,14 @@ def _find_unp_alignment(self, pdb_aa_seq: str, unp_aa_seq: str) -> Dict[int, int
aligner = PairwiseAligner(
substitution_matrix=BLOSUM80, open_gap_score=-10, extend_gap_score=-0.5
)

# In rare cases, there could be unknown letters in the sequences. This causes
# the alignment to break. Replace with "X" which the aligner can handle.
unknown_aas = set(pdb_aa_seq).union(set(unp_aa_seq)) - set(ACIDS_1TO3)
for unk_aa in unknown_aas: # usually there are none
unp_aa_seq = unp_aa_seq.replace(unk_aa, UNKNOWN_AA)
pdb_aa_seq = pdb_aa_seq.replace(unk_aa, UNKNOWN_AA)

multi_alignments = aligner.align(pdb_aa_seq, unp_aa_seq)
alignment = sorted(multi_alignments, key=lambda a: a.score)[-1]
LOGGER.info(f"{self}: PDB to UNP sequence alignment score={alignment.score}")
Expand Down

0 comments on commit b26845b

Please sign in to comment.