Skip to content

Commit

Permalink
add docs
Browse files Browse the repository at this point in the history
  • Loading branch information
jamesmkrieger committed Sep 12, 2024
1 parent 0441b56 commit 13850c3
Show file tree
Hide file tree
Showing 5 changed files with 39 additions and 21 deletions.
8 changes: 4 additions & 4 deletions prody/atomic/atomic.py
Original file line number Diff line number Diff line change
Expand Up @@ -251,22 +251,22 @@ def getTitle(self):
return ag._title

def getSequence(self, **kwargs):
"""Returns one-letter sequence string for amino acids.
"""Returns one-letter sequence string for amino acids, unless *longSeq* is **True**.
When *allres* keyword argument is **True**, sequence will include all
residues (e.g. water molecules) in the chain and **X** will be used for
non-standard residue names."""

threeLetter = kwargs.get('threeLetter', False)
longSeq = kwargs.get('longSeq', False)

get = AAMAP.get
if hasattr(self, 'getResnames'):
if threeLetter:
if longSeq:
seq = ' '.join(self.getResnames())
else:
seq = ''.join([get(res, 'X') for res in self.getResnames()])
else:
res = self.getResname()
if threeLetter:
if longSeq:
seq = res
else:
seq = get(res, 'X')
Expand Down
11 changes: 6 additions & 5 deletions prody/atomic/chain.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,12 @@
__all__ = ['Chain']

def getSequence(resnames, **kwargs):
"""Returns polypeptide sequence as from list of *resnames* (residue
name abbreviations)."""
"""Returns polypeptide sequence from a list of *resnames* using one-letter residue
name abbreviations by default, or long (usually three letter) abbrevations
if *longSeq* is **True**."""

threeLetter = kwargs.get('threeLetter', False)
if threeLetter:
longSeq = kwargs.get('longSeq', False)
if longSeq:
return ' '.join(resnames)

get = AAMAP.get
Expand Down Expand Up @@ -138,7 +139,7 @@ def getSequence(self, **kwargs):

if kwargs.get('allres', False):
get = AAMAP.get
if kwargs.get('threeLetter', False):
if kwargs.get('longSeq', False):
seq = ' '.join([res.getResname() for res in self])
else:
seq = ''.join([get(res.getResname(), 'X') for res in self])
Expand Down
25 changes: 18 additions & 7 deletions prody/proteins/cifheader.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,11 @@ def parseCIFHeader(pdb, *keys, **kwargs):

def getCIFHeaderDict(stream, *keys, **kwargs):
"""Returns header data in a dictionary. *stream* may be a list of PDB lines
or a stream."""
or a stream.
Polymers have sequences that usually use one-letter residue name abbreviations by default.
To obtain long (usually three letter) abbrevations, set *longSeq* to **True**.
"""

try:
lines = stream.readlines()
Expand Down Expand Up @@ -766,7 +770,10 @@ def _getReference(lines):


def _getPolymers(lines, **kwargs):
"""Returns list of polymers (macromolecules)."""
"""Returns list of polymers (macromolecules).
Sequence is usually one-letter abbreviations, but can be long
abbreviations (usually three letters) if *longSeq* is **True**"""

pdbid = _PDB_HEADER_MAP['identifier'](lines)
polymers = dict()
Expand All @@ -785,15 +792,15 @@ def _getPolymers(lines, **kwargs):
poly = polymers.get(ch, Polymer(ch))
polymers[ch] = poly

threeLetter = kwargs.get('threeLetter', False)
if threeLetter:
longSeq = kwargs.get('longSeq', False)
if longSeq:
poly.sequence += ''.join(item[
'_entity_poly.pdbx_seq_one_letter_code'].replace(';', '').split())
else:
poly.sequence += ''.join(item[
'_entity_poly.pdbx_seq_one_letter_code_can'].replace(';', '').split())

if threeLetter:
if longSeq:
for poly in polymers.values():
seq = poly.sequence
resnames = []
Expand Down Expand Up @@ -1264,13 +1271,17 @@ def _getOther(lines, key=None):
return data


def _getUnobservedSeq(lines):
def _getUnobservedSeq(lines, **kwargs):
"""Get sequence of unobserved residues.
This sequence is usually using one-letter residue name abbreviations by default.
To obtain long (usually three letter) abbrevations, set *longSeq* to **True**."""

key_unobs = '_pdbx_unobs_or_zero_occ_residues'

try:
unobs = parseSTARSection(lines, key_unobs, report=False)
polymers = _getPolymers(lines)
polymers = _getPolymers(lines, **kwargs)
except:
pass

Expand Down
14 changes: 10 additions & 4 deletions prody/proteins/header.py
Original file line number Diff line number Diff line change
Expand Up @@ -304,7 +304,10 @@ def parsePDBHeader(pdb, *keys, **kwargs):

def getHeaderDict(stream, *keys, **kwargs):
"""Returns header data in a dictionary. *stream* may be a list of PDB lines
or a stream."""
or a stream.
Polymers have sequences that usually use one-letter residue name abbreviations by default.
To obtain long (usually three letter) abbrevations, set *longSeq* to **True**."""

lines = defaultdict(list)
loc = 0
Expand Down Expand Up @@ -559,7 +562,10 @@ def _getReference(lines):


def _getPolymers(lines, **kwargs):
"""Returns list of polymers (macromolecules)."""
"""Returns list of polymers (macromolecules).
Polymers have sequences that usually use one-letter residue name abbreviations by default.
To obtain long (usually three letter) abbrevations, set *longSeq* to **True**."""

pdbid = lines['pdbid']
polymers = dict()
Expand All @@ -568,8 +574,8 @@ def _getPolymers(lines, **kwargs):
poly = polymers.get(ch, Polymer(ch))
polymers[ch] = poly

threeLetter = kwargs.get('threeLetter', False)
if threeLetter:
longSeq = kwargs.get('longSeq', False)
if longSeq:
if poly.sequence != '':
poly.sequence += ' '
poly.sequence += getSequence(line[19:].split(), **kwargs)
Expand Down
2 changes: 1 addition & 1 deletion prody/utilities/catchall.py
Original file line number Diff line number Diff line change
Expand Up @@ -314,7 +314,7 @@ def calcTree(names, distance_matrix, method='upgma', linkage=False):

method = method.lower().strip()

if method in ['ward', 'single', 'average', 'weighted', 'centroid', 'median']:
if method in ['ward', 'single', 'average', 'weighted', 'centroid', 'median', 'complete']:
from scipy.cluster.hierarchy import linkage as hlinkage
from scipy.spatial.distance import squareform

Expand Down

0 comments on commit 13850c3

Please sign in to comment.