add docs

jamesmkrieger · Sep 12, 2024 · 13850c3 · 13850c3
1 parent 0441b56
commit 13850c3
Show file tree

Hide file tree

Showing 5 changed files with 39 additions and 21 deletions.
diff --git a/prody/atomic/atomic.py b/prody/atomic/atomic.py
@@ -251,22 +251,22 @@ def getTitle(self):
         return ag._title
 
     def getSequence(self, **kwargs):
-        """Returns one-letter sequence string for amino acids.
+        """Returns one-letter sequence string for amino acids, unless *longSeq* is **True**.
         When *allres* keyword argument is **True**, sequence will include all
         residues (e.g. water molecules) in the chain and **X** will be used for
         non-standard residue names."""
 
-        threeLetter = kwargs.get('threeLetter', False)
+        longSeq = kwargs.get('longSeq', False)
 
         get = AAMAP.get
         if hasattr(self, 'getResnames'):
-            if threeLetter:
+            if longSeq:
                 seq = ' '.join(self.getResnames())
             else:
                 seq = ''.join([get(res, 'X') for res in self.getResnames()])
         else:
             res = self.getResname()
-            if threeLetter:
+            if longSeq:
                 seq = res
             else:
                 seq = get(res, 'X')

diff --git a/prody/atomic/chain.py b/prody/atomic/chain.py
@@ -9,11 +9,12 @@
 __all__ = ['Chain']
 
 def getSequence(resnames, **kwargs):
-    """Returns polypeptide sequence as from list of *resnames* (residue
-    name abbreviations)."""
+    """Returns polypeptide sequence from a list of *resnames* using one-letter residue
+    name abbreviations by default, or long (usually three letter) abbrevations
+    if *longSeq* is **True**."""
 
-    threeLetter = kwargs.get('threeLetter', False)
-    if threeLetter:
+    longSeq = kwargs.get('longSeq', False)
+    if longSeq:
         return ' '.join(resnames)
 
     get = AAMAP.get
@@ -138,7 +139,7 @@ def getSequence(self, **kwargs):
 
         if kwargs.get('allres', False):
             get = AAMAP.get
-            if kwargs.get('threeLetter', False):
+            if kwargs.get('longSeq', False):
                 seq = ' '.join([res.getResname() for res in self])
             else:
                 seq = ''.join([get(res.getResname(), 'X') for res in self])

diff --git a/prody/proteins/cifheader.py b/prody/proteins/cifheader.py
@@ -127,7 +127,11 @@ def parseCIFHeader(pdb, *keys, **kwargs):
 
 def getCIFHeaderDict(stream, *keys, **kwargs):
     """Returns header data in a dictionary.  *stream* may be a list of PDB lines
-    or a stream."""
+    or a stream.
+    
+    Polymers have sequences that usually use one-letter residue name abbreviations by default. 
+    To obtain long (usually three letter) abbrevations, set *longSeq* to **True**.
+    """
 
     try:
         lines = stream.readlines()
@@ -766,7 +770,10 @@ def _getReference(lines):
 
 
 def _getPolymers(lines, **kwargs):
-    """Returns list of polymers (macromolecules)."""
+    """Returns list of polymers (macromolecules).
+    
+    Sequence is usually one-letter abbreviations, but can be long 
+    abbreviations (usually three letters) if *longSeq* is **True**"""
 
     pdbid = _PDB_HEADER_MAP['identifier'](lines)
     polymers = dict()
@@ -785,15 +792,15 @@ def _getPolymers(lines, **kwargs):
             poly = polymers.get(ch, Polymer(ch))
             polymers[ch] = poly
 
-            threeLetter = kwargs.get('threeLetter', False)
-            if threeLetter:
+            longSeq = kwargs.get('longSeq', False)
+            if longSeq:
                 poly.sequence += ''.join(item[
                     '_entity_poly.pdbx_seq_one_letter_code'].replace(';', '').split())
             else:
                 poly.sequence += ''.join(item[
                     '_entity_poly.pdbx_seq_one_letter_code_can'].replace(';', '').split())
 
-    if threeLetter:
+    if longSeq:
         for poly in polymers.values():
             seq = poly.sequence
             resnames = []
@@ -1264,13 +1271,17 @@ def _getOther(lines, key=None):
     return data
 
 
-def _getUnobservedSeq(lines):
+def _getUnobservedSeq(lines, **kwargs):
+    """Get sequence of unobserved residues.
+    
+    This sequence is usually using one-letter residue name abbreviations by default. 
+    To obtain long (usually three letter) abbrevations, set *longSeq* to **True**."""
 
     key_unobs = '_pdbx_unobs_or_zero_occ_residues'
 
     try:
         unobs = parseSTARSection(lines, key_unobs, report=False)
-        polymers = _getPolymers(lines)
+        polymers = _getPolymers(lines, **kwargs)
     except:
         pass
 

diff --git a/prody/proteins/header.py b/prody/proteins/header.py
@@ -304,7 +304,10 @@ def parsePDBHeader(pdb, *keys, **kwargs):
 
 def getHeaderDict(stream, *keys, **kwargs):
     """Returns header data in a dictionary.  *stream* may be a list of PDB lines
-    or a stream."""
+    or a stream.
+    
+    Polymers have sequences that usually use one-letter residue name abbreviations by default. 
+    To obtain long (usually three letter) abbrevations, set *longSeq* to **True**."""
 
     lines = defaultdict(list)
     loc = 0
@@ -559,7 +562,10 @@ def _getReference(lines):
 
 
 def _getPolymers(lines, **kwargs):
-    """Returns list of polymers (macromolecules)."""
+    """Returns list of polymers (macromolecules).
+    
+    Polymers have sequences that usually use one-letter residue name abbreviations by default. 
+    To obtain long (usually three letter) abbrevations, set *longSeq* to **True**."""
 
     pdbid = lines['pdbid']
     polymers = dict()
@@ -568,8 +574,8 @@ def _getPolymers(lines, **kwargs):
         poly = polymers.get(ch, Polymer(ch))
         polymers[ch] = poly
 
-        threeLetter = kwargs.get('threeLetter', False)
-        if threeLetter:
+        longSeq = kwargs.get('longSeq', False)
+        if longSeq:
             if poly.sequence != '':
                 poly.sequence += ' '
             poly.sequence += getSequence(line[19:].split(), **kwargs)

diff --git a/prody/utilities/catchall.py b/prody/utilities/catchall.py
@@ -314,7 +314,7 @@ def calcTree(names, distance_matrix, method='upgma', linkage=False):
 
     method = method.lower().strip()
 
-    if method in ['ward', 'single', 'average', 'weighted', 'centroid', 'median']:
+    if method in ['ward', 'single', 'average', 'weighted', 'centroid', 'median', 'complete']:
         from scipy.cluster.hierarchy import linkage as hlinkage
         from scipy.spatial.distance import squareform