From 13850c3fc58f13589cf3d6e5cc1f30b9b6e07d86 Mon Sep 17 00:00:00 2001 From: James Krieger Date: Thu, 12 Sep 2024 14:47:38 +0200 Subject: [PATCH] add docs --- prody/atomic/atomic.py | 8 ++++---- prody/atomic/chain.py | 11 ++++++----- prody/proteins/cifheader.py | 25 ++++++++++++++++++------- prody/proteins/header.py | 14 ++++++++++---- prody/utilities/catchall.py | 2 +- 5 files changed, 39 insertions(+), 21 deletions(-) diff --git a/prody/atomic/atomic.py b/prody/atomic/atomic.py index bc5153253..1541c1993 100644 --- a/prody/atomic/atomic.py +++ b/prody/atomic/atomic.py @@ -251,22 +251,22 @@ def getTitle(self): return ag._title def getSequence(self, **kwargs): - """Returns one-letter sequence string for amino acids. + """Returns one-letter sequence string for amino acids, unless *longSeq* is **True**. When *allres* keyword argument is **True**, sequence will include all residues (e.g. water molecules) in the chain and **X** will be used for non-standard residue names.""" - threeLetter = kwargs.get('threeLetter', False) + longSeq = kwargs.get('longSeq', False) get = AAMAP.get if hasattr(self, 'getResnames'): - if threeLetter: + if longSeq: seq = ' '.join(self.getResnames()) else: seq = ''.join([get(res, 'X') for res in self.getResnames()]) else: res = self.getResname() - if threeLetter: + if longSeq: seq = res else: seq = get(res, 'X') diff --git a/prody/atomic/chain.py b/prody/atomic/chain.py index 0b0b17094..db74231c9 100644 --- a/prody/atomic/chain.py +++ b/prody/atomic/chain.py @@ -9,11 +9,12 @@ __all__ = ['Chain'] def getSequence(resnames, **kwargs): - """Returns polypeptide sequence as from list of *resnames* (residue - name abbreviations).""" + """Returns polypeptide sequence from a list of *resnames* using one-letter residue + name abbreviations by default, or long (usually three letter) abbrevations + if *longSeq* is **True**.""" - threeLetter = kwargs.get('threeLetter', False) - if threeLetter: + longSeq = kwargs.get('longSeq', False) + if longSeq: return ' '.join(resnames) get = AAMAP.get @@ -138,7 +139,7 @@ def getSequence(self, **kwargs): if kwargs.get('allres', False): get = AAMAP.get - if kwargs.get('threeLetter', False): + if kwargs.get('longSeq', False): seq = ' '.join([res.getResname() for res in self]) else: seq = ''.join([get(res.getResname(), 'X') for res in self]) diff --git a/prody/proteins/cifheader.py b/prody/proteins/cifheader.py index 34823b098..71c634486 100644 --- a/prody/proteins/cifheader.py +++ b/prody/proteins/cifheader.py @@ -127,7 +127,11 @@ def parseCIFHeader(pdb, *keys, **kwargs): def getCIFHeaderDict(stream, *keys, **kwargs): """Returns header data in a dictionary. *stream* may be a list of PDB lines - or a stream.""" + or a stream. + + Polymers have sequences that usually use one-letter residue name abbreviations by default. + To obtain long (usually three letter) abbrevations, set *longSeq* to **True**. + """ try: lines = stream.readlines() @@ -766,7 +770,10 @@ def _getReference(lines): def _getPolymers(lines, **kwargs): - """Returns list of polymers (macromolecules).""" + """Returns list of polymers (macromolecules). + + Sequence is usually one-letter abbreviations, but can be long + abbreviations (usually three letters) if *longSeq* is **True**""" pdbid = _PDB_HEADER_MAP['identifier'](lines) polymers = dict() @@ -785,15 +792,15 @@ def _getPolymers(lines, **kwargs): poly = polymers.get(ch, Polymer(ch)) polymers[ch] = poly - threeLetter = kwargs.get('threeLetter', False) - if threeLetter: + longSeq = kwargs.get('longSeq', False) + if longSeq: poly.sequence += ''.join(item[ '_entity_poly.pdbx_seq_one_letter_code'].replace(';', '').split()) else: poly.sequence += ''.join(item[ '_entity_poly.pdbx_seq_one_letter_code_can'].replace(';', '').split()) - if threeLetter: + if longSeq: for poly in polymers.values(): seq = poly.sequence resnames = [] @@ -1264,13 +1271,17 @@ def _getOther(lines, key=None): return data -def _getUnobservedSeq(lines): +def _getUnobservedSeq(lines, **kwargs): + """Get sequence of unobserved residues. + + This sequence is usually using one-letter residue name abbreviations by default. + To obtain long (usually three letter) abbrevations, set *longSeq* to **True**.""" key_unobs = '_pdbx_unobs_or_zero_occ_residues' try: unobs = parseSTARSection(lines, key_unobs, report=False) - polymers = _getPolymers(lines) + polymers = _getPolymers(lines, **kwargs) except: pass diff --git a/prody/proteins/header.py b/prody/proteins/header.py index b22e6483b..2de4e7123 100644 --- a/prody/proteins/header.py +++ b/prody/proteins/header.py @@ -304,7 +304,10 @@ def parsePDBHeader(pdb, *keys, **kwargs): def getHeaderDict(stream, *keys, **kwargs): """Returns header data in a dictionary. *stream* may be a list of PDB lines - or a stream.""" + or a stream. + + Polymers have sequences that usually use one-letter residue name abbreviations by default. + To obtain long (usually three letter) abbrevations, set *longSeq* to **True**.""" lines = defaultdict(list) loc = 0 @@ -559,7 +562,10 @@ def _getReference(lines): def _getPolymers(lines, **kwargs): - """Returns list of polymers (macromolecules).""" + """Returns list of polymers (macromolecules). + + Polymers have sequences that usually use one-letter residue name abbreviations by default. + To obtain long (usually three letter) abbrevations, set *longSeq* to **True**.""" pdbid = lines['pdbid'] polymers = dict() @@ -568,8 +574,8 @@ def _getPolymers(lines, **kwargs): poly = polymers.get(ch, Polymer(ch)) polymers[ch] = poly - threeLetter = kwargs.get('threeLetter', False) - if threeLetter: + longSeq = kwargs.get('longSeq', False) + if longSeq: if poly.sequence != '': poly.sequence += ' ' poly.sequence += getSequence(line[19:].split(), **kwargs) diff --git a/prody/utilities/catchall.py b/prody/utilities/catchall.py index 33d166dcc..d88aeee70 100644 --- a/prody/utilities/catchall.py +++ b/prody/utilities/catchall.py @@ -314,7 +314,7 @@ def calcTree(names, distance_matrix, method='upgma', linkage=False): method = method.lower().strip() - if method in ['ward', 'single', 'average', 'weighted', 'centroid', 'median']: + if method in ['ward', 'single', 'average', 'weighted', 'centroid', 'median', 'complete']: from scipy.cluster.hierarchy import linkage as hlinkage from scipy.spatial.distance import squareform