Merge branch 'main' of github.com:prody/ProDy into devel-9

jamesmkrieger · Aug 14, 2024 · 18d2b2a · 18d2b2a
2 parents 94318ca + 34fa7be
commit 18d2b2a
Show file tree

Hide file tree

Showing 23 changed files with 53,335 additions and 221 deletions.
diff --git a/docs/about/people.rst b/docs/about/people.rst
@@ -29,8 +29,15 @@ including *SignDy* and Adaptive ANM.
 the *cryo-EM* module, :mod:`.protein.emdmap`.
 
 `Burak Kaynak`_ contributed significantly to the development of 
-:mod:`.domain_decomposition` and :mod:`.dynamics.essa`, 
-and is in the process of adding other modules too.
+:mod:`.domain_decomposition`, :mod:`.dynamics.essa`, and
+:mod:`.dynamics.clustenm`. 
+
+`Karolina Mikulska-Ruminska`_ contributed significantly to the development of 
+:mod:`.protein.interactions` (*InSty*), :mod:`.protein.waterbridges`
+(*WatFinder*), and :mod:`.dynamics.mechstiff` (*MechStiff*).
+
+`Anthony Bogetti`_ is overseeing the overall development of *ProDy* since 
+2024.
 
 `Anindita Dutta`_ contributed to the development of *Evol*,
 :mod:`.database` and :mod:`.sequence` modules.
@@ -52,26 +59,32 @@ contributions and feedback from the following individuals:
 
 `Ying Liu`_ provided the code for Perturbation Response Scanning method.
 
+`Frane Doljanin`_ provided the code for the water bridge detection.
+
 `Kian Ho`_ contributed with bug fixes and unit tests for DSSP functions.
 
 `Gökçen Eraslan`_ contributed with bug fixes and development and maintenance
 insights.
 
 
+
 .. _Ahmet Bakan: https://scholar.google.com/citations?user=-QAYVgMAAAAJ&hl=en
 .. _Cihan Kaya: https://www.linkedin.com/in/cihan-kaya/
-.. _Bahar Lab: http://www.ccbb.pitt.edu/faculty/bahar/
+.. _Bahar Lab: http://www.bahargroup.org/Faculty/bahar/
 .. _University of Pittsburgh: http://www.pitt.edu/
 .. _Anindita Dutta: http://www.linkedin.com/pub/anindita-dutta/5a/568/a90
 .. _Wenzhi Mao: http://www.linkedin.com/pub/wenzhi-mao/2a/29a/29
 .. _Lidio Meireles: http://www.linkedin.com/in/lidio
 .. _Ying Liu: http://www.linkedin.com/pub/ying-liu/15/48b/5a9
 .. _Kian Ho: https://github.com/kianho
 .. _Gökçen Eraslan: http://blog.yeredusuncedernegi.com/
-.. _Tim Lezon: http://www.csb.pitt.edu/Faculty/Lezon/
+.. _Tim Lezon: https://scholar.google.pl/citations?user=1MwNI3EAAAAJ&hl=pl&oi=ao
 .. _Chakra Chennubhotla: http://www.csb.pitt.edu/Faculty/Chakra/
 .. _She (John) Zhang: https://www.linkedin.com/in/she-zhang-49164399/
 .. _Hongchun Li: http://www.pitt.edu/~hongchun/
-.. _James Krieger: http://www.csb.pitt.edu/Faculty/bahar/lab.html
-.. _Yan Zhang: https://www.csb.pitt.edu/Faculty/bahar/lab.html
-.. _Burak Kaynak: https://www.csb.pitt.edu/Faculty/bahar/lab.html
+.. _James Krieger: https://scholar.google.pl/citations?user=DoiCjkUAAAAJ&hl=pl
+.. _Yan Zhang: https://scholar.google.pl/citations?user=VxwU0pgAAAAJ&hl=pl&oi=sra
+.. _Burak Kaynak: https://scholar.google.pl/citations?user=gP8RokwAAAAJ&hl=pl&oi=ao
+.. _Karolina Mikulska-Ruminska: https://scholar.google.pl/citations?user=IpyPHRwAAAAJ&hl=pl
+.. _Anthony Bogetti: https://scholar.google.pl/citations?hl=pl&user=9qQClIcAAAAJ
+.. _Frane Doljanin: https://github.com/fdoljanin
diff --git a/docs/release/index.rst b/docs/release/index.rst
@@ -10,6 +10,10 @@ Release Notes
    :maxdepth: 2
    :glob:
 
+   v2.4_series
+   v2.3_series
+   v2.2_series
+   v2.1_series
    v2.0_series
    v1.11_series
    v1.10_series

diff --git a/prody/apps/prody_apps/prody_catdcd.py b/prody/apps/prody_apps/prody_catdcd.py
@@ -80,8 +80,9 @@ def prody_catdcd(*dcd, **kwargs):
     out = prody.DCDFile(output, 'w')
     count = 0
     stride = kwargs.get('stride', 1)
-    goto = stride != 1
-    slc = slice(kwargs.get('first', 0), kwargs.get('last', -1),
+    first = kwargs.get('first', 0)
+    goto = stride != 1 or first != 0
+    slc = slice(first, kwargs.get('last', -1),
                 stride).indices(len(traj)+1)
     for i in range(*slc):
         if goto:

diff --git a/prody/database/bioexcel.py b/prody/database/bioexcel.py
@@ -58,14 +58,7 @@ def fetchBioexcelPDB(acc, **kwargs):
     if selection is not None:
         url += '?selection=' + selection.replace(" ","%20")
 
-    response = requestFromUrl(url, timeout)
-
-    if PY3K:
-        response = response.decode()
-
-    fo = open(filepath, 'w')
-    fo.write(response)
-    fo.close()
+    filepath = requestFromUrl(url, timeout, filepath, source='pdb')
 
     return filepath
 
@@ -118,11 +111,7 @@ def fetchBioexcelTrajectory(acc, **kwargs):
     if selection is not None:
         url += '&selection=' + selection.replace(" ","%20")
 
-    response = requestFromUrl(url, timeout)
-
-    fo = open(filepath, 'wb')
-    fo.write(response)
-    fo.close()
+    filepath = requestFromUrl(url, timeout, filepath, source='xtc')
 
     if convert:
         filepath = convertXtcToDcd(filepath, **kwargs)
@@ -150,20 +139,19 @@ def fetchBioexcelTopology(acc, **kwargs):
 
     See https://bioexcel-cv19.bsc.es/api/rest/docs for more info
     """
-    acc, convert, _, filepath, timeout, _ = checkInputs(acc, **kwargs)
-    if not filepath.endswith('.json'):
-        filepath += '.json'
-
-    url = prefix + acc + "/topology"
-
-    response = requestFromUrl(url, timeout)
+    if isfile(acc):
+        filepath = acc
+    else:
+        acc, convert, _, filepath, timeout, _ = checkInputs(acc, **kwargs)
+        if not filepath.endswith('.json') and not filepath.endswith('.psf'):
+            filepath += '.json'
 
-    if PY3K:
-        response = response.decode()
+    if filepath.endswith('.psf'):
+        convert = False
 
-    fo = open(filepath, 'w')
-    fo.write(response)
-    fo.close()
+    if not isfile(filepath):
+        url = prefix + acc + "/topology"
+        filepath = requestFromUrl(url, timeout, filepath, source='json')
 
     if convert:
         ag = parseBioexcelTopology(filepath, **kwargs)
@@ -204,21 +192,17 @@ def parseBioexcelTopology(query, **kwargs):
         ag._n_csets = 1
         ag._acsi = 0
 
-        nodes = ag.select('name N')
+        indices = np.ix_(*[np.array(data['atom_residue_indices'])])
 
-        residue_chids = [data['chain_names'][chain_index] for chain_index in data['residue_chain_indices']]
-        chids, _ = extendAtomicData(residue_chids, nodes, ag)
-        ag.setChids(chids)
+        chids = np.array([data['chain_names'][chain_index]
+                          for chain_index in data['residue_chain_indices']])
+        ag.setChids(chids[indices])
 
-        resnames, _ = extendAtomicData(data['residue_names'], nodes, ag)
-        ag.setResnames(resnames)
-
-        resnums, _ = extendAtomicData(data['residue_numbers'], nodes, ag)
-        ag.setResnums(resnums)
+        ag.setResnames(np.array(data['residue_names'])[indices])
+        ag.setResnums(np.array(data['residue_numbers'])[indices])
 
         if data['residue_icodes'] is not None:
-            icodes, _ = extendAtomicData(data['residue_icodes'], nodes, ag)
-            ag.setIcodes(icodes)
+            ag.setIcodes(np.array(data['residue_icodes'])[indices])
 
         # restore acsi and n_csets to defaults
         ag._acsi = None
@@ -240,6 +224,9 @@ def parseBioexcelTopology(query, **kwargs):
 def parseBioexcelTrajectory(query, **kwargs):
     """Parse a BioExcel-CV19 topology json into an :class:`.Ensemble`,
     fetching it if needed using **kwargs
+
+    :arg top: topology filename
+    :type top: str
     """
     kwargs['convert'] = True
     if isfile(query) and query.endswith('.dcd'):
@@ -260,18 +247,37 @@ def parseBioexcelPDB(query, **kwargs):
     fetching it if needed using **kwargs
     """
     kwargs['convert'] = True
-    if not isfile(query):
-        filename = fetchBioexcelPDB(query, **kwargs)
-    else:
+    if isfile(query):
         filename = query
+    elif isfile(query + '.pdb'):
+        filename = query + '.pdb'
+    else:
+        filename = fetchBioexcelPDB(query, **kwargs)
+
+    ag = parsePDB(filename)
+    if ag is None:
+        filename = fetchBioexcelPDB(query, **kwargs)
+        ag = parsePDB(filename)
 
-    return parsePDB(filename)
+    acc = basename(splitext(filename)[0])
+    ag2 = parseBioexcelTopology(acc, **kwargs)
+
+    ag.setElements(ag2.getElements())
+    return ag
 
 def convertXtcToDcd(filepath, **kwargs):
     """Convert xtc trajectories to dcd files using mdtraj.
     Returns path to output dcd file.
+
+    :arg top: topology filename
+    :type top: str    
     """
-    acc = basename(splitext(filepath)[0])
+    topFile = kwargs.get('top', None)
+    if topFile is not None:
+        acc = topFile
+    else:
+        acc = basename(splitext(filepath)[0])
+
     try:
         import mdtraj
     except ImportError:
@@ -284,25 +290,60 @@ def convertXtcToDcd(filepath, **kwargs):
 
     return filepath
 
-def requestFromUrl(url, timeout):
+def requestFromUrl(url, timeout, filepath, source=None):
     """Helper function to make a request from a url and return the response"""
     import requests
+    import json
+    import mdtraj
+    import tempfile
+
+    acc = url.split(prefix)[1].split('/')[0]
 
     LOGGER.timeit('_bioexcel')
     response = None
     sleep = 2
     while LOGGER.timing('_bioexcel') < timeout:
         try:
             response = requests.get(url).content
+
+            if source == 'json':
+                json.loads(response)
+
+                if PY3K:
+                    response = response.decode()
+
+                fo = open(filepath, 'w')
+                fo.write(response)
+                fo.close()
+
+            elif source == 'xtc':
+                fo = open(filepath, 'wb')
+                fo.write(response)
+                fo.close()
+
+                top = mdtraj.load_psf(fetchBioexcelTopology(acc, timeout=timeout))
+                mdtraj.load_xtc(filepath, top=top)
+
+            elif source == 'pdb':
+                if PY3K:
+                    response = response.decode()
+
+                fo = open(filepath, 'w')
+                fo.write(response)
+                fo.close()
+
+                ag = parsePDB(filepath)
+                numAtoms = ag.numAtoms()
+
         except Exception:
             pass
         else:
             break
 
-        sleep = 20 if int(sleep * 1.5) >= 20 else int(sleep * 1.5)
+        sleep = 100 if int(sleep * 1.5) >= 100 else int(sleep * 1.5)
         LOGGER.sleep(int(sleep), '. Trying to reconnect...')
 
-    return response
+    return filepath
 
 def checkSelection(**kwargs):
     """Helper function to check selection"""
@@ -333,7 +374,7 @@ def checkConvert(**kwargs):
     return convert
 
 def checkTimeout(**kwargs):
-    timeout = kwargs.get('timeout', 60)
+    timeout = kwargs.get('timeout', 200)
     if not isinstance(timeout, (Number, type(None))):
         raise TypeError('timeout should be number')
     return timeout

diff --git a/prody/database/dali.py b/prody/database/dali.py
@@ -154,16 +154,16 @@ def __init__(self, url, pdbId, chain, subset='fullPDB', localFile=False, **kwarg
         """
 
         self._url = url
-        self._pdbId = pdbId
-        self._chain = chain
+        self._pdbId = pdbId.lower()
+        self._chain = chain.upper()
         subset = subset.upper()
         if subset == "FULLPDB" or subset not in ["PDB25", "PDB50", "PDB90"]:
             self._subset = ""
         else:
             self._subset = "-"+subset[3:]
         timeout = kwargs.pop('timeout', 120)
 
-        self._title = pdbId + '-' + chain
+        self._title = self._pdbId + '-' + self._chain
         self._alignPDB = None
         self._filterDict = None
         self._max_index = None

diff --git a/prody/dynamics/__init__.py b/prody/dynamics/__init__.py
@@ -363,3 +363,7 @@
 from . import lda
 from .lda import *
 __all__.extend(lda.__all__)
+
+from . import logistic
+from .logistic import *
+__all__.extend(logistic.__all__)
diff --git a/prody/dynamics/clustenm.py b/prody/dynamics/clustenm.py
@@ -111,6 +111,8 @@ def __init__(self, title=None):
         self._targeted = False
         self._tmdk = 10.
 
+        self._cc = None
+
         super(ClustENM, self).__init__('Unknown')   # dummy title; will be replaced in the next line
         self._title = title