From aa2a88fd463512e42e43e338b807ed662d9de19c Mon Sep 17 00:00:00 2001 From: Egor Marin Date: Wed, 22 May 2024 20:04:49 +0200 Subject: [PATCH 01/30] Start working on MMCIF parser --- package/MDAnalysis/topology/MMCIFParser.py | 150 +++++++++++++++++++++ package/MDAnalysis/topology/__init__.py | 1 + 2 files changed, 151 insertions(+) create mode 100644 package/MDAnalysis/topology/MMCIFParser.py diff --git a/package/MDAnalysis/topology/MMCIFParser.py b/package/MDAnalysis/topology/MMCIFParser.py new file mode 100644 index 00000000000..5c155ec3c79 --- /dev/null +++ b/package/MDAnalysis/topology/MMCIFParser.py @@ -0,0 +1,150 @@ +# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*- +# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 +# +# MDAnalysis --- https://www.mdanalysis.org +# Copyright (c) 2006-2017 The MDAnalysis Development Team and contributors +# (see the file AUTHORS for the full list of names) +# +# Released under the GNU Public Licence, v2 or any higher version +# +# Please cite your use of MDAnalysis in published work: +# +# R. J. Gowers, M. Linke, J. Barnoud, T. J. E. Reddy, M. N. Melo, S. L. Seyler, +# D. L. Dotson, J. Domanski, S. Buchoux, I. M. Kenney, and O. Beckstein. +# MDAnalysis: A Python package for the rapid analysis of molecular dynamics +# simulations. In S. Benthall and S. Rostrup editors, Proceedings of the 15th +# Python in Science Conference, pages 102-109, Austin, TX, 2016. SciPy. +# doi: 10.25080/majora-629e541a-00e +# +# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein. +# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations. +# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787 +# TODO + +""" +MMCIF Topology Parser # TODO +=================== + +.. versionadded:: 0.9.1 + +Reads an xyz file and pulls the atom information from it. Because +xyz only has atom name information, all information about residues +and segments won't be populated. + +Classes +------- + +.. autoclass:: XYZParser + :members: + +""" + +from .base import TopologyReaderBase +from ..core.topology import Topology +from ..core.topologyattrs import ( + Atomnames, + Atomids, + AltLocs, + ChainIDs, + Atomtypes, + Elements, + ICodes, + Masses, + Occupancies, + RecordTypes, + Resids, + Resnames, + Segids, + Tempfactors, + FormalCharges, +) + + +class MMCIFParser(TopologyReaderBase): + """Parse a list of atoms from an XYZ file. + + Creates the following attributes: + - Atomnames + - TODO + + + .. versionadded:: 2.8.0 + """ + + format = "MMCIF" + + def parse(self, **kwargs): + """Read the file and return the structure. + + Returns + ------- + MDAnalysis Topology object + """ + import gemmi + + structure = gemmi.read_structure(self.filename) + + ( + names, + atomtypes, + record_types, + serials, + altlocs, + chainids, + icodes, + tempfactors, + occupancies, + resids, + resnames, + segids, + elements, + formalcharges, + weights, + ) = list(zip(*[ + ( + at.name, + at.element.name, + res.het_flag, + at.serial, + at.altloc, + chain.name, + res.seqid.icode, + at.b_iso, + at.occ, + res.label_seq, + res.name, + res.segment, + at.element.name, + at.charge, + at.element.weight, + ) + for model in structure + for chain in model + for res in chain + for at in res + ])) + + print(resids) + + attrs = [ + Atomnames(names), + Atomtypes(atomtypes), + RecordTypes(record_types), + Atomids(serials), + AltLocs(altlocs), + ChainIDs(chainids), + ICodes(icodes), + Tempfactors(tempfactors), + Occupancies(occupancies), + Resids(resids), + Resnames(resnames), + Segids(segids), + Elements(elements), + FormalCharges(formalcharges), + Masses(weights), + ] + + n_atoms = n_residues = n_segments = 1 + top = Topology(n_atoms, n_residues, n_segments, attrs=attrs) + + return top diff --git a/package/MDAnalysis/topology/__init__.py b/package/MDAnalysis/topology/__init__.py index 32df510f47e..058d142737e 100644 --- a/package/MDAnalysis/topology/__init__.py +++ b/package/MDAnalysis/topology/__init__.py @@ -333,3 +333,4 @@ from . import MinimalParser from . import ITPParser from . import FHIAIMSParser +from . import MMCIFParser \ No newline at end of file From 218cf436fcd5bf7ccccd68059aab98fe8fe8a11f Mon Sep 17 00:00:00 2001 From: Egor Marin Date: Wed, 22 May 2024 22:24:38 +0200 Subject: [PATCH 02/30] Add first (not working) version of MMCIFReader and MMCIF topology parser --- package/MDAnalysis/coordinates/MMCIF.py | 37 ++++++++++++ package/MDAnalysis/coordinates/__init__.py | 1 + package/MDAnalysis/topology/MMCIFParser.py | 68 ++++++++++++---------- 3 files changed, 75 insertions(+), 31 deletions(-) create mode 100644 package/MDAnalysis/coordinates/MMCIF.py diff --git a/package/MDAnalysis/coordinates/MMCIF.py b/package/MDAnalysis/coordinates/MMCIF.py new file mode 100644 index 00000000000..c896f98c62a --- /dev/null +++ b/package/MDAnalysis/coordinates/MMCIF.py @@ -0,0 +1,37 @@ +import numpy as np +import gemmi +import logging +from . import base + +logger = logging.getLogger("MDAnalysis.coordinates.MMCIF") + + +class MMCIFReader(base.SingleFrameReaderBase): + """Reads from an MMCIF file""" + + format = "MMCIF" + units = {"time": None, "length": "Angstrom"} + + def _read_first_frame(self): + structure = gemmi.read_structure(self.filename) + coords = np.array( + [ + [*at.pos.tolist()] + for model in structure + for chain in model + for res in chain + for at in res + ] + ) + self.n_atoms = len(coords) + self.ts = self._Timestep.from_coordinates(coords, **self._ts_kwargs) + self.ts.frame = 0 + + def Writer(self, filename, n_atoms=None, **kwargs): + raise NotImplementedError + + def close(self): + pass + + +class MMCIFWriter(base.WriterBase): ... diff --git a/package/MDAnalysis/coordinates/__init__.py b/package/MDAnalysis/coordinates/__init__.py index 9b6a7121bc9..b8b4ac41f7f 100644 --- a/package/MDAnalysis/coordinates/__init__.py +++ b/package/MDAnalysis/coordinates/__init__.py @@ -791,3 +791,4 @@ class can choose an appropriate reader automatically. from . import NAMDBIN from . import FHIAIMS from . import TNG +from . import MMCIF \ No newline at end of file diff --git a/package/MDAnalysis/topology/MMCIFParser.py b/package/MDAnalysis/topology/MMCIFParser.py index 5c155ec3c79..7d47a2d0629 100644 --- a/package/MDAnalysis/topology/MMCIFParser.py +++ b/package/MDAnalysis/topology/MMCIFParser.py @@ -48,13 +48,13 @@ ChainIDs, Atomtypes, Elements, - ICodes, + # ICodes, Masses, Occupancies, RecordTypes, Resids, Resnames, - Segids, + # Segids, Tempfactors, FormalCharges, ) @@ -84,6 +84,8 @@ def parse(self, **kwargs): structure = gemmi.read_structure(self.filename) + # here we freaking go + ( names, atomtypes, @@ -91,40 +93,42 @@ def parse(self, **kwargs): serials, altlocs, chainids, - icodes, + # icodes, tempfactors, occupancies, resids, resnames, - segids, + # segids, elements, formalcharges, weights, - ) = list(zip(*[ - ( - at.name, - at.element.name, - res.het_flag, - at.serial, - at.altloc, - chain.name, - res.seqid.icode, - at.b_iso, - at.occ, - res.label_seq, - res.name, - res.segment, - at.element.name, - at.charge, - at.element.weight, + ) = list( + zip( + *[ + ( + at.name, # names + at.element.name, # atomtypes + res.het_flag, # record_types + at.serial, # serials + at.altloc, # altlocs + chain.name, # chainids + # res.seqid.icode, # icodes + at.b_iso, # tempfactores + at.occ, # occupancies + res.seqid.num, # resids + res.name, # resnames + # res.segment, # segids + at.element.name, # elements + at.charge, # formalcharges + at.element.weight, # weights + ) + for model in structure + for chain in model + for res in chain + for at in res + ] ) - for model in structure - for chain in model - for res in chain - for at in res - ])) - - print(resids) + ) attrs = [ Atomnames(names), @@ -133,18 +137,20 @@ def parse(self, **kwargs): Atomids(serials), AltLocs(altlocs), ChainIDs(chainids), - ICodes(icodes), + # ICodes(icodes), Tempfactors(tempfactors), Occupancies(occupancies), Resids(resids), Resnames(resnames), - Segids(segids), + # Segids(segids), Elements(elements), FormalCharges(formalcharges), Masses(weights), ] - n_atoms = n_residues = n_segments = 1 + n_atoms = len(names) + n_residues = len(resids) + n_segments = 1 top = Topology(n_atoms, n_residues, n_segments, attrs=attrs) return top From 7f78e024dcc916972643300fe2808843415b32ac Mon Sep 17 00:00:00 2001 From: Egor Marin Date: Wed, 22 May 2024 22:57:40 +0200 Subject: [PATCH 03/30] Do some squashing --- package/MDAnalysis/topology/MMCIFParser.py | 132 +++++++++++++-------- 1 file changed, 82 insertions(+), 50 deletions(-) diff --git a/package/MDAnalysis/topology/MMCIFParser.py b/package/MDAnalysis/topology/MMCIFParser.py index 7d47a2d0629..d7e8b431dd1 100644 --- a/package/MDAnalysis/topology/MMCIFParser.py +++ b/package/MDAnalysis/topology/MMCIFParser.py @@ -39,7 +39,8 @@ """ -from .base import TopologyReaderBase +import numpy as np +from .base import TopologyReaderBase, change_squash from ..core.topology import Topology from ..core.topologyattrs import ( Atomnames, @@ -48,13 +49,13 @@ ChainIDs, Atomtypes, Elements, - # ICodes, + ICodes, Masses, Occupancies, RecordTypes, Resids, Resnames, - # Segids, + Segids, Tempfactors, FormalCharges, ) @@ -85,72 +86,103 @@ def parse(self, **kwargs): structure = gemmi.read_structure(self.filename) # here we freaking go - ( - names, + # atom properties -- no squashing! + # -- + altlocs, atomtypes, - record_types, + elements, + formalcharges, + names, serials, - altlocs, - chainids, - # icodes, tempfactors, occupancies, + weights, + # -- + # residue properties -- some squashing... + # -- + icodes, + record_types, resids, resnames, - # segids, - elements, - formalcharges, - weights, - ) = list( - zip( - *[ - ( - at.name, # names - at.element.name, # atomtypes - res.het_flag, # record_types - at.serial, # serials - at.altloc, # altlocs - chain.name, # chainids - # res.seqid.icode, # icodes - at.b_iso, # tempfactores - at.occ, # occupancies - res.seqid.num, # resids - res.name, # resnames - # res.segment, # segids - at.element.name, # elements - at.charge, # formalcharges - at.element.weight, # weights - ) - for model in structure - for chain in model - for res in chain - for at in res - ] + segids, + # -- + # chain properties -- lots of squashing... + # -- + chainids, + ) = map( + np.array, + list( + zip( + *[ + ( + # atom properties -- no squashing! + # -- + at.altloc, # altlocs + at.element.name, # atomtypes + at.element.name, # elements + at.charge, # formalcharges + at.name, # names + at.serial, # serials + at.b_iso, # tempfactores + at.occ, # occupancies + at.element.weight, # weights + # -- + # residue properties -- some squashing... + # -- + res.seqid.icode, # icodes + res.het_flag, # record_types + res.seqid.num, # resids + res.name, # resnames + res.segment, # segids + # -- + # chain properties -- lots of squashing... + # -- + chain.name, # chainids + ) + for model in structure + for chain in model + for res in chain + for at in res + ] + ) + ), + ) + + # squash residue-based attributes + _, (res_icodes, res_record_types, res_resids, res_resnames, res_segids) = ( + change_squash( + (resids, resnames), + (icodes, record_types, resids, resnames, segids), ) ) + # squash chain-based attributes + _, (chain_chainids,) = change_squash((chainids,), (chainids,)) attrs = [ - Atomnames(names), + # per atom + AltLocs(altlocs), Atomtypes(atomtypes), - RecordTypes(record_types), + Elements(elements), + FormalCharges(formalcharges), + Atomnames(names), Atomids(serials), - AltLocs(altlocs), - ChainIDs(chainids), - # ICodes(icodes), Tempfactors(tempfactors), Occupancies(occupancies), - Resids(resids), - Resnames(resnames), - # Segids(segids), - Elements(elements), - FormalCharges(formalcharges), Masses(weights), + # per residue + # ICodes(res_icodes), + # RecordTypes(res_record_types), + Resids(res_resids), + Resnames(res_resnames), + Segids(res_segids), + # per chain + ChainIDs(chainids), ] n_atoms = len(names) - n_residues = len(resids) - n_segments = 1 + n_residues = len(res_resids) + n_segments = len(res_segids) top = Topology(n_atoms, n_residues, n_segments, attrs=attrs) return top From 6682d6e42c3992d8e16631248f2412ded1658072 Mon Sep 17 00:00:00 2001 From: Egor Marin Date: Wed, 22 May 2024 22:59:45 +0200 Subject: [PATCH 04/30] Remove inherited docs --- package/MDAnalysis/topology/MMCIFParser.py | 46 ---------------------- 1 file changed, 46 deletions(-) diff --git a/package/MDAnalysis/topology/MMCIFParser.py b/package/MDAnalysis/topology/MMCIFParser.py index d7e8b431dd1..5a88ab61f2e 100644 --- a/package/MDAnalysis/topology/MMCIFParser.py +++ b/package/MDAnalysis/topology/MMCIFParser.py @@ -1,42 +1,6 @@ -# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*- -# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 -# -# MDAnalysis --- https://www.mdanalysis.org -# Copyright (c) 2006-2017 The MDAnalysis Development Team and contributors -# (see the file AUTHORS for the full list of names) -# -# Released under the GNU Public Licence, v2 or any higher version -# -# Please cite your use of MDAnalysis in published work: -# -# R. J. Gowers, M. Linke, J. Barnoud, T. J. E. Reddy, M. N. Melo, S. L. Seyler, -# D. L. Dotson, J. Domanski, S. Buchoux, I. M. Kenney, and O. Beckstein. -# MDAnalysis: A Python package for the rapid analysis of molecular dynamics -# simulations. In S. Benthall and S. Rostrup editors, Proceedings of the 15th -# Python in Science Conference, pages 102-109, Austin, TX, 2016. SciPy. -# doi: 10.25080/majora-629e541a-00e -# -# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein. -# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations. -# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787 -# TODO - """ MMCIF Topology Parser # TODO =================== - -.. versionadded:: 0.9.1 - -Reads an xyz file and pulls the atom information from it. Because -xyz only has atom name information, all information about residues -and segments won't be populated. - -Classes -------- - -.. autoclass:: XYZParser - :members: - """ import numpy as np @@ -62,16 +26,6 @@ class MMCIFParser(TopologyReaderBase): - """Parse a list of atoms from an XYZ file. - - Creates the following attributes: - - Atomnames - - TODO - - - .. versionadded:: 2.8.0 - """ - format = "MMCIF" def parse(self, **kwargs): From 817f3a0e92b3dec9954cee98af3b76f11b8cd3fe Mon Sep 17 00:00:00 2001 From: Egor Marin Date: Wed, 22 May 2024 23:29:37 +0200 Subject: [PATCH 05/30] Try improving the parsing --- package/MDAnalysis/topology/MMCIFParser.py | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/package/MDAnalysis/topology/MMCIFParser.py b/package/MDAnalysis/topology/MMCIFParser.py index 5a88ab61f2e..511b137d366 100644 --- a/package/MDAnalysis/topology/MMCIFParser.py +++ b/package/MDAnalysis/topology/MMCIFParser.py @@ -73,7 +73,7 @@ def parse(self, **kwargs): # atom properties -- no squashing! # -- at.altloc, # altlocs - at.element.name, # atomtypes + at.name, # atomtypes at.element.name, # elements at.charge, # formalcharges at.name, # names @@ -82,7 +82,7 @@ def parse(self, **kwargs): at.occ, # occupancies at.element.weight, # weights # -- - # residue properties -- some squashing... + # residue properties # -- res.seqid.icode, # icodes res.het_flag, # record_types @@ -90,7 +90,7 @@ def parse(self, **kwargs): res.name, # resnames res.segment, # segids # -- - # chain properties -- lots of squashing... + # chain properties # -- chain.name, # chainids ) @@ -112,6 +112,7 @@ def parse(self, **kwargs): ) # squash chain-based attributes _, (chain_chainids,) = change_squash((chainids,), (chainids,)) + _, (seg_segids,) = change_squash((res_segids,), (res_segids,)) attrs = [ # per atom @@ -125,18 +126,19 @@ def parse(self, **kwargs): Occupancies(occupancies), Masses(weights), # per residue - # ICodes(res_icodes), - # RecordTypes(res_record_types), - Resids(res_resids), - Resnames(res_resnames), - Segids(res_segids), + ICodes(res_icodes), # for each atom + RecordTypes(record_types), # for atom too? + Resids(res_resids), # for residue + Resnames(res_resnames), # for residue + # + Segids(seg_segids), # for segment (currently for residue) # per chain - ChainIDs(chainids), + ChainIDs(chainids), # actually for atom ] n_atoms = len(names) n_residues = len(res_resids) - n_segments = len(res_segids) + n_segments = len(seg_segids) top = Topology(n_atoms, n_residues, n_segments, attrs=attrs) return top From 3cc8c808d9c093b4228088960e8b4ca7bd698c41 Mon Sep 17 00:00:00 2001 From: Egor Marin Date: Thu, 30 May 2024 12:16:02 +0200 Subject: [PATCH 06/30] Try three independent loops over the model --- package/MDAnalysis/topology/MMCIFParser.py | 73 +++++++++------------- 1 file changed, 30 insertions(+), 43 deletions(-) diff --git a/package/MDAnalysis/topology/MMCIFParser.py b/package/MDAnalysis/topology/MMCIFParser.py index 511b137d366..e8ec3bba07d 100644 --- a/package/MDAnalysis/topology/MMCIFParser.py +++ b/package/MDAnalysis/topology/MMCIFParser.py @@ -39,10 +39,8 @@ def parse(self, **kwargs): structure = gemmi.read_structure(self.filename) - # here we freaking go + # atom properties ( - # atom properties -- no squashing! - # -- altlocs, atomtypes, elements, @@ -52,26 +50,12 @@ def parse(self, **kwargs): tempfactors, occupancies, weights, - # -- - # residue properties -- some squashing... - # -- - icodes, - record_types, - resids, - resnames, - segids, - # -- - # chain properties -- lots of squashing... - # -- - chainids, ) = map( np.array, list( zip( *[ ( - # atom properties -- no squashing! - # -- at.altloc, # altlocs at.name, # atomtypes at.element.name, # elements @@ -81,39 +65,42 @@ def parse(self, **kwargs): at.b_iso, # tempfactores at.occ, # occupancies at.element.weight, # weights - # -- - # residue properties - # -- + ) + for model in structure + for chain in model + for res in chain + for at in res + ] + ) + ), + ) + # per-residue properties + ( + icodes, + record_types, + resids, + resnames, + segids, + ) = map( + np.array, + list( + zip( + *[ + ( res.seqid.icode, # icodes res.het_flag, # record_types res.seqid.num, # resids res.name, # resnames res.segment, # segids - # -- - # chain properties - # -- - chain.name, # chainids ) for model in structure for chain in model for res in chain - for at in res ] ) ), ) - # squash residue-based attributes - _, (res_icodes, res_record_types, res_resids, res_resnames, res_segids) = ( - change_squash( - (resids, resnames), - (icodes, record_types, resids, resnames, segids), - ) - ) - # squash chain-based attributes - _, (chain_chainids,) = change_squash((chainids,), (chainids,)) - _, (seg_segids,) = change_squash((res_segids,), (res_segids,)) - attrs = [ # per atom AltLocs(altlocs), @@ -126,19 +113,19 @@ def parse(self, **kwargs): Occupancies(occupancies), Masses(weights), # per residue - ICodes(res_icodes), # for each atom + ICodes(icodes), # for each atom RecordTypes(record_types), # for atom too? - Resids(res_resids), # for residue - Resnames(res_resnames), # for residue + Resids(resids), # for residue + Resnames(resnames), # for residue # - Segids(seg_segids), # for segment (currently for residue) + # Segids(segids), # for segment (currently for residue) # per chain - ChainIDs(chainids), # actually for atom + # ChainIDs(chainids), # actually for atom ] n_atoms = len(names) - n_residues = len(res_resids) - n_segments = len(seg_segids) + n_residues = len(resids) + n_segments = len(segids) top = Topology(n_atoms, n_residues, n_segments, attrs=attrs) return top From d21c220a9795137b99d11da50fccb59b0a54ae9e Mon Sep 17 00:00:00 2001 From: Egor Marin Date: Fri, 13 Sep 2024 22:32:46 +0200 Subject: [PATCH 07/30] Add gemmi dependency --- package/pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/package/pyproject.toml b/package/pyproject.toml index 1880d88fd0d..2f7ba8b90d5 100644 --- a/package/pyproject.toml +++ b/package/pyproject.toml @@ -78,6 +78,7 @@ extra_formats = [ "pytng>=0.2.3", "gsd>3.0.0", "rdkit>=2020.03.1", + "gemmi", # for mmcif format ] analysis = [ "biopython>=1.80", From 2a1be15a5b0fe9c46103cbd598a4b4d00cfafa1e Mon Sep 17 00:00:00 2001 From: Egor Marin Date: Fri, 20 Sep 2024 21:25:27 +0200 Subject: [PATCH 08/30] necessary params --- package/MDAnalysis/topology/MMCIFParser.py | 115 +++++++++++---------- 1 file changed, 58 insertions(+), 57 deletions(-) diff --git a/package/MDAnalysis/topology/MMCIFParser.py b/package/MDAnalysis/topology/MMCIFParser.py index e8ec3bba07d..c29790c53be 100644 --- a/package/MDAnalysis/topology/MMCIFParser.py +++ b/package/MDAnalysis/topology/MMCIFParser.py @@ -3,26 +3,29 @@ =================== """ +import gemmi import numpy as np -from .base import TopologyReaderBase, change_squash +import warnings + from ..core.topology import Topology from ..core.topologyattrs import ( - Atomnames, - Atomids, AltLocs, - ChainIDs, + Atomids, + Atomnames, Atomtypes, + ChainIDs, Elements, + FormalCharges, ICodes, Masses, Occupancies, RecordTypes, Resids, Resnames, - Segids, Tempfactors, - FormalCharges, + Segids, ) +from .base import TopologyReaderBase, change_squash class MMCIFParser(TopologyReaderBase): @@ -35,22 +38,32 @@ def parse(self, **kwargs): ------- MDAnalysis Topology object """ - import gemmi - structure = gemmi.read_structure(self.filename) + if len(structure) > 1: + warnings.warn( + "MMCIF model {self.filename} contains {len(model)=} different models, " + "but only the first one will be used to assign the topology" + ) + model = structure[0] + # atom properties ( - altlocs, - atomtypes, - elements, - formalcharges, - names, - serials, - tempfactors, - occupancies, - weights, - ) = map( + altlocs, # at.altlog + atomtypes, # at.name + elements, # at.element.name + formalcharges, # at.charge + names, # at.name + serials, # at.serial + tempfactors, # at.b_iso + occupancies, # at.occ + weights, # at.element.weight + record_types, # res.het_flag TODO: match to ATOM/HETATM + chainids, # chain.name + resids, # res.seqid.num + resnames, # res.name + icodes, # residue.seqid.icode + ) = map( # this is probably not pretty, but it's efficient -- one loop over the mmcif np.array, list( zip( @@ -65,8 +78,12 @@ def parse(self, **kwargs): at.b_iso, # tempfactores at.occ, # occupancies at.element.weight, # weights + res.het_flag, + chain.name, # chainids + res.seqid.num, + res.name, + res.seqid.icode, ) - for model in structure for chain in model for res in chain for at in res @@ -74,58 +91,42 @@ def parse(self, **kwargs): ) ), ) - # per-residue properties - ( - icodes, - record_types, - resids, - resnames, - segids, - ) = map( - np.array, - list( - zip( - *[ - ( - res.seqid.icode, # icodes - res.het_flag, # record_types - res.seqid.num, # resids - res.name, # resnames - res.segment, # segids - ) - for model in structure - for chain in model - for res in chain - ] - ) - ), - ) + + # fill in altlocs + altlocs = ["A" if not elem else elem for elem in altlocs] attrs = [ - # per atom + # AtomAttr subclasses AltLocs(altlocs), + Atomids(serials), + Atomnames(names), Atomtypes(atomtypes), + ChainIDs(chainids), # actually for atom Elements(elements), FormalCharges(formalcharges), - Atomnames(names), - Atomids(serials), - Tempfactors(tempfactors), - Occupancies(occupancies), Masses(weights), - # per residue - ICodes(icodes), # for each atom + Occupancies(occupancies), RecordTypes(record_types), # for atom too? + Tempfactors(tempfactors), + # ResidueAttr subclasses + ICodes(icodes), # for each atom Resids(resids), # for residue Resnames(resnames), # for residue - # - # Segids(segids), # for segment (currently for residue) - # per chain - # ChainIDs(chainids), # actually for atom + # SegmentAttr subclasses + Segids(segids), ] n_atoms = len(names) n_residues = len(resids) - n_segments = len(segids) + n_segments = len(set(chainids)) + + print(resids) + print(change_squash(resids, resids)) + top = Topology(n_atoms, n_residues, n_segments, attrs=attrs) + print(f"{n_atoms=}") + print(f"{n_residues=}") + print(f"{n_segments=}") + return top From 77645e67d10ba047c872cc37b42d5619b7458bfa Mon Sep 17 00:00:00 2001 From: Egor Marin Date: Fri, 20 Sep 2024 21:50:04 +0200 Subject: [PATCH 09/30] finished sorting atom attrs --- package/MDAnalysis/topology/MMCIFParser.py | 54 +++++++++++++--------- 1 file changed, 31 insertions(+), 23 deletions(-) diff --git a/package/MDAnalysis/topology/MMCIFParser.py b/package/MDAnalysis/topology/MMCIFParser.py index c29790c53be..05ab2c086a9 100644 --- a/package/MDAnalysis/topology/MMCIFParser.py +++ b/package/MDAnalysis/topology/MMCIFParser.py @@ -49,18 +49,21 @@ def parse(self, **kwargs): # atom properties ( - altlocs, # at.altlog + altlocs, # at.altloc + serials, # at.serial + names, # at.name atomtypes, # at.name + # ------------------ + chainids, # chain.name elements, # at.element.name formalcharges, # at.charge - names, # at.name - serials, # at.serial - tempfactors, # at.b_iso - occupancies, # at.occ weights, # at.element.weight + # ------------------ + occupancies, # at.occ record_types, # res.het_flag TODO: match to ATOM/HETATM - chainids, # chain.name + tempfactors, # at.b_iso resids, # res.seqid.num + # ------------------ resnames, # res.name icodes, # residue.seqid.icode ) = map( # this is probably not pretty, but it's efficient -- one loop over the mmcif @@ -70,17 +73,20 @@ def parse(self, **kwargs): *[ ( at.altloc, # altlocs + at.serial, # serials + at.name, # names at.name, # atomtypes + # ------------------ + chain.name, # chainids at.element.name, # elements at.charge, # formalcharges - at.name, # names - at.serial, # serials - at.b_iso, # tempfactores - at.occ, # occupancies at.element.weight, # weights - res.het_flag, - chain.name, # chainids + # ------------------ + at.occ, # occupancies + res.het_flag, # record_types + at.b_iso, # tempfactors res.seqid.num, + # ------------------ res.name, res.seqid.icode, ) @@ -97,17 +103,19 @@ def parse(self, **kwargs): attrs = [ # AtomAttr subclasses - AltLocs(altlocs), - Atomids(serials), - Atomnames(names), - Atomtypes(atomtypes), - ChainIDs(chainids), # actually for atom - Elements(elements), - FormalCharges(formalcharges), - Masses(weights), - Occupancies(occupancies), - RecordTypes(record_types), # for atom too? - Tempfactors(tempfactors), + AltLocs(altlocs), # ✅ + Atomids(serials), # ✅ + Atomnames(names), # ✅ + Atomtypes(atomtypes), # ✅ + # ------------------ + ChainIDs(chainids), # ✅ + Elements(elements), # ✅; same as atomtypes + FormalCharges(formalcharges), # ✅ + Masses(weights), # ✅ + # ------------------ + Occupancies(occupancies), # ✅ + RecordTypes(record_types), # ✅ + Tempfactors(tempfactors), # ✅ # ResidueAttr subclasses ICodes(icodes), # for each atom Resids(resids), # for residue From 91e694282a8867616a617133936710f533028303 Mon Sep 17 00:00:00 2001 From: Egor Marin Date: Fri, 20 Sep 2024 22:47:06 +0200 Subject: [PATCH 10/30] add function for transformation into *idx --- package/MDAnalysis/topology/MMCIFParser.py | 108 +++++++++++++++------ 1 file changed, 76 insertions(+), 32 deletions(-) diff --git a/package/MDAnalysis/topology/MMCIFParser.py b/package/MDAnalysis/topology/MMCIFParser.py index 05ab2c086a9..bfd7b443f0e 100644 --- a/package/MDAnalysis/topology/MMCIFParser.py +++ b/package/MDAnalysis/topology/MMCIFParser.py @@ -6,6 +6,7 @@ import gemmi import numpy as np import warnings +import itertools from ..core.topology import Topology from ..core.topologyattrs import ( @@ -22,10 +23,15 @@ RecordTypes, Resids, Resnames, + Resnums, Tempfactors, Segids, ) -from .base import TopologyReaderBase, change_squash +from .base import TopologyReaderBase + + +def _into_idx(arr: list[int]) -> list[int]: + return [idx for idx, (_, group) in enumerate(itertools.groupby(arr)) for _ in group] class MMCIFParser(TopologyReaderBase): @@ -60,12 +66,9 @@ def parse(self, **kwargs): weights, # at.element.weight # ------------------ occupancies, # at.occ - record_types, # res.het_flag TODO: match to ATOM/HETATM + record_types, # res.het_flag tempfactors, # at.b_iso - resids, # res.seqid.num - # ------------------ - resnames, # res.name - icodes, # residue.seqid.icode + residx, # res.index? ) = map( # this is probably not pretty, but it's efficient -- one loop over the mmcif np.array, list( @@ -85,56 +88,97 @@ def parse(self, **kwargs): at.occ, # occupancies res.het_flag, # record_types at.b_iso, # tempfactors + res.seqid.num, # residx TODO: translate to continious index + ) + for chain in model + for res in chain + for at in res + ] + ) + ), + ) + + ( + icodes, # res.seqid.icode + resids, # res.seqid.num + resnames, # res.name + segidx, # chain.name TODO: translate into continious index + ) = map( + np.array, + list( + zip( + *[ + ( + res.seqid.icode, res.seqid.num, - # ------------------ res.name, - res.seqid.icode, + chain.name, ) for chain in model for res in chain - for at in res ] ) ), ) + segids = [chain.name for chain in model] + + # transform *idx into continious numpy arrays + residx = np.array(_into_idx(residx)) + segidx = np.array(_into_idx(segidx)) + # fill in altlocs altlocs = ["A" if not elem else elem for elem in altlocs] + record_types = [ + "ATOM" if record == "A" else "HETATM" if record == "H" else None + for record in record_types + ] + if any((elem is None for elem in record_types)): + raise ValueError("Found an atom that is neither ATOM or HETATM") attrs = [ # AtomAttr subclasses - AltLocs(altlocs), # ✅ - Atomids(serials), # ✅ - Atomnames(names), # ✅ - Atomtypes(atomtypes), # ✅ - # ------------------ - ChainIDs(chainids), # ✅ - Elements(elements), # ✅; same as atomtypes - FormalCharges(formalcharges), # ✅ - Masses(weights), # ✅ - # ------------------ - Occupancies(occupancies), # ✅ - RecordTypes(record_types), # ✅ - Tempfactors(tempfactors), # ✅ + AltLocs(altlocs), # at.altloc + Atomids(serials), # at.serial + Atomnames(names), # at.name + Atomtypes(atomtypes), # at.name + # --------------------------------------- + ChainIDs(chainids), # chain.name + Elements(elements), # at.element.name + FormalCharges(formalcharges), # at.charge + Masses(weights), # at.element.weight + # --------------------------------------- + Occupancies(occupancies), # at.occ + RecordTypes(record_types), # res.het_flat + Tempfactors(tempfactors), # at.b_iso + # # ResidueAttr subclasses - ICodes(icodes), # for each atom - Resids(resids), # for residue - Resnames(resnames), # for residue + ICodes(icodes), # res.seqid.icode + Resids(resids), # res.seqid.num + Resnames(resnames), # res.name + # # SegmentAttr subclasses - Segids(segids), + Segids(segids), # chain.name ] n_atoms = len(names) n_residues = len(resids) - n_segments = len(set(chainids)) + n_segments = len(segids) - print(resids) - print(change_squash(resids, resids)) - - top = Topology(n_atoms, n_residues, n_segments, attrs=attrs) + print(f"{len(residx)=}") + print(f"{residx=}") + print(f"{len(segidx)=}") + print(f"{segidx=}") print(f"{n_atoms=}") print(f"{n_residues=}") print(f"{n_segments=}") - return top + return Topology( + n_atoms, + n_residues, + n_segments, + attrs=attrs, + atom_resindex=np.arange(n_atoms), + residue_segindex=np.arange(n_residues), + ) From 9a0c08684bd628200a8fbcaa4a990d207502e689 Mon Sep 17 00:00:00 2001 From: Egor Marin Date: Fri, 20 Sep 2024 23:33:19 +0200 Subject: [PATCH 11/30] oh damn seems to finally be working --- package/MDAnalysis/topology/MMCIFParser.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/package/MDAnalysis/topology/MMCIFParser.py b/package/MDAnalysis/topology/MMCIFParser.py index bfd7b443f0e..f1a070f379d 100644 --- a/package/MDAnalysis/topology/MMCIFParser.py +++ b/package/MDAnalysis/topology/MMCIFParser.py @@ -24,8 +24,8 @@ Resids, Resnames, Resnums, - Tempfactors, Segids, + Tempfactors, ) from .base import TopologyReaderBase @@ -68,7 +68,7 @@ def parse(self, **kwargs): occupancies, # at.occ record_types, # res.het_flag tempfactors, # at.b_iso - residx, # res.index? + residx, # _into_idx(res.seqid.num) ) = map( # this is probably not pretty, but it's efficient -- one loop over the mmcif np.array, list( @@ -88,7 +88,7 @@ def parse(self, **kwargs): at.occ, # occupancies res.het_flag, # record_types at.b_iso, # tempfactors - res.seqid.num, # residx TODO: translate to continious index + res.seqid.num, # residx, later translated into continious repr ) for chain in model for res in chain @@ -103,6 +103,7 @@ def parse(self, **kwargs): resids, # res.seqid.num resnames, # res.name segidx, # chain.name TODO: translate into continious index + resnums, ) = map( np.array, list( @@ -113,6 +114,7 @@ def parse(self, **kwargs): res.seqid.num, res.name, chain.name, + res.seqid.num, ) for chain in model for res in chain @@ -150,6 +152,7 @@ def parse(self, **kwargs): # --------------------------------------- Occupancies(occupancies), # at.occ RecordTypes(record_types), # res.het_flat + Resnums(resnums), # res.seqid.num Tempfactors(tempfactors), # at.b_iso # # ResidueAttr subclasses @@ -179,6 +182,6 @@ def parse(self, **kwargs): n_residues, n_segments, attrs=attrs, - atom_resindex=np.arange(n_atoms), - residue_segindex=np.arange(n_residues), + atom_resindex=residx, + residue_segindex=segidx, ) From 9c731df4db56c398fd899bd8390baf4429ea78a2 Mon Sep 17 00:00:00 2001 From: Egor Marin Date: Fri, 20 Sep 2024 23:36:13 +0200 Subject: [PATCH 12/30] remove TODOs --- package/MDAnalysis/topology/MMCIFParser.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/package/MDAnalysis/topology/MMCIFParser.py b/package/MDAnalysis/topology/MMCIFParser.py index f1a070f379d..7e88f4ac4b4 100644 --- a/package/MDAnalysis/topology/MMCIFParser.py +++ b/package/MDAnalysis/topology/MMCIFParser.py @@ -1,5 +1,5 @@ """ -MMCIF Topology Parser # TODO +MMCIF Topology Parser # =================== """ @@ -102,7 +102,7 @@ def parse(self, **kwargs): icodes, # res.seqid.icode resids, # res.seqid.num resnames, # res.name - segidx, # chain.name TODO: translate into continious index + segidx, # chain.name resnums, ) = map( np.array, From 8b40ec7219514ffc257a8830b16be16db5209902 Mon Sep 17 00:00:00 2001 From: Egor Marin Date: Sat, 21 Sep 2024 00:02:42 +0200 Subject: [PATCH 13/30] Remove debug prints --- package/MDAnalysis/coordinates/MMCIF.py | 3 ++- package/MDAnalysis/coordinates/__init__.py | 2 +- package/MDAnalysis/topology/MMCIFParser.py | 11 +---------- 3 files changed, 4 insertions(+), 12 deletions(-) diff --git a/package/MDAnalysis/coordinates/MMCIF.py b/package/MDAnalysis/coordinates/MMCIF.py index c896f98c62a..b6b633f9512 100644 --- a/package/MDAnalysis/coordinates/MMCIF.py +++ b/package/MDAnalysis/coordinates/MMCIF.py @@ -34,4 +34,5 @@ def close(self): pass -class MMCIFWriter(base.WriterBase): ... +class MMCIFWriter(base.WriterBase): + pass diff --git a/package/MDAnalysis/coordinates/__init__.py b/package/MDAnalysis/coordinates/__init__.py index b8b4ac41f7f..1d0488d38ff 100644 --- a/package/MDAnalysis/coordinates/__init__.py +++ b/package/MDAnalysis/coordinates/__init__.py @@ -791,4 +791,4 @@ class can choose an appropriate reader automatically. from . import NAMDBIN from . import FHIAIMS from . import TNG -from . import MMCIF \ No newline at end of file +from . import MMCIF diff --git a/package/MDAnalysis/topology/MMCIFParser.py b/package/MDAnalysis/topology/MMCIFParser.py index 7e88f4ac4b4..b9d5fcf694b 100644 --- a/package/MDAnalysis/topology/MMCIFParser.py +++ b/package/MDAnalysis/topology/MMCIFParser.py @@ -102,7 +102,7 @@ def parse(self, **kwargs): icodes, # res.seqid.icode resids, # res.seqid.num resnames, # res.name - segidx, # chain.name + segidx, # chain.name resnums, ) = map( np.array, @@ -168,15 +168,6 @@ def parse(self, **kwargs): n_residues = len(resids) n_segments = len(segids) - print(f"{len(residx)=}") - print(f"{residx=}") - print(f"{len(segidx)=}") - print(f"{segidx=}") - - print(f"{n_atoms=}") - print(f"{n_residues=}") - print(f"{n_segments=}") - return Topology( n_atoms, n_residues, From 401a4d360af8b4619636d4fc37197c5d2ff3e023 Mon Sep 17 00:00:00 2001 From: Egor Marin Date: Mon, 23 Sep 2024 00:48:07 +0200 Subject: [PATCH 14/30] try to pack things into separate class in utils? --- package/MDAnalysis/topology/tpr/utils.py | 146 +++++++++++++++++++++++ 1 file changed, 146 insertions(+) diff --git a/package/MDAnalysis/topology/tpr/utils.py b/package/MDAnalysis/topology/tpr/utils.py index 4e26dbfa565..0adb8bc200e 100644 --- a/package/MDAnalysis/topology/tpr/utils.py +++ b/package/MDAnalysis/topology/tpr/utils.py @@ -47,31 +47,43 @@ The module also contains the :func:`do_inputrec` to read the TPR header with. """ +from collections.abc import Callable +from typing import Any import numpy as np from mda_xdrlib import xdrlib import struct +from dataclasses import dataclass from . import obj from . import setting from ..base import squash_by from ...core.topology import Topology from ...core.topologyattrs import ( + AltLocs, Atomids, Atomnames, Atomtypes, Masses, Charges, Elements, + Occupancies, + RecordTypes, Resids, Resnames, Moltypes, Molnums, + Resnums, Segids, ChainIDs, Bonds, Angles, Dihedrals, Impropers, + FormalCharges, + AtomAttr, + ResidueAttr, + SegmentAttr, + Tempfactors, ) @@ -886,3 +898,137 @@ def do_blocka(data): ndo_int(data, block_nr + 1) ndo_int(data, block_nra) return block_nr, block_nra + + +# gemmi-related utils: https://gemmi.readthedocs.io/en/latest/ +# TODO: add docs here? + + +@dataclass +class AttrMapping: + level: str # 'atom', 'residue', 'segment' + target: AtomAttr | ResidueAttr | SegmentAttr + func: Callable[[int], Any] + + def __post_init__(self): + mapping = {'atom':0, 'residue':1, 'segment':2} + self.idx = mapping.get(self.level, None) + if self.idx is None: + raise ValueError(f"'level' can only be one of {list(mapping.keys())}, got {self.level=}") + + def apply(self): pass + +class GemmiToAttrs: + AltLocs = AttrMapping('atom', AltLocs, ) + Atomids = ... + Atomnames = ... + Atomtypes = ... + ChainIDs = ... + Elements = ... + FormalCharges = ... + Occupancies = ... + Resnums = ... + Tempfactors = ... + Icodes = ... + Resids = ... + Resnames = ... + Segids = ... + +def gemmiModel2AtomAttrs(model: 'gemmi.Structure') -> list[AtomAttr]: # type: ignore # noqa: F821 + ( + altlocs, # at.altloc + serials, # at.serial + names, # at.name + atomtypes, # at.name + # ------------------ + chainids, # chain.name + elements, # at.element.name + formalcharges, # at.charge + weights, # at.element.weight + # ------------------ + occupancies, # at.occ + record_types, # res.het_flag + tempfactors, # at.b_iso + residx, # _into_idx(res.seqid.num) + ) = map( # this is probably not pretty, but it's efficient -- one loop over the mmcif + np.array, + list( + zip( + *[ + ( + at.altloc, # altlocs + at.serial, # serials + at.name, # names + at.name, # atomtypes + # ------------------ + chain.name, # chainids + at.element.name, # elements + at.charge, # formalcharges + at.element.weight, # weights + # ------------------ + at.occ, # occupancies + res.het_flag, # record_types + at.b_iso, # tempfactors + res.seqid.num, # residx, later translated into continious repr + ) + for chain in model + for res in chain + for at in res + ] + ) + ), + ) + + altlocs = ["A" if not elem else elem for elem in altlocs] + record_types = [ + "ATOM" if record == "A" else "HETATM" if record == "H" else None + for record in record_types + ] + if any((elem is None for elem in record_types)): + raise ValueError("Found an atom that is neither ATOM or HETATM") + + attrs = [ + # AtomAttr subclasses + AltLocs(altlocs), # at.altloc + Atomids(serials), # at.serial + Atomnames(names), # at.name + Atomtypes(atomtypes), # at.name + # --------------------------------------- + ChainIDs(chainids), # chain.name + Elements(elements), # at.element.name + FormalCharges(formalcharges), # at.charge + Masses(weights), # at.element.weight + # --------------------------------------- + Occupancies(occupancies), # at.occ + RecordTypes(record_types), # res.het_flat + Tempfactors(tempfactors), # at.b_iso + # + ] + + return attrs + +def gemmiModel2ResidueAttrs(model: 'gemmi.Structure') -> list[ResidueAttr]: # type: ignore # noqa: F821 + ( + icodes, # res.seqid.icode + resids, # res.seqid.num + resnames, # res.name + segidx, # chain.name + resnums, + ) = map( + np.array, + list( + zip( + *[ + ( + res.seqid.icode, + res.seqid.num, + res.name, + chain.name, + res.seqid.num, + ) + for chain in model + for res in chain + ] + ) + ), + ) From 9c336bd6d1f13521e1ce0505b4acd452827ecf55 Mon Sep 17 00:00:00 2001 From: Egor Marin Date: Mon, 23 Sep 2024 00:59:49 +0200 Subject: [PATCH 15/30] remove unnecessary functions --- package/MDAnalysis/topology/tpr/utils.py | 134 ----------------------- 1 file changed, 134 deletions(-) diff --git a/package/MDAnalysis/topology/tpr/utils.py b/package/MDAnalysis/topology/tpr/utils.py index 0adb8bc200e..efcac4111e4 100644 --- a/package/MDAnalysis/topology/tpr/utils.py +++ b/package/MDAnalysis/topology/tpr/utils.py @@ -898,137 +898,3 @@ def do_blocka(data): ndo_int(data, block_nr + 1) ndo_int(data, block_nra) return block_nr, block_nra - - -# gemmi-related utils: https://gemmi.readthedocs.io/en/latest/ -# TODO: add docs here? - - -@dataclass -class AttrMapping: - level: str # 'atom', 'residue', 'segment' - target: AtomAttr | ResidueAttr | SegmentAttr - func: Callable[[int], Any] - - def __post_init__(self): - mapping = {'atom':0, 'residue':1, 'segment':2} - self.idx = mapping.get(self.level, None) - if self.idx is None: - raise ValueError(f"'level' can only be one of {list(mapping.keys())}, got {self.level=}") - - def apply(self): pass - -class GemmiToAttrs: - AltLocs = AttrMapping('atom', AltLocs, ) - Atomids = ... - Atomnames = ... - Atomtypes = ... - ChainIDs = ... - Elements = ... - FormalCharges = ... - Occupancies = ... - Resnums = ... - Tempfactors = ... - Icodes = ... - Resids = ... - Resnames = ... - Segids = ... - -def gemmiModel2AtomAttrs(model: 'gemmi.Structure') -> list[AtomAttr]: # type: ignore # noqa: F821 - ( - altlocs, # at.altloc - serials, # at.serial - names, # at.name - atomtypes, # at.name - # ------------------ - chainids, # chain.name - elements, # at.element.name - formalcharges, # at.charge - weights, # at.element.weight - # ------------------ - occupancies, # at.occ - record_types, # res.het_flag - tempfactors, # at.b_iso - residx, # _into_idx(res.seqid.num) - ) = map( # this is probably not pretty, but it's efficient -- one loop over the mmcif - np.array, - list( - zip( - *[ - ( - at.altloc, # altlocs - at.serial, # serials - at.name, # names - at.name, # atomtypes - # ------------------ - chain.name, # chainids - at.element.name, # elements - at.charge, # formalcharges - at.element.weight, # weights - # ------------------ - at.occ, # occupancies - res.het_flag, # record_types - at.b_iso, # tempfactors - res.seqid.num, # residx, later translated into continious repr - ) - for chain in model - for res in chain - for at in res - ] - ) - ), - ) - - altlocs = ["A" if not elem else elem for elem in altlocs] - record_types = [ - "ATOM" if record == "A" else "HETATM" if record == "H" else None - for record in record_types - ] - if any((elem is None for elem in record_types)): - raise ValueError("Found an atom that is neither ATOM or HETATM") - - attrs = [ - # AtomAttr subclasses - AltLocs(altlocs), # at.altloc - Atomids(serials), # at.serial - Atomnames(names), # at.name - Atomtypes(atomtypes), # at.name - # --------------------------------------- - ChainIDs(chainids), # chain.name - Elements(elements), # at.element.name - FormalCharges(formalcharges), # at.charge - Masses(weights), # at.element.weight - # --------------------------------------- - Occupancies(occupancies), # at.occ - RecordTypes(record_types), # res.het_flat - Tempfactors(tempfactors), # at.b_iso - # - ] - - return attrs - -def gemmiModel2ResidueAttrs(model: 'gemmi.Structure') -> list[ResidueAttr]: # type: ignore # noqa: F821 - ( - icodes, # res.seqid.icode - resids, # res.seqid.num - resnames, # res.name - segidx, # chain.name - resnums, - ) = map( - np.array, - list( - zip( - *[ - ( - res.seqid.icode, - res.seqid.num, - res.name, - chain.name, - res.seqid.num, - ) - for chain in model - for res in chain - ] - ) - ), - ) From def88e45875db73eed375b3a8bc5c1fbf0d0cf0f Mon Sep 17 00:00:00 2001 From: Egor Marin Date: Tue, 24 Sep 2024 01:00:49 +0200 Subject: [PATCH 16/30] copy all loops into separate functions --- package/MDAnalysis/topology/MMCIFParser.py | 129 +++++++++++++++++++++ 1 file changed, 129 insertions(+) diff --git a/package/MDAnalysis/topology/MMCIFParser.py b/package/MDAnalysis/topology/MMCIFParser.py index b9d5fcf694b..29243b7e1ac 100644 --- a/package/MDAnalysis/topology/MMCIFParser.py +++ b/package/MDAnalysis/topology/MMCIFParser.py @@ -3,6 +3,13 @@ =================== """ +try: + import gemmi +except ImportError: + HAS_GEMMI = False +else: + HAS_GEMMI = True + import gemmi import numpy as np import warnings @@ -26,6 +33,9 @@ Resnums, Segids, Tempfactors, + AtomAttr, + ResidueAttr, + SegmentAttr, ) from .base import TopologyReaderBase @@ -34,6 +44,125 @@ def _into_idx(arr: list[int]) -> list[int]: return [idx for idx, (_, group) in enumerate(itertools.groupby(arr)) for _ in group] +def get_Atomattrs(model: gemmi.Model) -> tuple[list[AtomAttr], np.ndarray]: + ( + altlocs, # at.altloc + serials, # at.serial + names, # at.name + atomtypes, # at.name + # ------------------ + chainids, # chain.name + elements, # at.element.name + formalcharges, # at.charge + weights, # at.element.weight + # ------------------ + occupancies, # at.occ + record_types, # res.het_flag + tempfactors, # at.b_iso + residx, # _into_idx(res.seqid.num) + ) = map( # this is probably not pretty, but it's efficient -- one loop over the mmcif + np.array, + list( + zip( + *[ + ( + at.altloc, # altlocs + at.serial, # serials + at.name, # names + at.name, # atomtypes + # ------------------ + chain.name, # chainids + at.element.name, # elements + at.charge, # formalcharges + at.element.weight, # weights + # ------------------ + at.occ, # occupancies + res.het_flag, # record_types + at.b_iso, # tempfactors + res.seqid.num, # residx, later translated into continious repr + ) + for chain in model + for res in chain + for at in res + ] + ) + ), + ) + + # transform *idx into continious numpy arrays + residx = np.array(_into_idx(residx)) + + # fill in altlocs + altlocs = ["A" if not elem else elem for elem in altlocs] + record_types = [ + "ATOM" if record == "A" else "HETATM" if record == "H" else None + for record in record_types + ] + if any((elem is None for elem in record_types)): + raise ValueError("Found an atom that is neither ATOM or HETATM") + + attrs = [ + AltLocs(altlocs), # at.altloc + Atomids(serials), # at.serial + Atomnames(names), # at.name + Atomtypes(atomtypes), # at.name + # --------------------------------------- + ChainIDs(chainids), # chain.name + Elements(elements), # at.element.name + FormalCharges(formalcharges), # at.charge + Masses(weights), # at.element.weight + # --------------------------------------- + Occupancies(occupancies), # at.occ + RecordTypes(record_types), # res.het_flat + Tempfactors(tempfactors), # at.b_iso + ] + + n_atoms = len(names) # TODO: replace in class itself with len(attrs[0]) + return attrs, residx + + +def get_Residueattrs(model: gemmi.Model) -> tuple[list[ResidueAttr], np.ndarray]: + ( + icodes, # res.seqid.icode + resids, # res.seqid.num + resnames, # res.name + segidx, # chain.name + resnums, + ) = map( + np.array, + list( + zip( + *[ + ( + res.seqid.icode, + res.seqid.num, + res.name, + chain.name, + res.seqid.num, + ) + for chain in model + for res in chain + ] + ) + ), + ) + segidx = np.array(_into_idx(segidx)) + attrs = [ + Resnums(resnums), # res.seqid.num + ICodes(icodes), # res.seqid.icode + Resids(resids), # res.seqid.num + Resnames(resnames), # res.name + ] + n_residues = len(resids) # TODO: replace in class itself with len(attrs[0]) + return attrs, segidx + + +def get_Segmentattrs(model: gemmi.Model) -> list[SegmentAttr]: + segids = [chain.name for chain in model] + n_segments = len(segids) # TODO: replace in class itself with len(attrs[0]) + return [Segids(segids)] + + class MMCIFParser(TopologyReaderBase): format = "MMCIF" From cabfd370a64725b9845da08fae944f57b43a53c1 Mon Sep 17 00:00:00 2001 From: Egor Marin Date: Tue, 24 Sep 2024 01:04:12 +0200 Subject: [PATCH 17/30] Move loops over structures into functions --- package/MDAnalysis/topology/MMCIFParser.py | 122 ++------------------- 1 file changed, 7 insertions(+), 115 deletions(-) diff --git a/package/MDAnalysis/topology/MMCIFParser.py b/package/MDAnalysis/topology/MMCIFParser.py index 29243b7e1ac..08d39e9b21f 100644 --- a/package/MDAnalysis/topology/MMCIFParser.py +++ b/package/MDAnalysis/topology/MMCIFParser.py @@ -117,7 +117,6 @@ def get_Atomattrs(model: gemmi.Model) -> tuple[list[AtomAttr], np.ndarray]: Tempfactors(tempfactors), # at.b_iso ] - n_atoms = len(names) # TODO: replace in class itself with len(attrs[0]) return attrs, residx @@ -153,13 +152,11 @@ def get_Residueattrs(model: gemmi.Model) -> tuple[list[ResidueAttr], np.ndarray] Resids(resids), # res.seqid.num Resnames(resnames), # res.name ] - n_residues = len(resids) # TODO: replace in class itself with len(attrs[0]) return attrs, segidx def get_Segmentattrs(model: gemmi.Model) -> list[SegmentAttr]: segids = [chain.name for chain in model] - n_segments = len(segids) # TODO: replace in class itself with len(attrs[0]) return [Segids(segids)] @@ -182,120 +179,15 @@ def parse(self, **kwargs): ) model = structure[0] - # atom properties - ( - altlocs, # at.altloc - serials, # at.serial - names, # at.name - atomtypes, # at.name - # ------------------ - chainids, # chain.name - elements, # at.element.name - formalcharges, # at.charge - weights, # at.element.weight - # ------------------ - occupancies, # at.occ - record_types, # res.het_flag - tempfactors, # at.b_iso - residx, # _into_idx(res.seqid.num) - ) = map( # this is probably not pretty, but it's efficient -- one loop over the mmcif - np.array, - list( - zip( - *[ - ( - at.altloc, # altlocs - at.serial, # serials - at.name, # names - at.name, # atomtypes - # ------------------ - chain.name, # chainids - at.element.name, # elements - at.charge, # formalcharges - at.element.weight, # weights - # ------------------ - at.occ, # occupancies - res.het_flag, # record_types - at.b_iso, # tempfactors - res.seqid.num, # residx, later translated into continious repr - ) - for chain in model - for res in chain - for at in res - ] - ) - ), - ) - - ( - icodes, # res.seqid.icode - resids, # res.seqid.num - resnames, # res.name - segidx, # chain.name - resnums, - ) = map( - np.array, - list( - zip( - *[ - ( - res.seqid.icode, - res.seqid.num, - res.name, - chain.name, - res.seqid.num, - ) - for chain in model - for res in chain - ] - ) - ), - ) - - segids = [chain.name for chain in model] - - # transform *idx into continious numpy arrays - residx = np.array(_into_idx(residx)) - segidx = np.array(_into_idx(segidx)) - - # fill in altlocs - altlocs = ["A" if not elem else elem for elem in altlocs] - record_types = [ - "ATOM" if record == "A" else "HETATM" if record == "H" else None - for record in record_types - ] - if any((elem is None for elem in record_types)): - raise ValueError("Found an atom that is neither ATOM or HETATM") + atomAttrs, residx = get_Atomattrs(model) + residAttrs, segidx = get_Residueattrs(model) + segmentAttrs = get_Segmentattrs(model) - attrs = [ - # AtomAttr subclasses - AltLocs(altlocs), # at.altloc - Atomids(serials), # at.serial - Atomnames(names), # at.name - Atomtypes(atomtypes), # at.name - # --------------------------------------- - ChainIDs(chainids), # chain.name - Elements(elements), # at.element.name - FormalCharges(formalcharges), # at.charge - Masses(weights), # at.element.weight - # --------------------------------------- - Occupancies(occupancies), # at.occ - RecordTypes(record_types), # res.het_flat - Resnums(resnums), # res.seqid.num - Tempfactors(tempfactors), # at.b_iso - # - # ResidueAttr subclasses - ICodes(icodes), # res.seqid.icode - Resids(resids), # res.seqid.num - Resnames(resnames), # res.name - # - # SegmentAttr subclasses - Segids(segids), # chain.name - ] + attrs = atomAttrs + residAttrs + segmentAttrs - n_atoms = len(names) - n_residues = len(resids) - n_segments = len(segids) + n_atoms = len(atomAttrs[0]) + n_residues = len(residAttrs[0]) + n_segments = len(segmentAttrs[0]) return Topology( n_atoms, From 4c9d930a949427c9ce637954f8a86b8122f0ad85 Mon Sep 17 00:00:00 2001 From: Egor Marin Date: Tue, 24 Sep 2024 01:14:42 +0200 Subject: [PATCH 18/30] Move coordinate fetching into function for the coordinate reader as well --- package/MDAnalysis/coordinates/MMCIF.py | 34 +++++++++++++++---------- 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/package/MDAnalysis/coordinates/MMCIF.py b/package/MDAnalysis/coordinates/MMCIF.py index b6b633f9512..244a7719512 100644 --- a/package/MDAnalysis/coordinates/MMCIF.py +++ b/package/MDAnalysis/coordinates/MMCIF.py @@ -1,11 +1,24 @@ import numpy as np -import gemmi import logging from . import base +import warnings + +try: + import gemmi + + HAS_GEMMI = True +except ImportError: + HAS_GEMMI = False logger = logging.getLogger("MDAnalysis.coordinates.MMCIF") +def get_Coordinates(model: gemmi.Model) -> np.ndarray: + return np.array( + [[*at.pos.tolist()] for chain in model for res in chain for at in res] + ) + + class MMCIFReader(base.SingleFrameReaderBase): """Reads from an MMCIF file""" @@ -14,15 +27,12 @@ class MMCIFReader(base.SingleFrameReaderBase): def _read_first_frame(self): structure = gemmi.read_structure(self.filename) - coords = np.array( - [ - [*at.pos.tolist()] - for model in structure - for chain in model - for res in chain - for at in res - ] - ) + if len(structure) > 1: + warnings.warn( + f"File {self.filename} has {len(structure)} models, but only the first one will be read" + ) + model = structure[0] + coords = get_Coordinates(model) self.n_atoms = len(coords) self.ts = self._Timestep.from_coordinates(coords, **self._ts_kwargs) self.ts.frame = 0 @@ -32,7 +42,3 @@ def Writer(self, filename, n_atoms=None, **kwargs): def close(self): pass - - -class MMCIFWriter(base.WriterBase): - pass From 184491a0635af5353463875859922e8bc92f3b06 Mon Sep 17 00:00:00 2001 From: Egor Marin Date: Tue, 24 Sep 2024 01:20:42 +0200 Subject: [PATCH 19/30] Fix imports --- package/MDAnalysis/coordinates/MMCIF.py | 6 ++++-- package/MDAnalysis/topology/MMCIFParser.py | 12 ++++++------ 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/package/MDAnalysis/coordinates/MMCIF.py b/package/MDAnalysis/coordinates/MMCIF.py index 244a7719512..2cfefa354ff 100644 --- a/package/MDAnalysis/coordinates/MMCIF.py +++ b/package/MDAnalysis/coordinates/MMCIF.py @@ -1,8 +1,10 @@ -import numpy as np import logging -from . import base import warnings +import numpy as np + +from . import base + try: import gemmi diff --git a/package/MDAnalysis/topology/MMCIFParser.py b/package/MDAnalysis/topology/MMCIFParser.py index 08d39e9b21f..f7b8d4d5724 100644 --- a/package/MDAnalysis/topology/MMCIFParser.py +++ b/package/MDAnalysis/topology/MMCIFParser.py @@ -10,14 +10,15 @@ else: HAS_GEMMI = True -import gemmi -import numpy as np -import warnings import itertools +import warnings + +import numpy as np from ..core.topology import Topology from ..core.topologyattrs import ( AltLocs, + AtomAttr, Atomids, Atomnames, Atomtypes, @@ -29,13 +30,12 @@ Occupancies, RecordTypes, Resids, + ResidueAttr, Resnames, Resnums, Segids, - Tempfactors, - AtomAttr, - ResidueAttr, SegmentAttr, + Tempfactors, ) from .base import TopologyReaderBase From 3de856511e46afffd3f25f3d97f01c6b65be4232 Mon Sep 17 00:00:00 2001 From: Egor Marin Date: Mon, 30 Sep 2024 14:45:00 +0200 Subject: [PATCH 20/30] Start adding documentation --- package/MDAnalysis/coordinates/MMCIF.py | 24 ++++-- package/MDAnalysis/topology/MMCIFParser.py | 99 +++++++++++++++------- 2 files changed, 89 insertions(+), 34 deletions(-) diff --git a/package/MDAnalysis/coordinates/MMCIF.py b/package/MDAnalysis/coordinates/MMCIF.py index 2cfefa354ff..6b9ae180ae2 100644 --- a/package/MDAnalysis/coordinates/MMCIF.py +++ b/package/MDAnalysis/coordinates/MMCIF.py @@ -15,7 +15,18 @@ logger = logging.getLogger("MDAnalysis.coordinates.MMCIF") -def get_Coordinates(model: gemmi.Model) -> np.ndarray: +def get_coordinates(model: gemmi.Model) -> np.ndarray: + """Get coordinates of all atoms in the `gemmi.Model` object. + + Parameters + ---------- + model + input `gemmi.Model`, e.g. `gemmi.read_structure('file.cif')[0]` + + Returns + ------- + np.ndarray, shape [n, 3], where `n` is the number of atoms in the structure. + """ return np.array( [[*at.pos.tolist()] for chain in model for res in chain for at in res] ) @@ -33,14 +44,17 @@ def _read_first_frame(self): warnings.warn( f"File {self.filename} has {len(structure)} models, but only the first one will be read" ) + if len(structure) > 1: + warnings.warn( + "MMCIF model {self.filename} contains {len(model)=} different models, " + "but only the first one will be used to assign the topology" + ) # TODO: if the structures represent timestamps, can parse them with :func:`get_coordinates`. + model = structure[0] - coords = get_Coordinates(model) + coords = get_coordinates(model) self.n_atoms = len(coords) self.ts = self._Timestep.from_coordinates(coords, **self._ts_kwargs) self.ts.frame = 0 - def Writer(self, filename, n_atoms=None, **kwargs): - raise NotImplementedError - def close(self): pass diff --git a/package/MDAnalysis/topology/MMCIFParser.py b/package/MDAnalysis/topology/MMCIFParser.py index f7b8d4d5724..debd75c8676 100644 --- a/package/MDAnalysis/topology/MMCIFParser.py +++ b/package/MDAnalysis/topology/MMCIFParser.py @@ -40,11 +40,46 @@ from .base import TopologyReaderBase -def _into_idx(arr: list[int]) -> list[int]: +def _into_idx(arr: list) -> list[int]: + """Replace consecutive identical elements of an array with their indices. + + Example + ------- + .. code-block:: python + + arr: list[int] = [1, 1, 5, 5, 7, 3, 3] + assert _into_idx(arr) == [0, 0, 1, 1, 2, 3, 3] + + Parameters + ---------- + arr + input array of elements that can be compared with `__eq__` + + Returns + ------- + list[int] -- array where these elements are replaced with their unique indices, in order of appearance. + """ return [idx for idx, (_, group) in enumerate(itertools.groupby(arr)) for _ in group] def get_Atomattrs(model: gemmi.Model) -> tuple[list[AtomAttr], np.ndarray]: + """Extract all attributes that are subclasses of :class:`..core.topologyattrs.AtomAttr` from a ``gemmi.Model`` object, + and a `segidx` index. + + Parameters + ---------- + model + input `gemmi.Model`, e.g. `gemmi.read_structure('file.cif')[0]` + + Returns + ------- + tuple[list[AtomAttr], np.ndarray] -- first element is list of all extracted attributes, second element is `segidx` + + Raises + ------ + ValueError + if any of the records is neither 'ATOM' nor 'HETATM' + """ ( altlocs, # at.altloc serials, # at.serial @@ -60,30 +95,34 @@ def get_Atomattrs(model: gemmi.Model) -> tuple[list[AtomAttr], np.ndarray]: record_types, # res.het_flag tempfactors, # at.b_iso residx, # _into_idx(res.seqid.num) - ) = map( # this is probably not pretty, but it's efficient -- one loop over the mmcif + ) = map( # this construct takes np.ndarray of all lists of attributes, extracted from the `gemmi.Model` np.array, list( zip( *[ ( - at.altloc, # altlocs - at.serial, # serials - at.name, # names - at.name, # atomtypes + # tuple of attributes + # extracted from residue, atom or chain in the structure + # ------------------ + atom.altloc, # altlocs + atom.serial, # serials + atom.name, # names + atom.name, # atomtypes # ------------------ chain.name, # chainids - at.element.name, # elements - at.charge, # formalcharges - at.element.weight, # weights + atom.element.name, # elements + atom.charge, # formalcharges + atom.element.weight, # weights # ------------------ - at.occ, # occupancies - res.het_flag, # record_types - at.b_iso, # tempfactors - res.seqid.num, # residx, later translated into continious repr + atom.occ, # occupancies + residue.het_flag, # record_types + atom.b_iso, # tempfactors + residue.seqid.num, # residx, later translated into continious repr ) + # the main loop over the `gemmi.Model` object for chain in model - for res in chain - for at in res + for residue in chain + for atom in residue ] ) ), @@ -92,8 +131,10 @@ def get_Atomattrs(model: gemmi.Model) -> tuple[list[AtomAttr], np.ndarray]: # transform *idx into continious numpy arrays residx = np.array(_into_idx(residx)) - # fill in altlocs + # fill in altlocs, since gemmi has '' as default altlocs = ["A" if not elem else elem for elem in altlocs] + + # convert default gemmi record types to default MDAnalysis record types record_types = [ "ATOM" if record == "A" else "HETATM" if record == "H" else None for record in record_types @@ -102,19 +143,19 @@ def get_Atomattrs(model: gemmi.Model) -> tuple[list[AtomAttr], np.ndarray]: raise ValueError("Found an atom that is neither ATOM or HETATM") attrs = [ - AltLocs(altlocs), # at.altloc - Atomids(serials), # at.serial - Atomnames(names), # at.name - Atomtypes(atomtypes), # at.name - # --------------------------------------- - ChainIDs(chainids), # chain.name - Elements(elements), # at.element.name - FormalCharges(formalcharges), # at.charge - Masses(weights), # at.element.weight - # --------------------------------------- - Occupancies(occupancies), # at.occ - RecordTypes(record_types), # res.het_flat - Tempfactors(tempfactors), # at.b_iso + AltLocs(altlocs), + Atomids(serials), + Atomnames(names), + Atomtypes(atomtypes), + # ---------------------------- + ChainIDs(chainids), + Elements(elements), + FormalCharges(formalcharges), + Masses(weights), + # ---------------------------- + Occupancies(occupancies), + RecordTypes(record_types), + Tempfactors(tempfactors), ] return attrs, residx From ca6ebbba5367c060b00da9ade28d1f8c45cabff1 Mon Sep 17 00:00:00 2001 From: Egor Marin Date: Tue, 1 Oct 2024 23:01:44 +0200 Subject: [PATCH 21/30] Reference MMCIFParser in PDBParser --- package/MDAnalysis/topology/PDBParser.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/package/MDAnalysis/topology/PDBParser.py b/package/MDAnalysis/topology/PDBParser.py index e1e08dd04c6..56c29a1dd16 100644 --- a/package/MDAnalysis/topology/PDBParser.py +++ b/package/MDAnalysis/topology/PDBParser.py @@ -43,9 +43,17 @@ if unknown. Partial charges are not set. Elements are parsed if they are valid. If partially missing or incorrect, empty records are assigned. +.. Note:: + + You can also use :mod:`~MDAnalysis.topology.MMCIFParser` to parse PDB files + that you're having troubles parsing with standard PDB parser. ``MMCIFParser`` + uses ``gemmi`` library (https://github.com/project-gemmi/gemmi) that is developed + together with RCSB, and might work better for your particular situation. + See Also -------- * :mod:`MDAnalysis.topology.ExtendedPDBParser` +* :mod:`MDAnalysis.topology.MMCIFParser` * :class:`MDAnalysis.coordinates.PDB.PDBReader` * :class:`MDAnalysis.core.universe.Universe` From 45077ade2b2ef35ea1d21af971c3d336a1fc5b43 Mon Sep 17 00:00:00 2001 From: Egor Marin Date: Tue, 1 Oct 2024 23:15:45 +0200 Subject: [PATCH 22/30] Add documentation for trajectory and topology parsers --- package/MDAnalysis/coordinates/MMCIF.py | 27 ++++++- package/MDAnalysis/topology/MMCIFParser.py | 86 ++++++++++++++++++---- 2 files changed, 98 insertions(+), 15 deletions(-) diff --git a/package/MDAnalysis/coordinates/MMCIF.py b/package/MDAnalysis/coordinates/MMCIF.py index 6b9ae180ae2..f127c92b0d5 100644 --- a/package/MDAnalysis/coordinates/MMCIF.py +++ b/package/MDAnalysis/coordinates/MMCIF.py @@ -33,13 +33,29 @@ def get_coordinates(model: gemmi.Model) -> np.ndarray: class MMCIFReader(base.SingleFrameReaderBase): - """Reads from an MMCIF file""" + """Reads from an MMCIF file using ``gemmi`` library as a backend. + + Notes + ----- + + If the structure represents an ensemble, only the first structure in the ensemble + is read here (and a warning is thrown). Also, if the structure has a placeholder "CRYST1" + record (1, 1, 1, 90, 90, 90), it's set to ``None`` instead. + + .. versionadded:: 2.8.0 + """ format = "MMCIF" units = {"time": None, "length": "Angstrom"} def _read_first_frame(self): structure = gemmi.read_structure(self.filename) + cell_dims = np.array( + [ + getattr(structure.cell, name) + for name in ("a", "b", "c", "alpha", "beta", "gamma") + ] + ) if len(structure) > 1: warnings.warn( f"File {self.filename} has {len(structure)} models, but only the first one will be read" @@ -54,6 +70,15 @@ def _read_first_frame(self): coords = get_coordinates(model) self.n_atoms = len(coords) self.ts = self._Timestep.from_coordinates(coords, **self._ts_kwargs) + if np.allclose(cell_dims, np.array([1.0, 1.0, 1.0, 90.0, 90.0, 90.0])): + warnings.warn( + "1 A^3 CRYST1 record," + " this is usually a placeholder." + " Unit cell dimensions will be set to None." + ) + self.ts.dimensions = None + else: + self.ts.dimensions = cell_dims self.ts.frame = 0 def close(self): diff --git a/package/MDAnalysis/topology/MMCIFParser.py b/package/MDAnalysis/topology/MMCIFParser.py index debd75c8676..757e314ce19 100644 --- a/package/MDAnalysis/topology/MMCIFParser.py +++ b/package/MDAnalysis/topology/MMCIFParser.py @@ -58,13 +58,15 @@ def _into_idx(arr: list) -> list[int]: Returns ------- list[int] -- array where these elements are replaced with their unique indices, in order of appearance. + + .. versionadded:: 2.8.0 """ return [idx for idx, (_, group) in enumerate(itertools.groupby(arr)) for _ in group] def get_Atomattrs(model: gemmi.Model) -> tuple[list[AtomAttr], np.ndarray]: """Extract all attributes that are subclasses of :class:`..core.topologyattrs.AtomAttr` from a ``gemmi.Model`` object, - and a `segidx` index. + and a `residx` index with indices of all atoms in residues. Parameters ---------- @@ -79,6 +81,8 @@ def get_Atomattrs(model: gemmi.Model) -> tuple[list[AtomAttr], np.ndarray]: ------ ValueError if any of the records is neither 'ATOM' nor 'HETATM' + + .. versionadded:: 2.8.0 """ ( altlocs, # at.altloc @@ -162,10 +166,24 @@ def get_Atomattrs(model: gemmi.Model) -> tuple[list[AtomAttr], np.ndarray]: def get_Residueattrs(model: gemmi.Model) -> tuple[list[ResidueAttr], np.ndarray]: + """Extract all attributes that are subclasses of :class:`..core.topologyattrs.ResidueAttr` from a ``gemmi.Model`` object, + and a `segidx` index witn indices of all residues in segments. + + Parameters + ---------- + model + input `gemmi.Model`, e.g. `gemmi.read_structure('file.cif')[0]` + + Returns + ------- + tuple[list[ResidueAttr], np.ndarray] -- first element is list of all extracted attributes, second element is `segidx` + + .. versionadded:: 2.8.0 + """ ( - icodes, # res.seqid.icode - resids, # res.seqid.num - resnames, # res.name + icodes, # residue.seqid.icode + resids, # residue.seqid.num + resnames, # residue.name segidx, # chain.name resnums, ) = map( @@ -174,34 +192,73 @@ def get_Residueattrs(model: gemmi.Model) -> tuple[list[ResidueAttr], np.ndarray] zip( *[ ( - res.seqid.icode, - res.seqid.num, - res.name, + residue.seqid.icode, + residue.seqid.num, + residue.name, chain.name, - res.seqid.num, + residue.seqid.num, ) for chain in model - for res in chain + for residue in chain ] ) ), ) segidx = np.array(_into_idx(segidx)) attrs = [ - Resnums(resnums), # res.seqid.num - ICodes(icodes), # res.seqid.icode - Resids(resids), # res.seqid.num - Resnames(resnames), # res.name + Resnums(resnums), + ICodes(icodes), + Resids(resids), + Resnames(resnames), ] return attrs, segidx -def get_Segmentattrs(model: gemmi.Model) -> list[SegmentAttr]: +def get_Segmentattrs(model: gemmi.Model) -> SegmentAttr: + """Extract all attributes that are subclasses of :class:`..core.topologyattrs.SegmentAttr` from a ``gemmi.Model`` object. + + Parameters + ---------- + model + input `gemmi.Model`, e.g. `gemmi.read_structure('file.cif')[0]` + + Returns + ------- + list[SegmentAttr] -- list of all extracted attributes + + .. versionadded:: 2.8.0 + """ segids = [chain.name for chain in model] return [Segids(segids)] class MMCIFParser(TopologyReaderBase): + """Parser that obtains a list of atoms from a standard MMCIF/PDBx file using ``gemmi`` library (https://github.com/project-gemmi/gemmi). + + Creates the following Attributes (if present): + - :class:`..core.topologyattrs.AtomAttr` subclasses: + - :class:`..core.topologyattrs.AltLocs` + - :class:`..core.topologyattrs.Atomids` + - :class:`..core.topologyattrs.Atomnames` + - :class:`..core.topologyattrs.Atomtypes` + - :class:`..core.topologyattrs.ChainIDs` + - :class:`..core.topologyattrs.Elements` + - :class:`..core.topologyattrs.FormalCharges` + - :class:`..core.topologyattrs.Masses` + - :class:`..core.topologyattrs.Occupancies` + - :class:`..core.topologyattrs.RecordTypes` + - :class:`..core.topologyattrs.Tempfactors` + - :class:`..core.topologyattrs.ResidueAttr` subclasses: + - :class:`..core.topologyattrs.Resnums` + - :class:`..core.topologyattrs.ICodes` + - :class:`..core.topologyattrs.Resids` + - :class:`..core.topologyattrs.Resnames` + - :class:`..core.topologyattrs.SegmentAttr` subclasses: + - :class:`..core.topologyattrs.Segids` + + .. versionadded:: 2.8.0 + """ + format = "MMCIF" def parse(self, **kwargs): @@ -226,6 +283,7 @@ def parse(self, **kwargs): attrs = atomAttrs + residAttrs + segmentAttrs + # due to the list(map(...)) construction, all elements in array have equal lengths n_atoms = len(atomAttrs[0]) n_residues = len(residAttrs[0]) n_segments = len(segmentAttrs[0]) From 9a1a59ac1e0ada37803c470baa7e5a6940c6f7b9 Mon Sep 17 00:00:00 2001 From: Egor Marin Date: Wed, 2 Oct 2024 19:31:12 +0200 Subject: [PATCH 23/30] Add mmcif tests --- .../MDAnalysisTests/coordinates/test_mmcif.py | 44 + .../MDAnalysisTests/data/mmcif/1YJP.cif.gz | Bin 0 -> 8187 bytes .../MDAnalysisTests/data/mmcif/7ETN.cif.gz | Bin 0 -> 13252 bytes testsuite/MDAnalysisTests/datafiles.py | 883 ++++++++++-------- .../MDAnalysisTests/topology/test_mmcif.py | 48 + 5 files changed, 609 insertions(+), 366 deletions(-) create mode 100644 testsuite/MDAnalysisTests/coordinates/test_mmcif.py create mode 100644 testsuite/MDAnalysisTests/data/mmcif/1YJP.cif.gz create mode 100644 testsuite/MDAnalysisTests/data/mmcif/7ETN.cif.gz create mode 100644 testsuite/MDAnalysisTests/topology/test_mmcif.py diff --git a/testsuite/MDAnalysisTests/coordinates/test_mmcif.py b/testsuite/MDAnalysisTests/coordinates/test_mmcif.py new file mode 100644 index 00000000000..ec25cd54de5 --- /dev/null +++ b/testsuite/MDAnalysisTests/coordinates/test_mmcif.py @@ -0,0 +1,44 @@ +import glob +import os +from io import StringIO + +import MDAnalysis as mda +import numpy as np +import pytest +from numpy.testing import ( + assert_allclose, + assert_almost_equal, + assert_array_almost_equal, + assert_equal, +) + +from MDAnalysisTests.datafiles import MMCIF as MMCIF_FOLDER + +# FIXME: rewrite tests to read trajectories only once + + +@pytest.mark.parametrize("mmcif_filename", glob.glob(f"{MMCIF_FOLDER}/*.cif*")) +def test_works_with_explicit_format(mmcif_filename): + u = mda.Universe(mmcif_filename, format="MMCIF") + assert u.trajectory.n_atoms > 0 + + +@pytest.mark.parametrize("mmcif_filename", glob.glob(f"{MMCIF_FOLDER}/*.cif*")) +def test_works_without_explicit_format(mmcif_filename): + u = mda.Universe(mmcif_filename) + assert u.trajectory.n_atoms > 0 + + +@pytest.mark.parametrize( + "mmcif_filename,natoms_protein,natoms_total", + [ + (f"{MMCIF_FOLDER}/1YJP.cif", 59, 66), + (f"{MMCIF_FOLDER}/1YJP.cif.gz", 59, 66), + (f"{MMCIF_FOLDER}/7ETN.cif", 70, 70), + (f"{MMCIF_FOLDER}/7ETN.cif.gz", 70, 70), + ], +) +def test_n_atoms(mmcif_filename, natoms_protein, natoms_total): + u = mda.Universe(mmcif_filename) + assert len(u.atoms) == natoms_total + assert len(u.select("protein").atoms) == natoms_protein diff --git a/testsuite/MDAnalysisTests/data/mmcif/1YJP.cif.gz b/testsuite/MDAnalysisTests/data/mmcif/1YJP.cif.gz new file mode 100644 index 0000000000000000000000000000000000000000..5bb7250751513b9ee2205e510166c97d4292ba83 GIT binary patch literal 8187 zcmVyT zur1SZZ0iFw;&*&E&gRz(pu&GG*Y;xzh%OE3rF>zpvtNJ3YuzL^@}^|RiG*5+$W-^- zbl&p1DZ#+N2=XnA{$VWaKaN`iR+%*KyX*-5CjGsvc=Ivje0w@FZy*+aW*ehLQ|(vH zzRHXZjA$`}>0mL#4~XGAS{xg3q6^Era4qA^)#5mSW7`*2cwxK7nWx2Z5eE>g3mfL4 zuf_2I$MG&Khsg7%fiAAICe4eQ4~hJ;kI?W2iTD^ze^3pL^oM!us`6{T&MH~fD%+KH zj-z#im|atOjqZEy=x`^pdR65+=0e)(aEyk$8|oA~GGSh;a=$w=T_{d(EDj-`IEfR_#=Pq0(t&^_FcC9G@g@$_9*c@6nX|u0QlB2a*nC3+jH$Cl|ddUR-G94{R6b5L) z>!Nf&MGJ}F*LA{@zJy7kOBwwk4)l%YPuiX)A;-Bf+j%OS$W9nGbO25FEG zlR4@a$&7+Dby>(g{(Oq4tNI77xXGHga$Uoye9ek-hd!3Ok%NTt6}Ov;!28jyLkkfW zc&|<9rj*(nm}(wCm8~7@`)L?gjk4w-VkZUJZ|hwK8GfFvWzR$%L%IhIRVI%S)<3k zUo2(=pzFnr3|W@tmt?nJ7J2=aDHm6*0VqH2myg-{APOnGc2&MfK{UysmoEkkhq0>4 zy1rO~RDTe|HA5)NfQ$lvM zFJOFluWDoYPP+A&e$O}i&BX(Bp~&CLa&0^kR{VTAz-etBXo*$z!R>-cxYusid#+bj}< zE(B_cZG&^aN}lG|_czjDcP2DWY4mHt7}VK~^IL&;rn&)>U4eJzJ{ zYE^@pZQiK|VxL*&#lF?mDxo1rw9y$T6l>HZjcvD z*TsJ4`Xkh>V|;@MhRFy_bN>0~UUp5WSsudE#F;6ksce-+@fy=QT zg4X=jIX9lGbXylx{T*$&U{Uj1`tJq2J#Du6uEc;1%G>S0OVxk{8EKP&dVtrRZ8W8^ zFm`Xqr9sXl!Q?phND&;zP!U?EoDRBf8ZZkvTfUpk{`t?@=M#w_0o)!TLLIMlx*O}zo3wh( znnZY7ZI;+tO}Xujk28QH8y1}ldOT8|GHItz)4q!$!lEvu%l2X5Vl=IrHWYOMZ#b7r zbQ-pC_wd=+(q)t}6j`ptkW2ljnU?D-afdb!|K- zqsfjQF4}mAP7k^_GTyFmcd! zW~TM~hI$$;SoU061pGJ*<;l&kdTe)!v zzppR1rQ6p%=G-y|W&MCRtBZN_$?fC#n!5f=tRr5lbfc-5SF4@(KkLFwSLXf;-Z^R) zS?G&xF!=WEn~9hd?E?mS^R3ww#%)=lVc{~4adtDA{q|(E*Ss8D9Z_3GWaYKW)_ZoR z>+)z3*SBXZuavw2L z*`y*CWOUhMUlhpzgG(73r3bIuG?=yy-QVEY_uD3UPM1Yivn86|YUkk&q(z}yg805` z3U>EG>EFLD=Ht)C&He4|e0=?U|1=wM?7lXc`Y3osS2QItZRuJG1DpEMuzPq=?A90^ zH(6S%s?p=^GAOLJSYcs*ZO^paLEOi*fg<7?<7=Cq4z<5aL3jCE+&!cnK^1RTWxKRW z`}}$u!qNNIYLlzm)}wVtaP-Ug|H@ETb~?*V`y{}>&(#%`KGkRU5Z zG$JSK{N<(E4%i~mHN!|Z%beWlQu}Tb7p1|G`g18Tx`QdMBZNW94A<{W_FcCW(L-e) zIvu#azR@5LEWtgF=kv*8GM-;gHAF;#Vk3xj&)FlpwH&=v+H!tU6#1?$^EG4$k9k#< z)kx7mm9BGo7imBRT4e~TZ|T>J#jIg3)1#thNLz8ivs{(xPOcFYo_7xuC}Dq@JWOVH z&(o2P4=y*Z-l}qw_6LOWS-qv(ZB|^Y_KmT~FKkFb6{69gcoJH^f-Ea3Qiyt+1O|GdhI7dkLnDuk06VWfNYs@=pQ3!WbUS=$U~RP4w{19!=`U~ zn^}CGT~Fsv&-15QkM7X&^k}k}-x^nw@#6!oK-}Nh?x@FrC|ps1cy&~vr$~ytZcd=6 z-?wN9O$Dlyc9Repj}k1ibW`A6R)chd>Kn4|Vq2SQ_yECkPX?(dzbRdaSXQRPeal~! z{@P|Fp|dJ2u~B)oYgAcX?Z;V|g9i)mxs_ z|Jg&K(s9A2<%a_IV8Y0<9fk@Q&8n~28sbfklZ*_<>n2TZQG^rl?o~Xdr%t-2)f4GF zt}J{_i+$G5JY>9W)_{Vb#H3ffZ}D6+l=OL(32?dM*RzR_y6fyB44pJ$+#IRWIfC7yd1~$JV8ggQq)p7797k={V&)`Bqk64p3TAFhr@>NJ|-8wyIF! zTMnt`vSTE4ANIZ}UtWkkC+V`j{Q(*9HqiR%Io*H_i5(z~vL%+?%X$YmiP-K?C&*cxOSl&uU&lZNc&WmoeRgfk^6s65=b4 zK2gj-qS(EtN<$1ZS=MFoqcmTR(3Gpyen%#!Ht#z|qssw@ha0kQsLys#+c$4uXQ1@H ztw=O9UB@Kh_=dBV2DfjN$<_bb|AyZmr*VFQ#{E$m=O?b?{KR$KAD}s4mBx|r$+tgF zbJ*58G^?^ATk^@_r7{EWep{lqx?TMYO~?24COVr$U)SGoootzrHJ$vRUAHuNu^*+W z-zvJ{?=TG}$&e@{i%;P9uB&~%`cl88+b_w@$NQ6L4n*w%nyXXCN1}Or_z{255>8OU zpP?ZU!1(cGqT1Bm>_82q-%iSRhG~YgD~;Z6pB`zrp5y(p(`oP`r_itx4?iESD)hI- z6ipsmu@*Hpr+I!@V78R6_u%lyHy_(=fv1cz*{gdz^-*ZJxohUhL7FQ}8oTr9lDdVEYln)gINVEHoGutQsqEe*lu9y zHm|dl&YWJm+y|nKco;Rbr zY1{3wEkkzaNt2pic95>ei@x)^)N(>YP;Ziot|niycW~Y7{yMzUQ-MbKw%^F^jQP{^ z?*e`B$D6B!oP5do2IC$k=#L$-RABNZLb;uvxfvTX_`y>$ zXIJE-)6qKErg>@PG|$hQ6Nhexk~#~}`1;efg6Esjtj z!C=P+<&vpnv2=2ATwTX;2kMbabb3D6b-n|*+aLQh0wG=~ovq=1sLnRa;@up)d9TIR zR@`hq>zc*U7I}yVTe(wNT@GQF&=))^2AQ0oSm&hW1R8Siy4{>Oq=e)(+nW3f*NA-x zy@UXRyP7I)oA$YLm_ONXF6|{4pI`MhsWVE#9WhO@ExQMjpY+iKvYpI&V7n)gG_^a6 z^{AV)sq*h^Y3ewVa|)6>jP#XkQ##>V&c9DXa!x@yF3@o{?kNJf|2~b=ae)xSHb(T| zVe@YAk(AEvB);z(-_PIA{}4Am`HSSb;@gQ|6b@(qZw@L5A1=oB;~A}q${lk^hc2K; z|80M8&M*EjF6^I>;M5L5;pDooM*Wg@y$_cY!t;tO*ZV(I#@B;)yhQ zE!Gvs^4q36-W0TiH+D-HvZmRM(+Ux_SpRx%v0~OlHX~1|s1|E-M3c>$#-hn#O;-+Y za@!`CH+ih-#>z z{q#;?xowlcV$x41T=%Zen_AtwOnOQ8Zb*-$O1gI=+QfAaO)+Z{y7w&FB%80znuKyZ zhc-#d@m$sB&7F3dLmX9BBD)GTdh%NGgtL}4g>93=nC;CxslX$FMc@nv z7J<_XEW{>jJk2D>ml@HF3~^+LBSTy%CS5Buo(#E?A-)8eDiQ=zI3qJ*UGA4kBq9(T zCr%Sd!Hvud4v8fQF)-0nONO{I#1?Up1VS!B8>|pQ;-R2%MU*5#N zAD=cVA}11M1tby>64V4FmLW2?HdG=f5}*5NWjAFxnq& z*B5b89c?!d5OuWW*xH$mQixSYn@3tQrgDGUfJ-5=cG|exAMG~$_D8#o!u_6NFm7Wi z38DhxOAw_$ULeA$svR#B5LG)~BtcYev53T~cG|G5@l{gvR_B5o#8vIIvAnMxIi#y| zfk*bP{5owaa83Q2 zOyi1#gQOFJ#uJd~M7fSHG7>@~WD5jjs?5eVTR?o0qX3#u&!JomFM}vAquhH$o2vsyM$N|{4oNzOCUXr6)AD? z!E6^Fu8p&qarV?hLiY5rk4vdYx9yHuciiX^H@JilP9`aByYNd-LQeFyrT^NBXEvnh2WgIXyPHg?tf%X~p=Q zhyXHKBQR~tp-aAyahhRBp2Wc@X7u(3V@S$FjDLNG5CegsV}*n-#7fT4R~RC-VCWKt zeF@paq+AnR7&7!AdF2f4Kw+pz=wSqY>N5;Y-@ym7!kbP1XB0`oFp?QQDGYs~L;64= zo7{T`h5>|QU>GA`5EQ~TW9XZfL!K_Vhl+BB zst!Y;LxrJlI*_(ummy082|Y-d8AA})RTwG~MnZ=QL(hysQ+VnSl0L#PXc^jOY|9K) z9mYh$D@~VOkFbd7Xtm~2}7`r*d+)5P*4vv8jDd5J(;0uhcIEg zc1RckCptG=mzYfy`+^}cY*%lGpzE$3GKNt|hSx@~;F^IKwG1sVN|~W*hfpsuIQ?)uHDThK}t@UG^1*iXA#+rrdNHLM%b2fI$iwgZr|gm>agI z4$6(fFhU(p>>k6ANC-0!iZtR6V~cz2z)^HKQ4Bja=<-UBp>L5c+c@XYEFm&69a_Gk zL!_2c5vym|K4ED4%nmVg@WA)jLWd+s`Fg{K_}w*ZGErgXSwx3!KvIzaQ=HZxy0Of# zlPozs!zM!rbB#(Eq7Hr27R~`|)KM6!61qLZrVI%quty)mGddVgxG&;ue7#}2UeB;; zm#x;YBNx4m?S)e25GWEVhVA-2!}f_`M=<`xAL3|3E+!aSW@IZ26~lIeo?%mlu15{q zgSZIuHx^xXX^`^uh7Gw@*RXvum&1?_ii;i_Gj2yn7^pf_3>!?UYuG;XHZhqgc7%O} zsppER=PD9*3_I>+4n*UoZ__RZ0SWYxFQ;>8hk-tG@T^|ufD8kgvZ7(zBx!f;$WSE= z^qGTa_c8}$7=lPt!jR4s*O3OrQyHofI=##R8A9@9Qys?Wv4LSIF^nCVp=#Kk+shoN zgjV1ah9ofrbBtXnb8r<26~p$tUgkgtB@U<#LzhHT%aRz9(k#$t4xZo394NyO12M1& zEYersm&yu9VW?&fF!j5c17&E(w96r#b3rJ@WlLeGW)6Vf+97@Rs!g7{4}*ePI?idx z&#)R5a~V7ljxv0w6dgN^xE;z2WANB93!@%72S;QMAblh=?AT$QMnOVRjY0@i2Su?% zpUf0uhpL2*O#^+1ek%q+Eg@lKsSIU1^hr#abS3m5?L~%|)IdCqVB*Lru`(#vipTaz zOqn=6hNObRDT1CGYZGu1t%RS3n z8NwV60vhNeDxuRRODLhj5UC}Gq)z$QbKkICKcbE>CUMzzqJ5`J}9HL`5G%!i|$@ zfT5$85c=#yL2HMQaC%_PZI{WK1AByVkD;rV5YKK!q@D=LD92%I*p5r%GJEWCkD;e$ z2o*A26d-dB+#@f|9PqYe+#EeaU*BcS+GDx`M9s#6Q6UW7nAXee@yb1h!GI2-t{{6n hgUBHa;~=CAJ(rpddn7Yw_$TA+{{RIlR$94v0079wvikr4 literal 0 HcmV?d00001 diff --git a/testsuite/MDAnalysisTests/data/mmcif/7ETN.cif.gz b/testsuite/MDAnalysisTests/data/mmcif/7ETN.cif.gz new file mode 100644 index 0000000000000000000000000000000000000000..c5d77f66adcee75f9705f43de3a6ed7161cb8998 GIT binary patch literal 13252 zcmZ{KWl&sA*Db-_B@h?_3~s@~AOi#m?jGFT-Ga+t69)HS!GhZm+#$FpxVvlckLRtr zKkuJYr*`jNTUPg~uG5D)1|9wRK;9DZh2O&6k;BZ!5}_!-!EF&#d*bzkskMyP-eg-y zgV4Uh$t>h)nyZ+Vl>Ik5TV0RkOTI(4OukkU%scQ&YO~=@2=Q@dDa0)Qp-Z0s*E<}1 zeXU5_9HW4i?8ho=?y9E9r(yes*nIRHsSOhk{YulWI%-&ZwWrjJH9)qBg2y-hEJ z_#sUpl2*5oPcAN?lSAvC@H&wg8Di-qylGygV%|6gUKxoX1_IuqnH80W;d_;exe9WI z$H=TTEEJm$v!k0)=xpuEX5-1B!E~iDvhs&=!E8}0{>537KB4NWc|Pqu zbsU|!W7Cd>Qt~p*D`MzB@@Cs**3{wvx1}~Y78o6nNO17{g^mF7{`n^I3fA*E#yua;qTkyR`5g4ZKlL$d*H_ zm0t<&fG0zs%)`5TH?M0Ox1vPnSjxdzyM=Z zT66!FlyMXKANKBh{1Z{RmnCgKr`fVvffrHG$P&(zHZ>=|(AvXFZ_I$tWV>k=*QRG3 z*>=HO{_FK--9~@!z?nnKrZ)}zEZ|=B6$;vfgZEb{12lMcORIq!>1>S39$OyAR5Rs5N}hw3l1QQ|*DzuW zOkDP_3f^%Nqr=XkH%r=@=0#f;XsKw|&Ya!wp-w_-&KICDvkiT<&nG=gzTC zDDk-%!=#Vb(&deV9iS|=428D@BxTM!{Le`CH~aD5fWGB#wxTbICiS!y(`khhjUDG* zAtL^X?gF-Y>HU4Y7P<_heWo7xU28vmt??dgUC_&wBV)(M?j9~5L~0{{+~ETGZ^@dF ztV8U*{B^1ptme0Izja-v8rJq4R9B+4R9hGRraZXwZFn*S&w|Yd0IX}@@=Cm(n+;ui z1!YLjCf?M3&1_n%`k0I^hJF6RI{==4^tnFk1jlZ^@8Fx1q6*NIXcWe(IY-T14SZOd zCziZFXb2FbCAj@|3>tcL zX>h_T9-7lk{Y{T`@SS+%;R%aB{IGmZjxg|@qvYb)$v*wQBv>le?SF%Id4vjmc zmzrzTud?$rd(3=OTyHO2aJ#=j-H0RnvwEr>sJB7bcOW#=NogU%w@W?4L2rocu%t-7 zFxm*s|1|ozMD}*UvmPK$nbJ~jkuUcXJXv+E7 zN2@kpUr$C4HR8J1B*60x+-D7QMtF=L$H<>@-lKd@EUnD&YE)aHK>)x~mG`D>AmF+_ zA(2I4_Eh(ZP=#>J#N6s&HNcb>a2NDFOaLk0V06Gw_vKXiWp`wAr7^PT)NC@>fhd2K zchOS)bYLRpjqPFJcqTjl@bx-ESFpjR|I^vNB@hj2*d#$Iyz`esqQLWcQ7UR(AJ0`BnLaH#vTvd>0{Vw>{q^sh{3|%Y|Nb(gw~xFbFI6~c0bCq!nDiy@knM?3 zSbydry{*0)@tZF?K<-kwSGg?zt@;NoKkma*@7^ZivGpht^Gd2iUBKUZB?xzMp4CFH zlGyB+h+^_&RHxw2MrEr>d3U#$@bayP%th9}(7Ib)_fhb^!&P{WR9FJ}P_q+a0}j&r z6B>jxL=yx<*N1WC-P|#!H>e15xMI9wDPuBSU_^ytKu`;3?h~wy)YugbPxMpS95S7cfoa2~L z6uRvNw^?2lr_yHl^9XqFa;bNkAMgZUm;+JxlRch3-yclp;`S-{NISwLVNOPzP!^As z(DU8x0AdfWl0Iv)*)MsYL>2A51>5}ruo+&Sm;%0cdiO~ak}OKRYm zNCZO-hC14~mO=wz>a)@X`_GLk-*D@lKMYr@9fdd>hX=3K7l=A)M|wVMJ$YP7I#?p*)+SW`?dd_6To3b^6iS@NFDowi)_m+NXf#rZXk8;=d zG{-zM$CVpc0yQ42A;EVZ+dKZNIO@uxEjAHV`1fbu_OR=Qt_BHw2~a2Yp++q|6dyS< zo^Ttp52OLDoxdF01s2SsrR!IRE4J^N?ta|!g;+O$r3&4c4QSi$BC#vzDe!+1mKN#_ zj=0K*^y1?%#t^G2H!8|(jPEbsi53iJ;!$_ft@<~EFaPWySrH&F>+>ZIc-ghDoi@dA z@IlwU=-jYrL!l4HItSyk!Bu|(8#hZNs7{MlW*<DeDB*9JS1ss==6w}xMfC%Vu|EdiTa3|PX^cZ3u31aA zyTZO6C{Am!*53&)5lS4(@t9+Fc|2ul3UPs<4%C&jLe5*uZWRqJkt6O7f6efK=}r}X z6aK^AH@BBnMkis^|2A{{f>-koFBhTwP1FU$Eu2Q<5n?<3Btkm#CkvOZb?`{%@$4NJ zqg?A2hMok=pq~|o^tj;>LkgU}gRMS9ldx+dRI0!rY-Tm;zhy0C01CopZA%s~-O{1G zh;(j;f7?r3Rm`1e6-P(5^1g$|$Drz#9Z#PJ4ZVhx6(21JfMk%SQ{c~=(T}Zx+mrWn zmf&=qKd2=S;&JaCRw)!G@2Bbal)(!(@I}6d93mb7yVqy`4bT3rJSu%Fzj}$vRi`Qd zT3|bNT)L>u=SdN&{>{n(8%Z>`C{M6L`Nx+V=MO5w;pHC#zazWFB)_eXkvf{?GTa7{vY81lAFWB9Dk2>Q`r#s(sqAyu2i>%}-r*k}paJ3lPBy($ME@X5Zvu z^Zt7r)qcmYLmax&w{ljrnJ1uesEU2#d`ii}n`QQPMpikyHt(GGhdS)GsbYuq=y-#^ z)vwaq8{yX!TF;yv-~;leb%%P2ol0v?6#M+9EsoyI^+oAdDTDH3w>q1g1BYb{EmXMi zX25x?llOdg*}JU;zC+s%+onr)XpZ4H{Ty9E^UY|_giF|mc?b_SSij?D7Hf+Fka5il z|EeWwn>;BXkf7RO-fA5Y*pSBWkD*R=$6|v!8+fuUPEx6TY|{Jom%f>_Au{#_b82^< zu6K{C=Xhv`vi79t*vr@J{ezp$ro%eJEL*(_qi6LF@qw^)Lk(wrB-5&R$w0;C`tE6x z>sgPCE#w-W&*yBSQt<;hBc{cC7v??oE=#9KG(oE_Qs6C5#2KWUaQ70g(Q)5EUqL2Y zonYzB(mv>i>nBmCRio8_JI~wg#vaD;k=P0gYpvYnz_!f;5PN=1`Q^B&^z()~?D1|u z7j5VD9Has6=Tot{Zp--yg>v%*MEhEY2x5}_pblMWx9_r~^!^48EGdr@{L1F%mx0pe z;QT`JgOr*(ugTw|Zl$p<=B~_P=Vk57Ams1{!O^KR#ga$7WLqR1hHJ?~Qs-UNZ?8V4 z+}D5VD}Yv;PNhVSL4|jb-CVr8jyg~m(hpub+}VYkOnImLh(JxsK_GAh;DY+bky;R!hc{}ZgIS;$1^bd+G zb;%XvK(qHsHd+)%4d}=QKwxZ__-TpWJClyGR6Zjne$j(~+(^o)$KsxH_P^`tdK0^r zZDAI7hHaQ4(dynGtM3*qW(|fs98TFd)uDPgv!nZY(N->>?%~SOgHG2}SvL==70_$e z&vJmzRd}rpqMag7-vs?HZO1-+M5o$k>sM03aer6luOuB+a;yO`#&YJ!(Z0M{9k4aZ zjb?=Fjrp&a)yX~neK|`Ge2D%;YG{CrbZO}oWjH#76s#T#&dNIE>%yXp2yr_)>~k^5 zj!ze~g&R5amvF{%LWB-F-VHAwAC^w;Yns|v8N2NbHkf|DP|7ZLr#KhU)v7LrNwrDH}|TGRMOR5PTuz!;-&x)t$H zW&I&KxnIuv;N)y+LQGoaZ&wA@vbV&C1+8tGOd(<@1_lQq$kRlCxJ21K*2CQv@--~n zD$d~I9)!O0Bz!0hf{}0=v8{W{c6RU~EUjjr9g}Z#TrMnn@5*)jFKj5*9a(5sv8wuyDESjnZNTwz!Zu&6r%3zu%`R`-}s&?amWey$uu@ z--hD48jC5b;M}*X;z)0~Hif-p_bw3t>wwVXhc7+~1+Woo%1r6=SbE)EO%B0fw-9T> zFHwO6E)D}f2W8H1sv$qg(9MrgBpR$NFq~BA9goA1ZO386)5I72WwztPD+@fTei(e_ zJaR6_Y|v=e6rlzrsqdcAcEvulMp20ksr4N<+@#30o=vB0p$Z=RGfKbL1{0Czb7bNr zXLGLR3^AKinJ38Vc)e^HRnte)^!!qgZ8_zNJu5$)RcXxU+|1=fFJP7AWfhGLG78M( zG&eBvYWTs-DEKbY%fJO~4VS(UnY{MUMcJ4ot>l!}(~)T(@jBzx>;-jQze>APavxpJ z7ri~%`#6#rPmJ_id6~oOo)nQ#6*03AUAkp@C zsy1u*JwX{B$AZ;yDy1D|PdZREEv_k)*g$6EbWA^!^Sj;Z_fg4$EJbn=<1HM06ke-9 zwXEW*thDdXa-V~-DC4^A=zh$r;%EG}c}$3hLu)Rr$uQpEX*JJ~_DHChBME&b$kkw* z?sNz&A9@J}QdRo9ZuxEFUaN^nYaG+pe5l8F9dBKb;Nf5`;Su@Wv0HWn&|^H@zSMLl zAJbY}Xue|$zKGz!F}^V19u;u&1#lnXyZ7$Eq-rf;;OFNoJLcJjHylR0OP;s0&VY_RuxhXOJqVlR zX8H`$q&qJtpnF-=6>0g^dWV|yDf@7jV3#IDxBySW1fWxz#@~)p`;LkO79Lt`1Pe)o#X2%S1px+ROxSI(8jl}&aeT&hwJ90Fvsd(rAjk#b@t? z)zlBSAKQ&sQN4Zrwl3<&%LhV7$lWCP-6JJMl|hBq-m!_Abbu=h4rpsQp%P+_Zwo^& z-6=P*~befKp)+JUi4G2+m7k17qY$>UAo3o`W?*)%QChImXCY z@v8Lm@zpAS&6MQs;!S^W!DEjw^-6aSRj&--4)T5T%?g017;-1L-ec!uEW3M-bKd!P zQ;d%)M-!_m^jItAZoPTMsjMN3qPX9<#tGj4R5Dj?0n!qEaQ5;bSyp`c`n(S<=fM+O zMs>CBJ^b^4!v-vpl}6Yln;bpHG2;DfR5iX%qE3y{_%+<&q!He<&t6sO*qKuGMdzVx z_*Wl;|Dw>fs95LZ+z$O^rO!bXnkJW%*R^ST$%5b@-Adz{d_wty=#Cei=I}bfF(nu3 zZ`5a<=d76dvOc|va#i7pr}@{8`}yEKq66CZCcSptM?-R#3#Ih`u$)-o}bKNguDTpwMcy+s0j!Q%$TVak`- zKS?BnsoVWiV9lYdtOJ)~{z4Jb0Ivk6U!&BE;VGx(-uEH`6gZL-Z z7SWGJRXOl!0Zk;+y}fCb#KO;AeM}7o$DT>0!!_WGyTP6lTbi9M8`P?IW^X;qx9tRL zN^bCqBW&j91}x~_iVyL-aMna^NiTC%$~=f>khX2K*kCFwmk^E7wO;Pswk})-rDc>aiNaz2+Ri%vsK?nk(TsD_I%drvxt} z-7nptn>wZe+TJy;G$$eD-q5}oIrFMpL1DT>e8H$p^>O9)0lxn_Mk^vhFHl|Rj`4^M zCL;-Ub#7BxwZTwbb87S6o}iwd67f_B?ZB9h(R4~>tXb<(d(ttm@sjy|B{|GBBxVuu zvtv1JOQUYo%U_9aoQ}Amhgm9beAkq=+lKbM{28TPnkO$~&opT8!4Q`I=+Ne)(=l?C z8u&#v3wNjtO#3Pk=Zfd-TB1gcz=$TZW%W}MiYXNl3qDQjFQj^P zrl*|ts_7Sd+Wdq?1n1v7F0;~RhH;_FwadSyf?|!g3vPEGC;gt+04sbjDDCwgsUZR( zw7s){b3--1lmpwWj5+|A!NmhD`I>Ynof=U#adw8oj$u?Ly-{{!_jg7IoPR^03LQJ9 zD+3t(kyZfT4`R(0^a`8atCmr#=2KhjB$$kh()H++NT>FGNZCoSEUfVyi-hU;9K~03Fk|N@S zre)VM;tCA_*{ISvBO{edrdep3PvMuqr%CRzxq@KJaGyf7jO3jJz#ec88f#5t3b5%9 z%aD#xZCa6IlHEcRNeF{!yyD1lUk}PL4QqN7HIe$#|4gKZ(x2WylC^^w?asec%%;P?!kP+7Arm}0hd0y>Fz zQX!3Gpz<`RRytQe!v?F~^|Njq{=Q3u^80dBHjv<$bm&k4K1`TvbBn1Amy(}8C1qC} zV8`^pU-j=@IoBFQOjXercfpVVT(|WkuEDVMVfiq-Via=MesPS{(etkB0u@A8ZebMH z(QdL#*c5XfytszF*>mAX0NZ;EB6Il?c7pcVln=#Wd9YqooazFJv`H+R|l_lP?xbXJEPvax3Iag-!H%EjhO#OYO zC1UMPt)tG*#PKMP;~XH>{EcC2gc0&3aV+{clPg9kAZy5Le7$cALA}YPxEQ~U`X(_Ld)0G!iP6Nt6vzLjGqr{m!m(rF{ zNSmC*CvtG-?M0@d!r`ismX=UJpL#VxtZ@j~oe>G8C7ksUuV_nZ=+j?AUMWg52LbZ| zddep9l~jr|Tv#(S-LyRg5@bxx?cLXV=%@$y!ap3y8|Y~jYJ^834Cey{K8n;3q?K#x z+HR6+m)Bw^>kHB|&AD9TL<(1Gr4JStXbK`6*d{|lf8{{(35!J;SPra{apQk|DKaap z)Y?~(J%|rYS%ldQjPG~KzE)lo_vf_T^YP6vw zbro`$TDWn^#o}}aXS_DTXz#W@P$!`KTVEOA+2~z{&rH#r(9JrLH9i%#_|_aWV^1{q zD4&U?xc{gtJS`aV;9c*(kdMTzwz{Ajb$wY9A+b|`D9`k27@wb&Xv_wuGw1Ng z5P5>EwOMC{ZVx?aOLC}ffqP1gWG8`71yLsi3xZAQXh17o#c2TdP9a8&x2W~NFj7T_ zn=_oOO*k8U&}Y%}nhZAMQHvhy_E1)3t}LFc80Pv9FFowxZBS@SUhlH>#9#0JjowZI z>|27aZNd}o_t8n{Aej23?w1cZ+JgCTN#}5MXkYCx844>ilbl?M3h{6408Ax)+=6xF zst8tkO00Sg*02?>2{K&mGh-q20engb%94-u(Z7qN7{p5F$g!PfRWl!ISVWJnv?Qix z@9ft@!gfQ1p^HAIRJH=CLK6C-w=R%B(9$o;MWOa6WDv`mspDL)g}+8U$7T$z!*^GgqVGVj zBPxE=!4(=1oQf$@nSM7ttf04~1}>`DBXAQx{oZf4vkNDPGQOW#CS^dWKOY}$tfrr< z(F@)2w{Ek+a36|00wW6DS}$0JVywn1$=EM)Z7qit5#&tuZV96PXO0B933pLQuBOdR zFte!UEiYnI;^i z1E=o3WVcO6MFh9?Zxn9SXwKTe_n4T|(NGi8Pz)J)Br+asuW-FoSrEk@+a?kgZp!U@ z?L>)DOle|mxJPr)18X4@u|7?DGDaMxi+0BTPo!HiQYo4pdutd;smdlub{km0fQhf| zV*|I7K9#dyqEfBSdZSN1EQ@+8a!?b)l|`h{`v=~_@Sb3rh8bdSkp9*#Rx!wIiM*?S zIoRxbe>j{mwxIhd3+y~7#H${sE=`G^QP3Q5{i6ifs`;%UmG$>1Y*|nMS zT|wh;iriz}*{Y}tz0BoJ>iId>RzKMh6;B0gHHDEGpXHSHRY^@63G*%EUp9*(n`}7C zX!rV@tPz36CarRw2XhtSj=lYwOz?U&1cXXaj!sRcdE7|c*IJ0#44-`?9Pw!GgsdlN?IWY z{m-^kBM;9(A0ovM>z<9b)Jl4bt6cjE(BwfZ`Q@yJNMbNbYf~Wu0V{F_B%5&Ou9wO~ zGsVWBcKWTVv%+n}3Py9e524)*KhqTE3S+BVr|p4xj4*1`*olDQA$~s7@c!{@MPZO)W6L59bUlq1|`RQUPpCGN7rmB zm^Jyi+kNf4TGhroZjkB$ji`0}ox%w-}cD=sJn5u2q1YCa7fLf4r z-OYtn9DuSPO}j&(9evnf!k%o`#%h6VvCed0pTA~J#<4zE{>^uwLMG$3J8$g3L#nBc zUO@{GfQS*vvr~r^DVRyKiQDpLcf+~`n?(*n$0kRE=xuYV#-P+C?>Yc47T`YN(8ow< z84662vZ>&ba*lUV#xh?r0KsDTbjN!CbWzD#rmGo)iaqH6^Tc)GF`B!WK%85tF+ewm zd&Ik-o!Z0-MTD~c2*)v*%Ckt1Tk2CR`{ALqA#ooKEyPM4>LL?h5)n|bAE{<~RS)&{ z5YKf?=M4Q$3l1uZFRemmk*NdZHzbCK8$dN}notv(k0ElDuT;L*P_fF;2k9hQG{e`_ zOKS8Zf6qZsN(fNSC|*Y?K_Wxm&Lh1jzgnf{66v`Ei`OZQ5=@NNJsV@grDtlS9G4s6 zf95+)_{Nf|&`Vut6$y#j#lu*>5Hm-(5lg+M*6nW!VtL{t;K!6e*3AEhdzih(=lok6 z{c_)vv=W{4vj9PjRL)hmZVYUIg8H4S`RN!@$ODPhoY~fOJ=L)Dg*O&KKcOt$a z%a@W}!Lky{R7V*QG)oO-GH#&5Ce}AtLW>j}{$_SX93C~+jfg-?ofyjUj}AMM$}H#o zz+Pi~r~gj2B2hy1;KRC#mHO6OgSJp*`Kb3vAv|n4qLc^rJM=@G3f(wAehx+vrAN!f zwMqDFV$ExBHKziboT~w(e;dU86WYc4!JnPus|K++WxtvbwLzjhBr%hEih3F1T(NiS zE3aT%WFIFpG0T-`PQK;deCP+ia$Ln^`k!}gj7!#UVDrpQY>`sRKs|0|P>8pzE0(nU zdiUDLK$|B8SgUhC?tmA{HEc_Z`0Nvw31Zl=O!Sg_Bk=8*ER2U!)^S~OEE^Kj{;71h zcl5#<#3*YBG=we5rHDsUBq@0Jj>ybK_Q+n)z5;k25z=09q_J@Swf2ZqTT;re616|k>8U2q`j5CwS+zEBxbU$ zz-AID7^be;Tf?`4b7+xz(rv)Ia9Nw=be&id$+RHiqP$YY*E`lqb3@o;_ty8{l9fY+ zfraQCV+2`dr@wpUVR|5n7?A2uR!QNatN7n>>`1%y9ItheeMBSS8O-}3r;5&p8f^j^ zSV?{@tjR33&r}=L6Y+apuTt%d4OOF;#R>h8gB6d8*+We|{U2*}3+W2J zJM;wP|G1OfI-EoQ*1odV=u!G1ZOw^1mdf8;<^gl3bG;2uq7|{GsjZR=57=8w>ht6&8hOs#_J83eHaX|%&!ZhA4@3;ei;5+ zYwifs@99#L4a?^G>H81JnttfZn?=-2)K2OG+p^XoR;W~#R52k#d{s%L-6Kht)uY#- z7{;cFj3%zNDV~ZuX$W3uN#in%vW`N-YBdE?ChRD5x|U#PZ!)MJ8l!AHkaa52e@2JH zRfMT1oTN_{M$}&a3f)-up}aRmh`Nj!xnM_MY@=Hsn|&E5pq2!KQY2ua?bnW+jQ77C zQ#GqRQi4&I*DTFMKuyl)l3i?Wd@<2*o0M9yJ*ia8#xS{f`88S-9{Imj-;gL-QSt4t zDZNr=Ifz-qL2AeRwk1svO(u=ji(kX%&oOX_T_4dAV-C1xmF|%JyTBUcC6O#`@V^NS zCc(+SCm=n^v7nvl&gKgFWngoJ88>L%xyIhW1lmLl_b}3jk0`Vr1hpKFnO0^HXy`DMQbPX+5kT*--*dD;;b6bZ5W{7$po4CV6_pxkRdFiDxls3P;tr9Ul%!v*RHl|Ev0R(st>G;-^CmOu@Pl>-w8x%TbBIrtG40 z8TJpfiU@J(MDH@^1p2cr9AQh{+sz6@p9mc?qN#h}?5rBN3Y81O)v}EjHqU8 zQ3ebdu6`5;W+6&5k#3)6@8 zRBT^#p;OAReMGcFKZF*vk<^CyeCucYx(3lB{=nQsCJxcV zXNlqth`%OVf>A6xFz3K2V|c0d4}x^nM};F+9Dt!3`g^Uxn{AC7%;IlX95b)cRTkY{Ch2Q+CkG{_gwjx#uZsPvk6aI;j zwP{nL;f$la_9+|Brb2P!b4qVBoNx_k*%>4QP1{TFCTlo*1@whXtlOKt6*eZtZK1Wd zGY61=kOjtLo(w0$2V_KG;*5VL3mR=8iyw*|m3ylv?EBr$TGy~PqVjM4glF1MzO;dA zyZ*<(PnV-zJomKMP;-_;X{tcvo^ZTBN`$cyh5hG_ zvq_NAR~s+|F#i=;G->9beNM0l0bdu7hXdtapfjZBs4 z3mw$uRlmbCE3U=OnGp3m8Fw<`uq0+L66N-#bxDsGGG3&jLdg0P8Yq(Ex;15#@hn$G zSiB&TI3@4~RKxi4_-x5+YZ-lovuJZR@DF{h7w~j>rY6vn^iGz?c7wE(zo=`#3W4$F z=rx$^K>t?nU||EnZZV3IY+UO(O{Z+c)!#1G!ggSK{p|(6i-_tPv8*=W`}ob3ku*Sn zyS(hSJ7)tH zaVE2srJAu7{_ZsYX2=`H^u(I|k5|3|+!vX|;IrS~LaMxypD%(+itBk)F)Ll$i^OLzse{{n4p~#Kq26T^-Wo+hzcu$Y%7MZGxtAT z&dFb`i<4c6=Y^CYkkxK)v7FKS%HBv}h8t(;;tqP4Q}=FFJQii(80~J-KyS{;*%_h0 zq%}h^7qz0pt$SA9v53eHSIvNszp#L)MxeUwyhOY~pZ9mp8~;eCwwy4Dll6-PW}V@; z$LqVK!5FD$v`jQ{OnAv z;j8+&V@mV0+F~lN>zJ>?5jk}1-_RXRs%(0+^6@=4*ZF&qBd(g3O<$o1f#=kr@RVrM zUN}Y;FZV^);ucX0_`iMJSDyOB%YS2BhAga!N`0EBeDn;5G6tt1hgcxZkx@BC-SSU- zzUy`T_TbIyvXs@xSvr|m%KhkF>~HEd2yU%Ux=$^;E{m2fZCcsI3~Tz$=o$0+kN5V7 za-oELvhejU;k-G(TH6I=neO(CyE%WPXnjO(W6X#ghe({(s?Oh(f9UCEC{~_j^sCm7 z{@nE&5}U1~6y+n-9HsLEZ@S_0d#3BhqWBkY6mLc+Q?E1sR`H4^^VXEIe`4DLQFB&b zKzcqzE#bSH%zpHv^(j?`xpgBu@@IxCy(jBF-WYMJ{s?!er3WQ9^4yh$W0otujvII0 zu6f^=DD~^tw)pC)W$CQ=UxCksTVv(?-XkrKj3@R&$e_c=t&rvZ%X5wItc>;?6@03P zv@p50ALM`zsJIa(h(hZC literal 0 HcmV?d00001 diff --git a/testsuite/MDAnalysisTests/datafiles.py b/testsuite/MDAnalysisTests/datafiles.py index ef0bea4036a..baa835ecba8 100644 --- a/testsuite/MDAnalysisTests/datafiles.py +++ b/testsuite/MDAnalysisTests/datafiles.py @@ -37,14 +37,21 @@ """ __all__ = [ - "PSF", "DCD", "CRD", # CHARMM (AdK example, DIMS trajectory from JMB 2009 paper) + "PSF", + "DCD", + "CRD", # CHARMM (AdK example, DIMS trajectory from JMB 2009 paper) "DCD2", # CHARMM (AdK example, DIMS trajectory from PLOS Comput Biol paper) - "PSF_notop", "PSF_BAD", # Same as PSF but no bonds etc, malformed version of previous + "PSF_notop", + "PSF_BAD", # Same as PSF but no bonds etc, malformed version of previous "DCD_empty", - "PSF_TRICLINIC", "DCD_TRICLINIC", # CHARMM c36 new unitcell, NPT 125 TIP3P (box vectors, see Issue 187 for details) - "PSF_NAMD", "PDB_NAMD", # NAMD - "PSF_NAMD_TRICLINIC", "DCD_NAMD_TRICLINIC", # NAMD, triclinic unitcell (Issue 187) - "PSF_NAMD_GBIS", "DCD_NAMD_GBIS", # NAMD, implicit solvent, 100 steps, #1819 + "PSF_TRICLINIC", + "DCD_TRICLINIC", # CHARMM c36 new unitcell, NPT 125 TIP3P (box vectors, see Issue 187 for details) + "PSF_NAMD", + "PDB_NAMD", # NAMD + "PSF_NAMD_TRICLINIC", + "DCD_NAMD_TRICLINIC", # NAMD, triclinic unitcell (Issue 187) + "PSF_NAMD_GBIS", + "DCD_NAMD_GBIS", # NAMD, implicit solvent, 100 steps, #1819 "PSF_nosegid", # psf without a segid, Issue 121 "PSF_cmap", # ala3 PSF from ParmEd test files with cmap "PSF_inscode", # PSF file with insertion codes @@ -58,61 +65,127 @@ "PDB_icodes", # stripped down version of 1osm, has icodes! "PDB_varying", # varying occupancies and tempfactors "XPDB_small", - "PDB_full", # PDB 4E43 (full HEADER, TITLE, COMPND, REMARK, altloc) + "PDB_full", # PDB 4E43 (full HEADER, TITLE, COMPND, REMARK, altloc) "ALIGN", # Various way to align atom names in PDB files - "RNA_PSF", "RNA_PDB", # nucleic acid (PDB 1K5I in CHARMM36m) + "RNA_PSF", + "RNA_PDB", # nucleic acid (PDB 1K5I in CHARMM36m) "INC_PDB", # incomplete PDB file (Issue #396) # for testing cryst before/after model headers - "PDB_cm", "PDB_cm_bz2", "PDB_cm_gz", - "PDB_mc", "PDB_mc_bz2", "PDB_mc_gz", + "PDB_cm", + "PDB_cm_bz2", + "PDB_cm_gz", + "PDB_mc", + "PDB_mc_bz2", + "PDB_mc_gz", "PDB_chainidnewres", # Issue 1110 - "PDB_sameresid_diffresname", #Case where two residues share the same resid + "PDB_sameresid_diffresname", # Case where two residues share the same resid "PDB_chainidrepeat", # Issue #1107 - "PDB", "GRO", "XTC", "TRR", "TPR", "GRO_velocity", # Gromacs (AdK) + "PDB", + "GRO", + "XTC", + "TRR", + "TPR", + "GRO_velocity", # Gromacs (AdK) "GRO_incomplete_vels", "COORDINATES_GRO_BZ2", - "GRO_large", #atom number truncation at > 100,000 particles, Issue 550 + "GRO_large", # atom number truncation at > 100,000 particles, Issue 550 "GRO_residwrap", # resids wrapping because of 5 digit field (Issue #728) "GRO_residwrap_0base", # corner case of #728 with resid=0 for first atom "GRO_sameresid_diffresname", # Case where two residues share the same resid - "PDB_xvf", "TPR_xvf", "TRR_xvf", # Gromacs coords/veloc/forces (cobrotoxin, OPLS-AA, Gromacs 4.5.5 tpr) + "PDB_xvf", + "TPR_xvf", + "TRR_xvf", # Gromacs coords/veloc/forces (cobrotoxin, OPLS-AA, Gromacs 4.5.5 tpr) "H5MD_xvf", # TPR_xvf + TRR_xvf converted to h5md format "H5MD_energy", # H5MD trajectory with observables/atoms/energy "H5MD_malformed", # H5MD trajectory with malformed observable group "XVG_BZ2", # Compressed xvg file about cobrotoxin "PDB_xlserial", - "TPR400", "TPR402", "TPR403", "TPR404", "TPR405", "TPR406", "TPR407", - "TPR450", "TPR451", "TPR452", "TPR453", "TPR454", "TPR455", "TPR455Double", - "TPR460", "TPR461", "TPR502", "TPR504", "TPR505", "TPR510", "TPR2016", - "TPR2018", "TPR2019B3", "TPR2020B2", "TPR2020", "TPR2020Double", - "TPR2021", "TPR2021Double", "TPR2022RC1", "TPR2023", "TPR2024", - "TPR510_bonded", "TPR2016_bonded", "TPR2018_bonded", "TPR2019B3_bonded", - "TPR2020B2_bonded", "TPR2020_bonded", "TPR2020_double_bonded", - "TPR2021_bonded", "TPR2021_double_bonded", "TPR2022RC1_bonded", - "TPR334_bonded", "TPR2023_bonded", "TPR2024_bonded", - "TPR_EXTRA_2021", "TPR_EXTRA_2020", "TPR_EXTRA_2018", - "TPR_EXTRA_2016", "TPR_EXTRA_407", "TPR_EXTRA_2022RC1", - "TPR_EXTRA_2023", "TPR_EXTRA_2024", - "PDB_sub_sol", "PDB_sub_dry", # TRRReader sub selection + "TPR400", + "TPR402", + "TPR403", + "TPR404", + "TPR405", + "TPR406", + "TPR407", + "TPR450", + "TPR451", + "TPR452", + "TPR453", + "TPR454", + "TPR455", + "TPR455Double", + "TPR460", + "TPR461", + "TPR502", + "TPR504", + "TPR505", + "TPR510", + "TPR2016", + "TPR2018", + "TPR2019B3", + "TPR2020B2", + "TPR2020", + "TPR2020Double", + "TPR2021", + "TPR2021Double", + "TPR2022RC1", + "TPR2023", + "TPR2024", + "TPR510_bonded", + "TPR2016_bonded", + "TPR2018_bonded", + "TPR2019B3_bonded", + "TPR2020B2_bonded", + "TPR2020_bonded", + "TPR2020_double_bonded", + "TPR2021_bonded", + "TPR2021_double_bonded", + "TPR2022RC1_bonded", + "TPR334_bonded", + "TPR2023_bonded", + "TPR2024_bonded", + "TPR_EXTRA_2021", + "TPR_EXTRA_2020", + "TPR_EXTRA_2018", + "TPR_EXTRA_2016", + "TPR_EXTRA_407", + "TPR_EXTRA_2022RC1", + "TPR_EXTRA_2023", + "TPR_EXTRA_2024", + "PDB_sub_sol", + "PDB_sub_dry", # TRRReader sub selection "TRR_sub_sol", "XTC_sub_sol", - "XYZ", "XYZ_psf", "XYZ_bz2", - "XYZ_mini", "XYZ_five", # 3 and 5 atoms xyzs for an easy topology - "TXYZ", "ARC", "ARC_PBC", # Tinker files + "XYZ", + "XYZ_psf", + "XYZ_bz2", + "XYZ_mini", + "XYZ_five", # 3 and 5 atoms xyzs for an easy topology + "TXYZ", + "ARC", + "ARC_PBC", # Tinker files "PRM", "PRM_chainid_bz2", "TRJ", "TRJ_bz2", # Amber (no periodic box) "INPCRD", - "PRMpbc", "TRJpbc_bz2", # Amber (periodic box) - "PRM7", "NCDFtruncoct", # Amber (cpptrj test trajectory, see Issue 488) - "PRM12", "TRJ12_bz2", # Amber (v12 format, Issue 100) - "PRMncdf", "TRJncdf", "NCDF", # Amber (netcdf) - "PFncdf_Top", "PFncdf_Trj", # Amber ncdf with Positions and Forces - "CPPTRAJ_TRAJ_TOP", "CPPTRAJ_TRAJ", # Amber ncdf extracted from CPPTRAJ without time variable + "PRMpbc", + "TRJpbc_bz2", # Amber (periodic box) + "PRM7", + "NCDFtruncoct", # Amber (cpptrj test trajectory, see Issue 488) + "PRM12", + "TRJ12_bz2", # Amber (v12 format, Issue 100) + "PRMncdf", + "TRJncdf", + "NCDF", # Amber (netcdf) + "PFncdf_Top", + "PFncdf_Trj", # Amber ncdf with Positions and Forces + "CPPTRAJ_TRAJ_TOP", + "CPPTRAJ_TRAJ", # Amber ncdf extracted from CPPTRAJ without time variable "PRMcs", # Amber (format, Issue 1331) "PRMNCRST", # Amber ncrst with positions/forces/velocities - "PRM_NCBOX", "TRJ_NCBOX", # Amber parm7 + nc w/ pos/forces/vels/box + "PRM_NCBOX", + "TRJ_NCBOX", # Amber parm7 + nc w/ pos/forces/vels/box "PRMNEGATIVE", # Amber negative ATOMIC_NUMBER (Issue 2306) "PRMErr1", # Amber TOP files to check raised errors "PRMErr2", @@ -120,7 +193,8 @@ "PRMErr4", "PRMErr5", "PRM_UreyBradley", # prmtop from ParmEd test files with Urey-Bradley angles - "PRM7_ala2", "RST7_ala2", # prmtop and rst files from ParmEd example files + "PRM7_ala2", + "RST7_ala2", # prmtop and rst files from ParmEd example files "PRM19SBOPC", # prmtop w/ ff19SB CMAP terms and OPC water (Issue #2449) "PQR", # PQR v1 "PQR_icodes", # PQR v2 with icodes @@ -138,7 +212,8 @@ "DMS_NO_SEGID", # ADK closed with no segids or chains "CONECT", # HIV Reverse Transcriptase with inhibitor "CONECT_ERROR", # PDB file with corrupt CONECT - "TRZ", "TRZ_psf", + "TRZ", + "TRZ_psf", "TRIC", "XTC_multi_frame", "TRR_multi_frame", @@ -146,15 +221,32 @@ "TNG_traj_gro", # topology for argon_npt_compressed_traj "TNG_traj_uneven_blocks", # TNG trajectory with pos and vel deposited on different strides "TNG_traj_vels_forces", # similar to above but with velocities and forces - "merge_protein", "merge_ligand", "merge_water", - "mol2_molecules", "mol2_molecule", "mol2_broken_molecule", - "mol2_zinc", "mol2_comments_header", "mol2_ligand", "mol2_sodium_ion", - "capping_input", "capping_output", "capping_ace", "capping_nma", - "contacts_villin_folded", "contacts_villin_unfolded", "contacts_file", - "LAMMPSdata", "trz4data", "LAMMPSdata_mini", - "LAMMPSdata2", "LAMMPSdcd2", - "LAMMPScnt", "LAMMPScnt2", # triclinic box - "LAMMPShyd", "LAMMPShyd2", + "merge_protein", + "merge_ligand", + "merge_water", + "mol2_molecules", + "mol2_molecule", + "mol2_broken_molecule", + "mol2_zinc", + "mol2_comments_header", + "mol2_ligand", + "mol2_sodium_ion", + "capping_input", + "capping_output", + "capping_ace", + "capping_nma", + "contacts_villin_folded", + "contacts_villin_unfolded", + "contacts_file", + "LAMMPSdata", + "trz4data", + "LAMMPSdata_mini", + "LAMMPSdata2", + "LAMMPSdcd2", + "LAMMPScnt", + "LAMMPScnt2", # triclinic box + "LAMMPShyd", + "LAMMPShyd2", "LAMMPSdata_many_bonds", "LAMMPSdata_deletedatoms", # with deleted atoms "LAMMPSdata_triclinic", # lammpsdata file to test triclinic dimension parsing, albite with most atoms deleted @@ -167,22 +259,29 @@ "LAMMPSDUMP_triclinic", # lammpsdump file to test triclinic dimension parsing, albite with most atoms deleted "LAMMPSDUMP_image_vf", # Lammps dump file with image flags, velocities, and forces. "LAMMPS_image_vf", # Lammps data file to go with LAMMPSDUMP_image_vf - "LAMMPSDUMP_chain1", # Lammps dump file with chain reader - "LAMMPSDUMP_chain2", # Lammps dump file with chain reader - "LAMMPS_chain", # Lammps data file with chain reader + "LAMMPSDUMP_chain1", # Lammps dump file with chain reader + "LAMMPSDUMP_chain2", # Lammps dump file with chain reader + "LAMMPS_chain", # Lammps data file with chain reader "LAMMPSDUMP_additional_columns", # lammpsdump file with additional data (an additional charge column) "unordered_res", # pdb file with resids non sequential "GMS_ASYMOPT", # GAMESS C1 optimization - "GMS_SYMOPT", # GAMESS D4h optimization + "GMS_SYMOPT", # GAMESS D4h optimization "GMS_ASYMSURF", # GAMESS C1 surface - "two_water_gro", "two_water_gro_nonames", # for bond guessing, 2 water molecules, one with weird names + "two_water_gro", + "two_water_gro_nonames", # for bond guessing, 2 water molecules, one with weird names "two_water_gro_multiframe", "two_water_gro_widebox", # Issue #548 - "DLP_CONFIG", "DLP_CONFIG_order", "DLP_CONFIG_minimal", # dl_poly 4 config file - "DLP_HISTORY", "DLP_HISTORY_order", "DLP_HISTORY_minimal", # dl_poly 4 history file + "DLP_CONFIG", + "DLP_CONFIG_order", + "DLP_CONFIG_minimal", # dl_poly 4 config file + "DLP_HISTORY", + "DLP_HISTORY_order", + "DLP_HISTORY_minimal", # dl_poly 4 history file "DLP_HISTORY_minimal_cell", # dl_poly 4 history file with cell parameters "DLP_HISTORY_classic", # dl_poly classic history file - "waterPSF","waterDCD","rmsfArray", + "waterPSF", + "waterDCD", + "rmsfArray", "HoomdXMLdata", "Make_Whole", # for testing the function lib.mdamath.make_whole, has 9 atoms "fullerene", # for make_whole, a nice friendly C60 with bonds @@ -199,37 +298,60 @@ "COORDINATES_DCD", "COORDINATES_TOPOLOGY", "NUCLsel", - "GRO_empty_atom", "GRO_missing_atomname", # for testing GROParser exception raise - "ENT", #for testing ENT file extension + "GRO_empty_atom", + "GRO_missing_atomname", # for testing GROParser exception raise + "ENT", # for testing ENT file extension "RANDOM_WALK", "RANDOM_WALK_TOPO", # garbage topology to go along with XTC positions above - "AUX_XVG", "XVG_BAD_NCOL", #for testing .xvg auxiliary reader - "AUX_XVG_LOWF", "AUX_XVG_HIGHF", - "AUX_EDR", "AUX_EDR_TPR", - "AUX_EDR_XTC", "AUX_EDR_RAW", + "AUX_XVG", + "XVG_BAD_NCOL", # for testing .xvg auxiliary reader + "AUX_XVG_LOWF", + "AUX_XVG_HIGHF", + "AUX_EDR", + "AUX_EDR_TPR", + "AUX_EDR_XTC", + "AUX_EDR_RAW", "AUX_EDR_SINGLE_FRAME", # for testing .edr auxiliary reader - "MMTF", "MMTF_gz", 'MMTF_skinny', # skinny - some optional fields stripped out + "MMTF", + "MMTF_gz", + "MMTF_skinny", # skinny - some optional fields stripped out "MMTF_skinny2", "ALIGN_BOUND", # two component bound system "ALIGN_UNBOUND", # two component unbound system "legacy_DCD_ADK_coords", # frames 5 and 29 read in for adk_dims.dcd using legacy DCD reader "legacy_DCD_NAMD_coords", # frame 0 read in for SiN_tric_namd.dcd using legacy DCD reader "legacy_DCD_c36_coords", # frames 1 and 4 read in for tip125_tric_C36.dcd using legacy DCD reader - "GSD", "GSD_bonds", "GSD_long", - "TRC_PDB_VAC", "TRC_TRAJ1_VAC", "TRC_TRAJ2_VAC", # 2x 3 frames of vacuum trajectory from GROMOS11 tutorial + "GSD", + "GSD_bonds", + "GSD_long", + "TRC_PDB_VAC", + "TRC_TRAJ1_VAC", + "TRC_TRAJ2_VAC", # 2x 3 frames of vacuum trajectory from GROMOS11 tutorial "TRC_CLUSTER_VAC", # three frames without TIMESTEP and GENBOX block but with unsupported POSITION block - "TRC_TRICLINIC_SOLV", "TRC_TRUNCOCT_VAC", - "TRC_GENBOX_ORIGIN", "TRC_GENBOX_EULER", + "TRC_TRICLINIC_SOLV", + "TRC_TRUNCOCT_VAC", + "TRC_GENBOX_ORIGIN", + "TRC_GENBOX_EULER", "TRC_EMPTY", # Empty file containing only one space - "TRC_PDB_SOLV", "TRC_TRAJ_SOLV", # 2 frames of solvated trajectory from GROMOS11 tutorial - "GRO_MEMPROT", "XTC_MEMPROT", # YiiP transporter in POPE:POPG lipids with Na+, Cl-, Zn2+ dummy model without water - "DihedralArray", "DihedralsArray", # time series of single dihedral - "RamaArray", "GLYRamaArray", # time series of phi/psi angles - "JaninArray", "LYSJaninArray", # time series of chi1/chi2 angles - "PDB_rama", "PDB_janin", # for testing failures of Ramachandran and Janin classes + "TRC_PDB_SOLV", + "TRC_TRAJ_SOLV", # 2 frames of solvated trajectory from GROMOS11 tutorial + "GRO_MEMPROT", + "XTC_MEMPROT", # YiiP transporter in POPE:POPG lipids with Na+, Cl-, Zn2+ dummy model without water + "DihedralArray", + "DihedralsArray", # time series of single dihedral + "RamaArray", + "GLYRamaArray", # time series of phi/psi angles + "JaninArray", + "LYSJaninArray", # time series of chi1/chi2 angles + "PDB_rama", + "PDB_janin", # for testing failures of Ramachandran and Janin classes "BATArray", # time series of bond-angle-torsion coordinates array from Molecule_comments_header.mol2 # DOS line endings - "WIN_PDB_multiframe", "WIN_DLP_HISTORY", "WIN_TRJ", "WIN_LAMMPSDUMP", "WIN_ARC", + "WIN_PDB_multiframe", + "WIN_DLP_HISTORY", + "WIN_TRJ", + "WIN_LAMMPSDUMP", + "WIN_ARC", "GRO_huge_box", # for testing gro parser with hige box sizes "ITP", # for GROMACS generated itps "ITP_nomass", # for ATB generated itps @@ -259,225 +381,243 @@ from importlib import resources import MDAnalysisTests.data -_data_ref = resources.files('MDAnalysisTests.data') - -WIN_PDB_multiframe = (_data_ref / 'windows/WIN_nmr_neopetrosiamide.pdb').as_posix() -WIN_DLP_HISTORY = (_data_ref / 'windows/WIN_HISTORY').as_posix() -WIN_TRJ = (_data_ref / 'windows/WIN_ache.mdcrd').as_posix() -WIN_ARC = (_data_ref / 'windows/WIN_test.arc').as_posix() -WIN_LAMMPSDUMP = (_data_ref / 'windows/WIN_wat.lammpstrj').as_posix() - -legacy_DCD_NAMD_coords = (_data_ref / 'legacy_DCD_NAMD_coords.npy').as_posix() -legacy_DCD_ADK_coords = (_data_ref / 'legacy_DCD_adk_coords.npy').as_posix() -legacy_DCD_c36_coords = (_data_ref / 'legacy_DCD_c36_coords.npy').as_posix() -AUX_XVG_LOWF = (_data_ref / 'test_lowf.xvg').as_posix() -AUX_XVG_HIGHF = (_data_ref / 'test_highf.xvg').as_posix() -XVG_BAD_NCOL = (_data_ref / 'bad_num_col.xvg').as_posix() -AUX_XVG = (_data_ref / 'test.xvg').as_posix() -AUX_EDR = (_data_ref / 'test.edr').as_posix() -AUX_EDR_RAW = (_data_ref / 'aux_edr_raw.txt').as_posix() -AUX_EDR_TPR = (_data_ref / 'aux_edr.tpr').as_posix() -AUX_EDR_XTC = (_data_ref / 'aux_edr.xtc').as_posix() -AUX_EDR_SINGLE_FRAME = (_data_ref / 'single_frame.edr').as_posix() -ENT = (_data_ref / 'testENT.ent').as_posix() -GRO_missing_atomname = (_data_ref / 'missing_atomname.gro').as_posix() -GRO_empty_atom = (_data_ref / 'empty_atom.gro').as_posix() -GRO_huge_box = (_data_ref / 'huge_box.gro').as_posix() - -COORDINATES_GRO = (_data_ref / 'coordinates/test.gro').as_posix() -COORDINATES_GRO_INCOMPLETE_VELOCITY = (_data_ref / 'coordinates/test_incomplete_vel.gro').as_posix() -COORDINATES_GRO_BZ2 = (_data_ref / 'coordinates/test.gro.bz2').as_posix() -COORDINATES_XYZ = (_data_ref / 'coordinates/test.xyz').as_posix() -COORDINATES_XYZ_BZ2 = (_data_ref / 'coordinates/test.xyz.bz2').as_posix() -COORDINATES_XTC = (_data_ref / 'coordinates/test.xtc').as_posix() -COORDINATES_TRR = (_data_ref / 'coordinates/test.trr').as_posix() -COORDINATES_TNG = (_data_ref / 'coordinates/test.tng').as_posix() -COORDINATES_H5MD = (_data_ref / 'coordinates/test.h5md').as_posix() -COORDINATES_DCD = (_data_ref / 'coordinates/test.dcd').as_posix() -COORDINATES_TOPOLOGY = (_data_ref / 'coordinates/test_topology.pdb').as_posix() - -PSF = (_data_ref / 'adk.psf').as_posix() -PSF_notop = (_data_ref / 'adk_notop.psf').as_posix() -PSF_BAD = (_data_ref / 'adk_notop_BAD.psf').as_posix() -DCD = (_data_ref / 'adk_dims.dcd').as_posix() -DCD_empty = (_data_ref / 'empty.dcd').as_posix() -CRD = (_data_ref / 'adk_open.crd').as_posix() -PSF_TRICLINIC = (_data_ref / 'tip125_tric_C36.psf').as_posix() -DCD_TRICLINIC = (_data_ref / 'tip125_tric_C36.dcd').as_posix() -DCD2 = (_data_ref / 'adk_dims2.dcd').as_posix() - -PSF_NAMD = (_data_ref / 'namd_cgenff.psf').as_posix() -PDB_NAMD = (_data_ref / 'namd_cgenff.pdb').as_posix() -PDB_multipole = (_data_ref / 'water_methane_acetic-acid_ammonia.pdb').as_posix() -PSF_NAMD_TRICLINIC = (_data_ref / 'SiN_tric_namd.psf').as_posix() -DCD_NAMD_TRICLINIC = (_data_ref / 'SiN_tric_namd.dcd').as_posix() -PSF_NAMD_GBIS = (_data_ref / 'adk_closed_NAMD.psf').as_posix() -DCD_NAMD_GBIS = (_data_ref / 'adk_gbis_tmd-fast1_NAMD.dcd').as_posix() - -PSF_nosegid = (_data_ref / 'nosegid.psf').as_posix() - -PSF_cmap = (_data_ref / 'parmed_ala3.psf').as_posix() - -PSF_inscode = (_data_ref / '1a2c_ins_code.psf').as_posix() - -PDB_varying = (_data_ref / 'varying_occ_tmp.pdb').as_posix() -PDB_small = (_data_ref / 'adk_open.pdb').as_posix() -PDB_closed = (_data_ref / 'adk_closed.pdb').as_posix() - -ALIGN = (_data_ref / 'align.pdb').as_posix() -RNA_PSF = (_data_ref / 'analysis/1k5i_c36.psf.gz').as_posix() -RNA_PDB = (_data_ref / 'analysis/1k5i_c36.pdb.gz').as_posix() -INC_PDB = (_data_ref / 'incomplete.pdb').as_posix() -PDB_cm = (_data_ref / 'cryst_then_model.pdb').as_posix() -PDB_cm_gz = (_data_ref / 'cryst_then_model.pdb.gz').as_posix() -PDB_cm_bz2 = (_data_ref / 'cryst_then_model.pdb.bz2').as_posix() -PDB_mc = (_data_ref / 'model_then_cryst.pdb').as_posix() -PDB_mc_gz = (_data_ref / 'model_then_cryst.pdb.gz').as_posix() -PDB_mc_bz2 = (_data_ref / 'model_then_cryst.pdb.bz2').as_posix() -PDB_chainidnewres = (_data_ref / 'chainIDnewres.pdb.gz').as_posix() -PDB_sameresid_diffresname = (_data_ref / 'sameresid_diffresname.pdb').as_posix() -PDB_chainidrepeat = (_data_ref / 'chainIDrepeat.pdb.gz').as_posix() -PDB_multiframe = (_data_ref / 'nmr_neopetrosiamide.pdb').as_posix() -PDB_helix = (_data_ref / 'A6PA6_alpha.pdb').as_posix() -PDB_conect = (_data_ref / 'conect_parsing.pdb').as_posix() -PDB_conect2TER = (_data_ref / 'CONECT2TER.pdb').as_posix() -PDB_singleconect = (_data_ref / 'SINGLECONECT.pdb').as_posix() -PDB_icodes = (_data_ref / '1osm.pdb.gz').as_posix() -PDB_CRYOEM_BOX = (_data_ref / '5a7u.pdb').as_posix() -PDB_CHECK_RIGHTHAND_PA = (_data_ref / '6msm.pdb.bz2').as_posix() -FHIAIMS = (_data_ref / 'fhiaims.in').as_posix() - -GRO = (_data_ref / 'adk_oplsaa.gro').as_posix() -GRO_velocity = (_data_ref / 'sample_velocity_file.gro').as_posix() -GRO_incomplete_vels = (_data_ref / 'grovels.gro').as_posix() -GRO_large = (_data_ref / 'bigbox.gro.bz2').as_posix() -GRO_residwrap = (_data_ref / 'residwrap.gro').as_posix() -GRO_residwrap_0base = (_data_ref / 'residwrap_0base.gro').as_posix() -GRO_sameresid_diffresname = (_data_ref / 'sameresid_diffresname.gro').as_posix() -PDB = (_data_ref / 'adk_oplsaa.pdb').as_posix() -XTC = (_data_ref / 'adk_oplsaa.xtc').as_posix() -TRR = (_data_ref / 'adk_oplsaa.trr').as_posix() -TPR = (_data_ref / 'adk_oplsaa.tpr').as_posix() -PDB_sub_dry = (_data_ref / 'cobrotoxin_dry_neutral_0.pdb').as_posix() -TRR_sub_sol = (_data_ref / 'cobrotoxin.trr').as_posix() -XTC_sub_sol = (_data_ref / 'cobrotoxin.xtc').as_posix() -PDB_sub_sol = (_data_ref / 'cobrotoxin.pdb').as_posix() -PDB_xlserial = (_data_ref / 'xl_serial.pdb').as_posix() -GRO_MEMPROT = (_data_ref / 'analysis/YiiP_lipids.gro.gz').as_posix() -XTC_MEMPROT = (_data_ref / 'analysis/YiiP_lipids.xtc').as_posix() -XTC_multi_frame = (_data_ref / 'xtc_test_only_10_frame_10_atoms.xtc').as_posix() -TRR_multi_frame = (_data_ref / 'trr_test_only_10_frame_10_atoms.trr').as_posix() -TNG_traj = (_data_ref / 'argon_npt_compressed.tng').as_posix() -TNG_traj_gro = (_data_ref / 'argon_npt_compressed.gro.gz').as_posix() -TNG_traj_uneven_blocks = (_data_ref / 'argon_npt_compressed_uneven.tng').as_posix() -TNG_traj_vels_forces = (_data_ref / 'argon_npt_compressed_vels_forces.tng').as_posix() -PDB_xvf = (_data_ref / 'cobrotoxin.pdb').as_posix() -TPR_xvf = (_data_ref / 'cobrotoxin.tpr').as_posix() -TRR_xvf = (_data_ref / 'cobrotoxin.trr').as_posix() -H5MD_xvf = (_data_ref / 'cobrotoxin.h5md').as_posix() -H5MD_energy = (_data_ref / 'cu.h5md').as_posix() -H5MD_malformed = (_data_ref / 'cu_malformed.h5md').as_posix() -XVG_BZ2 = (_data_ref / 'cobrotoxin_protein_forces.xvg.bz2').as_posix() - -XPDB_small = (_data_ref / '5digitResid.pdb').as_posix() +_data_ref = resources.files("MDAnalysisTests.data") + +WIN_PDB_multiframe = (_data_ref / "windows/WIN_nmr_neopetrosiamide.pdb").as_posix() +WIN_DLP_HISTORY = (_data_ref / "windows/WIN_HISTORY").as_posix() +WIN_TRJ = (_data_ref / "windows/WIN_ache.mdcrd").as_posix() +WIN_ARC = (_data_ref / "windows/WIN_test.arc").as_posix() +WIN_LAMMPSDUMP = (_data_ref / "windows/WIN_wat.lammpstrj").as_posix() + +legacy_DCD_NAMD_coords = (_data_ref / "legacy_DCD_NAMD_coords.npy").as_posix() +legacy_DCD_ADK_coords = (_data_ref / "legacy_DCD_adk_coords.npy").as_posix() +legacy_DCD_c36_coords = (_data_ref / "legacy_DCD_c36_coords.npy").as_posix() +AUX_XVG_LOWF = (_data_ref / "test_lowf.xvg").as_posix() +AUX_XVG_HIGHF = (_data_ref / "test_highf.xvg").as_posix() +XVG_BAD_NCOL = (_data_ref / "bad_num_col.xvg").as_posix() +AUX_XVG = (_data_ref / "test.xvg").as_posix() +AUX_EDR = (_data_ref / "test.edr").as_posix() +AUX_EDR_RAW = (_data_ref / "aux_edr_raw.txt").as_posix() +AUX_EDR_TPR = (_data_ref / "aux_edr.tpr").as_posix() +AUX_EDR_XTC = (_data_ref / "aux_edr.xtc").as_posix() +AUX_EDR_SINGLE_FRAME = (_data_ref / "single_frame.edr").as_posix() +ENT = (_data_ref / "testENT.ent").as_posix() +GRO_missing_atomname = (_data_ref / "missing_atomname.gro").as_posix() +GRO_empty_atom = (_data_ref / "empty_atom.gro").as_posix() +GRO_huge_box = (_data_ref / "huge_box.gro").as_posix() + +COORDINATES_GRO = (_data_ref / "coordinates/test.gro").as_posix() +COORDINATES_GRO_INCOMPLETE_VELOCITY = ( + _data_ref / "coordinates/test_incomplete_vel.gro" +).as_posix() +COORDINATES_GRO_BZ2 = (_data_ref / "coordinates/test.gro.bz2").as_posix() +COORDINATES_XYZ = (_data_ref / "coordinates/test.xyz").as_posix() +COORDINATES_XYZ_BZ2 = (_data_ref / "coordinates/test.xyz.bz2").as_posix() +COORDINATES_XTC = (_data_ref / "coordinates/test.xtc").as_posix() +COORDINATES_TRR = (_data_ref / "coordinates/test.trr").as_posix() +COORDINATES_TNG = (_data_ref / "coordinates/test.tng").as_posix() +COORDINATES_H5MD = (_data_ref / "coordinates/test.h5md").as_posix() +COORDINATES_DCD = (_data_ref / "coordinates/test.dcd").as_posix() +COORDINATES_TOPOLOGY = (_data_ref / "coordinates/test_topology.pdb").as_posix() + +PSF = (_data_ref / "adk.psf").as_posix() +PSF_notop = (_data_ref / "adk_notop.psf").as_posix() +PSF_BAD = (_data_ref / "adk_notop_BAD.psf").as_posix() +DCD = (_data_ref / "adk_dims.dcd").as_posix() +DCD_empty = (_data_ref / "empty.dcd").as_posix() +CRD = (_data_ref / "adk_open.crd").as_posix() +PSF_TRICLINIC = (_data_ref / "tip125_tric_C36.psf").as_posix() +DCD_TRICLINIC = (_data_ref / "tip125_tric_C36.dcd").as_posix() +DCD2 = (_data_ref / "adk_dims2.dcd").as_posix() + +PSF_NAMD = (_data_ref / "namd_cgenff.psf").as_posix() +PDB_NAMD = (_data_ref / "namd_cgenff.pdb").as_posix() +PDB_multipole = (_data_ref / "water_methane_acetic-acid_ammonia.pdb").as_posix() +PSF_NAMD_TRICLINIC = (_data_ref / "SiN_tric_namd.psf").as_posix() +DCD_NAMD_TRICLINIC = (_data_ref / "SiN_tric_namd.dcd").as_posix() +PSF_NAMD_GBIS = (_data_ref / "adk_closed_NAMD.psf").as_posix() +DCD_NAMD_GBIS = (_data_ref / "adk_gbis_tmd-fast1_NAMD.dcd").as_posix() + +PSF_nosegid = (_data_ref / "nosegid.psf").as_posix() + +PSF_cmap = (_data_ref / "parmed_ala3.psf").as_posix() + +PSF_inscode = (_data_ref / "1a2c_ins_code.psf").as_posix() + +PDB_varying = (_data_ref / "varying_occ_tmp.pdb").as_posix() +PDB_small = (_data_ref / "adk_open.pdb").as_posix() +PDB_closed = (_data_ref / "adk_closed.pdb").as_posix() + +ALIGN = (_data_ref / "align.pdb").as_posix() +RNA_PSF = (_data_ref / "analysis/1k5i_c36.psf.gz").as_posix() +RNA_PDB = (_data_ref / "analysis/1k5i_c36.pdb.gz").as_posix() +INC_PDB = (_data_ref / "incomplete.pdb").as_posix() +PDB_cm = (_data_ref / "cryst_then_model.pdb").as_posix() +PDB_cm_gz = (_data_ref / "cryst_then_model.pdb.gz").as_posix() +PDB_cm_bz2 = (_data_ref / "cryst_then_model.pdb.bz2").as_posix() +PDB_mc = (_data_ref / "model_then_cryst.pdb").as_posix() +PDB_mc_gz = (_data_ref / "model_then_cryst.pdb.gz").as_posix() +PDB_mc_bz2 = (_data_ref / "model_then_cryst.pdb.bz2").as_posix() +PDB_chainidnewres = (_data_ref / "chainIDnewres.pdb.gz").as_posix() +PDB_sameresid_diffresname = (_data_ref / "sameresid_diffresname.pdb").as_posix() +PDB_chainidrepeat = (_data_ref / "chainIDrepeat.pdb.gz").as_posix() +PDB_multiframe = (_data_ref / "nmr_neopetrosiamide.pdb").as_posix() +PDB_helix = (_data_ref / "A6PA6_alpha.pdb").as_posix() +PDB_conect = (_data_ref / "conect_parsing.pdb").as_posix() +PDB_conect2TER = (_data_ref / "CONECT2TER.pdb").as_posix() +PDB_singleconect = (_data_ref / "SINGLECONECT.pdb").as_posix() +PDB_icodes = (_data_ref / "1osm.pdb.gz").as_posix() +PDB_CRYOEM_BOX = (_data_ref / "5a7u.pdb").as_posix() +PDB_CHECK_RIGHTHAND_PA = (_data_ref / "6msm.pdb.bz2").as_posix() +FHIAIMS = (_data_ref / "fhiaims.in").as_posix() + +GRO = (_data_ref / "adk_oplsaa.gro").as_posix() +GRO_velocity = (_data_ref / "sample_velocity_file.gro").as_posix() +GRO_incomplete_vels = (_data_ref / "grovels.gro").as_posix() +GRO_large = (_data_ref / "bigbox.gro.bz2").as_posix() +GRO_residwrap = (_data_ref / "residwrap.gro").as_posix() +GRO_residwrap_0base = (_data_ref / "residwrap_0base.gro").as_posix() +GRO_sameresid_diffresname = (_data_ref / "sameresid_diffresname.gro").as_posix() +PDB = (_data_ref / "adk_oplsaa.pdb").as_posix() +XTC = (_data_ref / "adk_oplsaa.xtc").as_posix() +TRR = (_data_ref / "adk_oplsaa.trr").as_posix() +TPR = (_data_ref / "adk_oplsaa.tpr").as_posix() +PDB_sub_dry = (_data_ref / "cobrotoxin_dry_neutral_0.pdb").as_posix() +TRR_sub_sol = (_data_ref / "cobrotoxin.trr").as_posix() +XTC_sub_sol = (_data_ref / "cobrotoxin.xtc").as_posix() +PDB_sub_sol = (_data_ref / "cobrotoxin.pdb").as_posix() +PDB_xlserial = (_data_ref / "xl_serial.pdb").as_posix() +GRO_MEMPROT = (_data_ref / "analysis/YiiP_lipids.gro.gz").as_posix() +XTC_MEMPROT = (_data_ref / "analysis/YiiP_lipids.xtc").as_posix() +XTC_multi_frame = (_data_ref / "xtc_test_only_10_frame_10_atoms.xtc").as_posix() +TRR_multi_frame = (_data_ref / "trr_test_only_10_frame_10_atoms.trr").as_posix() +TNG_traj = (_data_ref / "argon_npt_compressed.tng").as_posix() +TNG_traj_gro = (_data_ref / "argon_npt_compressed.gro.gz").as_posix() +TNG_traj_uneven_blocks = (_data_ref / "argon_npt_compressed_uneven.tng").as_posix() +TNG_traj_vels_forces = (_data_ref / "argon_npt_compressed_vels_forces.tng").as_posix() +PDB_xvf = (_data_ref / "cobrotoxin.pdb").as_posix() +TPR_xvf = (_data_ref / "cobrotoxin.tpr").as_posix() +TRR_xvf = (_data_ref / "cobrotoxin.trr").as_posix() +H5MD_xvf = (_data_ref / "cobrotoxin.h5md").as_posix() +H5MD_energy = (_data_ref / "cu.h5md").as_posix() +H5MD_malformed = (_data_ref / "cu_malformed.h5md").as_posix() +XVG_BZ2 = (_data_ref / "cobrotoxin_protein_forces.xvg.bz2").as_posix() + +XPDB_small = (_data_ref / "5digitResid.pdb").as_posix() # number is the gromacs version -TPR400 = (_data_ref / 'tprs/2lyz_gmx_4.0.tpr').as_posix() -TPR402 = (_data_ref / 'tprs/2lyz_gmx_4.0.2.tpr').as_posix() -TPR403 = (_data_ref / 'tprs/2lyz_gmx_4.0.3.tpr').as_posix() -TPR404 = (_data_ref / 'tprs/2lyz_gmx_4.0.4.tpr').as_posix() -TPR405 = (_data_ref / 'tprs/2lyz_gmx_4.0.5.tpr').as_posix() -TPR406 = (_data_ref / 'tprs/2lyz_gmx_4.0.6.tpr').as_posix() -TPR407 = (_data_ref / 'tprs/2lyz_gmx_4.0.7.tpr').as_posix() -TPR450 = (_data_ref / 'tprs/2lyz_gmx_4.5.tpr').as_posix() -TPR451 = (_data_ref / 'tprs/2lyz_gmx_4.5.1.tpr').as_posix() -TPR452 = (_data_ref / 'tprs/2lyz_gmx_4.5.2.tpr').as_posix() -TPR453 = (_data_ref / 'tprs/2lyz_gmx_4.5.3.tpr').as_posix() -TPR454 = (_data_ref / 'tprs/2lyz_gmx_4.5.4.tpr').as_posix() -TPR455 = (_data_ref / 'tprs/2lyz_gmx_4.5.5.tpr').as_posix() -TPR502 = (_data_ref / 'tprs/2lyz_gmx_5.0.2.tpr').as_posix() -TPR504 = (_data_ref / 'tprs/2lyz_gmx_5.0.4.tpr').as_posix() -TPR505 = (_data_ref / 'tprs/2lyz_gmx_5.0.5.tpr').as_posix() -TPR510 = (_data_ref / 'tprs/2lyz_gmx_5.1.tpr').as_posix() -TPR2016 = (_data_ref / 'tprs/2lyz_gmx_2016.tpr').as_posix() -TPR2018 = (_data_ref / 'tprs/2lyz_gmx_2018.tpr').as_posix() -TPR2019B3 = (_data_ref / 'tprs/2lyz_gmx_2019-beta3.tpr').as_posix() -TPR2020B2 = (_data_ref / 'tprs/2lyz_gmx_2020-beta2.tpr').as_posix() -TPR2020 = (_data_ref / 'tprs/2lyz_gmx_2020.tpr').as_posix() -TPR2021 = (_data_ref / 'tprs/2lyz_gmx_2021.tpr').as_posix() -TPR2022RC1 = (_data_ref / 'tprs/2lyz_gmx_2022-rc1.tpr').as_posix() -TPR2023 = (_data_ref / 'tprs/2lyz_gmx_2023.tpr').as_posix() -TPR2024 = (_data_ref / 'tprs/2lyz_gmx_2024.tpr').as_posix() +TPR400 = (_data_ref / "tprs/2lyz_gmx_4.0.tpr").as_posix() +TPR402 = (_data_ref / "tprs/2lyz_gmx_4.0.2.tpr").as_posix() +TPR403 = (_data_ref / "tprs/2lyz_gmx_4.0.3.tpr").as_posix() +TPR404 = (_data_ref / "tprs/2lyz_gmx_4.0.4.tpr").as_posix() +TPR405 = (_data_ref / "tprs/2lyz_gmx_4.0.5.tpr").as_posix() +TPR406 = (_data_ref / "tprs/2lyz_gmx_4.0.6.tpr").as_posix() +TPR407 = (_data_ref / "tprs/2lyz_gmx_4.0.7.tpr").as_posix() +TPR450 = (_data_ref / "tprs/2lyz_gmx_4.5.tpr").as_posix() +TPR451 = (_data_ref / "tprs/2lyz_gmx_4.5.1.tpr").as_posix() +TPR452 = (_data_ref / "tprs/2lyz_gmx_4.5.2.tpr").as_posix() +TPR453 = (_data_ref / "tprs/2lyz_gmx_4.5.3.tpr").as_posix() +TPR454 = (_data_ref / "tprs/2lyz_gmx_4.5.4.tpr").as_posix() +TPR455 = (_data_ref / "tprs/2lyz_gmx_4.5.5.tpr").as_posix() +TPR502 = (_data_ref / "tprs/2lyz_gmx_5.0.2.tpr").as_posix() +TPR504 = (_data_ref / "tprs/2lyz_gmx_5.0.4.tpr").as_posix() +TPR505 = (_data_ref / "tprs/2lyz_gmx_5.0.5.tpr").as_posix() +TPR510 = (_data_ref / "tprs/2lyz_gmx_5.1.tpr").as_posix() +TPR2016 = (_data_ref / "tprs/2lyz_gmx_2016.tpr").as_posix() +TPR2018 = (_data_ref / "tprs/2lyz_gmx_2018.tpr").as_posix() +TPR2019B3 = (_data_ref / "tprs/2lyz_gmx_2019-beta3.tpr").as_posix() +TPR2020B2 = (_data_ref / "tprs/2lyz_gmx_2020-beta2.tpr").as_posix() +TPR2020 = (_data_ref / "tprs/2lyz_gmx_2020.tpr").as_posix() +TPR2021 = (_data_ref / "tprs/2lyz_gmx_2021.tpr").as_posix() +TPR2022RC1 = (_data_ref / "tprs/2lyz_gmx_2022-rc1.tpr").as_posix() +TPR2023 = (_data_ref / "tprs/2lyz_gmx_2023.tpr").as_posix() +TPR2024 = (_data_ref / "tprs/2lyz_gmx_2024.tpr").as_posix() # double precision -TPR455Double = (_data_ref / 'tprs/drew_gmx_4.5.5.double.tpr').as_posix() -TPR460 = (_data_ref / 'tprs/ab42_gmx_4.6.tpr').as_posix() -TPR461 = (_data_ref / 'tprs/ab42_gmx_4.6.1.tpr').as_posix() -TPR2020Double = (_data_ref / 'tprs/2lyz_gmx_2020_double.tpr').as_posix() -TPR2021Double = (_data_ref / 'tprs/2lyz_gmx_2021_double.tpr').as_posix() +TPR455Double = (_data_ref / "tprs/drew_gmx_4.5.5.double.tpr").as_posix() +TPR460 = (_data_ref / "tprs/ab42_gmx_4.6.tpr").as_posix() +TPR461 = (_data_ref / "tprs/ab42_gmx_4.6.1.tpr").as_posix() +TPR2020Double = (_data_ref / "tprs/2lyz_gmx_2020_double.tpr").as_posix() +TPR2021Double = (_data_ref / "tprs/2lyz_gmx_2021_double.tpr").as_posix() # all bonded interactions -TPR334_bonded = (_data_ref / 'tprs/all_bonded/dummy_3.3.4.tpr').as_posix() -TPR510_bonded = (_data_ref / 'tprs/all_bonded/dummy_5.1.tpr').as_posix() -TPR2016_bonded = (_data_ref / 'tprs/all_bonded/dummy_2016.tpr').as_posix() -TPR2018_bonded = (_data_ref / 'tprs/all_bonded/dummy_2018.tpr').as_posix() -TPR2019B3_bonded = (_data_ref / 'tprs/all_bonded/dummy_2019-beta3.tpr').as_posix() -TPR2020B2_bonded = (_data_ref / 'tprs/all_bonded/dummy_2020-beta2.tpr').as_posix() -TPR2020_bonded = (_data_ref / 'tprs/all_bonded/dummy_2020.tpr').as_posix() -TPR2020_double_bonded = (_data_ref / 'tprs/all_bonded/dummy_2020_double.tpr').as_posix() -TPR2021_bonded = (_data_ref / 'tprs/all_bonded/dummy_2021.tpr').as_posix() -TPR2021_double_bonded = (_data_ref / 'tprs/all_bonded/dummy_2021_double.tpr').as_posix() -TPR2022RC1_bonded = (_data_ref / 'tprs/all_bonded/dummy_2022-rc1.tpr').as_posix() -TPR2023_bonded = (_data_ref / 'tprs/all_bonded/dummy_2023.tpr').as_posix() -TPR2024_bonded = (_data_ref / 'tprs/all_bonded/dummy_2024.tpr').as_posix() +TPR334_bonded = (_data_ref / "tprs/all_bonded/dummy_3.3.4.tpr").as_posix() +TPR510_bonded = (_data_ref / "tprs/all_bonded/dummy_5.1.tpr").as_posix() +TPR2016_bonded = (_data_ref / "tprs/all_bonded/dummy_2016.tpr").as_posix() +TPR2018_bonded = (_data_ref / "tprs/all_bonded/dummy_2018.tpr").as_posix() +TPR2019B3_bonded = (_data_ref / "tprs/all_bonded/dummy_2019-beta3.tpr").as_posix() +TPR2020B2_bonded = (_data_ref / "tprs/all_bonded/dummy_2020-beta2.tpr").as_posix() +TPR2020_bonded = (_data_ref / "tprs/all_bonded/dummy_2020.tpr").as_posix() +TPR2020_double_bonded = (_data_ref / "tprs/all_bonded/dummy_2020_double.tpr").as_posix() +TPR2021_bonded = (_data_ref / "tprs/all_bonded/dummy_2021.tpr").as_posix() +TPR2021_double_bonded = (_data_ref / "tprs/all_bonded/dummy_2021_double.tpr").as_posix() +TPR2022RC1_bonded = (_data_ref / "tprs/all_bonded/dummy_2022-rc1.tpr").as_posix() +TPR2023_bonded = (_data_ref / "tprs/all_bonded/dummy_2023.tpr").as_posix() +TPR2024_bonded = (_data_ref / "tprs/all_bonded/dummy_2024.tpr").as_posix() # all interactions -TPR_EXTRA_2024 = (_data_ref / 'tprs/virtual_sites/extra-interactions-2024.tpr').as_posix() -TPR_EXTRA_2023 = (_data_ref / 'tprs/virtual_sites/extra-interactions-2023.tpr').as_posix() -TPR_EXTRA_2022RC1 = (_data_ref / 'tprs/virtual_sites/extra-interactions-2022-rc1.tpr').as_posix() -TPR_EXTRA_2021 = (_data_ref / 'tprs/virtual_sites/extra-interactions-2021.tpr').as_posix() -TPR_EXTRA_2020 = (_data_ref / 'tprs/virtual_sites/extra-interactions-2020.tpr').as_posix() -TPR_EXTRA_2018 = (_data_ref / 'tprs/virtual_sites/extra-interactions-2018.tpr').as_posix() -TPR_EXTRA_2016 = (_data_ref / 'tprs/virtual_sites/extra-interactions-2016.3.tpr').as_posix() -TPR_EXTRA_407 = (_data_ref / 'tprs/virtual_sites/extra-interactions-4.0.7.tpr').as_posix() - -XYZ_psf = (_data_ref / '2r9r-1b.psf').as_posix() -XYZ_bz2 = (_data_ref / '2r9r-1b.xyz.bz2').as_posix() -XYZ = (_data_ref / '2r9r-1b.xyz').as_posix() -XYZ_mini = (_data_ref / 'mini.xyz').as_posix() -XYZ_five = (_data_ref / 'five.xyz').as_posix() -TXYZ = (_data_ref / 'coordinates/test.txyz').as_posix() -ARC = (_data_ref / 'coordinates/test.arc').as_posix() -ARC_PBC = (_data_ref / 'coordinates/new_hexane.arc').as_posix() - -PRM = (_data_ref / 'Amber/ache.prmtop').as_posix() -TRJ = (_data_ref / 'Amber/ache.mdcrd').as_posix() -INPCRD = (_data_ref / 'Amber/test.inpcrd').as_posix() -TRJ_bz2 = (_data_ref / 'Amber/ache.mdcrd.bz2').as_posix() -PFncdf_Top = (_data_ref / 'Amber/posfor.top').as_posix() -PFncdf_Trj = (_data_ref / 'Amber/posfor.ncdf').as_posix() +TPR_EXTRA_2024 = ( + _data_ref / "tprs/virtual_sites/extra-interactions-2024.tpr" +).as_posix() +TPR_EXTRA_2023 = ( + _data_ref / "tprs/virtual_sites/extra-interactions-2023.tpr" +).as_posix() +TPR_EXTRA_2022RC1 = ( + _data_ref / "tprs/virtual_sites/extra-interactions-2022-rc1.tpr" +).as_posix() +TPR_EXTRA_2021 = ( + _data_ref / "tprs/virtual_sites/extra-interactions-2021.tpr" +).as_posix() +TPR_EXTRA_2020 = ( + _data_ref / "tprs/virtual_sites/extra-interactions-2020.tpr" +).as_posix() +TPR_EXTRA_2018 = ( + _data_ref / "tprs/virtual_sites/extra-interactions-2018.tpr" +).as_posix() +TPR_EXTRA_2016 = ( + _data_ref / "tprs/virtual_sites/extra-interactions-2016.3.tpr" +).as_posix() +TPR_EXTRA_407 = ( + _data_ref / "tprs/virtual_sites/extra-interactions-4.0.7.tpr" +).as_posix() + +XYZ_psf = (_data_ref / "2r9r-1b.psf").as_posix() +XYZ_bz2 = (_data_ref / "2r9r-1b.xyz.bz2").as_posix() +XYZ = (_data_ref / "2r9r-1b.xyz").as_posix() +XYZ_mini = (_data_ref / "mini.xyz").as_posix() +XYZ_five = (_data_ref / "five.xyz").as_posix() +TXYZ = (_data_ref / "coordinates/test.txyz").as_posix() +ARC = (_data_ref / "coordinates/test.arc").as_posix() +ARC_PBC = (_data_ref / "coordinates/new_hexane.arc").as_posix() + +PRM = (_data_ref / "Amber/ache.prmtop").as_posix() +TRJ = (_data_ref / "Amber/ache.mdcrd").as_posix() +INPCRD = (_data_ref / "Amber/test.inpcrd").as_posix() +TRJ_bz2 = (_data_ref / "Amber/ache.mdcrd.bz2").as_posix() +PFncdf_Top = (_data_ref / "Amber/posfor.top").as_posix() +PFncdf_Trj = (_data_ref / "Amber/posfor.ncdf").as_posix() PRM_chainid_bz2 = (_data_ref / "Amber/ache_chainid.prmtop.bz2").as_posix() -CPPTRAJ_TRAJ_TOP = (_data_ref / 'Amber/cpptraj_traj.prmtop').as_posix() -CPPTRAJ_TRAJ = (_data_ref / 'Amber/cpptraj_traj.nc').as_posix() +CPPTRAJ_TRAJ_TOP = (_data_ref / "Amber/cpptraj_traj.prmtop").as_posix() +CPPTRAJ_TRAJ = (_data_ref / "Amber/cpptraj_traj.nc").as_posix() -PRMpbc = (_data_ref / 'Amber/capped-ala.prmtop').as_posix() -TRJpbc_bz2 = (_data_ref / 'Amber/capped-ala.mdcrd.bz2').as_posix() +PRMpbc = (_data_ref / "Amber/capped-ala.prmtop").as_posix() +TRJpbc_bz2 = (_data_ref / "Amber/capped-ala.mdcrd.bz2").as_posix() -PRMncdf = (_data_ref / 'Amber/bala.prmtop').as_posix() -TRJncdf = (_data_ref / 'Amber/bala.trj').as_posix() -NCDF = (_data_ref / 'Amber/bala.ncdf').as_posix() +PRMncdf = (_data_ref / "Amber/bala.prmtop").as_posix() +TRJncdf = (_data_ref / "Amber/bala.trj").as_posix() +NCDF = (_data_ref / "Amber/bala.ncdf").as_posix() -PRM12 = (_data_ref / 'Amber/anti.top').as_posix() -TRJ12_bz2 = (_data_ref / 'Amber/anti_md1.mdcrd.bz2').as_posix() +PRM12 = (_data_ref / "Amber/anti.top").as_posix() +TRJ12_bz2 = (_data_ref / "Amber/anti_md1.mdcrd.bz2").as_posix() -PRM7 = (_data_ref / 'Amber/tz2.truncoct.parm7.bz2').as_posix() -NCDFtruncoct = (_data_ref / 'Amber/tz2.truncoct.nc').as_posix() +PRM7 = (_data_ref / "Amber/tz2.truncoct.parm7.bz2").as_posix() +NCDFtruncoct = (_data_ref / "Amber/tz2.truncoct.nc").as_posix() -PRMcs = (_data_ref / 'Amber/chitosan.prmtop').as_posix() +PRMcs = (_data_ref / "Amber/chitosan.prmtop").as_posix() -PRMNCRST = (_data_ref / 'Amber/ace_mbondi3.parm7').as_posix() +PRMNCRST = (_data_ref / "Amber/ace_mbondi3.parm7").as_posix() -PRM_NCBOX = (_data_ref / 'Amber/ace_tip3p.parm7').as_posix() -TRJ_NCBOX = (_data_ref / 'Amber/ace_tip3p.nc').as_posix() +PRM_NCBOX = (_data_ref / "Amber/ace_tip3p.parm7").as_posix() +TRJ_NCBOX = (_data_ref / "Amber/ace_tip3p.nc").as_posix() -PRMNEGATIVE = (_data_ref / 'Amber/ace_mbondi3.negative.parm7').as_posix() +PRMNEGATIVE = (_data_ref / "Amber/ace_mbondi3.negative.parm7").as_posix() PRMErr1 = (_data_ref / "Amber/ace_mbondi3.error1.parm7").as_posix() PRMErr2 = (_data_ref / "Amber/ace_mbondi3.error2.parm7").as_posix() @@ -485,37 +625,41 @@ PRMErr4 = (_data_ref / "Amber/ace_mbondi3.error4.parm7").as_posix() PRMErr5 = (_data_ref / "Amber/ache_chainid.error5.prmtop.bz2").as_posix() -PRM_UreyBradley = (_data_ref / 'Amber/parmed_fad.prmtop').as_posix() -PRM7_ala2 = (_data_ref / 'Amber/parmed_ala2_solv.parm7').as_posix() -RST7_ala2 = (_data_ref / 'Amber/parmed_ala2_solv.rst7').as_posix() +PRM_UreyBradley = (_data_ref / "Amber/parmed_fad.prmtop").as_posix() +PRM7_ala2 = (_data_ref / "Amber/parmed_ala2_solv.parm7").as_posix() +RST7_ala2 = (_data_ref / "Amber/parmed_ala2_solv.rst7").as_posix() -PRM19SBOPC = (_data_ref / 'Amber/ala.ff19SB.OPC.parm7.bz2').as_posix() +PRM19SBOPC = (_data_ref / "Amber/ala.ff19SB.OPC.parm7.bz2").as_posix() -PQR = (_data_ref / 'adk_open.pqr').as_posix() -PQR_icodes = (_data_ref / '1A2C.pqr').as_posix() +PQR = (_data_ref / "adk_open.pqr").as_posix() +PQR_icodes = (_data_ref / "1A2C.pqr").as_posix() PDBQT_input = (_data_ref / "pdbqt_inputpdbqt.pdbqt").as_posix() PDBQT_querypdb = (_data_ref / "pdbqt_querypdb.pdb").as_posix() PDBQT_tyrosol = (_data_ref / "tyrosol.pdbqt.bz2").as_posix() -FASTA = (_data_ref / 'test.fasta').as_posix() -HELANAL_BENDING_MATRIX = (_data_ref / 'helanal_bending_matrix_AdK_DIMS_H8.dat').as_posix() -HELANAL_BENDING_MATRIX_SUBSET = (_data_ref / 'helanal_bending_matrix_AdK_DIMS_H8_frames10to79.dat').as_posix() +FASTA = (_data_ref / "test.fasta").as_posix() +HELANAL_BENDING_MATRIX = ( + _data_ref / "helanal_bending_matrix_AdK_DIMS_H8.dat" +).as_posix() +HELANAL_BENDING_MATRIX_SUBSET = ( + _data_ref / "helanal_bending_matrix_AdK_DIMS_H8_frames10to79.dat" +).as_posix() -PDB_HOLE = (_data_ref / '1grm_single.pdb').as_posix() -MULTIPDB_HOLE = (_data_ref / '1grm_elNemo_mode7.pdb.bz2').as_posix() +PDB_HOLE = (_data_ref / "1grm_single.pdb").as_posix() +MULTIPDB_HOLE = (_data_ref / "1grm_elNemo_mode7.pdb.bz2").as_posix() -DMS = (_data_ref / 'adk_closed.dms').as_posix() -DMS_DOMAINS = (_data_ref / 'adk_closed_domains.dms').as_posix() -DMS_NO_SEGID = (_data_ref / 'adk_closed_no_segid.dms').as_posix() +DMS = (_data_ref / "adk_closed.dms").as_posix() +DMS_DOMAINS = (_data_ref / "adk_closed_domains.dms").as_posix() +DMS_NO_SEGID = (_data_ref / "adk_closed_no_segid.dms").as_posix() -CONECT = (_data_ref / '1hvr.pdb').as_posix() -CONECT_ERROR = (_data_ref / 'conect_error.pdb').as_posix() +CONECT = (_data_ref / "1hvr.pdb").as_posix() +CONECT_ERROR = (_data_ref / "conect_error.pdb").as_posix() -TRZ = (_data_ref / 'trzfile.trz').as_posix() -TRZ_psf = (_data_ref / 'trz_psf.psf').as_posix() +TRZ = (_data_ref / "trzfile.trz").as_posix() +TRZ_psf = (_data_ref / "trz_psf.psf").as_posix() -TRIC = (_data_ref / 'dppc_vesicle_hg.gro').as_posix() +TRIC = (_data_ref / "dppc_vesicle_hg.gro").as_posix() PDB_full = (_data_ref / "4E43.pdb").as_posix() @@ -551,7 +695,7 @@ LAMMPScnt2 = (_data_ref / "lammps/cnt-hexagonal-class1.data2").as_posix() LAMMPShyd = (_data_ref / "lammps/hydrogen-class1.data").as_posix() LAMMPShyd2 = (_data_ref / "lammps/hydrogen-class1.data2").as_posix() -LAMMPSdata_deletedatoms = (_data_ref / 'lammps/deletedatoms.data').as_posix() +LAMMPSdata_deletedatoms = (_data_ref / "lammps/deletedatoms.data").as_posix() LAMMPSdata_triclinic = (_data_ref / "lammps/albite_triclinic.data").as_posix() LAMMPSdata_PairIJ = (_data_ref / "lammps/pairij_coeffs.data.bz2").as_posix() LAMMPSDUMP = (_data_ref / "lammps/wat.lammpstrj.bz2").as_posix() @@ -565,14 +709,18 @@ LAMMPSDUMP_chain2 = (_data_ref / "lammps/chain_dump_2.lammpstrj").as_posix() LAMMPS_chain = (_data_ref / "lammps/chain_initial.data").as_posix() LAMMPSdata_many_bonds = (_data_ref / "lammps/a_lot_of_bond_types.data").as_posix() -LAMMPSdata_additional_columns = (_data_ref / "lammps/additional_columns.data").as_posix() -LAMMPSDUMP_additional_columns = (_data_ref / "lammps/additional_columns.lammpstrj").as_posix() +LAMMPSdata_additional_columns = ( + _data_ref / "lammps/additional_columns.data" +).as_posix() +LAMMPSDUMP_additional_columns = ( + _data_ref / "lammps/additional_columns.lammpstrj" +).as_posix() unordered_res = (_data_ref / "unordered_res.pdb").as_posix() -GMS_ASYMOPT = (_data_ref / "gms/c1opt.gms.gz").as_posix() -GMS_SYMOPT = (_data_ref / "gms/symopt.gms").as_posix() -GMS_ASYMSURF = (_data_ref / "gms/surf2wat.gms").as_posix() +GMS_ASYMOPT = (_data_ref / "gms/c1opt.gms.gz").as_posix() +GMS_SYMOPT = (_data_ref / "gms/symopt.gms").as_posix() +GMS_ASYMSURF = (_data_ref / "gms/surf2wat.gms").as_posix() two_water_gro = (_data_ref / "two_water_gro.gro").as_posix() two_water_gro_multiframe = (_data_ref / "two_water_gro_multiframe.gro").as_posix() @@ -588,88 +736,91 @@ DLP_HISTORY_minimal_cell = (_data_ref / "dlpoly/HISTORY_minimal_cell").as_posix() DLP_HISTORY_classic = (_data_ref / "dlpoly/HISTORY_classic").as_posix() -waterPSF = (_data_ref / 'watdyn.psf').as_posix() -waterDCD = (_data_ref / 'watdyn.dcd').as_posix() +waterPSF = (_data_ref / "watdyn.psf").as_posix() +waterDCD = (_data_ref / "watdyn.dcd").as_posix() -rmsfArray = (_data_ref / 'adk_oplsaa_CA_rmsf.npy').as_posix() +rmsfArray = (_data_ref / "adk_oplsaa_CA_rmsf.npy").as_posix() -HoomdXMLdata = (_data_ref / 'C12x64.xml.bz2').as_posix() +HoomdXMLdata = (_data_ref / "C12x64.xml.bz2").as_posix() -Make_Whole = (_data_ref / 'make_whole.gro').as_posix() -fullerene = (_data_ref / 'fullerene.pdb.gz').as_posix() +Make_Whole = (_data_ref / "make_whole.gro").as_posix() +fullerene = (_data_ref / "fullerene.pdb.gz").as_posix() -Plength = (_data_ref / 'plength.gro').as_posix() -Martini_membrane_gro = (_data_ref / 'martini_dppc_chol_bilayer.gro').as_posix() +Plength = (_data_ref / "plength.gro").as_posix() +Martini_membrane_gro = (_data_ref / "martini_dppc_chol_bilayer.gro").as_posix() # Contains one of each residue in 'nucleic' selections -NUCLsel = (_data_ref / 'nucl_res.pdb').as_posix() +NUCLsel = (_data_ref / "nucl_res.pdb").as_posix() -RANDOM_WALK = (_data_ref / 'xyz_random_walk.xtc').as_posix() -RANDOM_WALK_TOPO = (_data_ref / 'RANDOM_WALK_TOPO.pdb').as_posix() +RANDOM_WALK = (_data_ref / "xyz_random_walk.xtc").as_posix() +RANDOM_WALK_TOPO = (_data_ref / "RANDOM_WALK_TOPO.pdb").as_posix() -MMTF = (_data_ref / '173D.mmtf').as_posix() -MMTF_gz = (_data_ref / '5KIH.mmtf.gz').as_posix() -MMTF_skinny = (_data_ref / '1ubq-less-optional.mmtf').as_posix() -MMTF_skinny2 = (_data_ref / '3NJW-onlyrequired.mmtf').as_posix() +MMTF = (_data_ref / "173D.mmtf").as_posix() +MMTF_gz = (_data_ref / "5KIH.mmtf.gz").as_posix() +MMTF_skinny = (_data_ref / "1ubq-less-optional.mmtf").as_posix() +MMTF_skinny2 = (_data_ref / "3NJW-onlyrequired.mmtf").as_posix() MMTF_NOCRYST = (_data_ref / "6QYR.mmtf.gz").as_posix() -ALIGN_BOUND = (_data_ref / 'analysis/align_bound.pdb.gz').as_posix() -ALIGN_UNBOUND = (_data_ref / 'analysis/align_unbound.pdb.gz').as_posix() - -GSD = (_data_ref / 'example.gsd').as_posix() -GSD_bonds = (_data_ref / 'example_bonds.gsd').as_posix() -GSD_long = (_data_ref / 'example_longer.gsd').as_posix() - -TRC_PDB_VAC = (_data_ref / 'gromos11/gromos11_traj_vac.pdb.gz').as_posix() -TRC_TRAJ1_VAC = (_data_ref / 'gromos11/gromos11_traj_vac_1.trc.gz').as_posix() -TRC_TRAJ2_VAC = (_data_ref / 'gromos11/gromos11_traj_vac_2.trc.gz').as_posix() -TRC_PDB_SOLV = (_data_ref / 'gromos11/gromos11_traj_solv.pdb.gz').as_posix() -TRC_TRAJ_SOLV = (_data_ref / 'gromos11/gromos11_traj_solv.trc.gz').as_posix() -TRC_CLUSTER_VAC = (_data_ref / 'gromos11/gromos11_cluster_vac.trj.gz').as_posix() -TRC_TRICLINIC_SOLV = (_data_ref / 'gromos11/gromos11_triclinic_solv.trc.gz').as_posix() -TRC_TRUNCOCT_VAC = (_data_ref / 'gromos11/gromos11_truncOcta_vac.trc.gz').as_posix() -TRC_GENBOX_ORIGIN = (_data_ref / 'gromos11/gromos11_genbox_origin.trc.gz').as_posix() -TRC_GENBOX_EULER = (_data_ref / 'gromos11/gromos11_genbox_euler.trc.gz').as_posix() -TRC_EMPTY = (_data_ref / 'gromos11/gromos11_empty.trc').as_posix() - -DihedralArray = (_data_ref / 'adk_oplsaa_dihedral.npy').as_posix() -DihedralsArray = (_data_ref / 'adk_oplsaa_dihedral_list.npy').as_posix() -RamaArray = (_data_ref / 'adk_oplsaa_rama.npy').as_posix() -GLYRamaArray = (_data_ref / 'adk_oplsaa_GLY_rama.npy').as_posix() -JaninArray = (_data_ref / 'adk_oplsaa_janin.npy').as_posix() -LYSJaninArray = (_data_ref / 'adk_oplsaa_LYS_janin.npy').as_posix() -PDB_rama = (_data_ref / '19hc.pdb.gz').as_posix() -PDB_janin = (_data_ref / '1a28.pdb.gz').as_posix() - -BATArray = (_data_ref / 'mol2_comments_header_bat.npy').as_posix() - -ITP = (_data_ref / 'gromacs_ala10.itp').as_posix() -ITP_nomass = (_data_ref / 'itp_nomass.itp').as_posix() -ITP_atomtypes = (_data_ref / 'atomtypes.itp').as_posix() -ITP_charges = (_data_ref / 'atomtypes_charge.itp').as_posix() -ITP_edited = (_data_ref / 'edited_itp.itp').as_posix() +ALIGN_BOUND = (_data_ref / "analysis/align_bound.pdb.gz").as_posix() +ALIGN_UNBOUND = (_data_ref / "analysis/align_unbound.pdb.gz").as_posix() + +GSD = (_data_ref / "example.gsd").as_posix() +GSD_bonds = (_data_ref / "example_bonds.gsd").as_posix() +GSD_long = (_data_ref / "example_longer.gsd").as_posix() + +TRC_PDB_VAC = (_data_ref / "gromos11/gromos11_traj_vac.pdb.gz").as_posix() +TRC_TRAJ1_VAC = (_data_ref / "gromos11/gromos11_traj_vac_1.trc.gz").as_posix() +TRC_TRAJ2_VAC = (_data_ref / "gromos11/gromos11_traj_vac_2.trc.gz").as_posix() +TRC_PDB_SOLV = (_data_ref / "gromos11/gromos11_traj_solv.pdb.gz").as_posix() +TRC_TRAJ_SOLV = (_data_ref / "gromos11/gromos11_traj_solv.trc.gz").as_posix() +TRC_CLUSTER_VAC = (_data_ref / "gromos11/gromos11_cluster_vac.trj.gz").as_posix() +TRC_TRICLINIC_SOLV = (_data_ref / "gromos11/gromos11_triclinic_solv.trc.gz").as_posix() +TRC_TRUNCOCT_VAC = (_data_ref / "gromos11/gromos11_truncOcta_vac.trc.gz").as_posix() +TRC_GENBOX_ORIGIN = (_data_ref / "gromos11/gromos11_genbox_origin.trc.gz").as_posix() +TRC_GENBOX_EULER = (_data_ref / "gromos11/gromos11_genbox_euler.trc.gz").as_posix() +TRC_EMPTY = (_data_ref / "gromos11/gromos11_empty.trc").as_posix() + +DihedralArray = (_data_ref / "adk_oplsaa_dihedral.npy").as_posix() +DihedralsArray = (_data_ref / "adk_oplsaa_dihedral_list.npy").as_posix() +RamaArray = (_data_ref / "adk_oplsaa_rama.npy").as_posix() +GLYRamaArray = (_data_ref / "adk_oplsaa_GLY_rama.npy").as_posix() +JaninArray = (_data_ref / "adk_oplsaa_janin.npy").as_posix() +LYSJaninArray = (_data_ref / "adk_oplsaa_LYS_janin.npy").as_posix() +PDB_rama = (_data_ref / "19hc.pdb.gz").as_posix() +PDB_janin = (_data_ref / "1a28.pdb.gz").as_posix() + +BATArray = (_data_ref / "mol2_comments_header_bat.npy").as_posix() + +ITP = (_data_ref / "gromacs_ala10.itp").as_posix() +ITP_nomass = (_data_ref / "itp_nomass.itp").as_posix() +ITP_atomtypes = (_data_ref / "atomtypes.itp").as_posix() +ITP_charges = (_data_ref / "atomtypes_charge.itp").as_posix() +ITP_edited = (_data_ref / "edited_itp.itp").as_posix() ITP_tip5p = (_data_ref / "tip5p.itp").as_posix() -ITP_spce = (_data_ref / 'spce.itp').as_posix() +ITP_spce = (_data_ref / "spce.itp").as_posix() -GMX_TOP = (_data_ref / 'gromacs_ala10.top').as_posix() -GMX_DIR = (_data_ref / 'gromacs/').as_posix() -GMX_TOP_BAD = (_data_ref / 'bad_top.top').as_posix() -ITP_no_endif = (_data_ref / 'no_endif_spc.itp').as_posix() +GMX_TOP = (_data_ref / "gromacs_ala10.top").as_posix() +GMX_DIR = (_data_ref / "gromacs/").as_posix() +GMX_TOP_BAD = (_data_ref / "bad_top.top").as_posix() +ITP_no_endif = (_data_ref / "no_endif_spc.itp").as_posix() -NAMDBIN = (_data_ref / 'adk_open.coor').as_posix() +NAMDBIN = (_data_ref / "adk_open.coor").as_posix() -SDF_molecule = (_data_ref / 'molecule.sdf').as_posix() +SDF_molecule = (_data_ref / "molecule.sdf").as_posix() -PDB_elements = (_data_ref / 'elements.pdb').as_posix() -PDB_charges = (_data_ref / 'charges.pdb').as_posix() +PDB_elements = (_data_ref / "elements.pdb").as_posix() +PDB_charges = (_data_ref / "charges.pdb").as_posix() PDBX = (_data_ref / "4x8u.pdbx").as_posix() -SURFACE_PDB = (_data_ref / 'surface.pdb.bz2').as_posix() -SURFACE_TRR = (_data_ref / 'surface.trr').as_posix() +SURFACE_PDB = (_data_ref / "surface.pdb.bz2").as_posix() +SURFACE_TRR = (_data_ref / "surface.trr").as_posix() # DSSP testing: from https://github.com/ShintaroMinami/PyDSSP -DSSP = (_data_ref / 'dssp').as_posix() +DSSP = (_data_ref / "dssp").as_posix() + +# MMCIF testing -- valid structures from RCSB +MMCIF = (_data_ref / "mmcif").as_posix() # This should be the last line: clean up namespace del resources diff --git a/testsuite/MDAnalysisTests/topology/test_mmcif.py b/testsuite/MDAnalysisTests/topology/test_mmcif.py new file mode 100644 index 00000000000..29d44ce7fe5 --- /dev/null +++ b/testsuite/MDAnalysisTests/topology/test_mmcif.py @@ -0,0 +1,48 @@ +import glob +import os +from io import StringIO + +import MDAnalysis as mda +import numpy as np +import pytest +from numpy.testing import ( + assert_allclose, + assert_almost_equal, + assert_array_almost_equal, + assert_equal, +) + +from MDAnalysisTests.datafiles import MMCIF as MMCIF_FOLDER + +# FIXME: rewrite tests to read trajectories only once + + +@pytest.mark.parametrize( + "mmcif_filename,n_chains", + [ + (f"{MMCIF_FOLDER}/1YJP.cif", 1), + (f"{MMCIF_FOLDER}/1YJP.cif.gz", 1), + (f"{MMCIF_FOLDER}/7ETN.cif", 2), + (f"{MMCIF_FOLDER}/7ETN.cif.gz", 2), + ], +) +def test_chains(mmcif_filename, n_chains): + u = mda.Universe(mmcif_filename) + assert len(u.segments) == n_chains + + +@pytest.mark.parametrize( + "mmcif_filename,sequence", + [ + (f"{MMCIF_FOLDER}/1YJP.cif", ["GLY", "ASN", "ASN", "GLN", "GLN", "ASN", "TRY"]), + ( + f"{MMCIF_FOLDER}/1YJP.cif.gz", + ["GLY", "ASN", "ASN", "GLN", "GLN", "ASN", "TRY"], + ), + (f"{MMCIF_FOLDER}/7ETN.cif", ["PRO", "PHE", "LEU", "ILE"]), + (f"{MMCIF_FOLDER}/7ETN.cif.gz", ["PRO", "PHE", "LEU", "ILE"]), + ], +) +def test_sequence(mmcif_filename, sequence): + u = mda.Universe(mmcif_filename) + assert [res.resname for res in u.residues] == sequence From 27c10d64114591d9a237459a04e2e8e5abd20e2c Mon Sep 17 00:00:00 2001 From: Egor Marin Date: Wed, 2 Oct 2024 19:51:52 +0200 Subject: [PATCH 24/30] Update format specifications --- package/MDAnalysis/coordinates/MMCIF.py | 2 +- package/MDAnalysis/topology/MMCIFParser.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/package/MDAnalysis/coordinates/MMCIF.py b/package/MDAnalysis/coordinates/MMCIF.py index f127c92b0d5..d34b6fb2958 100644 --- a/package/MDAnalysis/coordinates/MMCIF.py +++ b/package/MDAnalysis/coordinates/MMCIF.py @@ -45,7 +45,7 @@ class MMCIFReader(base.SingleFrameReaderBase): .. versionadded:: 2.8.0 """ - format = "MMCIF" + format = ["cif", "cif.gz", "mmcif"] units = {"time": None, "length": "Angstrom"} def _read_first_frame(self): diff --git a/package/MDAnalysis/topology/MMCIFParser.py b/package/MDAnalysis/topology/MMCIFParser.py index 757e314ce19..aed69fc5b6f 100644 --- a/package/MDAnalysis/topology/MMCIFParser.py +++ b/package/MDAnalysis/topology/MMCIFParser.py @@ -259,7 +259,7 @@ class MMCIFParser(TopologyReaderBase): .. versionadded:: 2.8.0 """ - format = "MMCIF" + format = ["cif", "cif.gz", "mmcif"] def parse(self, **kwargs): """Read the file and return the structure. From 950cfcf51fba10318464f2050c304695ea9211c5 Mon Sep 17 00:00:00 2001 From: Egor Marin Date: Wed, 2 Oct 2024 19:52:03 +0200 Subject: [PATCH 25/30] Write simple tests --- .../MDAnalysisTests/coordinates/test_mmcif.py | 25 +++++++++++++++---- .../MDAnalysisTests/topology/test_mmcif.py | 9 ++++--- 2 files changed, 26 insertions(+), 8 deletions(-) diff --git a/testsuite/MDAnalysisTests/coordinates/test_mmcif.py b/testsuite/MDAnalysisTests/coordinates/test_mmcif.py index ec25cd54de5..b29012c2dd7 100644 --- a/testsuite/MDAnalysisTests/coordinates/test_mmcif.py +++ b/testsuite/MDAnalysisTests/coordinates/test_mmcif.py @@ -1,6 +1,4 @@ import glob -import os -from io import StringIO import MDAnalysis as mda import numpy as np @@ -34,11 +32,28 @@ def test_works_without_explicit_format(mmcif_filename): [ (f"{MMCIF_FOLDER}/1YJP.cif", 59, 66), (f"{MMCIF_FOLDER}/1YJP.cif.gz", 59, 66), - (f"{MMCIF_FOLDER}/7ETN.cif", 70, 70), - (f"{MMCIF_FOLDER}/7ETN.cif.gz", 70, 70), + (f"{MMCIF_FOLDER}/7ETN.cif", 150, 150), + (f"{MMCIF_FOLDER}/7ETN.cif.gz", 150, 150), ], ) def test_n_atoms(mmcif_filename, natoms_protein, natoms_total): u = mda.Universe(mmcif_filename) assert len(u.atoms) == natoms_total - assert len(u.select("protein").atoms) == natoms_protein + assert len(u.select_atoms("protein").atoms) == natoms_protein + + +@pytest.mark.parametrize( + "mmcif_filename,cell", + [ + ( + f"{MMCIF_FOLDER}/1YJP.cif.gz", + np.array([21.937, 4.866, 23.477, 90.00, 107.08, 90.00]), + ), + ( + f"{MMCIF_FOLDER}/7ETN.cif.gz", + np.array([5.264, 24.967, 20.736, 90.00, 94.85, 90.00]), + ), + ], +) +def test_cell(mmcif_filename, cell): + assert np.allclose(mda.Universe(mmcif_filename).coord._unitcell, cell) diff --git a/testsuite/MDAnalysisTests/topology/test_mmcif.py b/testsuite/MDAnalysisTests/topology/test_mmcif.py index 29d44ce7fe5..341b5fe5296 100644 --- a/testsuite/MDAnalysisTests/topology/test_mmcif.py +++ b/testsuite/MDAnalysisTests/topology/test_mmcif.py @@ -34,10 +34,10 @@ def test_chains(mmcif_filename, n_chains): @pytest.mark.parametrize( "mmcif_filename,sequence", [ - (f"{MMCIF_FOLDER}/1YJP.cif", ["GLY", "ASN", "ASN", "GLN", "GLN", "ASN", "TRY"]), + (f"{MMCIF_FOLDER}/1YJP.cif", ["GLY", "ASN", "ASN", "GLN", "GLN", "ASN", "TYR"]), ( f"{MMCIF_FOLDER}/1YJP.cif.gz", - ["GLY", "ASN", "ASN", "GLN", "GLN", "ASN", "TRY"], + ["GLY", "ASN", "ASN", "GLN", "GLN", "ASN", "TYR"], ), (f"{MMCIF_FOLDER}/7ETN.cif", ["PRO", "PHE", "LEU", "ILE"]), (f"{MMCIF_FOLDER}/7ETN.cif.gz", ["PRO", "PHE", "LEU", "ILE"]), @@ -45,4 +45,7 @@ def test_chains(mmcif_filename, n_chains): ) def test_sequence(mmcif_filename, sequence): u = mda.Universe(mmcif_filename) - assert [res.resname for res in u.residues] == sequence + in_structure = [ + str(res.resname) for res in u.select_atoms("protein and chainid A").residues + ] + assert in_structure == sequence, ":".join(in_structure) From ef29338ea7a32e62177e7a825056f63f30ba8fd6 Mon Sep 17 00:00:00 2001 From: Egor Marin Date: Thu, 24 Oct 2024 19:52:38 +0200 Subject: [PATCH 26/30] update github action with gemmi --- .github/actions/setup-deps/action.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/actions/setup-deps/action.yaml b/.github/actions/setup-deps/action.yaml index cbfd91df7e1..b1a08cda783 100644 --- a/.github/actions/setup-deps/action.yaml +++ b/.github/actions/setup-deps/action.yaml @@ -23,6 +23,8 @@ inputs: default: 'cython' fasteners: default: 'fasteners' + gemmi: + default: 'gemmi' griddataformats: default: 'griddataformats' gsd: @@ -130,6 +132,7 @@ runs: ${{ inputs.dask }} ${{ inputs.distopia }} ${{ inputs.gsd }} + ${{ inputs.gemmi }} ${{ inputs.h5py }} ${{ inputs.hole2 }} ${{ inputs.joblib }} From caca17e1d1dcb08c835dbba495cb5bf0ceac924c Mon Sep 17 00:00:00 2001 From: Egor Marin Date: Thu, 24 Oct 2024 20:13:33 +0200 Subject: [PATCH 27/30] fix gemmi import errors --- package/MDAnalysis/coordinates/MMCIF.py | 2 +- package/MDAnalysis/topology/MMCIFParser.py | 6 +++--- testsuite/MDAnalysisTests/coordinates/test_mmcif.py | 7 +++++-- testsuite/MDAnalysisTests/topology/test_mmcif.py | 5 +++-- 4 files changed, 12 insertions(+), 8 deletions(-) diff --git a/package/MDAnalysis/coordinates/MMCIF.py b/package/MDAnalysis/coordinates/MMCIF.py index d34b6fb2958..ee8df98d781 100644 --- a/package/MDAnalysis/coordinates/MMCIF.py +++ b/package/MDAnalysis/coordinates/MMCIF.py @@ -15,7 +15,7 @@ logger = logging.getLogger("MDAnalysis.coordinates.MMCIF") -def get_coordinates(model: gemmi.Model) -> np.ndarray: +def get_coordinates(model: 'gemmi.Model') -> np.ndarray: """Get coordinates of all atoms in the `gemmi.Model` object. Parameters diff --git a/package/MDAnalysis/topology/MMCIFParser.py b/package/MDAnalysis/topology/MMCIFParser.py index aed69fc5b6f..2a0adb71704 100644 --- a/package/MDAnalysis/topology/MMCIFParser.py +++ b/package/MDAnalysis/topology/MMCIFParser.py @@ -64,7 +64,7 @@ def _into_idx(arr: list) -> list[int]: return [idx for idx, (_, group) in enumerate(itertools.groupby(arr)) for _ in group] -def get_Atomattrs(model: gemmi.Model) -> tuple[list[AtomAttr], np.ndarray]: +def get_Atomattrs(model: 'gemmi.Model') -> tuple[list[AtomAttr], np.ndarray]: """Extract all attributes that are subclasses of :class:`..core.topologyattrs.AtomAttr` from a ``gemmi.Model`` object, and a `residx` index with indices of all atoms in residues. @@ -165,7 +165,7 @@ def get_Atomattrs(model: gemmi.Model) -> tuple[list[AtomAttr], np.ndarray]: return attrs, residx -def get_Residueattrs(model: gemmi.Model) -> tuple[list[ResidueAttr], np.ndarray]: +def get_Residueattrs(model: 'gemmi.Model') -> tuple[list[ResidueAttr], np.ndarray]: """Extract all attributes that are subclasses of :class:`..core.topologyattrs.ResidueAttr` from a ``gemmi.Model`` object, and a `segidx` index witn indices of all residues in segments. @@ -214,7 +214,7 @@ def get_Residueattrs(model: gemmi.Model) -> tuple[list[ResidueAttr], np.ndarray] return attrs, segidx -def get_Segmentattrs(model: gemmi.Model) -> SegmentAttr: +def get_Segmentattrs(model: 'gemmi.Model') -> SegmentAttr: """Extract all attributes that are subclasses of :class:`..core.topologyattrs.SegmentAttr` from a ``gemmi.Model`` object. Parameters diff --git a/testsuite/MDAnalysisTests/coordinates/test_mmcif.py b/testsuite/MDAnalysisTests/coordinates/test_mmcif.py index b29012c2dd7..10b01f465c3 100644 --- a/testsuite/MDAnalysisTests/coordinates/test_mmcif.py +++ b/testsuite/MDAnalysisTests/coordinates/test_mmcif.py @@ -11,22 +11,24 @@ ) from MDAnalysisTests.datafiles import MMCIF as MMCIF_FOLDER - -# FIXME: rewrite tests to read trajectories only once +from MDAnalysis.coordinates.MMCIF import HAS_GEMMI +@pytest.mark.skipif(not HAS_GEMMI, reason="gemmi not installed") @pytest.mark.parametrize("mmcif_filename", glob.glob(f"{MMCIF_FOLDER}/*.cif*")) def test_works_with_explicit_format(mmcif_filename): u = mda.Universe(mmcif_filename, format="MMCIF") assert u.trajectory.n_atoms > 0 +@pytest.mark.skipif(not HAS_GEMMI, reason="gemmi not installed") @pytest.mark.parametrize("mmcif_filename", glob.glob(f"{MMCIF_FOLDER}/*.cif*")) def test_works_without_explicit_format(mmcif_filename): u = mda.Universe(mmcif_filename) assert u.trajectory.n_atoms > 0 +@pytest.mark.skipif(not HAS_GEMMI, reason="gemmi not installed") @pytest.mark.parametrize( "mmcif_filename,natoms_protein,natoms_total", [ @@ -42,6 +44,7 @@ def test_n_atoms(mmcif_filename, natoms_protein, natoms_total): assert len(u.select_atoms("protein").atoms) == natoms_protein +@pytest.mark.skipif(not HAS_GEMMI, reason="gemmi not installed") @pytest.mark.parametrize( "mmcif_filename,cell", [ diff --git a/testsuite/MDAnalysisTests/topology/test_mmcif.py b/testsuite/MDAnalysisTests/topology/test_mmcif.py index 341b5fe5296..b4ff08a8a55 100644 --- a/testsuite/MDAnalysisTests/topology/test_mmcif.py +++ b/testsuite/MDAnalysisTests/topology/test_mmcif.py @@ -13,10 +13,10 @@ ) from MDAnalysisTests.datafiles import MMCIF as MMCIF_FOLDER - -# FIXME: rewrite tests to read trajectories only once +from MDAnalysis.coordinates.MMCIF import HAS_GEMMI +@pytest.mark.skipif(not HAS_GEMMI, reason="gemmi not installed") @pytest.mark.parametrize( "mmcif_filename,n_chains", [ @@ -31,6 +31,7 @@ def test_chains(mmcif_filename, n_chains): assert len(u.segments) == n_chains +@pytest.mark.skipif(not HAS_GEMMI, reason="gemmi not installed") @pytest.mark.parametrize( "mmcif_filename,sequence", [ From f0e49ccc7abfeab0c7eed577351de7992a92b79d Mon Sep 17 00:00:00 2001 From: Egor Marin Date: Thu, 24 Oct 2024 21:25:59 +0200 Subject: [PATCH 28/30] add mmcif testfiles --- testsuite/MDAnalysisTests/data/mmcif/1YJP.cif | 813 +++++++++++ testsuite/MDAnalysisTests/data/mmcif/7ETN.cif | 1251 +++++++++++++++++ 2 files changed, 2064 insertions(+) create mode 100644 testsuite/MDAnalysisTests/data/mmcif/1YJP.cif create mode 100644 testsuite/MDAnalysisTests/data/mmcif/7ETN.cif diff --git a/testsuite/MDAnalysisTests/data/mmcif/1YJP.cif b/testsuite/MDAnalysisTests/data/mmcif/1YJP.cif new file mode 100644 index 00000000000..b0e30c0ddc6 --- /dev/null +++ b/testsuite/MDAnalysisTests/data/mmcif/1YJP.cif @@ -0,0 +1,813 @@ +data_1YJP +# +_entry.id 1YJP +# +_audit.revision_id 1 +_audit.creation_date 2005-01-14 +_audit.update_record 'initial release' +# +_audit_conform.dict_name mmcif_pdbx.dic +_audit_conform.dict_version 5.387 +_audit_conform.dict_location http://mmcif.pdb.org/dictionaries/ascii/mmcif_pdbx.dic +# +loop_ +_database_2.database_id +_database_2.database_code +_database_2.pdbx_database_accession +_database_2.pdbx_DOI +PDB 1YJP pdb_00001yjp 10.2210/pdb1yjp/pdb +RCSB RCSB031590 ? ? +WWPDB D_1000031590 ? ? +# +loop_ +_pdbx_audit_revision_history.ordinal +_pdbx_audit_revision_history.data_content_type +_pdbx_audit_revision_history.major_revision +_pdbx_audit_revision_history.minor_revision +_pdbx_audit_revision_history.revision_date +1 'Structure model' 1 0 2005-06-14 +2 'Structure model' 1 1 2008-04-30 +3 'Structure model' 1 2 2011-07-13 +4 'Structure model' 1 3 2017-10-11 +5 'Structure model' 1 4 2024-02-14 +# +_pdbx_audit_revision_details.ordinal 1 +_pdbx_audit_revision_details.revision_ordinal 1 +_pdbx_audit_revision_details.data_content_type 'Structure model' +_pdbx_audit_revision_details.provider repository +_pdbx_audit_revision_details.type 'Initial release' +_pdbx_audit_revision_details.description ? +_pdbx_audit_revision_details.details ? +# +loop_ +_pdbx_audit_revision_group.ordinal +_pdbx_audit_revision_group.revision_ordinal +_pdbx_audit_revision_group.data_content_type +_pdbx_audit_revision_group.group +1 2 'Structure model' 'Version format compliance' +2 3 'Structure model' 'Version format compliance' +3 4 'Structure model' 'Refinement description' +4 5 'Structure model' 'Data collection' +5 5 'Structure model' 'Database references' +# +loop_ +_pdbx_audit_revision_category.ordinal +_pdbx_audit_revision_category.revision_ordinal +_pdbx_audit_revision_category.data_content_type +_pdbx_audit_revision_category.category +1 4 'Structure model' software +2 5 'Structure model' chem_comp_atom +3 5 'Structure model' chem_comp_bond +4 5 'Structure model' database_2 +# +loop_ +_pdbx_audit_revision_item.ordinal +_pdbx_audit_revision_item.revision_ordinal +_pdbx_audit_revision_item.data_content_type +_pdbx_audit_revision_item.item +1 5 'Structure model' '_database_2.pdbx_DOI' +2 5 'Structure model' '_database_2.pdbx_database_accession' +# +_pdbx_database_status.entry_id 1YJP +_pdbx_database_status.deposit_site RCSB +_pdbx_database_status.process_site RCSB +_pdbx_database_status.recvd_initial_deposition_date 2005-01-15 +_pdbx_database_status.status_code REL +_pdbx_database_status.status_code_sf REL +_pdbx_database_status.status_code_mr ? +_pdbx_database_status.SG_entry ? +_pdbx_database_status.pdb_format_compatible Y +_pdbx_database_status.status_code_cs ? +_pdbx_database_status.methods_development_category ? +_pdbx_database_status.status_code_nmr_data ? +# +_pdbx_database_related.db_name PDB +_pdbx_database_related.db_id 1YJO +_pdbx_database_related.details . +_pdbx_database_related.content_type unspecified +# +loop_ +_audit_author.name +_audit_author.pdbx_ordinal +'Nelson, R.' 1 +'Sawaya, M.R.' 2 +'Balbirnie, M.' 3 +'Madsen, A.O.' 4 +'Riekel, C.' 5 +'Grothe, R.' 6 +'Eisenberg, D.' 7 +# +loop_ +_citation.id +_citation.title +_citation.journal_abbrev +_citation.journal_volume +_citation.page_first +_citation.page_last +_citation.year +_citation.journal_id_ASTM +_citation.country +_citation.journal_id_ISSN +_citation.journal_id_CSD +_citation.book_publisher +_citation.pdbx_database_id_PubMed +_citation.pdbx_database_id_DOI +primary 'Structure of the cross-beta spine of amyloid-like fibrils.' Nature 435 773 778 2005 +NATUAS UK 0028-0836 0006 ? 15944695 10.1038/nature03680 +1 'Refinement of Macromolecular Structures by the Maximum-Likelihood Method' 'Acta Crystallogr.,Sect.D' 53 240 255 1997 +ABCRE6 DK 0907-4449 0766 ? ? ? +# +loop_ +_citation_author.citation_id +_citation_author.name +_citation_author.ordinal +_citation_author.identifier_ORCID +primary 'Nelson, R.' 1 ? +primary 'Sawaya, M.R.' 2 ? +primary 'Balbirnie, M.' 3 ? +primary 'Madsen, A.O.' 4 ? +primary 'Riekel, C.' 5 ? +primary 'Grothe, R.' 6 ? +primary 'Eisenberg, D.' 7 ? +1 'Murshudov, G.N.' 8 ? +1 'Vagin, A.A.' 9 ? +1 'Dodson, E.J.' 10 ? +# +loop_ +_entity.id +_entity.type +_entity.src_method +_entity.pdbx_description +_entity.formula_weight +_entity.pdbx_number_of_molecules +_entity.pdbx_ec +_entity.pdbx_mutation +_entity.pdbx_fragment +_entity.details +1 polymer syn 'Eukaryotic peptide chain release factor GTP-binding subunit' 836.807 1 ? ? 'prion determining domain of Sup35' ? +2 water nat water 18.015 7 ? ? ? ? +# +_entity_name_com.entity_id 1 +_entity_name_com.name +'ERF2, Translation release factor 3, ERF3, ERF-3, Omnipotent suppressor protein 2, G1 to S phase transition protein 1' +# +_entity_poly.entity_id 1 +_entity_poly.type 'polypeptide(L)' +_entity_poly.nstd_linkage no +_entity_poly.nstd_monomer no +_entity_poly.pdbx_seq_one_letter_code GNNQQNY +_entity_poly.pdbx_seq_one_letter_code_can GNNQQNY +_entity_poly.pdbx_strand_id A +_entity_poly.pdbx_target_identifier ? +# +_pdbx_entity_nonpoly.entity_id 2 +_pdbx_entity_nonpoly.name water +_pdbx_entity_nonpoly.comp_id HOH +# +loop_ +_entity_poly_seq.entity_id +_entity_poly_seq.num +_entity_poly_seq.mon_id +_entity_poly_seq.hetero +1 1 GLY n +1 2 ASN n +1 3 ASN n +1 4 GLN n +1 5 GLN n +1 6 ASN n +1 7 TYR n +# +_pdbx_entity_src_syn.entity_id 1 +_pdbx_entity_src_syn.pdbx_src_id 1 +_pdbx_entity_src_syn.pdbx_alt_source_flag sample +_pdbx_entity_src_syn.pdbx_beg_seq_num ? +_pdbx_entity_src_syn.pdbx_end_seq_num ? +_pdbx_entity_src_syn.organism_scientific ? +_pdbx_entity_src_syn.organism_common_name ? +_pdbx_entity_src_syn.ncbi_taxonomy_id ? +_pdbx_entity_src_syn.details 'This sequence is from the prion determining domain of Saccharomyces cerevisiae Sup35' +# +loop_ +_chem_comp.id +_chem_comp.type +_chem_comp.mon_nstd_flag +_chem_comp.name +_chem_comp.pdbx_synonyms +_chem_comp.formula +_chem_comp.formula_weight +ASN 'L-peptide linking' y ASPARAGINE ? 'C4 H8 N2 O3' 132.118 +GLN 'L-peptide linking' y GLUTAMINE ? 'C5 H10 N2 O3' 146.144 +GLY 'peptide linking' y GLYCINE ? 'C2 H5 N O2' 75.067 +HOH non-polymer . WATER ? 'H2 O' 18.015 +TYR 'L-peptide linking' y TYROSINE ? 'C9 H11 N O3' 181.189 +# +loop_ +_pdbx_poly_seq_scheme.asym_id +_pdbx_poly_seq_scheme.entity_id +_pdbx_poly_seq_scheme.seq_id +_pdbx_poly_seq_scheme.mon_id +_pdbx_poly_seq_scheme.ndb_seq_num +_pdbx_poly_seq_scheme.pdb_seq_num +_pdbx_poly_seq_scheme.auth_seq_num +_pdbx_poly_seq_scheme.pdb_mon_id +_pdbx_poly_seq_scheme.auth_mon_id +_pdbx_poly_seq_scheme.pdb_strand_id +_pdbx_poly_seq_scheme.pdb_ins_code +_pdbx_poly_seq_scheme.hetero +A 1 1 GLY 1 1 1 GLY GLY A . n +A 1 2 ASN 2 2 2 ASN ASN A . n +A 1 3 ASN 3 3 3 ASN ASN A . n +A 1 4 GLN 4 4 4 GLN GLN A . n +A 1 5 GLN 5 5 5 GLN GLN A . n +A 1 6 ASN 6 6 6 ASN ASN A . n +A 1 7 TYR 7 7 7 TYR TYR A . n +# +loop_ +_pdbx_nonpoly_scheme.asym_id +_pdbx_nonpoly_scheme.entity_id +_pdbx_nonpoly_scheme.mon_id +_pdbx_nonpoly_scheme.ndb_seq_num +_pdbx_nonpoly_scheme.pdb_seq_num +_pdbx_nonpoly_scheme.auth_seq_num +_pdbx_nonpoly_scheme.pdb_mon_id +_pdbx_nonpoly_scheme.auth_mon_id +_pdbx_nonpoly_scheme.pdb_strand_id +_pdbx_nonpoly_scheme.pdb_ins_code +B 2 HOH 1 8 8 HOH HOH A . +B 2 HOH 2 9 9 HOH HOH A . +B 2 HOH 3 10 10 HOH HOH A . +B 2 HOH 4 11 11 HOH HOH A . +B 2 HOH 5 12 12 HOH HOH A . +B 2 HOH 6 13 13 HOH HOH A . +B 2 HOH 7 14 14 HOH HOH A . +# +loop_ +_software.name +_software.version +_software.date +_software.type +_software.contact_author +_software.contact_author_email +_software.classification +_software.location +_software.language +_software.citation_id +_software.pdbx_ordinal +REFMAC . ? program 'Murshudov, G.N.' ccp4@dl.ac.uk refinement http://www.ccp4.ac.uk/main.html Fortran ? 1 +DENZO . ? ? ? ? 'data reduction' ? ? ? 2 +SCALEPACK . ? ? ? ? 'data scaling' ? ? ? 3 +# +_cell.entry_id 1YJP +_cell.length_a 21.937 +_cell.length_b 4.866 +_cell.length_c 23.477 +_cell.angle_alpha 90.00 +_cell.angle_beta 107.08 +_cell.angle_gamma 90.00 +_cell.Z_PDB 2 +_cell.pdbx_unique_axis ? +# +_symmetry.entry_id 1YJP +_symmetry.space_group_name_H-M 'P 1 21 1' +_symmetry.pdbx_full_space_group_name_H-M ? +_symmetry.cell_setting ? +_symmetry.Int_Tables_number 4 +_symmetry.space_group_name_Hall ? +# +_exptl.method 'X-RAY DIFFRACTION' +_exptl.entry_id 1YJP +_exptl.crystals_number 1 +# +_exptl_crystal.id 1 +_exptl_crystal.density_meas ? +_exptl_crystal.density_percent_sol 14.03 +_exptl_crystal.density_Matthews 1.43 +_exptl_crystal.description ? +_exptl_crystal.F_000 ? +_exptl_crystal.preparation ? +# +_exptl_crystal_grow.crystal_id 1 +_exptl_crystal_grow.method 'VAPOR DIFFUSION, HANGING DROP' +_exptl_crystal_grow.pH 7 +_exptl_crystal_grow.temp 298 +_exptl_crystal_grow.temp_details ? +_exptl_crystal_grow.pdbx_details 'water, pH 7, VAPOR DIFFUSION, HANGING DROP, temperature 298K' +_exptl_crystal_grow.pdbx_pH_range . +# +_diffrn.id 1 +_diffrn.ambient_temp 100 +_diffrn.ambient_temp_details ? +_diffrn.crystal_id 1 +# +_diffrn_detector.diffrn_id 1 +_diffrn_detector.detector CCD +_diffrn_detector.type MARRESEARCH +_diffrn_detector.pdbx_collection_date 2004-06-12 +_diffrn_detector.details 'Ellipsoidal Mirror' +# +_diffrn_radiation.diffrn_id 1 +_diffrn_radiation.wavelength_id 1 +_diffrn_radiation.pdbx_diffrn_protocol 'SINGLE WAVELENGTH' +_diffrn_radiation.monochromator 'channel-cut Si-111 monochromator' +_diffrn_radiation.pdbx_monochromatic_or_laue_m_l M +_diffrn_radiation.pdbx_scattering_type x-ray +# +_diffrn_radiation_wavelength.id 1 +_diffrn_radiation_wavelength.wavelength 0.975 +_diffrn_radiation_wavelength.wt 1.0 +# +_diffrn_source.diffrn_id 1 +_diffrn_source.source SYNCHROTRON +_diffrn_source.type 'ESRF BEAMLINE ID13' +_diffrn_source.pdbx_wavelength 0.975 +_diffrn_source.pdbx_wavelength_list 0.975 +_diffrn_source.pdbx_synchrotron_site ESRF +_diffrn_source.pdbx_synchrotron_beamline ID13 +# +_reflns.d_resolution_low 80.00 +_reflns.d_resolution_high 1.80 +_reflns.number_obs 509 +_reflns.percent_possible_obs 89.5 +_reflns.pdbx_Rmerge_I_obs 0.204 +_reflns.pdbx_chi_squared 1.057 +_reflns.entry_id 1YJP +_reflns.observed_criterion_sigma_F 0 +_reflns.observed_criterion_sigma_I 0 +_reflns.number_all 509 +_reflns.pdbx_Rsym_value ? +_reflns.pdbx_netI_over_sigmaI 3.75 +_reflns.B_iso_Wilson_estimate 45.6 +_reflns.pdbx_redundancy 2.0 +_reflns.R_free_details ? +_reflns.limit_h_max ? +_reflns.limit_h_min ? +_reflns.limit_k_max ? +_reflns.limit_k_min ? +_reflns.limit_l_max ? +_reflns.limit_l_min ? +_reflns.observed_criterion_F_max ? +_reflns.observed_criterion_F_min ? +_reflns.pdbx_scaling_rejects ? +_reflns.pdbx_diffrn_id 1 +_reflns.pdbx_ordinal 1 +# +_reflns_shell.d_res_low 1.94 +_reflns_shell.d_res_high 1.80 +_reflns_shell.number_unique_all 85 +_reflns_shell.percent_possible_all 84.2 +_reflns_shell.Rmerge_I_obs 0.491 +_reflns_shell.pdbx_redundancy ? +_reflns_shell.pdbx_chi_squared 1.092 +_reflns_shell.number_unique_obs ? +_reflns_shell.meanI_over_sigI_obs 1.5 +_reflns_shell.pdbx_Rsym_value ? +_reflns_shell.percent_possible_obs ? +_reflns_shell.number_measured_all ? +_reflns_shell.number_measured_obs ? +_reflns_shell.pdbx_diffrn_id ? +_reflns_shell.pdbx_ordinal 1 +# +_refine.entry_id 1YJP +_refine.ls_d_res_high 1.80 +_refine.ls_d_res_low 22.44 +_refine.pdbx_ls_sigma_F 0 +_refine.pdbx_ls_sigma_I 0 +_refine.ls_number_reflns_all 474 +_refine.ls_number_reflns_obs 474 +_refine.ls_number_reflns_R_free 20 +_refine.ls_percent_reflns_obs ? +_refine.ls_R_factor_all 0.18139 +_refine.ls_R_factor_obs 0.18139 +_refine.ls_R_factor_R_work 0.18086 +_refine.ls_R_factor_R_free 0.19014 +_refine.ls_redundancy_reflns_obs ? +_refine.pdbx_data_cutoff_high_absF ? +_refine.pdbx_data_cutoff_low_absF ? +_refine.ls_number_parameters ? +_refine.ls_number_restraints ? +_refine.ls_percent_reflns_R_free ? +_refine.ls_R_factor_R_free_error ? +_refine.ls_R_factor_R_free_error_details ? +_refine.pdbx_method_to_determine_struct 'FOURIER SYNTHESIS' +_refine.pdbx_starting_model ? +_refine.pdbx_ls_cross_valid_method THROUGHOUT +_refine.pdbx_R_Free_selection_details RANDOM +_refine.pdbx_stereochem_target_val_spec_case ? +_refine.pdbx_stereochemistry_target_values 'Engh & Huber' +_refine.solvent_model_details ? +_refine.solvent_model_param_bsol ? +_refine.solvent_model_param_ksol ? +_refine.occupancy_max ? +_refine.occupancy_min ? +_refine.pdbx_isotropic_thermal_model ? +_refine.B_iso_mean ? +_refine.aniso_B[1][1] ? +_refine.aniso_B[1][2] ? +_refine.aniso_B[1][3] ? +_refine.aniso_B[2][2] ? +_refine.aniso_B[2][3] ? +_refine.aniso_B[3][3] ? +_refine.details ? +_refine.B_iso_min ? +_refine.B_iso_max ? +_refine.correlation_coeff_Fo_to_Fc ? +_refine.correlation_coeff_Fo_to_Fc_free ? +_refine.pdbx_solvent_vdw_probe_radii ? +_refine.pdbx_solvent_ion_probe_radii ? +_refine.pdbx_solvent_shrinkage_radii ? +_refine.overall_SU_R_Cruickshank_DPI ? +_refine.overall_SU_R_free ? +_refine.overall_SU_B ? +_refine.overall_SU_ML ? +_refine.pdbx_overall_ESU_R ? +_refine.pdbx_overall_ESU_R_Free ? +_refine.pdbx_data_cutoff_high_rms_absF ? +_refine.ls_wR_factor_R_free ? +_refine.ls_wR_factor_R_work ? +_refine.overall_FOM_free_R_set ? +_refine.overall_FOM_work_R_set ? +_refine.pdbx_refine_id 'X-RAY DIFFRACTION' +_refine.pdbx_diffrn_id 1 +_refine.pdbx_TLS_residual_ADP_flag ? +_refine.pdbx_overall_phase_error ? +_refine.pdbx_overall_SU_R_free_Cruickshank_DPI ? +_refine.pdbx_overall_SU_R_Blow_DPI ? +_refine.pdbx_overall_SU_R_free_Blow_DPI ? +# +_refine_hist.pdbx_refine_id 'X-RAY DIFFRACTION' +_refine_hist.cycle_id LAST +_refine_hist.pdbx_number_atoms_protein 59 +_refine_hist.pdbx_number_atoms_nucleic_acid 0 +_refine_hist.pdbx_number_atoms_ligand 0 +_refine_hist.number_atoms_solvent 7 +_refine_hist.number_atoms_total 66 +_refine_hist.d_res_high 1.80 +_refine_hist.d_res_low 22.44 +# +loop_ +_refine_ls_restr.type +_refine_ls_restr.dev_ideal +_refine_ls_restr.dev_ideal_target +_refine_ls_restr.number +_refine_ls_restr.weight +_refine_ls_restr.pdbx_refine_id +_refine_ls_restr.pdbx_restraint_function +r_angle_refined_deg 1.228 ? ? ? 'X-RAY DIFFRACTION' ? +r_bond_refined_d 0.014 ? ? ? 'X-RAY DIFFRACTION' ? +# +_struct.entry_id 1YJP +_struct.title 'Structure of GNNQQNY from yeast prion Sup35' +_struct.pdbx_model_details ? +_struct.pdbx_CASP_flag ? +_struct.pdbx_model_type_details ? +# +_struct_keywords.entry_id 1YJP +_struct_keywords.pdbx_keywords 'PROTEIN BINDING' +_struct_keywords.text 'beta sheet, steric zipper, glutamine zipper, asparagine zipper, PROTEIN BINDING' +# +loop_ +_struct_asym.id +_struct_asym.pdbx_blank_PDB_chainid_flag +_struct_asym.pdbx_modified +_struct_asym.entity_id +_struct_asym.details +A N N 1 ? +B N N 2 ? +# +_struct_ref.id 1 +_struct_ref.db_name UNP +_struct_ref.db_code ERF2_YEAST +_struct_ref.pdbx_db_accession P05453 +_struct_ref.entity_id 1 +_struct_ref.pdbx_seq_one_letter_code GNNQQNY +_struct_ref.pdbx_align_begin 7 +_struct_ref.pdbx_db_isoform ? +# +_struct_ref_seq.align_id 1 +_struct_ref_seq.ref_id 1 +_struct_ref_seq.pdbx_PDB_id_code 1YJP +_struct_ref_seq.pdbx_strand_id A +_struct_ref_seq.seq_align_beg 1 +_struct_ref_seq.pdbx_seq_align_beg_ins_code ? +_struct_ref_seq.seq_align_end 7 +_struct_ref_seq.pdbx_seq_align_end_ins_code ? +_struct_ref_seq.pdbx_db_accession P05453 +_struct_ref_seq.db_align_beg 7 +_struct_ref_seq.pdbx_db_align_beg_ins_code ? +_struct_ref_seq.db_align_end 13 +_struct_ref_seq.pdbx_db_align_end_ins_code ? +_struct_ref_seq.pdbx_auth_seq_align_beg 1 +_struct_ref_seq.pdbx_auth_seq_align_end 7 +# +_pdbx_struct_assembly.id 1 +_pdbx_struct_assembly.details author_defined_assembly +_pdbx_struct_assembly.method_details ? +_pdbx_struct_assembly.oligomeric_details dimeric +_pdbx_struct_assembly.oligomeric_count 2 +# +_pdbx_struct_assembly_gen.assembly_id 1 +_pdbx_struct_assembly_gen.oper_expression 1,2 +_pdbx_struct_assembly_gen.asym_id_list A,B +# +loop_ +_pdbx_struct_oper_list.id +_pdbx_struct_oper_list.type +_pdbx_struct_oper_list.name +_pdbx_struct_oper_list.symmetry_operation +_pdbx_struct_oper_list.matrix[1][1] +_pdbx_struct_oper_list.matrix[1][2] +_pdbx_struct_oper_list.matrix[1][3] +_pdbx_struct_oper_list.vector[1] +_pdbx_struct_oper_list.matrix[2][1] +_pdbx_struct_oper_list.matrix[2][2] +_pdbx_struct_oper_list.matrix[2][3] +_pdbx_struct_oper_list.vector[2] +_pdbx_struct_oper_list.matrix[3][1] +_pdbx_struct_oper_list.matrix[3][2] +_pdbx_struct_oper_list.matrix[3][3] +_pdbx_struct_oper_list.vector[3] +1 'identity operation' 1_555 x,y,z 1.0000000000 0.0000000000 0.0000000000 0.0000000000 0.0000000000 1.0000000000 +0.0000000000 0.0000000000 0.0000000000 0.0000000000 1.0000000000 0.0000000000 +2 'crystal symmetry operation' 2_555 -x,y+1/2,-z -1.0000000000 0.0000000000 0.0000000000 0.0000000000 0.0000000000 1.0000000000 +0.0000000000 2.4330000000 0.0000000000 0.0000000000 -1.0000000000 0.0000000000 +# +_pdbx_database_remark.id 300 +_pdbx_database_remark.text +;BIOMOLECULE: 1 +THIS ENTRY CONTAINS THE CRYSTALLOGRAPHIC ASYMMETRIC UNIT +WHICH CONSISTS OF 1 CHAIN(S). The second beta strand of +the beta sandwich is generated as described in remark 350. +Beta sheets are generated from unit cell translations +along the unit cell b dimension: x,y+1,z. +; +# +loop_ +_chem_comp_atom.comp_id +_chem_comp_atom.atom_id +_chem_comp_atom.type_symbol +_chem_comp_atom.pdbx_aromatic_flag +_chem_comp_atom.pdbx_stereo_config +_chem_comp_atom.pdbx_ordinal +ASN N N N N 1 +ASN CA C N S 2 +ASN C C N N 3 +ASN O O N N 4 +ASN CB C N N 5 +ASN CG C N N 6 +ASN OD1 O N N 7 +ASN ND2 N N N 8 +ASN OXT O N N 9 +ASN H H N N 10 +ASN H2 H N N 11 +ASN HA H N N 12 +ASN HB2 H N N 13 +ASN HB3 H N N 14 +ASN HD21 H N N 15 +ASN HD22 H N N 16 +ASN HXT H N N 17 +GLN N N N N 18 +GLN CA C N S 19 +GLN C C N N 20 +GLN O O N N 21 +GLN CB C N N 22 +GLN CG C N N 23 +GLN CD C N N 24 +GLN OE1 O N N 25 +GLN NE2 N N N 26 +GLN OXT O N N 27 +GLN H H N N 28 +GLN H2 H N N 29 +GLN HA H N N 30 +GLN HB2 H N N 31 +GLN HB3 H N N 32 +GLN HG2 H N N 33 +GLN HG3 H N N 34 +GLN HE21 H N N 35 +GLN HE22 H N N 36 +GLN HXT H N N 37 +GLY N N N N 38 +GLY CA C N N 39 +GLY C C N N 40 +GLY O O N N 41 +GLY OXT O N N 42 +GLY H H N N 43 +GLY H2 H N N 44 +GLY HA2 H N N 45 +GLY HA3 H N N 46 +GLY HXT H N N 47 +HOH O O N N 48 +HOH H1 H N N 49 +HOH H2 H N N 50 +TYR N N N N 51 +TYR CA C N S 52 +TYR C C N N 53 +TYR O O N N 54 +TYR CB C N N 55 +TYR CG C Y N 56 +TYR CD1 C Y N 57 +TYR CD2 C Y N 58 +TYR CE1 C Y N 59 +TYR CE2 C Y N 60 +TYR CZ C Y N 61 +TYR OH O N N 62 +TYR OXT O N N 63 +TYR H H N N 64 +TYR H2 H N N 65 +TYR HA H N N 66 +TYR HB2 H N N 67 +TYR HB3 H N N 68 +TYR HD1 H N N 69 +TYR HD2 H N N 70 +TYR HE1 H N N 71 +TYR HE2 H N N 72 +TYR HH H N N 73 +TYR HXT H N N 74 +# +loop_ +_chem_comp_bond.comp_id +_chem_comp_bond.atom_id_1 +_chem_comp_bond.atom_id_2 +_chem_comp_bond.value_order +_chem_comp_bond.pdbx_aromatic_flag +_chem_comp_bond.pdbx_stereo_config +_chem_comp_bond.pdbx_ordinal +ASN N CA sing N N 1 +ASN N H sing N N 2 +ASN N H2 sing N N 3 +ASN CA C sing N N 4 +ASN CA CB sing N N 5 +ASN CA HA sing N N 6 +ASN C O doub N N 7 +ASN C OXT sing N N 8 +ASN CB CG sing N N 9 +ASN CB HB2 sing N N 10 +ASN CB HB3 sing N N 11 +ASN CG OD1 doub N N 12 +ASN CG ND2 sing N N 13 +ASN ND2 HD21 sing N N 14 +ASN ND2 HD22 sing N N 15 +ASN OXT HXT sing N N 16 +GLN N CA sing N N 17 +GLN N H sing N N 18 +GLN N H2 sing N N 19 +GLN CA C sing N N 20 +GLN CA CB sing N N 21 +GLN CA HA sing N N 22 +GLN C O doub N N 23 +GLN C OXT sing N N 24 +GLN CB CG sing N N 25 +GLN CB HB2 sing N N 26 +GLN CB HB3 sing N N 27 +GLN CG CD sing N N 28 +GLN CG HG2 sing N N 29 +GLN CG HG3 sing N N 30 +GLN CD OE1 doub N N 31 +GLN CD NE2 sing N N 32 +GLN NE2 HE21 sing N N 33 +GLN NE2 HE22 sing N N 34 +GLN OXT HXT sing N N 35 +GLY N CA sing N N 36 +GLY N H sing N N 37 +GLY N H2 sing N N 38 +GLY CA C sing N N 39 +GLY CA HA2 sing N N 40 +GLY CA HA3 sing N N 41 +GLY C O doub N N 42 +GLY C OXT sing N N 43 +GLY OXT HXT sing N N 44 +HOH O H1 sing N N 45 +HOH O H2 sing N N 46 +TYR N CA sing N N 47 +TYR N H sing N N 48 +TYR N H2 sing N N 49 +TYR CA C sing N N 50 +TYR CA CB sing N N 51 +TYR CA HA sing N N 52 +TYR C O doub N N 53 +TYR C OXT sing N N 54 +TYR CB CG sing N N 55 +TYR CB HB2 sing N N 56 +TYR CB HB3 sing N N 57 +TYR CG CD1 doub Y N 58 +TYR CG CD2 sing Y N 59 +TYR CD1 CE1 sing Y N 60 +TYR CD1 HD1 sing N N 61 +TYR CD2 CE2 doub Y N 62 +TYR CD2 HD2 sing N N 63 +TYR CE1 CZ doub Y N 64 +TYR CE1 HE1 sing N N 65 +TYR CE2 CZ sing Y N 66 +TYR CE2 HE2 sing N N 67 +TYR CZ OH sing N N 68 +TYR OH HH sing N N 69 +TYR OXT HXT sing N N 70 +# +_atom_sites.entry_id 1YJP +_atom_sites.fract_transf_matrix[1][1] 0.045585 +_atom_sites.fract_transf_matrix[1][2] 0.000000 +_atom_sites.fract_transf_matrix[1][3] 0.014006 +_atom_sites.fract_transf_matrix[2][1] 0.000000 +_atom_sites.fract_transf_matrix[2][2] 0.205508 +_atom_sites.fract_transf_matrix[2][3] 0.000000 +_atom_sites.fract_transf_matrix[3][1] 0.000000 +_atom_sites.fract_transf_matrix[3][2] 0.000000 +_atom_sites.fract_transf_matrix[3][3] 0.044560 +_atom_sites.fract_transf_vector[1] 0.00000 +_atom_sites.fract_transf_vector[2] 0.00000 +_atom_sites.fract_transf_vector[3] 0.00000 +# +loop_ +_atom_type.symbol +C +N +O +# +loop_ +_atom_site.group_PDB +_atom_site.id +_atom_site.type_symbol +_atom_site.label_atom_id +_atom_site.label_alt_id +_atom_site.label_comp_id +_atom_site.label_asym_id +_atom_site.label_entity_id +_atom_site.label_seq_id +_atom_site.pdbx_PDB_ins_code +_atom_site.Cartn_x +_atom_site.Cartn_y +_atom_site.Cartn_z +_atom_site.occupancy +_atom_site.B_iso_or_equiv +_atom_site.pdbx_formal_charge +_atom_site.auth_seq_id +_atom_site.auth_comp_id +_atom_site.auth_asym_id +_atom_site.auth_atom_id +_atom_site.pdbx_PDB_model_num +ATOM 1 N N . GLY A 1 1 ? -9.009 4.612 6.102 1.00 16.77 ? 1 GLY A N 1 +ATOM 2 C CA . GLY A 1 1 ? -9.052 4.207 4.651 1.00 16.57 ? 1 GLY A CA 1 +ATOM 3 C C . GLY A 1 1 ? -8.015 3.140 4.419 1.00 16.16 ? 1 GLY A C 1 +ATOM 4 O O . GLY A 1 1 ? -7.523 2.521 5.381 1.00 16.78 ? 1 GLY A O 1 +ATOM 5 N N . ASN A 1 2 ? -7.656 2.923 3.155 1.00 15.02 ? 2 ASN A N 1 +ATOM 6 C CA . ASN A 1 2 ? -6.522 2.038 2.831 1.00 14.10 ? 2 ASN A CA 1 +ATOM 7 C C . ASN A 1 2 ? -5.241 2.537 3.427 1.00 13.13 ? 2 ASN A C 1 +ATOM 8 O O . ASN A 1 2 ? -4.978 3.742 3.426 1.00 11.91 ? 2 ASN A O 1 +ATOM 9 C CB . ASN A 1 2 ? -6.346 1.881 1.341 1.00 15.38 ? 2 ASN A CB 1 +ATOM 10 C CG . ASN A 1 2 ? -7.584 1.342 0.692 1.00 14.08 ? 2 ASN A CG 1 +ATOM 11 O OD1 . ASN A 1 2 ? -8.025 0.227 1.016 1.00 17.46 ? 2 ASN A OD1 1 +ATOM 12 N ND2 . ASN A 1 2 ? -8.204 2.155 -0.169 1.00 11.72 ? 2 ASN A ND2 1 +ATOM 13 N N . ASN A 1 3 ? -4.438 1.590 3.905 1.00 12.26 ? 3 ASN A N 1 +ATOM 14 C CA . ASN A 1 3 ? -3.193 1.904 4.589 1.00 11.74 ? 3 ASN A CA 1 +ATOM 15 C C . ASN A 1 3 ? -1.955 1.332 3.895 1.00 11.10 ? 3 ASN A C 1 +ATOM 16 O O . ASN A 1 3 ? -1.872 0.119 3.648 1.00 10.42 ? 3 ASN A O 1 +ATOM 17 C CB . ASN A 1 3 ? -3.259 1.378 6.042 1.00 12.15 ? 3 ASN A CB 1 +ATOM 18 C CG . ASN A 1 3 ? -2.006 1.739 6.861 1.00 12.82 ? 3 ASN A CG 1 +ATOM 19 O OD1 . ASN A 1 3 ? -1.702 2.925 7.072 1.00 15.05 ? 3 ASN A OD1 1 +ATOM 20 N ND2 . ASN A 1 3 ? -1.271 0.715 7.306 1.00 13.48 ? 3 ASN A ND2 1 +ATOM 21 N N . GLN A 1 4 ? -1.005 2.228 3.598 1.00 10.29 ? 4 GLN A N 1 +ATOM 22 C CA . GLN A 1 4 ? 0.384 1.888 3.199 1.00 10.53 ? 4 GLN A CA 1 +ATOM 23 C C . GLN A 1 4 ? 1.435 2.606 4.088 1.00 10.24 ? 4 GLN A C 1 +ATOM 24 O O . GLN A 1 4 ? 1.547 3.843 4.115 1.00 8.86 ? 4 GLN A O 1 +ATOM 25 C CB . GLN A 1 4 ? 0.656 2.148 1.711 1.00 9.80 ? 4 GLN A CB 1 +ATOM 26 C CG . GLN A 1 4 ? 1.944 1.458 1.213 1.00 10.25 ? 4 GLN A CG 1 +ATOM 27 C CD . GLN A 1 4 ? 2.504 2.044 -0.089 1.00 12.43 ? 4 GLN A CD 1 +ATOM 28 O OE1 . GLN A 1 4 ? 2.744 3.268 -0.190 1.00 14.62 ? 4 GLN A OE1 1 +ATOM 29 N NE2 . GLN A 1 4 ? 2.750 1.161 -1.091 1.00 9.05 ? 4 GLN A NE2 1 +ATOM 30 N N . GLN A 1 5 ? 2.154 1.821 4.871 1.00 10.38 ? 5 GLN A N 1 +ATOM 31 C CA . GLN A 1 5 ? 3.270 2.361 5.640 1.00 11.39 ? 5 GLN A CA 1 +ATOM 32 C C . GLN A 1 5 ? 4.594 1.768 5.172 1.00 11.52 ? 5 GLN A C 1 +ATOM 33 O O . GLN A 1 5 ? 4.768 0.546 5.054 1.00 12.05 ? 5 GLN A O 1 +ATOM 34 C CB . GLN A 1 5 ? 3.056 2.183 7.147 1.00 11.96 ? 5 GLN A CB 1 +ATOM 35 C CG . GLN A 1 5 ? 1.829 2.950 7.647 1.00 10.81 ? 5 GLN A CG 1 +ATOM 36 C CD . GLN A 1 5 ? 1.344 2.414 8.954 1.00 13.10 ? 5 GLN A CD 1 +ATOM 37 O OE1 . GLN A 1 5 ? 0.774 1.325 9.002 1.00 10.65 ? 5 GLN A OE1 1 +ATOM 38 N NE2 . GLN A 1 5 ? 1.549 3.187 10.039 1.00 12.30 ? 5 GLN A NE2 1 +ATOM 39 N N . ASN A 1 6 ? 5.514 2.664 4.856 1.00 11.99 ? 6 ASN A N 1 +ATOM 40 C CA . ASN A 1 6 ? 6.831 2.310 4.318 1.00 12.30 ? 6 ASN A CA 1 +ATOM 41 C C . ASN A 1 6 ? 7.854 2.761 5.324 1.00 13.40 ? 6 ASN A C 1 +ATOM 42 O O . ASN A 1 6 ? 8.219 3.943 5.374 1.00 13.92 ? 6 ASN A O 1 +ATOM 43 C CB . ASN A 1 6 ? 7.065 3.016 2.993 1.00 12.13 ? 6 ASN A CB 1 +ATOM 44 C CG . ASN A 1 6 ? 5.961 2.735 2.003 1.00 12.77 ? 6 ASN A CG 1 +ATOM 45 O OD1 . ASN A 1 6 ? 5.798 1.604 1.551 1.00 14.27 ? 6 ASN A OD1 1 +ATOM 46 N ND2 . ASN A 1 6 ? 5.195 3.747 1.679 1.00 10.07 ? 6 ASN A ND2 1 +ATOM 47 N N . TYR A 1 7 ? 8.292 1.817 6.147 1.00 14.70 ? 7 TYR A N 1 +ATOM 48 C CA . TYR A 1 7 ? 9.159 2.144 7.299 1.00 15.18 ? 7 TYR A CA 1 +ATOM 49 C C . TYR A 1 7 ? 10.603 2.331 6.885 1.00 15.91 ? 7 TYR A C 1 +ATOM 50 O O . TYR A 1 7 ? 11.041 1.811 5.855 1.00 15.76 ? 7 TYR A O 1 +ATOM 51 C CB . TYR A 1 7 ? 9.061 1.065 8.369 1.00 15.35 ? 7 TYR A CB 1 +ATOM 52 C CG . TYR A 1 7 ? 7.665 0.929 8.902 1.00 14.45 ? 7 TYR A CG 1 +ATOM 53 C CD1 . TYR A 1 7 ? 6.771 0.021 8.327 1.00 15.68 ? 7 TYR A CD1 1 +ATOM 54 C CD2 . TYR A 1 7 ? 7.210 1.756 9.920 1.00 14.80 ? 7 TYR A CD2 1 +ATOM 55 C CE1 . TYR A 1 7 ? 5.480 -0.094 8.796 1.00 13.46 ? 7 TYR A CE1 1 +ATOM 56 C CE2 . TYR A 1 7 ? 5.904 1.649 10.416 1.00 14.33 ? 7 TYR A CE2 1 +ATOM 57 C CZ . TYR A 1 7 ? 5.047 0.729 9.831 1.00 15.09 ? 7 TYR A CZ 1 +ATOM 58 O OH . TYR A 1 7 ? 3.766 0.589 10.291 1.00 14.39 ? 7 TYR A OH 1 +ATOM 59 O OXT . TYR A 1 7 ? 11.358 2.999 7.612 1.00 17.49 ? 7 TYR A OXT 1 +HETATM 60 O O . HOH B 2 . ? -6.471 5.227 7.124 1.00 22.62 ? 8 HOH A O 1 +HETATM 61 O O . HOH B 2 . ? 10.431 1.858 3.216 1.00 19.71 ? 9 HOH A O 1 +HETATM 62 O O . HOH B 2 . ? -11.286 1.756 -1.468 1.00 17.08 ? 10 HOH A O 1 +HETATM 63 O O . HOH B 2 . ? 11.808 4.179 9.970 1.00 23.99 ? 11 HOH A O 1 +HETATM 64 O O . HOH B 2 . ? 13.605 1.327 9.198 1.00 26.17 ? 12 HOH A O 1 +HETATM 65 O O . HOH B 2 . ? -2.749 3.429 10.024 1.00 39.15 ? 13 HOH A O 1 +HETATM 66 O O . HOH B 2 . ? -1.500 0.682 10.967 1.00 43.49 ? 14 HOH A O 1 +# diff --git a/testsuite/MDAnalysisTests/data/mmcif/7ETN.cif b/testsuite/MDAnalysisTests/data/mmcif/7ETN.cif new file mode 100644 index 00000000000..944c4838572 --- /dev/null +++ b/testsuite/MDAnalysisTests/data/mmcif/7ETN.cif @@ -0,0 +1,1251 @@ +data_7ETN +# +_entry.id 7ETN +# +_audit_conform.dict_name mmcif_pdbx.dic +_audit_conform.dict_version 5.392 +_audit_conform.dict_location http://mmcif.pdb.org/dictionaries/ascii/mmcif_pdbx.dic +# +loop_ +_database_2.database_id +_database_2.database_code +_database_2.pdbx_database_accession +_database_2.pdbx_DOI +PDB 7ETN pdb_00007etn 10.2210/pdb7etn/pdb +WWPDB D_1300022157 ? ? +# +loop_ +_pdbx_audit_revision_history.ordinal +_pdbx_audit_revision_history.data_content_type +_pdbx_audit_revision_history.major_revision +_pdbx_audit_revision_history.minor_revision +_pdbx_audit_revision_history.revision_date +1 'Structure model' 1 0 2022-05-25 +2 'Structure model' 1 1 2024-05-29 +# +_pdbx_audit_revision_details.ordinal 1 +_pdbx_audit_revision_details.revision_ordinal 1 +_pdbx_audit_revision_details.data_content_type 'Structure model' +_pdbx_audit_revision_details.provider repository +_pdbx_audit_revision_details.type 'Initial release' +_pdbx_audit_revision_details.description ? +_pdbx_audit_revision_details.details ? +# +_pdbx_audit_revision_group.ordinal 1 +_pdbx_audit_revision_group.revision_ordinal 2 +_pdbx_audit_revision_group.data_content_type 'Structure model' +_pdbx_audit_revision_group.group 'Data collection' +# +loop_ +_pdbx_audit_revision_category.ordinal +_pdbx_audit_revision_category.revision_ordinal +_pdbx_audit_revision_category.data_content_type +_pdbx_audit_revision_category.category +1 2 'Structure model' chem_comp_atom +2 2 'Structure model' chem_comp_bond +# +_pdbx_database_status.status_code REL +_pdbx_database_status.status_code_sf REL +_pdbx_database_status.status_code_mr ? +_pdbx_database_status.entry_id 7ETN +_pdbx_database_status.recvd_initial_deposition_date 2021-05-13 +_pdbx_database_status.SG_entry N +_pdbx_database_status.deposit_site PDBJ +_pdbx_database_status.process_site PDBJ +_pdbx_database_status.status_code_cs ? +_pdbx_database_status.status_code_nmr_data ? +_pdbx_database_status.methods_development_category ? +_pdbx_database_status.pdb_format_compatible Y +# +loop_ +_audit_author.name +_audit_author.pdbx_ordinal +_audit_author.identifier_ORCID +'Kurumida, Y.' 1 0000-0003-2696-154X +'Ikeda, K.' 2 0000-0003-3217-5713 +'Nakamichi, Y.' 3 0000-0001-7089-663X +'Hirano, A.' 4 0000-0002-4138-0308 +'Kobayashi, K.' 5 0000-0002-2662-2452 +'Saito, Y.' 6 0000-0002-4853-0153 +'Kameda, T.' 7 0000-0001-9508-5366 +# +_citation.abstract ? +_citation.abstract_id_CAS ? +_citation.book_id_ISBN ? +_citation.book_publisher ? +_citation.book_publisher_city ? +_citation.book_title ? +_citation.coordinate_linkage ? +_citation.country ? +_citation.database_id_Medline ? +_citation.details ? +_citation.id primary +_citation.journal_abbrev 'To Be Published' +_citation.journal_id_ASTM ? +_citation.journal_id_CSD 0353 +_citation.journal_id_ISSN ? +_citation.journal_full ? +_citation.journal_issue ? +_citation.journal_volume ? +_citation.language ? +_citation.page_first ? +_citation.page_last ? +_citation.title 'Crystal structure of Pro-Phe-Leu-Ile' +_citation.year ? +_citation.database_id_CSD ? +_citation.pdbx_database_id_DOI ? +_citation.pdbx_database_id_PubMed ? +_citation.pdbx_database_id_patent ? +_citation.unpublished_flag ? +# +loop_ +_citation_author.citation_id +_citation_author.name +_citation_author.ordinal +_citation_author.identifier_ORCID +primary 'Kurumida, Y.' 1 0000-0003-2696-154X +primary 'Ikeda, K.' 2 0000-0003-3217-5713 +primary 'Nakamichi, Y.' 3 0000-0001-7089-663X +primary 'Hirano, A.' 4 0000-0002-4138-0308 +primary 'Kobayashi, K.' 5 0000-0002-2662-2452 +primary 'Saito, Y.' 6 0000-0002-4853-0153 +primary 'Kameda, T.' 7 0000-0001-9508-5366 +# +_entity.id 1 +_entity.type polymer +_entity.src_method syn +_entity.pdbx_description PRO-PHE-LEU-ILE +_entity.formula_weight 488.619 +_entity.pdbx_number_of_molecules 2 +_entity.pdbx_ec ? +_entity.pdbx_mutation ? +_entity.pdbx_fragment ? +_entity.details ? +# +_entity_poly.entity_id 1 +_entity_poly.type 'polypeptide(L)' +_entity_poly.nstd_linkage no +_entity_poly.nstd_monomer no +_entity_poly.pdbx_seq_one_letter_code PFLI +_entity_poly.pdbx_seq_one_letter_code_can PFLI +_entity_poly.pdbx_strand_id A,B +_entity_poly.pdbx_target_identifier ? +# +loop_ +_entity_poly_seq.entity_id +_entity_poly_seq.num +_entity_poly_seq.mon_id +_entity_poly_seq.hetero +1 1 PRO n +1 2 PHE n +1 3 LEU n +1 4 ILE n +# +_pdbx_entity_src_syn.entity_id 1 +_pdbx_entity_src_syn.pdbx_src_id 1 +_pdbx_entity_src_syn.pdbx_alt_source_flag sample +_pdbx_entity_src_syn.pdbx_beg_seq_num 1 +_pdbx_entity_src_syn.pdbx_end_seq_num 4 +_pdbx_entity_src_syn.organism_scientific 'synthetic construct' +_pdbx_entity_src_syn.organism_common_name ? +_pdbx_entity_src_syn.ncbi_taxonomy_id 32630 +_pdbx_entity_src_syn.details ? +# +loop_ +_chem_comp.id +_chem_comp.type +_chem_comp.mon_nstd_flag +_chem_comp.name +_chem_comp.pdbx_synonyms +_chem_comp.formula +_chem_comp.formula_weight +ILE 'L-peptide linking' y ISOLEUCINE ? 'C6 H13 N O2' 131.173 +LEU 'L-peptide linking' y LEUCINE ? 'C6 H13 N O2' 131.173 +PHE 'L-peptide linking' y PHENYLALANINE ? 'C9 H11 N O2' 165.189 +PRO 'L-peptide linking' y PROLINE ? 'C5 H9 N O2' 115.130 +# +loop_ +_pdbx_poly_seq_scheme.asym_id +_pdbx_poly_seq_scheme.entity_id +_pdbx_poly_seq_scheme.seq_id +_pdbx_poly_seq_scheme.mon_id +_pdbx_poly_seq_scheme.ndb_seq_num +_pdbx_poly_seq_scheme.pdb_seq_num +_pdbx_poly_seq_scheme.auth_seq_num +_pdbx_poly_seq_scheme.pdb_mon_id +_pdbx_poly_seq_scheme.auth_mon_id +_pdbx_poly_seq_scheme.pdb_strand_id +_pdbx_poly_seq_scheme.pdb_ins_code +_pdbx_poly_seq_scheme.hetero +A 1 1 PRO 1 1 1 PRO PRO A . n +A 1 2 PHE 2 2 2 PHE PHE A . n +A 1 3 LEU 3 3 3 LEU LEU A . n +A 1 4 ILE 4 4 4 ILE ILE A . n +B 1 1 PRO 1 1 1 PRO PRO B . n +B 1 2 PHE 2 2 2 PHE PHE B . n +B 1 3 LEU 3 3 3 LEU LEU B . n +B 1 4 ILE 4 4 4 ILE ILE B . n +# +loop_ +_software.citation_id +_software.classification +_software.compiler_name +_software.compiler_version +_software.contact_author +_software.contact_author_email +_software.date +_software.description +_software.dependencies +_software.hardware +_software.language +_software.location +_software.mods +_software.name +_software.os +_software.os_version +_software.type +_software.version +_software.pdbx_ordinal +? refinement ? ? ? ? ? ? ? ? ? ? ? SHELX ? ? ? . 1 +? 'data reduction' ? ? ? ? ? ? ? ? ? ? ? XDS ? ? ? . 2 +? 'data scaling' ? ? ? ? ? ? ? ? ? ? ? XSCALE ? ? ? . 3 +? 'data extraction' ? ? ? ? ? ? ? ? ? ? ? PDB_EXTRACT ? ? ? 3.27 4 +? phasing ? ? ? ? ? ? ? ? ? ? ? SHELXT ? ? ? . 5 +# +_cell.angle_alpha 90.000 +_cell.angle_alpha_esd ? +_cell.angle_beta 94.850 +_cell.angle_beta_esd ? +_cell.angle_gamma 90.000 +_cell.angle_gamma_esd ? +_cell.entry_id 7ETN +_cell.details ? +_cell.formula_units_Z ? +_cell.length_a 5.264 +_cell.length_a_esd ? +_cell.length_b 24.967 +_cell.length_b_esd ? +_cell.length_c 20.736 +_cell.length_c_esd ? +_cell.volume ? +_cell.volume_esd ? +_cell.Z_PDB 4 +_cell.reciprocal_angle_alpha ? +_cell.reciprocal_angle_beta ? +_cell.reciprocal_angle_gamma ? +_cell.reciprocal_angle_alpha_esd ? +_cell.reciprocal_angle_beta_esd ? +_cell.reciprocal_angle_gamma_esd ? +_cell.reciprocal_length_a ? +_cell.reciprocal_length_b ? +_cell.reciprocal_length_c ? +_cell.reciprocal_length_a_esd ? +_cell.reciprocal_length_b_esd ? +_cell.reciprocal_length_c_esd ? +_cell.pdbx_unique_axis ? +# +_symmetry.entry_id 7ETN +_symmetry.cell_setting ? +_symmetry.Int_Tables_number 4 +_symmetry.space_group_name_Hall ? +_symmetry.space_group_name_H-M 'P 1 21 1' +_symmetry.pdbx_full_space_group_name_H-M ? +# +_exptl.absorpt_coefficient_mu ? +_exptl.absorpt_correction_T_max ? +_exptl.absorpt_correction_T_min ? +_exptl.absorpt_correction_type ? +_exptl.absorpt_process_details ? +_exptl.entry_id 7ETN +_exptl.crystals_number 1 +_exptl.details ? +_exptl.method 'X-RAY DIFFRACTION' +_exptl.method_details ? +# +_exptl_crystal.colour ? +_exptl_crystal.density_diffrn ? +_exptl_crystal.density_Matthews ? +_exptl_crystal.density_method ? +_exptl_crystal.density_percent_sol ? +_exptl_crystal.description ? +_exptl_crystal.F_000 ? +_exptl_crystal.id 1 +_exptl_crystal.preparation ? +_exptl_crystal.size_max ? +_exptl_crystal.size_mid ? +_exptl_crystal.size_min ? +_exptl_crystal.size_rad ? +_exptl_crystal.colour_lustre ? +_exptl_crystal.colour_modifier ? +_exptl_crystal.colour_primary ? +_exptl_crystal.density_meas ? +_exptl_crystal.density_meas_esd ? +_exptl_crystal.density_meas_gt ? +_exptl_crystal.density_meas_lt ? +_exptl_crystal.density_meas_temp ? +_exptl_crystal.density_meas_temp_esd ? +_exptl_crystal.density_meas_temp_gt ? +_exptl_crystal.density_meas_temp_lt ? +_exptl_crystal.pdbx_crystal_image_url ? +_exptl_crystal.pdbx_crystal_image_format ? +_exptl_crystal.pdbx_mosaicity ? +_exptl_crystal.pdbx_mosaicity_esd ? +# +_exptl_crystal_grow.apparatus ? +_exptl_crystal_grow.atmosphere ? +_exptl_crystal_grow.crystal_id 1 +_exptl_crystal_grow.details ? +_exptl_crystal_grow.method 'VAPOR DIFFUSION, HANGING DROP' +_exptl_crystal_grow.method_ref ? +_exptl_crystal_grow.pH 7.0 +_exptl_crystal_grow.pressure ? +_exptl_crystal_grow.pressure_esd ? +_exptl_crystal_grow.seeding ? +_exptl_crystal_grow.seeding_ref ? +_exptl_crystal_grow.temp 293 +_exptl_crystal_grow.temp_details ? +_exptl_crystal_grow.temp_esd ? +_exptl_crystal_grow.time ? +_exptl_crystal_grow.pdbx_details '100 mM MOPS, 5 mM EDTA-2Na' +_exptl_crystal_grow.pdbx_pH_range ? +# +_diffrn.ambient_environment ? +_diffrn.ambient_temp 100 +_diffrn.ambient_temp_details ? +_diffrn.ambient_temp_esd ? +_diffrn.crystal_id 1 +_diffrn.crystal_support ? +_diffrn.crystal_treatment ? +_diffrn.details ? +_diffrn.id 1 +_diffrn.ambient_pressure ? +_diffrn.ambient_pressure_esd ? +_diffrn.ambient_pressure_gt ? +_diffrn.ambient_pressure_lt ? +_diffrn.ambient_temp_gt ? +_diffrn.ambient_temp_lt ? +_diffrn.pdbx_serial_crystal_experiment N +# +_diffrn_detector.details ? +_diffrn_detector.detector PIXEL +_diffrn_detector.diffrn_id 1 +_diffrn_detector.type 'DECTRIS EIGER X 16M' +_diffrn_detector.area_resol_mean ? +_diffrn_detector.dtime ? +_diffrn_detector.pdbx_frames_total ? +_diffrn_detector.pdbx_collection_time_total ? +_diffrn_detector.pdbx_collection_date 2020-02-17 +_diffrn_detector.pdbx_frequency ? +# +_diffrn_radiation.collimation ? +_diffrn_radiation.diffrn_id 1 +_diffrn_radiation.filter_edge ? +_diffrn_radiation.inhomogeneity ? +_diffrn_radiation.monochromator ? +_diffrn_radiation.polarisn_norm ? +_diffrn_radiation.polarisn_ratio ? +_diffrn_radiation.probe ? +_diffrn_radiation.type ? +_diffrn_radiation.xray_symbol ? +_diffrn_radiation.wavelength_id 1 +_diffrn_radiation.pdbx_monochromatic_or_laue_m_l M +_diffrn_radiation.pdbx_wavelength_list ? +_diffrn_radiation.pdbx_wavelength ? +_diffrn_radiation.pdbx_diffrn_protocol 'SINGLE WAVELENGTH' +_diffrn_radiation.pdbx_analyzer ? +_diffrn_radiation.pdbx_scattering_type x-ray +# +_diffrn_radiation_wavelength.id 1 +_diffrn_radiation_wavelength.wavelength 0.7 +_diffrn_radiation_wavelength.wt 1.0 +# +_diffrn_source.current ? +_diffrn_source.details ? +_diffrn_source.diffrn_id 1 +_diffrn_source.power ? +_diffrn_source.size ? +_diffrn_source.source SYNCHROTRON +_diffrn_source.target ? +_diffrn_source.type 'SPRING-8 BEAMLINE BL44XU' +_diffrn_source.voltage ? +_diffrn_source.take-off_angle ? +_diffrn_source.pdbx_wavelength_list 0.7 +_diffrn_source.pdbx_wavelength ? +_diffrn_source.pdbx_synchrotron_beamline BL44XU +_diffrn_source.pdbx_synchrotron_site SPring-8 +# +_reflns.B_iso_Wilson_estimate 6.112 +_reflns.entry_id 7ETN +_reflns.data_reduction_details ? +_reflns.data_reduction_method ? +_reflns.d_resolution_high 0.820 +_reflns.d_resolution_low 20.55 +_reflns.details ? +_reflns.limit_h_max ? +_reflns.limit_h_min ? +_reflns.limit_k_max ? +_reflns.limit_k_min ? +_reflns.limit_l_max ? +_reflns.limit_l_min ? +_reflns.number_all ? +_reflns.number_obs 9768 +_reflns.observed_criterion ? +_reflns.observed_criterion_F_max ? +_reflns.observed_criterion_F_min ? +_reflns.observed_criterion_I_max ? +_reflns.observed_criterion_I_min ? +_reflns.observed_criterion_sigma_F ? +_reflns.observed_criterion_sigma_I ? +_reflns.percent_possible_obs 95.500 +_reflns.R_free_details ? +_reflns.Rmerge_F_all ? +_reflns.Rmerge_F_obs ? +_reflns.Friedel_coverage ? +_reflns.number_gt ? +_reflns.threshold_expression ? +_reflns.pdbx_redundancy 2.795 +_reflns.pdbx_Rmerge_I_obs 0.081 +_reflns.pdbx_Rmerge_I_all ? +_reflns.pdbx_Rsym_value ? +_reflns.pdbx_netI_over_av_sigmaI ? +_reflns.pdbx_netI_over_sigmaI 12.020 +_reflns.pdbx_res_netI_over_av_sigmaI_2 ? +_reflns.pdbx_res_netI_over_sigmaI_2 ? +_reflns.pdbx_chi_squared 0.865 +_reflns.pdbx_scaling_rejects ? +_reflns.pdbx_d_res_high_opt ? +_reflns.pdbx_d_res_low_opt ? +_reflns.pdbx_d_res_opt_method ? +_reflns.phase_calculation_details ? +_reflns.pdbx_Rrim_I_all 0.101 +_reflns.pdbx_Rpim_I_all ? +_reflns.pdbx_d_opt ? +_reflns.pdbx_number_measured_all ? +_reflns.pdbx_diffrn_id 1 +_reflns.pdbx_ordinal 1 +_reflns.pdbx_CC_half 0.990 +_reflns.pdbx_CC_star ? +_reflns.pdbx_R_split ? +_reflns.pdbx_aniso_diffraction_limit_axis_1_ortho[1] ? +_reflns.pdbx_aniso_diffraction_limit_axis_1_ortho[2] ? +_reflns.pdbx_aniso_diffraction_limit_axis_1_ortho[3] ? +_reflns.pdbx_aniso_diffraction_limit_axis_2_ortho[1] ? +_reflns.pdbx_aniso_diffraction_limit_axis_2_ortho[2] ? +_reflns.pdbx_aniso_diffraction_limit_axis_2_ortho[3] ? +_reflns.pdbx_aniso_diffraction_limit_axis_3_ortho[1] ? +_reflns.pdbx_aniso_diffraction_limit_axis_3_ortho[2] ? +_reflns.pdbx_aniso_diffraction_limit_axis_3_ortho[3] ? +_reflns.pdbx_aniso_diffraction_limit_1 ? +_reflns.pdbx_aniso_diffraction_limit_2 ? +_reflns.pdbx_aniso_diffraction_limit_3 ? +_reflns.pdbx_aniso_B_tensor_eigenvector_1_ortho[1] ? +_reflns.pdbx_aniso_B_tensor_eigenvector_1_ortho[2] ? +_reflns.pdbx_aniso_B_tensor_eigenvector_1_ortho[3] ? +_reflns.pdbx_aniso_B_tensor_eigenvector_2_ortho[1] ? +_reflns.pdbx_aniso_B_tensor_eigenvector_2_ortho[2] ? +_reflns.pdbx_aniso_B_tensor_eigenvector_2_ortho[3] ? +_reflns.pdbx_aniso_B_tensor_eigenvector_3_ortho[1] ? +_reflns.pdbx_aniso_B_tensor_eigenvector_3_ortho[2] ? +_reflns.pdbx_aniso_B_tensor_eigenvector_3_ortho[3] ? +_reflns.pdbx_aniso_B_tensor_eigenvalue_1 ? +_reflns.pdbx_aniso_B_tensor_eigenvalue_2 ? +_reflns.pdbx_aniso_B_tensor_eigenvalue_3 ? +_reflns.pdbx_orthogonalization_convention ? +_reflns.pdbx_percent_possible_ellipsoidal ? +_reflns.pdbx_percent_possible_spherical ? +_reflns.pdbx_percent_possible_ellipsoidal_anomalous ? +_reflns.pdbx_percent_possible_spherical_anomalous ? +_reflns.pdbx_redundancy_anomalous ? +_reflns.pdbx_CC_half_anomalous ? +_reflns.pdbx_absDiff_over_sigma_anomalous ? +_reflns.pdbx_percent_possible_anomalous ? +_reflns.pdbx_observed_signal_threshold ? +_reflns.pdbx_signal_type ? +_reflns.pdbx_signal_details ? +_reflns.pdbx_signal_software_id ? +# +loop_ +_reflns_shell.d_res_high +_reflns_shell.d_res_low +_reflns_shell.meanI_over_sigI_all +_reflns_shell.meanI_over_sigI_obs +_reflns_shell.number_measured_all +_reflns_shell.number_measured_obs +_reflns_shell.number_possible +_reflns_shell.number_unique_all +_reflns_shell.number_unique_obs +_reflns_shell.percent_possible_all +_reflns_shell.percent_possible_obs +_reflns_shell.Rmerge_F_all +_reflns_shell.Rmerge_F_obs +_reflns_shell.Rmerge_I_all +_reflns_shell.Rmerge_I_obs +_reflns_shell.meanI_over_sigI_gt +_reflns_shell.meanI_over_uI_all +_reflns_shell.meanI_over_uI_gt +_reflns_shell.number_measured_gt +_reflns_shell.number_unique_gt +_reflns_shell.percent_possible_gt +_reflns_shell.Rmerge_F_gt +_reflns_shell.Rmerge_I_gt +_reflns_shell.pdbx_redundancy +_reflns_shell.pdbx_Rsym_value +_reflns_shell.pdbx_chi_squared +_reflns_shell.pdbx_netI_over_sigmaI_all +_reflns_shell.pdbx_netI_over_sigmaI_obs +_reflns_shell.pdbx_Rrim_I_all +_reflns_shell.pdbx_Rpim_I_all +_reflns_shell.pdbx_rejects +_reflns_shell.pdbx_ordinal +_reflns_shell.pdbx_diffrn_id +_reflns_shell.pdbx_CC_half +_reflns_shell.pdbx_CC_star +_reflns_shell.pdbx_R_split +_reflns_shell.pdbx_percent_possible_ellipsoidal +_reflns_shell.pdbx_percent_possible_spherical +_reflns_shell.pdbx_percent_possible_ellipsoidal_anomalous +_reflns_shell.pdbx_percent_possible_spherical_anomalous +_reflns_shell.pdbx_redundancy_anomalous +_reflns_shell.pdbx_CC_half_anomalous +_reflns_shell.pdbx_absDiff_over_sigma_anomalous +_reflns_shell.pdbx_percent_possible_anomalous +0.820 0.840 ? 4.470 ? ? ? ? 702 91.200 ? ? ? ? 0.346 ? ? ? ? ? ? ? ? 2.771 ? ? ? ? 0.428 ? ? 1 1 0.861 ? ? ? ? ? ? ? ? ? ? +0.840 0.860 ? 4.830 ? ? ? ? 699 90.400 ? ? ? ? 0.311 ? ? ? ? ? ? ? ? 2.645 ? ? ? ? 0.385 ? ? 2 1 0.879 ? ? ? ? ? ? ? ? ? ? +0.860 0.890 ? 6.220 ? ? ? ? 637 89.700 ? ? ? ? 0.220 ? ? ? ? ? ? ? ? 2.625 ? ? ? ? 0.274 ? ? 3 1 0.929 ? ? ? ? ? ? ? ? ? ? +0.890 0.920 ? 6.830 ? ? ? ? 636 95.900 ? ? ? ? 0.196 ? ? ? ? ? ? ? ? 2.714 ? ? ? ? 0.240 ? ? 4 1 0.949 ? ? ? ? ? ? ? ? ? ? +0.920 0.950 ? 8.570 ? ? ? ? 631 96.300 ? ? ? ? 0.164 ? ? ? ? ? ? ? ? 2.894 ? ? ? ? 0.202 ? ? 5 1 0.945 ? ? ? ? ? ? ? ? ? ? +0.950 0.980 ? 9.520 ? ? ? ? 647 96.600 ? ? ? ? 0.142 ? ? ? ? ? ? ? ? 2.847 ? ? ? ? 0.174 ? ? 6 1 0.962 ? ? ? ? ? ? ? ? ? ? +0.980 1.020 ? 10.870 ? ? ? ? 591 93.200 ? ? ? ? 0.126 ? ? ? ? ? ? ? ? 2.897 ? ? ? ? 0.154 ? ? 7 1 0.969 ? ? ? ? ? ? ? ? ? ? +1.020 1.060 ? 12.320 ? ? ? ? 584 93.400 ? ? ? ? 0.122 ? ? ? ? ? ? ? ? 2.788 ? ? ? ? 0.150 ? ? 8 1 0.966 ? ? ? ? ? ? ? ? ? ? +1.060 1.110 ? 13.730 ? ? ? ? 537 97.600 ? ? ? ? 0.112 ? ? ? ? ? ? ? ? 2.834 ? ? ? ? 0.138 ? ? 9 1 0.970 ? ? ? ? ? ? ? ? ? ? +1.110 1.160 ? 14.490 ? ? ? ? 532 97.300 ? ? ? ? 0.102 ? ? ? ? ? ? ? ? 2.586 ? ? ? ? 0.127 ? ? 10 1 0.972 ? ? ? ? ? ? ? ? ? ? +1.160 1.220 ? 14.630 ? ? ? ? 530 97.800 ? ? ? ? 0.093 ? ? ? ? ? ? ? ? 2.649 ? ? ? ? 0.116 ? ? 11 1 0.974 ? ? ? ? ? ? ? ? ? ? +1.220 1.300 ? 15.250 ? ? ? ? 522 96.700 ? ? ? ? 0.098 ? ? ? ? ? ? ? ? 2.943 ? ? ? ? 0.119 ? ? 12 1 0.977 ? ? ? ? ? ? ? ? ? ? +1.300 1.390 ? 15.890 ? ? ? ? 429 98.800 ? ? ? ? 0.080 ? ? ? ? ? ? ? ? 3.009 ? ? ? ? 0.099 ? ? 13 1 0.985 ? ? ? ? ? ? ? ? ? ? +1.390 1.500 ? 16.950 ? ? ? ? 415 99.300 ? ? ? ? 0.086 ? ? ? ? ? ? ? ? 2.995 ? ? ? ? 0.107 ? ? 14 1 0.966 ? ? ? ? ? ? ? ? ? ? +1.500 1.640 ? 17.860 ? ? ? ? 412 99.300 ? ? ? ? 0.077 ? ? ? ? ? ? ? ? 2.888 ? ? ? ? 0.096 ? ? 15 1 0.975 ? ? ? ? ? ? ? ? ? ? +1.640 1.830 ? 18.390 ? ? ? ? 372 98.700 ? ? ? ? 0.065 ? ? ? ? ? ? ? ? 2.780 ? ? ? ? 0.080 ? ? 16 1 0.987 ? ? ? ? ? ? ? ? ? ? +1.830 2.120 ? 18.910 ? ? ? ? 289 98.600 ? ? ? ? 0.062 ? ? ? ? ? ? ? ? 2.682 ? ? ? ? 0.078 ? ? 17 1 0.989 ? ? ? ? ? ? ? ? ? ? +2.120 2.590 ? 20.510 ? ? ? ? 297 99.700 ? ? ? ? 0.057 ? ? ? ? ? ? ? ? 2.747 ? ? ? ? 0.072 ? ? 18 1 0.992 ? ? ? ? ? ? ? ? ? ? +2.590 3.670 ? 22.830 ? ? ? ? 190 99.000 ? ? ? ? 0.049 ? ? ? ? ? ? ? ? 3.079 ? ? ? ? 0.060 ? ? 19 1 0.993 ? ? ? ? ? ? ? ? ? ? +3.670 20.55 ? 22.750 ? ? ? ? 116 99.100 ? ? ? ? 0.066 ? ? ? ? ? ? ? ? 2.836 ? ? ? ? 0.084 ? ? 20 1 0.983 ? ? ? ? ? ? ? ? ? ? +# +_refine.aniso_B[1][1] ? +_refine.aniso_B[1][2] ? +_refine.aniso_B[1][3] ? +_refine.aniso_B[2][2] ? +_refine.aniso_B[2][3] ? +_refine.aniso_B[3][3] ? +_refine.B_iso_max 9.850 +_refine.B_iso_mean 4.8465 +_refine.B_iso_min 2.950 +_refine.correlation_coeff_Fo_to_Fc ? +_refine.correlation_coeff_Fo_to_Fc_free ? +_refine.details ? +_refine.diff_density_max ? +_refine.diff_density_max_esd ? +_refine.diff_density_min ? +_refine.diff_density_min_esd ? +_refine.diff_density_rms ? +_refine.diff_density_rms_esd ? +_refine.entry_id 7ETN +_refine.pdbx_refine_id 'X-RAY DIFFRACTION' +_refine.ls_abs_structure_details ? +_refine.ls_abs_structure_Flack ? +_refine.ls_abs_structure_Flack_esd ? +_refine.ls_abs_structure_Rogers ? +_refine.ls_abs_structure_Rogers_esd ? +_refine.ls_d_res_high 0.82 +_refine.ls_d_res_low 20.55 +_refine.ls_extinction_coef ? +_refine.ls_extinction_coef_esd ? +_refine.ls_extinction_expression ? +_refine.ls_extinction_method ? +_refine.ls_goodness_of_fit_all ? +_refine.ls_goodness_of_fit_all_esd ? +_refine.ls_goodness_of_fit_obs ? +_refine.ls_goodness_of_fit_obs_esd ? +_refine.ls_hydrogen_treatment ? +_refine.ls_matrix_type ? +_refine.ls_number_constraints ? +_refine.ls_number_parameters ? +_refine.ls_number_reflns_all ? +_refine.ls_number_reflns_obs 9285 +_refine.ls_number_reflns_R_free 483 +_refine.ls_number_reflns_R_work ? +_refine.ls_number_restraints ? +_refine.ls_percent_reflns_obs 96.0 +_refine.ls_percent_reflns_R_free ? +_refine.ls_R_factor_all ? +_refine.ls_R_factor_obs ? +_refine.ls_R_factor_R_free 0.0873 +_refine.ls_R_factor_R_free_error ? +_refine.ls_R_factor_R_free_error_details ? +_refine.ls_R_factor_R_work 0.0692 +_refine.ls_R_Fsqd_factor_obs ? +_refine.ls_R_I_factor_obs ? +_refine.ls_redundancy_reflns_all ? +_refine.ls_redundancy_reflns_obs ? +_refine.ls_restrained_S_all ? +_refine.ls_restrained_S_obs ? +_refine.ls_shift_over_esd_max ? +_refine.ls_shift_over_esd_mean ? +_refine.ls_structure_factor_coef ? +_refine.ls_weighting_details ? +_refine.ls_weighting_scheme ? +_refine.ls_wR_factor_all ? +_refine.ls_wR_factor_obs ? +_refine.ls_wR_factor_R_free ? +_refine.ls_wR_factor_R_work ? +_refine.occupancy_max ? +_refine.occupancy_min ? +_refine.solvent_model_details ? +_refine.solvent_model_param_bsol ? +_refine.solvent_model_param_ksol ? +_refine.pdbx_R_complete ? +_refine.ls_R_factor_gt ? +_refine.ls_goodness_of_fit_gt ? +_refine.ls_goodness_of_fit_ref ? +_refine.ls_shift_over_su_max ? +_refine.ls_shift_over_su_max_lt ? +_refine.ls_shift_over_su_mean ? +_refine.ls_shift_over_su_mean_lt ? +_refine.pdbx_ls_sigma_I ? +_refine.pdbx_ls_sigma_F ? +_refine.pdbx_ls_sigma_Fsqd ? +_refine.pdbx_data_cutoff_high_absF ? +_refine.pdbx_data_cutoff_high_rms_absF ? +_refine.pdbx_data_cutoff_low_absF ? +_refine.pdbx_isotropic_thermal_model ? +_refine.pdbx_ls_cross_valid_method 'FREE R-VALUE' +_refine.pdbx_method_to_determine_struct 'AB INITIO PHASING' +_refine.pdbx_starting_model ? +_refine.pdbx_stereochemistry_target_values ? +_refine.pdbx_R_Free_selection_details ? +_refine.pdbx_stereochem_target_val_spec_case ? +_refine.pdbx_overall_ESU_R ? +_refine.pdbx_overall_ESU_R_Free ? +_refine.pdbx_solvent_vdw_probe_radii ? +_refine.pdbx_solvent_ion_probe_radii ? +_refine.pdbx_solvent_shrinkage_radii ? +_refine.pdbx_real_space_R ? +_refine.pdbx_density_correlation ? +_refine.pdbx_pd_number_of_powder_patterns ? +_refine.pdbx_pd_number_of_points ? +_refine.pdbx_pd_meas_number_of_points ? +_refine.pdbx_pd_proc_ls_prof_R_factor ? +_refine.pdbx_pd_proc_ls_prof_wR_factor ? +_refine.pdbx_pd_Marquardt_correlation_coeff ? +_refine.pdbx_pd_Fsqrd_R_factor ? +_refine.pdbx_pd_ls_matrix_band_width ? +_refine.pdbx_overall_phase_error ? +_refine.pdbx_overall_SU_R_free_Cruickshank_DPI ? +_refine.pdbx_overall_SU_R_free_Blow_DPI ? +_refine.pdbx_overall_SU_R_Blow_DPI ? +_refine.pdbx_TLS_residual_ADP_flag ? +_refine.pdbx_diffrn_id 1 +_refine.overall_SU_B ? +_refine.overall_SU_ML ? +_refine.overall_SU_R_Cruickshank_DPI ? +_refine.overall_SU_R_free ? +_refine.overall_FOM_free_R_set ? +_refine.overall_FOM_work_R_set ? +_refine.pdbx_average_fsc_overall ? +_refine.pdbx_average_fsc_work ? +_refine.pdbx_average_fsc_free ? +# +_refine_hist.pdbx_refine_id 'X-RAY DIFFRACTION' +_refine_hist.cycle_id LAST +_refine_hist.details ? +_refine_hist.d_res_high 0.82 +_refine_hist.d_res_low 20.55 +_refine_hist.number_atoms_solvent 0 +_refine_hist.number_atoms_total 70 +_refine_hist.number_reflns_all ? +_refine_hist.number_reflns_obs ? +_refine_hist.number_reflns_R_free ? +_refine_hist.number_reflns_R_work ? +_refine_hist.R_factor_all ? +_refine_hist.R_factor_obs ? +_refine_hist.R_factor_R_free ? +_refine_hist.R_factor_R_work ? +_refine_hist.pdbx_number_residues_total ? +_refine_hist.pdbx_B_iso_mean_ligand ? +_refine_hist.pdbx_B_iso_mean_solvent ? +_refine_hist.pdbx_number_atoms_protein 70 +_refine_hist.pdbx_number_atoms_nucleic_acid 0 +_refine_hist.pdbx_number_atoms_ligand 0 +_refine_hist.pdbx_number_atoms_lipid ? +_refine_hist.pdbx_number_atoms_carb ? +_refine_hist.pdbx_pseudo_atom_details ? +# +_struct.entry_id 7ETN +_struct.title 'Crystal structure of Pro-Phe-Leu-Ile' +_struct.pdbx_model_details ? +_struct.pdbx_formula_weight ? +_struct.pdbx_formula_weight_method ? +_struct.pdbx_model_type_details ? +_struct.pdbx_CASP_flag N +# +_struct_keywords.entry_id 7ETN +_struct_keywords.text 'Synthetic peptide, UNKNOWN FUNCTION' +_struct_keywords.pdbx_keywords 'UNKNOWN FUNCTION' +# +loop_ +_struct_asym.id +_struct_asym.pdbx_blank_PDB_chainid_flag +_struct_asym.pdbx_modified +_struct_asym.entity_id +_struct_asym.details +A N N 1 ? +B N N 1 ? +# +_struct_ref.id 1 +_struct_ref.db_name PDB +_struct_ref.db_code 7ETN +_struct_ref.pdbx_db_accession 7ETN +_struct_ref.pdbx_db_isoform ? +_struct_ref.entity_id 1 +_struct_ref.pdbx_seq_one_letter_code ? +_struct_ref.pdbx_align_begin 1 +# +loop_ +_struct_ref_seq.align_id +_struct_ref_seq.ref_id +_struct_ref_seq.pdbx_PDB_id_code +_struct_ref_seq.pdbx_strand_id +_struct_ref_seq.seq_align_beg +_struct_ref_seq.pdbx_seq_align_beg_ins_code +_struct_ref_seq.seq_align_end +_struct_ref_seq.pdbx_seq_align_end_ins_code +_struct_ref_seq.pdbx_db_accession +_struct_ref_seq.db_align_beg +_struct_ref_seq.pdbx_db_align_beg_ins_code +_struct_ref_seq.db_align_end +_struct_ref_seq.pdbx_db_align_end_ins_code +_struct_ref_seq.pdbx_auth_seq_align_beg +_struct_ref_seq.pdbx_auth_seq_align_end +1 1 7ETN A 1 ? 4 ? 7ETN 1 ? 4 ? 1 4 +2 1 7ETN B 1 ? 4 ? 7ETN 1 ? 4 ? 1 4 +# +loop_ +_pdbx_struct_assembly.id +_pdbx_struct_assembly.details +_pdbx_struct_assembly.method_details +_pdbx_struct_assembly.oligomeric_details +_pdbx_struct_assembly.oligomeric_count +1 author_defined_assembly ? monomeric 1 +2 author_defined_assembly ? monomeric 1 +# +loop_ +_pdbx_struct_assembly_gen.assembly_id +_pdbx_struct_assembly_gen.oper_expression +_pdbx_struct_assembly_gen.asym_id_list +1 1 A +2 1 B +# +_pdbx_struct_assembly_auth_evidence.id 1 +_pdbx_struct_assembly_auth_evidence.assembly_id 1 +_pdbx_struct_assembly_auth_evidence.experimental_support none +_pdbx_struct_assembly_auth_evidence.details ? +# +_pdbx_struct_oper_list.id 1 +_pdbx_struct_oper_list.type 'identity operation' +_pdbx_struct_oper_list.name 1_555 +_pdbx_struct_oper_list.symmetry_operation x,y,z +_pdbx_struct_oper_list.matrix[1][1] 1.0000000000 +_pdbx_struct_oper_list.matrix[1][2] 0.0000000000 +_pdbx_struct_oper_list.matrix[1][3] 0.0000000000 +_pdbx_struct_oper_list.vector[1] 0.0000000000 +_pdbx_struct_oper_list.matrix[2][1] 0.0000000000 +_pdbx_struct_oper_list.matrix[2][2] 1.0000000000 +_pdbx_struct_oper_list.matrix[2][3] 0.0000000000 +_pdbx_struct_oper_list.vector[2] 0.0000000000 +_pdbx_struct_oper_list.matrix[3][1] 0.0000000000 +_pdbx_struct_oper_list.matrix[3][2] 0.0000000000 +_pdbx_struct_oper_list.matrix[3][3] 1.0000000000 +_pdbx_struct_oper_list.vector[3] 0.0000000000 +# +loop_ +_chem_comp_atom.comp_id +_chem_comp_atom.atom_id +_chem_comp_atom.type_symbol +_chem_comp_atom.pdbx_aromatic_flag +_chem_comp_atom.pdbx_stereo_config +_chem_comp_atom.pdbx_ordinal +ILE N N N N 1 +ILE CA C N S 2 +ILE C C N N 3 +ILE O O N N 4 +ILE CB C N S 5 +ILE CG1 C N N 6 +ILE CG2 C N N 7 +ILE CD1 C N N 8 +ILE OXT O N N 9 +ILE H H N N 10 +ILE H2 H N N 11 +ILE HA H N N 12 +ILE HB H N N 13 +ILE HG12 H N N 14 +ILE HG13 H N N 15 +ILE HG21 H N N 16 +ILE HG22 H N N 17 +ILE HG23 H N N 18 +ILE HD11 H N N 19 +ILE HD12 H N N 20 +ILE HD13 H N N 21 +ILE HXT H N N 22 +LEU N N N N 23 +LEU CA C N S 24 +LEU C C N N 25 +LEU O O N N 26 +LEU CB C N N 27 +LEU CG C N N 28 +LEU CD1 C N N 29 +LEU CD2 C N N 30 +LEU OXT O N N 31 +LEU H H N N 32 +LEU H2 H N N 33 +LEU HA H N N 34 +LEU HB2 H N N 35 +LEU HB3 H N N 36 +LEU HG H N N 37 +LEU HD11 H N N 38 +LEU HD12 H N N 39 +LEU HD13 H N N 40 +LEU HD21 H N N 41 +LEU HD22 H N N 42 +LEU HD23 H N N 43 +LEU HXT H N N 44 +PHE N N N N 45 +PHE CA C N S 46 +PHE C C N N 47 +PHE O O N N 48 +PHE CB C N N 49 +PHE CG C Y N 50 +PHE CD1 C Y N 51 +PHE CD2 C Y N 52 +PHE CE1 C Y N 53 +PHE CE2 C Y N 54 +PHE CZ C Y N 55 +PHE OXT O N N 56 +PHE H H N N 57 +PHE H2 H N N 58 +PHE HA H N N 59 +PHE HB2 H N N 60 +PHE HB3 H N N 61 +PHE HD1 H N N 62 +PHE HD2 H N N 63 +PHE HE1 H N N 64 +PHE HE2 H N N 65 +PHE HZ H N N 66 +PHE HXT H N N 67 +PRO N N N N 68 +PRO CA C N S 69 +PRO C C N N 70 +PRO O O N N 71 +PRO CB C N N 72 +PRO CG C N N 73 +PRO CD C N N 74 +PRO OXT O N N 75 +PRO H H N N 76 +PRO HA H N N 77 +PRO HB2 H N N 78 +PRO HB3 H N N 79 +PRO HG2 H N N 80 +PRO HG3 H N N 81 +PRO HD2 H N N 82 +PRO HD3 H N N 83 +PRO HXT H N N 84 +# +loop_ +_chem_comp_bond.comp_id +_chem_comp_bond.atom_id_1 +_chem_comp_bond.atom_id_2 +_chem_comp_bond.value_order +_chem_comp_bond.pdbx_aromatic_flag +_chem_comp_bond.pdbx_stereo_config +_chem_comp_bond.pdbx_ordinal +ILE N CA sing N N 1 +ILE N H sing N N 2 +ILE N H2 sing N N 3 +ILE CA C sing N N 4 +ILE CA CB sing N N 5 +ILE CA HA sing N N 6 +ILE C O doub N N 7 +ILE C OXT sing N N 8 +ILE CB CG1 sing N N 9 +ILE CB CG2 sing N N 10 +ILE CB HB sing N N 11 +ILE CG1 CD1 sing N N 12 +ILE CG1 HG12 sing N N 13 +ILE CG1 HG13 sing N N 14 +ILE CG2 HG21 sing N N 15 +ILE CG2 HG22 sing N N 16 +ILE CG2 HG23 sing N N 17 +ILE CD1 HD11 sing N N 18 +ILE CD1 HD12 sing N N 19 +ILE CD1 HD13 sing N N 20 +ILE OXT HXT sing N N 21 +LEU N CA sing N N 22 +LEU N H sing N N 23 +LEU N H2 sing N N 24 +LEU CA C sing N N 25 +LEU CA CB sing N N 26 +LEU CA HA sing N N 27 +LEU C O doub N N 28 +LEU C OXT sing N N 29 +LEU CB CG sing N N 30 +LEU CB HB2 sing N N 31 +LEU CB HB3 sing N N 32 +LEU CG CD1 sing N N 33 +LEU CG CD2 sing N N 34 +LEU CG HG sing N N 35 +LEU CD1 HD11 sing N N 36 +LEU CD1 HD12 sing N N 37 +LEU CD1 HD13 sing N N 38 +LEU CD2 HD21 sing N N 39 +LEU CD2 HD22 sing N N 40 +LEU CD2 HD23 sing N N 41 +LEU OXT HXT sing N N 42 +PHE N CA sing N N 43 +PHE N H sing N N 44 +PHE N H2 sing N N 45 +PHE CA C sing N N 46 +PHE CA CB sing N N 47 +PHE CA HA sing N N 48 +PHE C O doub N N 49 +PHE C OXT sing N N 50 +PHE CB CG sing N N 51 +PHE CB HB2 sing N N 52 +PHE CB HB3 sing N N 53 +PHE CG CD1 doub Y N 54 +PHE CG CD2 sing Y N 55 +PHE CD1 CE1 sing Y N 56 +PHE CD1 HD1 sing N N 57 +PHE CD2 CE2 doub Y N 58 +PHE CD2 HD2 sing N N 59 +PHE CE1 CZ doub Y N 60 +PHE CE1 HE1 sing N N 61 +PHE CE2 CZ sing Y N 62 +PHE CE2 HE2 sing N N 63 +PHE CZ HZ sing N N 64 +PHE OXT HXT sing N N 65 +PRO N CA sing N N 66 +PRO N CD sing N N 67 +PRO N H sing N N 68 +PRO CA C sing N N 69 +PRO CA CB sing N N 70 +PRO CA HA sing N N 71 +PRO C O doub N N 72 +PRO C OXT sing N N 73 +PRO CB CG sing N N 74 +PRO CB HB2 sing N N 75 +PRO CB HB3 sing N N 76 +PRO CG CD sing N N 77 +PRO CG HG2 sing N N 78 +PRO CG HG3 sing N N 79 +PRO CD HD2 sing N N 80 +PRO CD HD3 sing N N 81 +PRO OXT HXT sing N N 82 +# +_atom_sites.entry_id 7ETN +_atom_sites.Cartn_transf_matrix[1][1] ? +_atom_sites.Cartn_transf_matrix[1][2] ? +_atom_sites.Cartn_transf_matrix[1][3] ? +_atom_sites.Cartn_transf_matrix[2][1] ? +_atom_sites.Cartn_transf_matrix[2][2] ? +_atom_sites.Cartn_transf_matrix[2][3] ? +_atom_sites.Cartn_transf_matrix[3][1] ? +_atom_sites.Cartn_transf_matrix[3][2] ? +_atom_sites.Cartn_transf_matrix[3][3] ? +_atom_sites.Cartn_transf_vector[1] ? +_atom_sites.Cartn_transf_vector[2] ? +_atom_sites.Cartn_transf_vector[3] ? +_atom_sites.fract_transf_matrix[1][1] 0.189970 +_atom_sites.fract_transf_matrix[1][2] 0.000000 +_atom_sites.fract_transf_matrix[1][3] 0.016119 +_atom_sites.fract_transf_matrix[2][1] 0.000000 +_atom_sites.fract_transf_matrix[2][2] 0.040053 +_atom_sites.fract_transf_matrix[2][3] 0.000000 +_atom_sites.fract_transf_matrix[3][1] 0.000000 +_atom_sites.fract_transf_matrix[3][2] 0.000000 +_atom_sites.fract_transf_matrix[3][3] 0.048399 +_atom_sites.fract_transf_vector[1] 0.000000 +_atom_sites.fract_transf_vector[2] 0.000000 +_atom_sites.fract_transf_vector[3] 0.000000 +_atom_sites.solution_primary ? +_atom_sites.solution_secondary ? +_atom_sites.solution_hydrogens ? +_atom_sites.special_details ? +# +loop_ +_atom_type.symbol +C +H +N +O +# +loop_ +_atom_site.group_PDB +_atom_site.id +_atom_site.type_symbol +_atom_site.label_atom_id +_atom_site.label_alt_id +_atom_site.label_comp_id +_atom_site.label_asym_id +_atom_site.label_entity_id +_atom_site.label_seq_id +_atom_site.pdbx_PDB_ins_code +_atom_site.Cartn_x +_atom_site.Cartn_y +_atom_site.Cartn_z +_atom_site.occupancy +_atom_site.B_iso_or_equiv +_atom_site.pdbx_formal_charge +_atom_site.auth_seq_id +_atom_site.auth_comp_id +_atom_site.auth_asym_id +_atom_site.auth_atom_id +_atom_site.pdbx_PDB_model_num +ATOM 1 N N . PRO A 1 1 ? 2.263 3.421 12.086 1.00 3.24 ? 1 PRO A N 1 +ATOM 2 C CA . PRO A 1 1 ? 2.768 4.596 11.318 1.00 3.25 ? 1 PRO A CA 1 +ATOM 3 C C . PRO A 1 1 ? 1.656 5.634 11.180 1.00 3.38 ? 1 PRO A C 1 +ATOM 4 O O . PRO A 1 1 ? 0.486 5.366 11.445 1.00 4.36 ? 1 PRO A O 1 +ATOM 5 C CB . PRO A 1 1 ? 3.198 4.020 9.966 1.00 3.76 ? 1 PRO A CB 1 +ATOM 6 C CG . PRO A 1 1 ? 3.584 2.584 10.304 1.00 3.71 ? 1 PRO A CG 1 +ATOM 7 C CD . PRO A 1 1 ? 2.514 2.172 11.283 1.00 3.95 ? 1 PRO A CD 1 +ATOM 8 H H2 . PRO A 1 1 ? 1.392 3.512 12.241 1.00 3.89 ? 1 PRO A H2 1 +ATOM 9 H H3 . PRO A 1 1 ? 2.683 3.368 12.869 1.00 3.89 ? 1 PRO A H3 1 +ATOM 10 H HA . PRO A 1 1 ? 3.540 4.990 11.776 1.00 3.90 ? 1 PRO A HA 1 +ATOM 11 H HB2 . PRO A 1 1 ? 2.467 4.044 9.328 1.00 4.52 ? 1 PRO A HB2 1 +ATOM 12 H HB3 . PRO A 1 1 ? 3.952 4.509 9.603 1.00 4.52 ? 1 PRO A HB3 1 +ATOM 13 H HG2 . PRO A 1 1 ? 3.572 2.022 9.513 1.00 4.46 ? 1 PRO A HG2 1 +ATOM 14 H HG3 . PRO A 1 1 ? 4.465 2.544 10.708 1.00 4.46 ? 1 PRO A HG3 1 +ATOM 15 H HD2 . PRO A 1 1 ? 1.710 1.889 10.820 1.00 4.74 ? 1 PRO A HD2 1 +ATOM 16 H HD3 . PRO A 1 1 ? 2.823 1.449 11.851 1.00 4.74 ? 1 PRO A HD3 1 +ATOM 17 N N . PHE A 1 2 ? 2.024 6.832 10.677 1.00 3.27 ? 2 PHE A N 1 +ATOM 18 C CA . PHE A 1 2 ? 0.997 7.762 10.219 1.00 3.34 ? 2 PHE A CA 1 +ATOM 19 C C . PHE A 1 2 ? 0.196 7.205 9.048 1.00 3.14 ? 2 PHE A C 1 +ATOM 20 O O . PHE A 1 2 ? -1.036 7.320 9.010 1.00 3.49 ? 2 PHE A O 1 +ATOM 21 C CB . PHE A 1 2 ? 1.641 9.127 9.888 1.00 3.81 ? 2 PHE A CB 1 +ATOM 22 C CG . PHE A 1 2 ? 0.650 10.129 9.333 1.00 3.64 ? 2 PHE A CG 1 +ATOM 23 C CD1 . PHE A 1 2 ? -0.279 10.766 10.177 1.00 3.85 ? 2 PHE A CD1 1 +ATOM 24 C CD2 . PHE A 1 2 ? 0.609 10.419 7.968 1.00 4.35 ? 2 PHE A CD2 1 +ATOM 25 C CE1 . PHE A 1 2 ? -1.208 11.660 9.659 1.00 4.03 ? 2 PHE A CE1 1 +ATOM 26 C CE2 . PHE A 1 2 ? -0.322 11.324 7.449 1.00 4.71 ? 2 PHE A CE2 1 +ATOM 27 C CZ . PHE A 1 2 ? -1.245 11.942 8.300 1.00 4.31 ? 2 PHE A CZ 1 +ATOM 28 H H . PHE A 1 2 ? 2.856 7.045 10.628 1.00 3.92 ? 2 PHE A H 1 +ATOM 29 H HA . PHE A 1 2 ? 0.373 7.902 10.962 1.00 4.00 ? 2 PHE A HA 1 +ATOM 30 H HB2 . PHE A 1 2 ? 2.040 9.491 10.694 1.00 4.57 ? 2 PHE A HB2 1 +ATOM 31 H HB3 . PHE A 1 2 ? 2.350 8.993 9.240 1.00 4.57 ? 2 PHE A HB3 1 +ATOM 32 H HD1 . PHE A 1 2 ? -0.271 10.587 11.090 1.00 4.62 ? 2 PHE A HD1 1 +ATOM 33 H HD2 . PHE A 1 2 ? 1.210 10.002 7.394 1.00 5.22 ? 2 PHE A HD2 1 +ATOM 34 H HE1 . PHE A 1 2 ? -1.814 12.074 10.230 1.00 4.83 ? 2 PHE A HE1 1 +ATOM 35 H HE2 . PHE A 1 2 ? -0.328 11.515 6.539 1.00 5.66 ? 2 PHE A HE2 1 +ATOM 36 H HZ . PHE A 1 2 ? -1.875 12.535 7.960 1.00 5.18 ? 2 PHE A HZ 1 +ATOM 37 N N . LEU A 1 3 ? 0.874 6.589 8.064 1.00 3.41 ? 3 LEU A N 1 +ATOM 38 C CA . LEU A 1 3 ? 0.168 5.898 6.984 1.00 3.60 ? 3 LEU A CA 1 +ATOM 39 C C . LEU A 1 3 ? 1.055 4.744 6.488 1.00 3.47 ? 3 LEU A C 1 +ATOM 40 O O . LEU A 1 3 ? 2.285 4.750 6.676 1.00 3.72 ? 3 LEU A O 1 +ATOM 41 C CB . LEU A 1 3 ? -0.257 6.837 5.824 1.00 4.05 ? 3 LEU A CB 1 +ATOM 42 C CG . LEU A 1 3 ? 0.846 7.520 5.010 1.00 4.83 ? 3 LEU A CG 1 +ATOM 43 C CD1 . LEU A 1 3 ? 1.475 6.588 3.975 1.00 5.29 ? 3 LEU A CD1 1 +ATOM 44 C CD2 . LEU A 1 3 ? 0.298 8.755 4.319 1.00 6.57 ? 3 LEU A CD2 1 +ATOM 45 H H . LEU A 1 3 ? 1.734 6.601 8.069 1.00 4.09 ? 3 LEU A H 1 +ATOM 46 H HA . LEU A 1 3 ? -0.646 5.506 7.363 1.00 4.32 ? 3 LEU A HA 1 +ATOM 47 H HB2 . LEU A 1 3 ? -0.803 6.322 5.210 1.00 4.86 ? 3 LEU A HB2 1 +ATOM 48 H HB3 . LEU A 1 3 ? -0.823 7.531 6.197 1.00 4.86 ? 3 LEU A HB3 1 +ATOM 49 H HG . LEU A 1 3 ? 1.550 7.805 5.630 1.00 5.80 ? 3 LEU A HG 1 +ATOM 50 H HD11 . LEU A 1 3 ? 1.937 5.876 4.424 1.00 7.93 ? 3 LEU A HD11 1 +ATOM 51 H HD12 . LEU A 1 3 ? 2.095 7.082 3.434 1.00 7.93 ? 3 LEU A HD12 1 +ATOM 52 H HD13 . LEU A 1 3 ? 0.786 6.220 3.416 1.00 7.93 ? 3 LEU A HD13 1 +ATOM 53 H HD21 . LEU A 1 3 ? -0.001 9.385 4.979 1.00 9.85 ? 3 LEU A HD21 1 +ATOM 54 H HD22 . LEU A 1 3 ? -0.439 8.506 3.757 1.00 9.85 ? 3 LEU A HD22 1 +ATOM 55 H HD23 . LEU A 1 3 ? 0.988 9.155 3.784 1.00 9.85 ? 3 LEU A HD23 1 +ATOM 56 N N . ILE A 1 4 ? 0.379 3.780 5.862 1.00 3.53 ? 4 ILE A N 1 +ATOM 57 C CA . ILE A 1 4 ? 0.996 2.768 4.989 1.00 3.42 ? 4 ILE A CA 1 +ATOM 58 C C . ILE A 1 4 ? 0.175 2.736 3.692 1.00 3.28 ? 4 ILE A C 1 +ATOM 59 O O . ILE A 1 4 ? 0.723 2.471 2.600 1.00 3.62 ? 4 ILE A O 1 +ATOM 60 C CB . ILE A 1 4 ? 1.063 1.368 5.663 1.00 3.79 ? 4 ILE A CB 1 +ATOM 61 C CG1 . ILE A 1 4 ? 1.662 1.421 7.059 1.00 4.33 ? 4 ILE A CG1 1 +ATOM 62 C CG2 . ILE A 1 4 ? 1.839 0.397 4.758 1.00 4.58 ? 4 ILE A CG2 1 +ATOM 63 C CD1 . ILE A 1 4 ? 1.604 0.099 7.814 1.00 5.48 ? 4 ILE A CD1 1 +ATOM 64 O OXT . ILE A 1 4 ? -1.067 2.955 3.832 1.00 4.27 ? 4 ILE A OXT 1 +ATOM 65 H H . ILE A 1 4 ? -0.473 3.747 5.975 1.00 4.24 ? 4 ILE A H 1 +ATOM 66 H HA . ILE A 1 4 ? 1.908 3.055 4.774 1.00 4.10 ? 4 ILE A HA 1 +ATOM 67 H HB . ILE A 1 4 ? 0.146 1.032 5.744 1.00 4.55 ? 4 ILE A HB 1 +ATOM 68 H HG12 . ILE A 1 4 ? 2.588 1.700 6.990 1.00 5.19 ? 4 ILE A HG12 1 +ATOM 69 H HG13 . ILE A 1 4 ? 1.190 2.094 7.574 1.00 5.19 ? 4 ILE A HG13 1 +ATOM 70 H HG21 . ILE A 1 4 ? 1.425 0.368 3.892 1.00 6.87 ? 4 ILE A HG21 1 +ATOM 71 H HG22 . ILE A 1 4 ? 1.828 -0.480 5.147 1.00 6.87 ? 4 ILE A HG22 1 +ATOM 72 H HG23 . ILE A 1 4 ? 2.746 0.698 4.670 1.00 6.87 ? 4 ILE A HG23 1 +ATOM 73 H HD11 . ILE A 1 4 ? 1.915 0.230 8.713 1.00 8.23 ? 4 ILE A HD11 1 +ATOM 74 H HD12 . ILE A 1 4 ? 2.162 -0.546 7.373 1.00 8.23 ? 4 ILE A HD12 1 +ATOM 75 H HD13 . ILE A 1 4 ? 0.699 -0.220 7.833 1.00 8.23 ? 4 ILE A HD13 1 +ATOM 76 N N . PRO B 1 1 ? 3.067 3.241 1.445 1.00 3.36 ? 1 PRO B N 1 +ATOM 77 C CA . PRO B 1 1 ? 3.628 4.468 0.782 1.00 3.20 ? 1 PRO B CA 1 +ATOM 78 C C . PRO B 1 1 ? 2.545 5.540 0.751 1.00 3.17 ? 1 PRO B C 1 +ATOM 79 O O . PRO B 1 1 ? 1.358 5.252 0.861 1.00 3.75 ? 1 PRO B O 1 +ATOM 80 C CB . PRO B 1 1 ? 4.005 4.028 -0.635 1.00 3.64 ? 1 PRO B CB 1 +ATOM 81 C CG . PRO B 1 1 ? 4.291 2.530 -0.474 1.00 3.80 ? 1 PRO B CG 1 +ATOM 82 C CD . PRO B 1 1 ? 3.202 2.099 0.487 1.00 3.91 ? 1 PRO B CD 1 +ATOM 83 H H2 . PRO B 1 1 ? 3.514 3.066 2.194 1.00 4.03 ? 1 PRO B H2 1 +ATOM 84 H H3 . PRO B 1 1 ? 2.211 3.369 1.651 1.00 4.03 ? 1 PRO B H3 1 +ATOM 85 H HA . PRO B 1 1 ? 4.418 4.790 1.265 1.00 3.85 ? 1 PRO B HA 1 +ATOM 86 H HB2 . PRO B 1 1 ? 3.273 4.180 -1.254 1.00 4.37 ? 1 PRO B HB2 1 +ATOM 87 H HB3 . PRO B 1 1 ? 4.790 4.502 -0.951 1.00 4.37 ? 1 PRO B HB3 1 +ATOM 88 H HG2 . PRO B 1 1 ? 4.218 2.064 -1.322 1.00 4.56 ? 1 PRO B HG2 1 +ATOM 89 H HG3 . PRO B 1 1 ? 5.173 2.378 -0.100 1.00 4.56 ? 1 PRO B HG3 1 +ATOM 90 H HD2 . PRO B 1 1 ? 2.368 1.944 0.017 1.00 4.69 ? 1 PRO B HD2 1 +ATOM 91 H HD3 . PRO B 1 1 ? 3.455 1.287 0.953 1.00 4.69 ? 1 PRO B HD3 1 +ATOM 92 N N . PHE B 1 2 ? 2.961 6.812 0.548 1.00 3.26 ? 2 PHE B N 1 +ATOM 93 C CA . PHE B 1 2 ? 1.940 7.825 0.248 1.00 3.07 ? 2 PHE B CA 1 +ATOM 94 C C . PHE B 1 2 ? 1.201 7.506 -1.064 1.00 2.95 ? 2 PHE B C 1 +ATOM 95 O O . PHE B 1 2 ? -0.034 7.564 -1.125 1.00 3.35 ? 2 PHE B O 1 +ATOM 96 C CB . PHE B 1 2 ? 2.602 9.219 0.215 1.00 3.55 ? 2 PHE B CB 1 +ATOM 97 C CG . PHE B 1 2 ? 1.618 10.276 -0.225 1.00 3.35 ? 2 PHE B CG 1 +ATOM 98 C CD1 . PHE B 1 2 ? 0.661 10.791 0.659 1.00 3.64 ? 2 PHE B CD1 1 +ATOM 99 C CD2 . PHE B 1 2 ? 1.587 10.693 -1.570 1.00 3.73 ? 2 PHE B CD2 1 +ATOM 100 C CE1 . PHE B 1 2 ? -0.311 11.680 0.210 1.00 3.67 ? 2 PHE B CE1 1 +ATOM 101 C CE2 . PHE B 1 2 ? 0.624 11.576 -2.020 1.00 3.98 ? 2 PHE B CE2 1 +ATOM 102 C CZ . PHE B 1 2 ? -0.347 12.063 -1.134 1.00 3.89 ? 2 PHE B CZ 1 +ATOM 103 H H . PHE B 1 2 ? 3.794 7.021 0.593 1.00 3.91 ? 2 PHE B H 1 +ATOM 104 H HA . PHE B 1 2 ? 1.283 7.818 0.975 1.00 3.69 ? 2 PHE B HA 1 +ATOM 105 H HB2 . PHE B 1 2 ? 2.937 9.438 1.099 1.00 4.26 ? 2 PHE B HB2 1 +ATOM 106 H HB3 . PHE B 1 2 ? 3.355 9.204 -0.396 1.00 4.26 ? 2 PHE B HB3 1 +ATOM 107 H HD1 . PHE B 1 2 ? 0.676 10.537 1.553 1.00 4.37 ? 2 PHE B HD1 1 +ATOM 108 H HD2 . PHE B 1 2 ? 2.225 10.369 -2.165 1.00 4.47 ? 2 PHE B HD2 1 +ATOM 109 H HE1 . PHE B 1 2 ? -0.938 12.020 0.806 1.00 4.40 ? 2 PHE B HE1 1 +ATOM 110 H HE2 . PHE B 1 2 ? 0.620 11.847 -2.909 1.00 4.78 ? 2 PHE B HE2 1 +ATOM 111 H HZ . PHE B 1 2 ? -1.012 12.638 -1.438 1.00 4.67 ? 2 PHE B HZ 1 +ATOM 112 N N . LEU B 1 3 ? 1.989 7.190 -2.102 1.00 3.24 ? 3 LEU B N 1 +ATOM 113 C CA . LEU B 1 3 ? 1.492 6.878 -3.438 1.00 3.42 ? 3 LEU B CA 1 +ATOM 114 C C . LEU B 1 3 ? 2.271 5.651 -3.931 1.00 3.35 ? 3 LEU B C 1 +ATOM 115 O O . LEU B 1 3 ? 3.502 5.608 -3.818 1.00 3.82 ? 3 LEU B O 1 +ATOM 116 C CB . LEU B 1 3 ? 1.705 8.077 -4.385 1.00 4.12 ? 3 LEU B CB 1 +ATOM 117 C CG . LEU B 1 3 ? 1.416 7.857 -5.867 1.00 4.61 ? 3 LEU B CG 1 +ATOM 118 C CD1 . LEU B 1 3 ? -0.011 7.487 -6.144 1.00 5.83 ? 3 LEU B CD1 1 +ATOM 119 C CD2 . LEU B 1 3 ? 1.781 9.134 -6.646 1.00 5.56 ? 3 LEU B CD2 1 +ATOM 120 H H . LEU B 1 3 ? 2.839 7.169 -1.973 1.00 3.89 ? 3 LEU B H 1 +ATOM 121 H HA . LEU B 1 3 ? 0.537 6.664 -3.390 1.00 4.11 ? 3 LEU B HA 1 +ATOM 122 H HB2 . LEU B 1 3 ? 1.145 8.807 -4.076 1.00 4.94 ? 3 LEU B HB2 1 +ATOM 123 H HB3 . LEU B 1 3 ? 2.627 8.366 -4.299 1.00 4.94 ? 3 LEU B HB3 1 +ATOM 124 H HG . LEU B 1 3 ? 1.990 7.129 -6.186 1.00 5.53 ? 3 LEU B HG 1 +ATOM 125 H HD11 . LEU B 1 3 ? -0.140 7.393 -7.091 1.00 8.74 ? 3 LEU B HD11 1 +ATOM 126 H HD12 . LEU B 1 3 ? -0.591 8.175 -5.809 1.00 8.74 ? 3 LEU B HD12 1 +ATOM 127 H HD13 . LEU B 1 3 ? -0.216 6.656 -5.709 1.00 8.74 ? 3 LEU B HD13 1 +ATOM 128 H HD21 . LEU B 1 3 ? 2.724 9.295 -6.573 1.00 8.33 ? 3 LEU B HD21 1 +ATOM 129 H HD22 . LEU B 1 3 ? 1.299 9.880 -6.280 1.00 8.33 ? 3 LEU B HD22 1 +ATOM 130 H HD23 . LEU B 1 3 ? 1.546 9.023 -7.570 1.00 8.33 ? 3 LEU B HD23 1 +ATOM 131 N N . ILE B 1 4 ? 1.536 4.683 -4.483 1.00 3.54 ? 4 ILE B N 1 +ATOM 132 C CA . ILE B 1 4 ? 2.085 3.515 -5.179 1.00 3.57 ? 4 ILE B CA 1 +ATOM 133 C C . ILE B 1 4 ? 1.159 3.180 -6.361 1.00 3.34 ? 4 ILE B C 1 +ATOM 134 O O . ILE B 1 4 ? 1.646 2.661 -7.402 1.00 3.64 ? 4 ILE B O 1 +ATOM 135 C CB . ILE B 1 4 ? 2.262 2.317 -4.203 1.00 3.73 ? 4 ILE B CB 1 +ATOM 136 C CG1 . ILE B 1 4 ? 2.772 1.074 -4.943 1.00 4.52 ? 4 ILE B CG1 1 +ATOM 137 C CG2 . ILE B 1 4 ? 0.994 2.032 -3.416 1.00 5.64 ? 4 ILE B CG2 1 +ATOM 138 C CD1 . ILE B 1 4 ? 3.110 -0.104 -4.045 1.00 5.40 ? 4 ILE B CD1 1 +ATOM 139 O OXT . ILE B 1 4 ? -0.063 3.470 -6.212 1.00 3.90 ? 4 ILE B OXT 1 +ATOM 140 H H . ILE B 1 4 ? 0.681 4.749 -4.426 1.00 4.25 ? 4 ILE B H 1 +ATOM 141 H HA . ILE B 1 4 ? 2.966 3.754 -5.536 1.00 4.29 ? 4 ILE B HA 1 +ATOM 142 H HB . ILE B 1 4 ? 2.952 2.572 -3.556 1.00 4.48 ? 4 ILE B HB 1 +ATOM 143 H HG12 . ILE B 1 4 ? 2.096 0.794 -5.580 1.00 5.43 ? 4 ILE B HG12 1 +ATOM 144 H HG13 . ILE B 1 4 ? 3.565 1.317 -5.446 1.00 5.43 ? 4 ILE B HG13 1 +ATOM 145 H HG21 . ILE B 1 4 ? 0.713 2.830 -2.962 1.00 8.45 ? 4 ILE B HG21 1 +ATOM 146 H HG22 . ILE B 1 4 ? 1.166 1.342 -2.771 1.00 8.45 ? 4 ILE B HG22 1 +ATOM 147 H HG23 . ILE B 1 4 ? 0.302 1.745 -4.016 1.00 8.45 ? 4 ILE B HG23 1 +ATOM 148 H HD11 . ILE B 1 4 ? 3.521 -0.797 -4.567 1.00 8.10 ? 4 ILE B HD11 1 +ATOM 149 H HD12 . ILE B 1 4 ? 2.307 -0.440 -3.642 1.00 8.10 ? 4 ILE B HD12 1 +ATOM 150 H HD13 . ILE B 1 4 ? 3.718 0.181 -3.358 1.00 8.10 ? 4 ILE B HD13 1 +# +loop_ +_atom_site_anisotrop.id +_atom_site_anisotrop.type_symbol +_atom_site_anisotrop.pdbx_label_atom_id +_atom_site_anisotrop.pdbx_label_alt_id +_atom_site_anisotrop.pdbx_label_comp_id +_atom_site_anisotrop.pdbx_label_asym_id +_atom_site_anisotrop.pdbx_label_seq_id +_atom_site_anisotrop.pdbx_PDB_ins_code +_atom_site_anisotrop.U[1][1] +_atom_site_anisotrop.U[2][2] +_atom_site_anisotrop.U[3][3] +_atom_site_anisotrop.U[1][2] +_atom_site_anisotrop.U[1][3] +_atom_site_anisotrop.U[2][3] +_atom_site_anisotrop.pdbx_auth_seq_id +_atom_site_anisotrop.pdbx_auth_comp_id +_atom_site_anisotrop.pdbx_auth_asym_id +_atom_site_anisotrop.pdbx_auth_atom_id +1 N N . PRO A 1 ? 0.0347 0.0300 0.0511 0.0011 0.0008 0.0024 1 PRO A N +2 C CA . PRO A 1 ? 0.0332 0.0318 0.0511 0.0003 0.0019 0.0044 1 PRO A CA +3 C C . PRO A 1 ? 0.0368 0.0339 0.0499 0.0007 -0.0010 0.0060 1 PRO A C +4 O O . PRO A 1 ? 0.0344 0.0404 0.0808 0.0001 0.0057 0.0158 1 PRO A O +5 C CB . PRO A 1 ? 0.0484 0.0389 0.0472 0.0048 0.0050 0.0025 1 PRO A CB +6 C CG . PRO A 1 ? 0.0427 0.0384 0.0516 0.0053 -0.0007 -0.0028 1 PRO A CG +7 C CD . PRO A 1 ? 0.0459 0.0301 0.0652 0.0006 -0.0029 -0.0030 1 PRO A CD +17 N N . PHE A 2 ? 0.0333 0.0303 0.0532 0.0004 0.0024 0.0028 2 PHE A N +18 C CA . PHE A 2 ? 0.0374 0.0287 0.0531 0.0022 0.0031 0.0004 2 PHE A CA +19 C C . PHE A 2 ? 0.0335 0.0284 0.0504 0.0016 0.0027 0.0055 2 PHE A C +20 O O . PHE A 2 ? 0.0386 0.0336 0.0526 0.0022 0.0044 0.0028 2 PHE A O +21 C CB . PHE A 2 ? 0.0403 0.0282 0.0675 -0.0025 0.0021 -0.0024 2 PHE A CB +22 C CG . PHE A 2 ? 0.0439 0.0263 0.0600 -0.0032 0.0029 -0.0015 2 PHE A CG +23 C CD1 . PHE A 2 ? 0.0482 0.0304 0.0588 0.0008 0.0045 0.0014 2 PHE A CD1 +24 C CD2 . PHE A 2 ? 0.0595 0.0404 0.0554 0.0058 0.0087 -0.0001 2 PHE A CD2 +25 C CE1 . PHE A 2 ? 0.0477 0.0338 0.0624 0.0024 0.0030 -0.0014 2 PHE A CE1 +26 C CE2 . PHE A 2 ? 0.0682 0.0425 0.0578 0.0064 -0.0008 0.0020 2 PHE A CE2 +27 C CZ . PHE A 2 ? 0.0559 0.0306 0.0677 0.0023 -0.0046 0.0056 2 PHE A CZ +37 N N . LEU A 3 ? 0.0359 0.0347 0.0514 0.0024 0.0011 0.0008 3 LEU A N +38 C CA . LEU A 3 ? 0.0410 0.0342 0.0534 0.0013 -0.0007 -0.0021 3 LEU A CA +39 C C . LEU A 3 ? 0.0430 0.0328 0.0481 0.0002 0.0014 0.0018 3 LEU A C +40 O O . LEU A 3 ? 0.0411 0.0390 0.0526 -0.0003 -0.0009 -0.0063 3 LEU A O +41 C CB . LEU A 3 ? 0.0512 0.0395 0.0540 0.0040 -0.0014 -0.0012 3 LEU A CB +42 C CG . LEU A 3 ? 0.0682 0.0428 0.0616 -0.0112 -0.0010 0.0039 3 LEU A CG +43 C CD1 . LEU A 3 ? 0.0648 0.0592 0.0650 -0.0051 0.0119 0.0075 3 LEU A CD1 +44 C CD2 . LEU A 3 ? 0.1260 0.0423 0.0664 -0.0019 0.0020 0.0031 3 LEU A CD2 +56 N N . ILE A 4 ? 0.0398 0.0360 0.0504 -0.0009 0.0016 -0.0029 4 ILE A N +57 C CA . ILE A 4 ? 0.0442 0.0358 0.0422 0.0009 -0.0016 -0.0029 4 ILE A CA +58 C C . ILE A 4 ? 0.0376 0.0293 0.0503 -0.0002 0.0033 0.0013 4 ILE A C +59 O O . ILE A 4 ? 0.0389 0.0401 0.0504 -0.0035 0.0051 -0.0019 4 ILE A O +60 C CB . ILE A 4 ? 0.0465 0.0376 0.0514 -0.0012 -0.0032 0.0006 4 ILE A CB +61 C CG1 . ILE A 4 ? 0.0581 0.0403 0.0562 0.0057 -0.0059 -0.0012 4 ILE A CG1 +62 C CG2 . ILE A 4 ? 0.0636 0.0340 0.0659 0.0069 -0.0092 -0.0009 4 ILE A CG2 +63 C CD1 . ILE A 4 ? 0.0898 0.0464 0.0598 0.0059 -0.0084 0.0013 4 ILE A CD1 +64 O OXT . ILE A 4 ? 0.0387 0.0559 0.0581 0.0023 0.0003 0.0020 4 ILE A OXT +76 N N . PRO B 1 ? 0.0318 0.0332 0.0549 0.0014 0.0030 0.0037 1 PRO B N +77 C CA . PRO B 1 ? 0.0358 0.0295 0.0493 -0.0015 0.0030 0.0031 1 PRO B CA +78 C C . PRO B 1 ? 0.0314 0.0311 0.0509 0.0001 0.0065 0.0018 1 PRO B C +79 O O . PRO B 1 ? 0.0365 0.0349 0.0627 -0.0001 0.0010 0.0038 1 PRO B O +80 C CB . PRO B 1 ? 0.0447 0.0321 0.0534 0.0041 0.0047 0.0046 1 PRO B CB +81 C CG . PRO B 1 ? 0.0466 0.0335 0.0557 0.0050 0.0026 0.0018 1 PRO B CG +82 C CD . PRO B 1 ? 0.0419 0.0302 0.0676 0.0010 0.0019 0.0006 1 PRO B CD +92 N N . PHE B 2 ? 0.0371 0.0278 0.0514 -0.0009 -0.0003 0.0016 2 PHE B N +93 C CA . PHE B 2 ? 0.0359 0.0283 0.0456 0.0028 0.0003 -0.0003 2 PHE B CA +94 C C . PHE B 2 ? 0.0353 0.0257 0.0445 0.0022 0.0045 0.0021 2 PHE B C +95 O O . PHE B 2 ? 0.0379 0.0321 0.0498 -0.0007 0.0014 0.0013 2 PHE B O +96 C CB . PHE B 2 ? 0.0432 0.0267 0.0571 -0.0032 -0.0021 -0.0016 2 PHE B CB +97 C CG . PHE B 2 ? 0.0414 0.0257 0.0527 -0.0028 -0.0012 -0.0016 2 PHE B CG +98 C CD1 . PHE B 2 ? 0.0494 0.0290 0.0516 -0.0018 0.0027 -0.0036 2 PHE B CD1 +99 C CD2 . PHE B 2 ? 0.0457 0.0341 0.0534 -0.0024 0.0013 -0.0036 2 PHE B CD2 +100 C CE1 . PHE B 2 ? 0.0458 0.0317 0.0536 0.0004 0.0077 -0.0022 2 PHE B CE1 +101 C CE2 . PHE B 2 ? 0.0533 0.0325 0.0566 -0.0002 0.0035 0.0015 2 PHE B CE2 +102 C CZ . PHE B 2 ? 0.0491 0.0286 0.0614 0.0006 -0.0024 0.0002 2 PHE B CZ +112 N N . LEU B 3 ? 0.0389 0.0326 0.0442 0.0013 0.0005 -0.0018 3 LEU B N +113 C CA . LEU B 3 ? 0.0452 0.0344 0.0427 0.0010 -0.0027 -0.0037 3 LEU B CA +114 C C . LEU B 3 ? 0.0444 0.0327 0.0425 -0.0018 0.0033 0.0006 3 LEU B C +115 O O . LEU B 3 ? 0.0424 0.0387 0.0554 -0.0005 -0.0036 -0.0065 3 LEU B O +116 C CB . LEU B 3 ? 0.0597 0.0356 0.0520 0.0032 -0.0029 -0.0004 3 LEU B CB +117 C CG . LEU B 3 ? 0.0707 0.0415 0.0524 0.0031 0.0042 0.0013 3 LEU B CG +118 C CD1 . LEU B 3 ? 0.0688 0.0703 0.0691 -0.0028 -0.0048 0.0111 3 LEU B CD1 +119 C CD2 . LEU B 3 ? 0.0772 0.0519 0.0695 0.0038 0.0042 0.0085 3 LEU B CD2 +131 N N . ILE B 4 ? 0.0367 0.0342 0.0557 -0.0001 0.0011 -0.0038 4 ILE B N +132 C CA . ILE B 4 ? 0.0380 0.0343 0.0553 -0.0031 0.0015 -0.0028 4 ILE B CA +133 C C . ILE B 4 ? 0.0394 0.0279 0.0520 0.0008 0.0036 0.0004 4 ILE B C +134 O O . ILE B 4 ? 0.0420 0.0349 0.0531 -0.0011 0.0028 -0.0052 4 ILE B O +135 C CB . ILE B 4 ? 0.0460 0.0359 0.0513 0.0021 -0.0007 0.0015 4 ILE B CB +136 C CG1 . ILE B 4 ? 0.0629 0.0362 0.0625 0.0021 -0.0004 -0.0019 4 ILE B CG1 +137 C CG2 . ILE B 4 ? 0.0640 0.0618 0.0757 0.0120 0.0151 0.0264 4 ILE B CG2 +138 C CD1 . ILE B 4 ? 0.0788 0.0387 0.0755 0.0059 0.0052 0.0074 4 ILE B CD1 +139 O OXT . ILE B 4 ? 0.0374 0.0492 0.0528 0.0027 0.0012 -0.0011 4 ILE B OXT +# From b7ada7c4a15a7aebe7919d35cd4e0238417ac4e0 Mon Sep 17 00:00:00 2001 From: Egor Marin Date: Thu, 24 Oct 2024 21:42:21 +0200 Subject: [PATCH 29/30] add mmcif to __all__ --- testsuite/MDAnalysisTests/datafiles.py | 1 + 1 file changed, 1 insertion(+) diff --git a/testsuite/MDAnalysisTests/datafiles.py b/testsuite/MDAnalysisTests/datafiles.py index baa835ecba8..bcecc2dc862 100644 --- a/testsuite/MDAnalysisTests/datafiles.py +++ b/testsuite/MDAnalysisTests/datafiles.py @@ -313,6 +313,7 @@ "AUX_EDR_RAW", "AUX_EDR_SINGLE_FRAME", # for testing .edr auxiliary reader "MMTF", + "MMCIF", "MMTF_gz", "MMTF_skinny", # skinny - some optional fields stripped out "MMTF_skinny2", From e80632c6a8efece7b4f02a481825d310eb4dcaa1 Mon Sep 17 00:00:00 2001 From: Egor Marin Date: Fri, 25 Oct 2024 11:17:18 +0000 Subject: [PATCH 30/30] add black instead of ruff --- package/MDAnalysis/coordinates/MMCIF.py | 2 +- package/MDAnalysis/topology/MMCIFParser.py | 14 ++++++++++---- testsuite/MDAnalysisTests/topology/test_mmcif.py | 8 ++++++-- 3 files changed, 17 insertions(+), 7 deletions(-) diff --git a/package/MDAnalysis/coordinates/MMCIF.py b/package/MDAnalysis/coordinates/MMCIF.py index ee8df98d781..0107a52c210 100644 --- a/package/MDAnalysis/coordinates/MMCIF.py +++ b/package/MDAnalysis/coordinates/MMCIF.py @@ -15,7 +15,7 @@ logger = logging.getLogger("MDAnalysis.coordinates.MMCIF") -def get_coordinates(model: 'gemmi.Model') -> np.ndarray: +def get_coordinates(model: "gemmi.Model") -> np.ndarray: """Get coordinates of all atoms in the `gemmi.Model` object. Parameters diff --git a/package/MDAnalysis/topology/MMCIFParser.py b/package/MDAnalysis/topology/MMCIFParser.py index 2a0adb71704..6367cb88f9f 100644 --- a/package/MDAnalysis/topology/MMCIFParser.py +++ b/package/MDAnalysis/topology/MMCIFParser.py @@ -61,10 +61,14 @@ def _into_idx(arr: list) -> list[int]: .. versionadded:: 2.8.0 """ - return [idx for idx, (_, group) in enumerate(itertools.groupby(arr)) for _ in group] + return [ + idx + for idx, (_, group) in enumerate(itertools.groupby(arr)) + for _ in group + ] -def get_Atomattrs(model: 'gemmi.Model') -> tuple[list[AtomAttr], np.ndarray]: +def get_Atomattrs(model: "gemmi.Model") -> tuple[list[AtomAttr], np.ndarray]: """Extract all attributes that are subclasses of :class:`..core.topologyattrs.AtomAttr` from a ``gemmi.Model`` object, and a `residx` index with indices of all atoms in residues. @@ -165,7 +169,9 @@ def get_Atomattrs(model: 'gemmi.Model') -> tuple[list[AtomAttr], np.ndarray]: return attrs, residx -def get_Residueattrs(model: 'gemmi.Model') -> tuple[list[ResidueAttr], np.ndarray]: +def get_Residueattrs( + model: "gemmi.Model", +) -> tuple[list[ResidueAttr], np.ndarray]: """Extract all attributes that are subclasses of :class:`..core.topologyattrs.ResidueAttr` from a ``gemmi.Model`` object, and a `segidx` index witn indices of all residues in segments. @@ -214,7 +220,7 @@ def get_Residueattrs(model: 'gemmi.Model') -> tuple[list[ResidueAttr], np.ndarra return attrs, segidx -def get_Segmentattrs(model: 'gemmi.Model') -> SegmentAttr: +def get_Segmentattrs(model: "gemmi.Model") -> SegmentAttr: """Extract all attributes that are subclasses of :class:`..core.topologyattrs.SegmentAttr` from a ``gemmi.Model`` object. Parameters diff --git a/testsuite/MDAnalysisTests/topology/test_mmcif.py b/testsuite/MDAnalysisTests/topology/test_mmcif.py index b4ff08a8a55..0498fc2feed 100644 --- a/testsuite/MDAnalysisTests/topology/test_mmcif.py +++ b/testsuite/MDAnalysisTests/topology/test_mmcif.py @@ -35,7 +35,10 @@ def test_chains(mmcif_filename, n_chains): @pytest.mark.parametrize( "mmcif_filename,sequence", [ - (f"{MMCIF_FOLDER}/1YJP.cif", ["GLY", "ASN", "ASN", "GLN", "GLN", "ASN", "TYR"]), + ( + f"{MMCIF_FOLDER}/1YJP.cif", + ["GLY", "ASN", "ASN", "GLN", "GLN", "ASN", "TYR"], + ), ( f"{MMCIF_FOLDER}/1YJP.cif.gz", ["GLY", "ASN", "ASN", "GLN", "GLN", "ASN", "TYR"], @@ -47,6 +50,7 @@ def test_chains(mmcif_filename, n_chains): def test_sequence(mmcif_filename, sequence): u = mda.Universe(mmcif_filename) in_structure = [ - str(res.resname) for res in u.select_atoms("protein and chainid A").residues + str(res.resname) + for res in u.select_atoms("protein and chainid A").residues ] assert in_structure == sequence, ":".join(in_structure)