Skip to content

Commit

Permalink
Load xyz energies, forces, box (duartegroup#117)
Browse files Browse the repository at this point in the history
Add optional box, energy and force loading to load_xyz() in ConfigurationSet. Add dtype object for np arrays to allow loading structures with a different number of atoms. (Solves one of the issues in duartegroup#88 ) Add test for plotting function and functions in ConfigurationSet.
  • Loading branch information
JoeHolownia authored Dec 6, 2024
1 parent 808e07d commit 1e93696
Show file tree
Hide file tree
Showing 5 changed files with 326 additions and 41 deletions.
193 changes: 158 additions & 35 deletions mlptrain/configurations/configuration_set.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,15 @@
import mlptrain
import os
import re
import numpy as np
from time import time
from multiprocessing import Pool
from typing import Optional, List, Union
from autode.atoms import elements, Atom
from mlptrain.config import Config
from mlptrain.log import logger
from mlptrain.forces import Forces
from mlptrain.energy import Energy
from mlptrain.configurations.configuration import Configuration
from mlptrain.box import Box

Expand Down Expand Up @@ -50,7 +53,7 @@ def true_energies(self) -> List[Optional[float]]:
@property
def true_forces(self) -> Optional[np.ndarray]:
"""
True force tensor. shape = (N, n_atoms, 3)
List of true config forces. List of np.ndarray with shape: (n_atoms, 3)
-----------------------------------------------------------------------
Returns:
Expand Down Expand Up @@ -265,15 +268,34 @@ def compare(
name = self._comparison_name(*args)

if os.path.exists(f'{name}.npz'):
logger.info(f'Loading energies and forces from {name}.npz')
self.load(f'{name}.npz')

else:
for arg in args:
# if is an mlp model with a 'predict' function
if hasattr(arg, 'predict'):
arg.predict(self)

# if is a string reference to a QM calculation method
elif isinstance(arg, str):
self.single_point(method=arg)
# if true energies and forces do not already exist for this config set
non_null_true_energies = [
x for x in self.true_energies if x is not None
]
if (
len(non_null_true_energies) == 0
and self.true_forces.size == 0
):
logger.info(
f'Running single point calcs with method {arg}'
)
self.single_point(method=arg)
else:
logger.info(
f'Not using method {arg}, true energies and forces '
f'are already defined'
)

else:
raise ValueError(f'Cannot compare using {arg}')
Expand Down Expand Up @@ -318,42 +340,114 @@ def save_xyz(
return None

def load_xyz(
self, filename: str, charge: int, mult: int, box: Optional[Box] = None
self,
filename: str,
charge: int,
mult: int,
box: Optional[Box] = None,
load_energies: bool = False,
load_forces: bool = False,
) -> None:
"""
Load configurations from a .xyz file. Will not load any energies or
forces
Load configurations from a .xyz file with optional box, energies and forces if specified.
Note: this currently assumes that all configurations have the same charge and multiplicity.
-----------------------------------------------------------------------
Arguments:
filename:
filename: name of the input .xyz file
charge: total charge on all configurations in the set
mult: total spin multiplicity on all configurations in the set
charge: Total charge on the configuration
box: optionally specify a Box or None, if the configurations
are in vacuum (or if 'Lattice' is specified in extended .xyz)
mult: Total spin multiplicity
load_energies: bool - whether to load 'true' configurational energies or not
box: Box or None, if the configurations are in vacuum
load_forces: bool - whether to load 'true' forces from atom lines or not
"""
file_lines = open(filename, 'r', errors='ignore').readlines()
atoms = []

def is_xyz_line(_l):
return len(_l.split()) in (4, 7) and _l.split()[0] in elements

def append_configuration(_atoms):
self.append(Configuration(_atoms, charge, mult, box))
return None

for idx, line in enumerate(file_lines):
if is_xyz_line(line):
atoms.append(Atom(*line.split()[:4]))

elif len(atoms) > 0:
append_configuration(atoms)
atoms.clear()

if len(atoms) > 0:
append_configuration(atoms)
with open(filename, 'r', errors='ignore') as xyz_file:
# get first num atoms line
line_id = 1
line = xyz_file.readline()
while line:
# load everything for a single configuration in this loop
energy, atoms, forces = None, [], []
num_atoms = int(line)

# get comments / property line
line_id += 1
line = xyz_file.readline()

# get dictionary of properties and values (matching key=value with regex)
pattern = r'(\w+)=("[^"]*"|\S+)'
config_info = re.findall(pattern, line)
config_info_dict = {
key: value.strip('"') for key, value in config_info
}

# if using extended xyz format, get properties
config_box = box
if len(config_info) > 1:
if (
box is None
and config_info_dict.get('Lattice') is not None
):
# set box size from xyz properties line if it is specified
lattice_info = [
float(x)
for x in re.findall(
'[0-9]+', config_info_dict['Lattice']
)
]

config_box = Box(
[
lattice_info[0],
lattice_info[8],
lattice_info[16],
]
)

if load_energies:
assert (
config_info_dict.get('energy') is not None
), "Property 'energy' not specified on properties line..."
energy = float(config_info_dict['energy'])

# get atom lines
for _ in range(num_atoms):
line_id += 1
line = xyz_file.readline()
assert is_xyz_line(
line
), f'There was an error in parsing your xyz file on line: {line_id}'
line_split = line.split()
atoms.append(Atom(*line_split[:4]))

if load_forces:
# add forces to forces dict in configuration
if len(line_split) > 4:
force = tuple([float(x) for x in line_split[4:]])
assert (
len(force) == 3
), f'Force is not a 3D vector: {force}'
forces.append(force)

# create configuration, add forces, energy and append it to config set
configuration = Configuration(atoms, charge, mult, config_box)
configuration.forces = Forces(true=np.array(forces))
configuration.energy = Energy(true=energy)
self.append(configuration)

# get num atoms line for next config
line_id += 1
line = xyz_file.readline()

return None

Expand Down Expand Up @@ -434,7 +528,10 @@ def _coordinates(self) -> np.ndarray:
(np.ndarray): Coordinates tensor (n, n_atoms, 3),
where n is len(self)
"""
return np.array([np.asarray(c.coordinates, dtype=float) for c in self])
return np.array(
[np.asarray(c.coordinates, dtype=float) for c in self],
dtype=object,
)

@property
def plumed_coordinates(self) -> Optional[np.ndarray]:
Expand Down Expand Up @@ -471,9 +568,11 @@ def plumed_coordinates(self) -> Optional[np.ndarray]:

for i, coords in enumerate(all_coordinates):
if coords is None:
all_coordinates[i] = np.array([np.nan for _ in range(n_cvs)])
all_coordinates[i] = np.array(
[np.nan for _ in range(n_cvs)], dtype=float
)

return np.array(all_coordinates)
return np.array(all_coordinates, dtype=object)

@property
def _atomic_numbers(self) -> np.ndarray:
Expand All @@ -486,7 +585,8 @@ def _atomic_numbers(self) -> np.ndarray:
"""

return np.array(
[[atom.atomic_number for atom in c.atoms] for c in self]
[[atom.atomic_number for atom in c.atoms] for c in self],
dtype=object,
)

@property
Expand Down Expand Up @@ -514,7 +614,7 @@ def _multiplicities(self) -> np.ndarray:
return np.array([c.mult for c in self])

def _forces(self, kind: str) -> Optional[np.ndarray]:
"""True or predicted forces. Returns a 3D tensor"""
"""True or predicted forces. Returns a 3D np.ndarray."""

all_forces = []
for config in self:
Expand All @@ -524,7 +624,7 @@ def _forces(self, kind: str) -> Optional[np.ndarray]:

all_forces.append(getattr(config.forces, kind))

return np.array(all_forces)
return np.array(all_forces, dtype=object)

def _save_npz(self, filename: str) -> None:
"""Save a compressed numpy array of all the data in this set"""
Expand Down Expand Up @@ -557,14 +657,18 @@ def _load_npz(self, filename: str) -> None:
box = Box(size=data['L'][i])

config = Configuration(
atoms=_atoms_from_z_r(data['Z'][i], coords),
atoms=_atoms_from_z_r(
data['Z'][i], np.array(coords, dtype=float)
),
charge=int(data['C'][i]),
mult=int(data['M'][i]),
box=None if box.has_zero_volume else box,
)

if data['R_plumed'].ndim > 0:
config.plumed_coordinates = data['R_plumed'][i]
config.plumed_coordinates = np.array(
data['R_plumed'][i], dtype=float
)

if data['E_true'].ndim > 0:
config.energy.true = data['E_true'][i]
Expand All @@ -579,10 +683,12 @@ def _load_npz(self, filename: str) -> None:
config.energy.inherited_bias = data['E_inherited_bias'][i]

if data['F_true'].ndim > 0:
config.forces.true = data['F_true'][i]
config.forces.true = np.array(data['F_true'][i], dtype=float)

if data['F_predicted'].ndim > 0:
config.forces.predicted = data['F_predicted'][i]
config.forces.predicted = np.array(
data['F_predicted'][i], dtype=float
)

self.append(config)

Expand Down Expand Up @@ -647,6 +753,23 @@ def _run_parallel_method(self, function, **kwargs):
logger.info(f'Calculations done in {(time() - start_time) / 60:.1f} m')
return None

def __str__(self):
return (
f'ConfigurationSet Summary:\n'
f' Coords Dimensions: {self._coordinates.shape if self._coordinates is not None else None}\n'
f' Plumed Coords Dimensions: {self.plumed_coordinates.shape if self.plumed_coordinates is not None else None}\n'
f' Has True Energies: {any(x is not None for x in self.true_energies)}\n'
f' Has Predicted Energies: {any(x is not None for x in self.predicted_energies)}\n'
f' Has Bias Energies: {any(x is not None for x in self.bias_energies)}\n'
f' Has Inherit. Bias Energies: {any(x is not None for x in self.inherited_bias_energies)}\n'
f' True Forces Dim: {self.true_forces.shape}\n'
f' Predicted Forces Dim: {self.predicted_forces.shape}\n'
f' Atomic Numbers Dim: {self._atomic_numbers.shape}\n'
f' Unique Box Sizes: {np.unique(self._box_sizes)}\n'
f' Unique Charges: {np.unique(self._charges)}\n'
f' Unique Multiplicities: {np.unique(self._multiplicities)}'
)

@staticmethod
def _comparison_name(*args):
"""Name of a comparison between different methods"""
Expand Down
28 changes: 23 additions & 5 deletions mlptrain/configurations/plotting.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@


def parity_plot(
config_set: 'mlptrain.ConfigurationSet', name: str = 'paritiy'
config_set: 'mlptrain.ConfigurationSet', name: str = 'parity'
) -> None:
"""
Plot parity plots of energies, forces and temporal differences (if present)
Expand Down Expand Up @@ -131,14 +131,32 @@ def _add_force_component_plot(config_set, axis) -> None:
plt.get_cmap('Purples'),
]

min_f = min(
[np.min(config_set.true_forces), np.min(config_set.predicted_forces)]
# get the min and max force components in any of (x, y, z) directions for plotting
min_true_f = min(
[np.min(config_forces) for config_forces in config_set.true_forces]
)

max_f = min(
[np.max(config_set.true_forces), np.max(config_set.predicted_forces)]
min_pred_f = min(
[
np.min(config_forces)
for config_forces in config_set.predicted_forces
]
)

max_true_f = max(
[np.max(config_forces) for config_forces in config_set.true_forces]
)

max_pred_f = max(
[
np.max(config_forces)
for config_forces in config_set.predicted_forces
]
)

min_f = min([min_true_f, min_pred_f])
max_f = min([max_true_f, max_pred_f])

for idx, k in enumerate(['x', 'y', 'z']):
xs, ys = [], []
for config in config_set:
Expand Down
1 change: 0 additions & 1 deletion tests/data/methane.xyz
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@ H -1.19178 2.07309 0.94983
H -1.19178 1.76033 -0.76926
H -1.19178 3.40548 -0.18057
5
5

C -0.83511 2.41296 0.00000
H 0.23489 2.41296 0.00000
Expand Down
Loading

0 comments on commit 1e93696

Please sign in to comment.