Skip to content

Commit

Permalink
feat: create direct interface to force-field error analysis (#108)
Browse files Browse the repository at this point in the history
* Implement container for multiple calculations (Calculations)
* Implemented from_paths and from_files for Calculations
* Make sure that the constructor is not called, just like in calculation
* Add calculations.forces.read() and calculations.stresses.read()

* Create MLFFErrorAnalysis from_files and from_paths
* Add force attributes to error analysis class
* Add stress attributes to mlfferroranalysis
* Implement a _from_data for testing mlff_error_analysis
* Implement energy error for error analysis
* Get the stress error for error analysis
* Infer nions and nconfig from parent classes

* Change error_analysis script with new MLFFErrorAnalysis class
* Make error_analysis a script with the core package as well
* Use os.system instead of subprocess to test error-analysis script

* Add documentation and refactor MLFFErrorAnalysis to have x.mlff.energies type attributes

---------

Co-authored-by: Sudarshan Vijay <sudarshan.vijay@vasp.at>
Co-authored-by: Sudarshan Vijay <47235474+sudarshanv01@users.noreply.github.com>
Co-authored-by: jona <jonathan.lahnsteiner@vasp.at>
  • Loading branch information
4 people authored Sep 18, 2023
1 parent b80bf00 commit de084b5
Show file tree
Hide file tree
Showing 17 changed files with 1,220 additions and 635 deletions.
2 changes: 2 additions & 0 deletions core/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ hypothesis = ">=6.48.1"
black = ">=22.6.0"
isort = ">=5.10.1"

[tool.poetry.scripts]
error-analysis = "py4vasp.scripts.error_analysis:main"

[tool.isort]
profile = "black"
Expand Down
5 changes: 4 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@ authors = [
"Henrique Miranda <miranda.henrique@gmail.com>",
"Orest Dubay <orest-d@users.noreply.github.com>",
"Jonathan Lahnsteiner <jonathan.lahnsteiner@gmx.at>",
"Eisuke Kawashima <e-kwsm@users.noreply.github.com>"
"Eisuke Kawashima <e-kwsm@users.noreply.github.com>",
"Sudarshan Vijay <sudarshan.vijay@protonmail.com>"
]
license = "Apache-2.0"
readme = "README.md"
Expand Down Expand Up @@ -45,6 +46,8 @@ pre-commit = ">=3.3.3"
sphinx = ">=5.0.2"
sphinx-automodapi = ">=0.14.1"

[tool.poetry.scripts]
error-analysis = "py4vasp.scripts.error_analysis:main"

[tool.isort]
profile = "black"
Expand Down
1 change: 1 addition & 0 deletions src/py4vasp/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# Copyright © VASP Software GmbH,
# Licensed under the Apache License 2.0 (http://www.apache.org/licenses/LICENSE-2.0)
from py4vasp._calculation import Calculation
from py4vasp._calculations import Calculations
from py4vasp._third_party.graph import plot
from py4vasp._third_party.interactive import set_error_handling

Expand Down
320 changes: 320 additions & 0 deletions src/py4vasp/_analysis/mlff.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,320 @@
# Copyright © VASP Software GmbH,
# Licensed under the Apache License 2.0 (http://www.apache.org/licenses/LICENSE-2.0)
from types import SimpleNamespace
from typing import Dict

import numpy as np

from py4vasp import Calculations, exception


class MLFFErrorAnalysis:
"""A class to handle the error analysis of MLFF calculations.
This class is used to perform error analysis of MLFF calculations. It
provides methods to calculate the error in energy, forces and stresses
from MLFF and DFT calculations. See the documentation for the methods
for more details on the type of error calculated.
Notes
-----
The class is designed to be instantiated using the class methods
:meth:`from_paths` and :meth:`from_files`. Please use these methods instead
of directly calling the class.
Examples
--------
>>> from py4vasp import MLFFErrorAnalysis
>>> mlff_error_analysis = MLFFErrorAnalysis.from_paths(
... dft_data="path/to/dft/data",
... mlff_data="path/to/mlff/data",
... )
>>> energy_error = mlff_error_analysis.get_energy_error_per_atom()
>>> force_error = mlff_error_analysis.get_force_rmse()
>>> stress_error = mlff_error_analysis.get_stress_rmse()
>>> # If you want to normalize the error by the number of configurations
>>> energy_error = mlff_error_analysis.get_energy_error_per_atom(
... normalize_by_configurations=True
... )
>>> force_error = mlff_error_analysis.get_force_rmse(
... normalize_by_configurations=True
... )
>>> stress_error = mlff_error_analysis.get_stress_rmse(
... normalize_by_configurations=True
... )
"""

def __init__(self, *args, **kwargs):
self.mlff = SimpleNamespace()
self.dft = SimpleNamespace()

@classmethod
def _from_data(cls, _calculations):
mlff_error_analysis = cls(_internal=True)
mlff_error_analysis._calculations = _calculations
set_appropriate_attrs(mlff_error_analysis)
return mlff_error_analysis

@classmethod
def from_paths(cls, dft_data, mlff_data):
"""Create an instance of MLFFErrorAnalysis from paths to the data.
Starting from paths for DFT and MLFF data, this method creates an
instance of MLFFErrorAnalysis. The paths are used to read the data
from the files.
Parameters
----------
dft_data : str or pathlib.Path
Path to the DFT data. Accepts wildcards.
mlff_data : str or pathlib.Path
Path to the MLFF data. Accepts wildcards.
"""
mlff_error_analysis = cls(_internal=True)
calculations = Calculations.from_paths(dft_data=dft_data, mlff_data=mlff_data)
mlff_error_analysis._calculations = calculations
set_appropriate_attrs(mlff_error_analysis)
return mlff_error_analysis

@classmethod
def from_files(cls, dft_data, mlff_data):
"""Create an instance of MLFFErrorAnalysis from files.
Starting from files for DFT and MLFF data, this method creates an
instance of MLFFErrorAnalysis. The files are used to read the data
from the files.
Parameters
----------
dft_data : str or pathlib.Path
Path to the DFT data. Accepts wildcards.
mlff_data : str or pathlib.Path
Path to the MLFF data. Accepts wildcards.
"""
mlff_error_analysis = cls(_internal=True)
calculations = Calculations.from_files(dft_data=dft_data, mlff_data=mlff_data)
mlff_error_analysis._calculations = calculations
set_appropriate_attrs(mlff_error_analysis)
return mlff_error_analysis

def get_energy_error_per_atom(self, normalize_by_configurations=False):
"""Get the error in energy per atom.
This method calculates the error in energy per atom between the MLFF
and DFT calculations. The error is calculated as
:math:`\\frac{E_{MLFF} - E_{DFT}}{N_{ions}}`, where :math:`E_{MLFF}`
and :math:`E_{DFT}` are the energies from the MLFF and DFT calculations
respectively, and :math:`N_{ions}` is the number of ions in the
structure. If ``normalize_by_configurations`` is set to ``True``, the
error is averaged over the number of configurations.
Parameters
----------
normalize_by_configurations : bool, optional
If set to ``True``, the error is averaged over the number of
configurations. Defaults to ``False``.
"""
error = (self.mlff.energies - self.dft.energies) / self.dft.nions
if normalize_by_configurations:
error = np.sum(np.abs(error), axis=-1) / self.dft.nconfig
return error

def _get_rmse(self, dft_quantity, mlff_quantity, degrees_of_freedom):
norm_error = np.linalg.norm(dft_quantity - mlff_quantity, axis=-1)
error = np.sqrt(np.sum(norm_error**2, axis=-1) / degrees_of_freedom)
return error

def get_force_rmse(self, normalize_by_configurations=False):
"""Get the root mean square error in forces.
This method calculates the root mean square error in forces between the
MLFF and DFT calculations. The error is calculated as
:math:`\\sqrt{\\frac{\\sum_{i=1}^{N_{ions}}{\\sum_{j=1}^{3}{(F_{MLFF} - F_{DFT})^2}}}{3N_{ions}}}`,
where :math:`F_{MLFF}` and :math:`F_{DFT}` are the forces from the MLFF
and DFT calculations respectively, and :math:`N_{ions}` is the number
of ions in the structure. If ``normalize_by_configurations`` is set to
``True``, the error is averaged over the number of configurations.
Parameters
----------
normalize_by_configurations : bool, optional
If set to ``True``, the error is averaged over the number of
configurations. Defaults to ``False``.
"""
deg_freedom = 3 * self.dft.nions
error = self._get_rmse(self.dft.forces, self.mlff.forces, deg_freedom)
if normalize_by_configurations:
error = np.sum(error, axis=-1) / self.dft.nconfig
return error

def get_stress_rmse(self, normalize_by_configurations=False):
"""Get the root mean square error in stresses.
This method calculates the root mean square error in stresses between
the MLFF and DFT calculations. The error is calculated as
:math:`\\sqrt{\\frac{\\sum_{i=1}^{6}{(\\sigma_{MLFF} - \\sigma_{DFT})^2}}{6}}`,
where :math:`\\sigma_{MLFF}` and :math:`\\sigma_{DFT}` are the stresses
from the MLFF and DFT calculations respectively. If
``normalize_by_configurations`` is set to ``True``, the error is
averaged over the number of configurations.
"""
deg_freedom = 6
dft_stresses = np.triu(self.dft.stresses)
mlff_stresses = np.triu(self.mlff.stresses)
error = self._get_rmse(dft_stresses, mlff_stresses, deg_freedom)
if normalize_by_configurations:
error = np.sum(error, axis=-1) / self.dft.nconfig
return error


def set_appropriate_attrs(cls):
set_paths_and_files(cls)
set_number_of_ions(cls)
set_number_of_configurations(cls)
set_energies(cls)
set_force_related_attributes(cls)
set_stresses(cls)
validate_data(cls)


def validate_data(cls):
"""Validate the data passed to the class.
This method validates the data passed to the class. It checks if the
number of ions, lattice vectors and positions are consistent between the
DFT and MLFF calculations. If not, it raises an exception.
"""
try:
np.testing.assert_almost_equal(cls.dft.positions, cls.mlff.positions)
np.testing.assert_almost_equal(
cls.dft.lattice_vectors, cls.mlff.lattice_vectors
)
np.testing.assert_almost_equal(cls.dft.nions, cls.mlff.nions)
except AssertionError:
raise exception.IncorrectUsage(
"""\
Please pass a consistent set of data between DFT and MLFF calculations."""
)


def set_number_of_configurations(cls):
"""Set the number of configurations in the data.
This method sets the number of configurations in the data. It uses the
number of calculations performed to set the number of configurations.
Parameters
----------
cls : MLFFErrorAnalysis
An instance of MLFFErrorAnalysis.
"""
number_of_calculations = cls._calculations.number_of_calculations()
cls.dft.nconfig = number_of_calculations["dft_data"]
cls.mlff.nconfig = number_of_calculations["mlff_data"]


def set_number_of_ions(cls):
"""Set the number of ions in the data.
This method sets the number of ions in the data. It uses the number of
elements in the structures to set the number of ions.
Parameters
----------
cls : MLFFErrorAnalysis
An instance of MLFFErrorAnalysis.
"""
force_data = cls._calculations.forces.read()
structures_dft = _dict_to_list(force_data["dft_data"], "structure")
structures_mlff = _dict_to_list(force_data["mlff_data"], "structure")
elements_dft = _dict_to_array(structures_dft, "elements")
elements_mlff = _dict_to_array(structures_mlff, "elements")
nions_dft = np.array([len(_elements) for _elements in elements_dft])
nions_mlff = np.array([len(_elements) for _elements in elements_mlff])
cls.dft.nions = nions_dft
cls.mlff.nions = nions_mlff


def set_paths_and_files(cls):
"""Set the paths and files for the data.
This method sets the paths and files for the data. It uses the
:meth:`Calculations.paths` and :meth:`Calculations.files` methods to set
the paths and files.
Parameters
----------
cls : MLFFErrorAnalysis
An instance of MLFFErrorAnalysis.
"""
paths = cls._calculations.paths()
cls.dft.paths = paths["dft_data"]
cls.mlff.paths = paths["mlff_data"]
if hasattr(cls._calculations, "_files"):
files = cls._calculations.files()
cls.dft.files = files["dft_data"]
cls.mlff.files = files["mlff_data"]


def set_energies(cls):
"""Set the energies for the data.
This method sets the energies for the data. It uses the
:meth:`Calculations.energies` method to set the energies.
Parameters
----------
cls : MLFFErrorAnalysis
An instance of MLFFErrorAnalysis.
"""
tag = "free energy TOTEN"
energies_data = cls._calculations.energies.read()
cls.mlff.energies = _dict_to_array(energies_data["mlff_data"], tag)
cls.dft.energies = _dict_to_array(energies_data["dft_data"], tag)


def _dict_to_array(data: Dict, key: str) -> np.ndarray:
return np.array([_data[key] for _data in data])


def _dict_to_list(data: Dict, key: str) -> list:
return [_data[key] for _data in data]


def set_force_related_attributes(cls):
"""Set the force related attributes for the data.
This method sets the force related attributes for the data. It uses the
:meth:`Calculations.forces` method to set the forces, lattice vectors and
positions.
Parameters
----------
cls : MLFFErrorAnalysis
An instance of MLFFErrorAnalysis.
"""
force_data = cls._calculations.forces.read()
cls.dft.forces = _dict_to_array(force_data["dft_data"], "forces")
cls.mlff.forces = _dict_to_array(force_data["mlff_data"], "forces")
dft_structures = _dict_to_list(force_data["dft_data"], "structure")
mlff_structures = _dict_to_list(force_data["mlff_data"], "structure")
cls.dft.lattice_vectors = _dict_to_array(dft_structures, "lattice_vectors")
cls.mlff.lattice_vectors = _dict_to_array(mlff_structures, "lattice_vectors")
cls.dft.positions = _dict_to_array(dft_structures, "positions")
cls.mlff.positions = _dict_to_array(mlff_structures, "positions")


def set_stresses(cls):
"""Set the stresses for the data.
This method sets the stresses for the data. It uses the
:meth:`Calculations.stresses` method to set the stresses.
Parameters
----------
cls : MLFFErrorAnalysis
An instance of MLFFErrorAnalysis.
"""
stress_data = cls._calculations.stresses.read()
cls.dft.stresses = _dict_to_array(stress_data["dft_data"], "stress")
cls.mlff.stresses = _dict_to_array(stress_data["mlff_data"], "stress")
Loading

0 comments on commit de084b5

Please sign in to comment.