diff --git a/core/pyproject.toml b/core/pyproject.toml index 49bbdfe0..20e48d04 100644 --- a/core/pyproject.toml +++ b/core/pyproject.toml @@ -33,6 +33,8 @@ hypothesis = ">=6.48.1" black = ">=22.6.0" isort = ">=5.10.1" +[tool.poetry.scripts] +error-analysis = "py4vasp.scripts.error_analysis:main" [tool.isort] profile = "black" diff --git a/pyproject.toml b/pyproject.toml index 296aed3e..1ba7c435 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,7 +8,8 @@ authors = [ "Henrique Miranda ", "Orest Dubay ", "Jonathan Lahnsteiner ", - "Eisuke Kawashima " + "Eisuke Kawashima ", + "Sudarshan Vijay " ] license = "Apache-2.0" readme = "README.md" @@ -45,6 +46,8 @@ pre-commit = ">=3.3.3" sphinx = ">=5.0.2" sphinx-automodapi = ">=0.14.1" +[tool.poetry.scripts] +error-analysis = "py4vasp.scripts.error_analysis:main" [tool.isort] profile = "black" diff --git a/src/py4vasp/__init__.py b/src/py4vasp/__init__.py index 11874119..1db3d4b2 100644 --- a/src/py4vasp/__init__.py +++ b/src/py4vasp/__init__.py @@ -1,6 +1,7 @@ # Copyright © VASP Software GmbH, # Licensed under the Apache License 2.0 (http://www.apache.org/licenses/LICENSE-2.0) from py4vasp._calculation import Calculation +from py4vasp._calculations import Calculations from py4vasp._third_party.graph import plot from py4vasp._third_party.interactive import set_error_handling diff --git a/src/py4vasp/_analysis/mlff.py b/src/py4vasp/_analysis/mlff.py new file mode 100644 index 00000000..4b78b221 --- /dev/null +++ b/src/py4vasp/_analysis/mlff.py @@ -0,0 +1,320 @@ +# Copyright © VASP Software GmbH, +# Licensed under the Apache License 2.0 (http://www.apache.org/licenses/LICENSE-2.0) +from types import SimpleNamespace +from typing import Dict + +import numpy as np + +from py4vasp import Calculations, exception + + +class MLFFErrorAnalysis: + """A class to handle the error analysis of MLFF calculations. + + This class is used to perform error analysis of MLFF calculations. It + provides methods to calculate the error in energy, forces and stresses + from MLFF and DFT calculations. See the documentation for the methods + for more details on the type of error calculated. + + Notes + ----- + The class is designed to be instantiated using the class methods + :meth:`from_paths` and :meth:`from_files`. Please use these methods instead + of directly calling the class. + + Examples + -------- + >>> from py4vasp import MLFFErrorAnalysis + >>> mlff_error_analysis = MLFFErrorAnalysis.from_paths( + ... dft_data="path/to/dft/data", + ... mlff_data="path/to/mlff/data", + ... ) + >>> energy_error = mlff_error_analysis.get_energy_error_per_atom() + >>> force_error = mlff_error_analysis.get_force_rmse() + >>> stress_error = mlff_error_analysis.get_stress_rmse() + >>> # If you want to normalize the error by the number of configurations + >>> energy_error = mlff_error_analysis.get_energy_error_per_atom( + ... normalize_by_configurations=True + ... ) + >>> force_error = mlff_error_analysis.get_force_rmse( + ... normalize_by_configurations=True + ... ) + >>> stress_error = mlff_error_analysis.get_stress_rmse( + ... normalize_by_configurations=True + ... ) + """ + + def __init__(self, *args, **kwargs): + self.mlff = SimpleNamespace() + self.dft = SimpleNamespace() + + @classmethod + def _from_data(cls, _calculations): + mlff_error_analysis = cls(_internal=True) + mlff_error_analysis._calculations = _calculations + set_appropriate_attrs(mlff_error_analysis) + return mlff_error_analysis + + @classmethod + def from_paths(cls, dft_data, mlff_data): + """Create an instance of MLFFErrorAnalysis from paths to the data. + + Starting from paths for DFT and MLFF data, this method creates an + instance of MLFFErrorAnalysis. The paths are used to read the data + from the files. + + Parameters + ---------- + dft_data : str or pathlib.Path + Path to the DFT data. Accepts wildcards. + mlff_data : str or pathlib.Path + Path to the MLFF data. Accepts wildcards. + """ + mlff_error_analysis = cls(_internal=True) + calculations = Calculations.from_paths(dft_data=dft_data, mlff_data=mlff_data) + mlff_error_analysis._calculations = calculations + set_appropriate_attrs(mlff_error_analysis) + return mlff_error_analysis + + @classmethod + def from_files(cls, dft_data, mlff_data): + """Create an instance of MLFFErrorAnalysis from files. + + Starting from files for DFT and MLFF data, this method creates an + instance of MLFFErrorAnalysis. The files are used to read the data + from the files. + + Parameters + ---------- + dft_data : str or pathlib.Path + Path to the DFT data. Accepts wildcards. + mlff_data : str or pathlib.Path + Path to the MLFF data. Accepts wildcards. + """ + mlff_error_analysis = cls(_internal=True) + calculations = Calculations.from_files(dft_data=dft_data, mlff_data=mlff_data) + mlff_error_analysis._calculations = calculations + set_appropriate_attrs(mlff_error_analysis) + return mlff_error_analysis + + def get_energy_error_per_atom(self, normalize_by_configurations=False): + """Get the error in energy per atom. + + This method calculates the error in energy per atom between the MLFF + and DFT calculations. The error is calculated as + :math:`\\frac{E_{MLFF} - E_{DFT}}{N_{ions}}`, where :math:`E_{MLFF}` + and :math:`E_{DFT}` are the energies from the MLFF and DFT calculations + respectively, and :math:`N_{ions}` is the number of ions in the + structure. If ``normalize_by_configurations`` is set to ``True``, the + error is averaged over the number of configurations. + + Parameters + ---------- + normalize_by_configurations : bool, optional + If set to ``True``, the error is averaged over the number of + configurations. Defaults to ``False``. + """ + error = (self.mlff.energies - self.dft.energies) / self.dft.nions + if normalize_by_configurations: + error = np.sum(np.abs(error), axis=-1) / self.dft.nconfig + return error + + def _get_rmse(self, dft_quantity, mlff_quantity, degrees_of_freedom): + norm_error = np.linalg.norm(dft_quantity - mlff_quantity, axis=-1) + error = np.sqrt(np.sum(norm_error**2, axis=-1) / degrees_of_freedom) + return error + + def get_force_rmse(self, normalize_by_configurations=False): + """Get the root mean square error in forces. + + This method calculates the root mean square error in forces between the + MLFF and DFT calculations. The error is calculated as + :math:`\\sqrt{\\frac{\\sum_{i=1}^{N_{ions}}{\\sum_{j=1}^{3}{(F_{MLFF} - F_{DFT})^2}}}{3N_{ions}}}`, + where :math:`F_{MLFF}` and :math:`F_{DFT}` are the forces from the MLFF + and DFT calculations respectively, and :math:`N_{ions}` is the number + of ions in the structure. If ``normalize_by_configurations`` is set to + ``True``, the error is averaged over the number of configurations. + + Parameters + ---------- + normalize_by_configurations : bool, optional + If set to ``True``, the error is averaged over the number of + configurations. Defaults to ``False``. + """ + deg_freedom = 3 * self.dft.nions + error = self._get_rmse(self.dft.forces, self.mlff.forces, deg_freedom) + if normalize_by_configurations: + error = np.sum(error, axis=-1) / self.dft.nconfig + return error + + def get_stress_rmse(self, normalize_by_configurations=False): + """Get the root mean square error in stresses. + + This method calculates the root mean square error in stresses between + the MLFF and DFT calculations. The error is calculated as + :math:`\\sqrt{\\frac{\\sum_{i=1}^{6}{(\\sigma_{MLFF} - \\sigma_{DFT})^2}}{6}}`, + where :math:`\\sigma_{MLFF}` and :math:`\\sigma_{DFT}` are the stresses + from the MLFF and DFT calculations respectively. If + ``normalize_by_configurations`` is set to ``True``, the error is + averaged over the number of configurations. + """ + deg_freedom = 6 + dft_stresses = np.triu(self.dft.stresses) + mlff_stresses = np.triu(self.mlff.stresses) + error = self._get_rmse(dft_stresses, mlff_stresses, deg_freedom) + if normalize_by_configurations: + error = np.sum(error, axis=-1) / self.dft.nconfig + return error + + +def set_appropriate_attrs(cls): + set_paths_and_files(cls) + set_number_of_ions(cls) + set_number_of_configurations(cls) + set_energies(cls) + set_force_related_attributes(cls) + set_stresses(cls) + validate_data(cls) + + +def validate_data(cls): + """Validate the data passed to the class. + + This method validates the data passed to the class. It checks if the + number of ions, lattice vectors and positions are consistent between the + DFT and MLFF calculations. If not, it raises an exception. + """ + try: + np.testing.assert_almost_equal(cls.dft.positions, cls.mlff.positions) + np.testing.assert_almost_equal( + cls.dft.lattice_vectors, cls.mlff.lattice_vectors + ) + np.testing.assert_almost_equal(cls.dft.nions, cls.mlff.nions) + except AssertionError: + raise exception.IncorrectUsage( + """\ +Please pass a consistent set of data between DFT and MLFF calculations.""" + ) + + +def set_number_of_configurations(cls): + """Set the number of configurations in the data. + + This method sets the number of configurations in the data. It uses the + number of calculations performed to set the number of configurations. + + Parameters + ---------- + cls : MLFFErrorAnalysis + An instance of MLFFErrorAnalysis. + """ + number_of_calculations = cls._calculations.number_of_calculations() + cls.dft.nconfig = number_of_calculations["dft_data"] + cls.mlff.nconfig = number_of_calculations["mlff_data"] + + +def set_number_of_ions(cls): + """Set the number of ions in the data. + + This method sets the number of ions in the data. It uses the number of + elements in the structures to set the number of ions. + + Parameters + ---------- + cls : MLFFErrorAnalysis + An instance of MLFFErrorAnalysis. + """ + force_data = cls._calculations.forces.read() + structures_dft = _dict_to_list(force_data["dft_data"], "structure") + structures_mlff = _dict_to_list(force_data["mlff_data"], "structure") + elements_dft = _dict_to_array(structures_dft, "elements") + elements_mlff = _dict_to_array(structures_mlff, "elements") + nions_dft = np.array([len(_elements) for _elements in elements_dft]) + nions_mlff = np.array([len(_elements) for _elements in elements_mlff]) + cls.dft.nions = nions_dft + cls.mlff.nions = nions_mlff + + +def set_paths_and_files(cls): + """Set the paths and files for the data. + + This method sets the paths and files for the data. It uses the + :meth:`Calculations.paths` and :meth:`Calculations.files` methods to set + the paths and files. + + Parameters + ---------- + cls : MLFFErrorAnalysis + An instance of MLFFErrorAnalysis. + """ + paths = cls._calculations.paths() + cls.dft.paths = paths["dft_data"] + cls.mlff.paths = paths["mlff_data"] + if hasattr(cls._calculations, "_files"): + files = cls._calculations.files() + cls.dft.files = files["dft_data"] + cls.mlff.files = files["mlff_data"] + + +def set_energies(cls): + """Set the energies for the data. + + This method sets the energies for the data. It uses the + :meth:`Calculations.energies` method to set the energies. + + Parameters + ---------- + cls : MLFFErrorAnalysis + An instance of MLFFErrorAnalysis. + """ + tag = "free energy TOTEN" + energies_data = cls._calculations.energies.read() + cls.mlff.energies = _dict_to_array(energies_data["mlff_data"], tag) + cls.dft.energies = _dict_to_array(energies_data["dft_data"], tag) + + +def _dict_to_array(data: Dict, key: str) -> np.ndarray: + return np.array([_data[key] for _data in data]) + + +def _dict_to_list(data: Dict, key: str) -> list: + return [_data[key] for _data in data] + + +def set_force_related_attributes(cls): + """Set the force related attributes for the data. + + This method sets the force related attributes for the data. It uses the + :meth:`Calculations.forces` method to set the forces, lattice vectors and + positions. + + Parameters + ---------- + cls : MLFFErrorAnalysis + An instance of MLFFErrorAnalysis. + """ + force_data = cls._calculations.forces.read() + cls.dft.forces = _dict_to_array(force_data["dft_data"], "forces") + cls.mlff.forces = _dict_to_array(force_data["mlff_data"], "forces") + dft_structures = _dict_to_list(force_data["dft_data"], "structure") + mlff_structures = _dict_to_list(force_data["mlff_data"], "structure") + cls.dft.lattice_vectors = _dict_to_array(dft_structures, "lattice_vectors") + cls.mlff.lattice_vectors = _dict_to_array(mlff_structures, "lattice_vectors") + cls.dft.positions = _dict_to_array(dft_structures, "positions") + cls.mlff.positions = _dict_to_array(mlff_structures, "positions") + + +def set_stresses(cls): + """Set the stresses for the data. + + This method sets the stresses for the data. It uses the + :meth:`Calculations.stresses` method to set the stresses. + + Parameters + ---------- + cls : MLFFErrorAnalysis + An instance of MLFFErrorAnalysis. + """ + stress_data = cls._calculations.stresses.read() + cls.dft.stresses = _dict_to_array(stress_data["dft_data"], "stress") + cls.mlff.stresses = _dict_to_array(stress_data["mlff_data"], "stress") diff --git a/src/py4vasp/_calculations.py b/src/py4vasp/_calculations.py new file mode 100644 index 00000000..32cb2bf4 --- /dev/null +++ b/src/py4vasp/_calculations.py @@ -0,0 +1,131 @@ +# Copyright © VASP Software GmbH, +# Licensed under the Apache License 2.0 (http://www.apache.org/licenses/LICENSE-2.0) +import inspect +import pathlib +from typing import Dict, List + +from py4vasp import combine, exception +from py4vasp._util import convert + + +class Calculations: + """A class to handle multiple Calculations all at once. + + This class combines the functionality of the Calculation class for more than one + calculation. Create a Calculations object using either a wildcard for a set of + paths or files or pass in paths and files directly. Then you can access the + properties of all calculations via the attributes of the object. + + Examples + -------- + >>> calcs = Calculations.from_paths(calc1="path_to_calc1", calc2="path_to_calc2") + >>> calcs.energies.read() # returns a dictionary with the energies of calc1 and calc2 + >>> calcs.forces.read() # returns a dictionary with the forces of calc1 and calc2 + >>> calcs.stresses.read() # returns a dictionary with the stresses of calc1 and calc2 + + Notes + ----- + To create new instances, you should use the classmethod :meth:`from_paths` or + :meth:`from_files`. This will ensure that the paths to your VASP calculations are + properly set and all features work as intended. Note that this is an alpha version + and the API might change in the future. + """ + + def __init__(self, *args, **kwargs): + if not kwargs.get("_internal"): + message = """\ +Please setup new CompareCalculations instance using the classmethod CompareCalculations.from_paths() +or CompareCalculations.from_files() instead of the constructor CompareCalculations().""" + raise exception.IncorrectUsage(message) + + def _path_finder(**kwargs): + for key, value in kwargs.items(): + if not (isinstance(value, pathlib.Path) or isinstance(value, str)): + message = """\ +Please provide a path to a VASP calculation as a string or pathlib.Path.""" + raise exception.IncorrectUsage(message) + paths = pathlib.Path(value).expanduser().absolute() + if "*" in paths.as_posix(): + paths = sorted(list(paths.parent.glob(paths.name))) + else: + paths = [paths] + yield key, paths + + @classmethod + def from_paths(cls, **kwargs): + """Set up a Calculations object for paths. + + Setup a calculation for paths by passing in a dictionary with the name of the + calculation as key and the path to the calculation as value. + + Parameters + ---------- + **kwargs : Dict[str, str or pathlib.Path] + A dictionary with the name of the calculation as key and the path to the + calculation as value. Wildcards are allowed. + """ + calculations = cls(_internal=True) + calculations._paths = {} + for key, paths in cls._path_finder(**kwargs): + calculations._paths[key] = paths + calculations = _add_all_combination_classes( + calculations, _add_attribute_from_path + ) + return calculations + + @classmethod + def from_files(cls, **kwargs): + """Set up a Calculations object from files. + + Setup a calculation for files by passing in a dictionary with the name of the + calculation as key and the path to the calculation as value. Note that this + limits the amount of information, you have access to, so prefer creating the + instance with the :meth:`from_paths` if possible. + + Parameters + ---------- + **kwargs : Dict[str, str or pathlib.Path] + A dictionary with the name of the calculation as key and the files to the + calculation as value. Wildcards are allowed. + """ + calculations = cls(_internal=True) + calculations._paths = {} + calculations._files = {} + for key, paths in cls._path_finder(**kwargs): + basedir_paths = [path.parent for path in paths] + calculations._paths[key] = basedir_paths + calculations._files[key] = paths + calculations = _add_all_combination_classes( + calculations, _add_attribute_from_file + ) + return calculations + + def paths(self) -> Dict[str, List[pathlib.Path]]: + """Return the paths of the calculations.""" + return self._paths + + def files(self) -> Dict[str, List[pathlib.Path]]: + """Return the files of the calculations.""" + return self._files + + def number_of_calculations(self) -> Dict[str, int]: + """Return the number of calculations for each calculation.""" + return {key: len(value) for key, value in self._paths.items()} + + +def _add_attribute_from_path(calc, class_): + instance = class_.from_paths(calc.paths()) + setattr(calc, convert.quantity_name(class_.__name__), instance) + return calc + + +def _add_attribute_from_file(calc, class_): + instance = class_.from_files(calc.files()) + setattr(calc, convert.quantity_name(class_.__name__), instance) + return calc + + +def _add_all_combination_classes(calc, add_single_class): + for _, class_ in inspect.getmembers(combine, inspect.isclass): + calc = add_single_class(calc, class_) + return calc diff --git a/src/py4vasp/_combine/base.py b/src/py4vasp/_combine/base.py new file mode 100644 index 00000000..5f228a5e --- /dev/null +++ b/src/py4vasp/_combine/base.py @@ -0,0 +1,99 @@ +# Copyright © VASP Software GmbH, +# Licensed under the Apache License 2.0 (http://www.apache.org/licenses/LICENSE-2.0) +import importlib +import inspect +import pathlib +from typing import Dict, List + +from py4vasp import data, exception + + +def _match_combine_with_refinement(combine_name: str): + combine_to_refinement_name = { + "Energies": "Energy", + "Forces": "Force", + "Stresses": "Stress", + } + for _, class_ in inspect.getmembers(data, inspect.isclass): + if class_.__name__ == combine_to_refinement_name[combine_name]: + return class_ + else: + raise exception.IncorrectUsage( + f"Could not find refinement class for {combine_name}." + ) + + +class BaseCombine: + """A class to handle multiple refinements all at once. + + This class combines the functionality of the refinement class for more than one + refinement. Create a BaseCombine object using either a wildcard for a set of + paths or files or pass in paths and files directly. Then you can access the + properties of all refinements via the attributes of the object. + + Notes + ----- + To create new instances, you should use the classmethod :meth:`from_paths` or + :meth:`from_files`. This will ensure that the paths to your VASP calculations are + properly set and all features work as intended. Note that this is an alpha version + and the API might change in the future. + """ + + def __init__(self): + pass + + @classmethod + def from_paths(cls, paths: Dict[str, List[pathlib.Path]]): + """Set up a BaseCombine object for paths. + + Setup the object for paths by passing in a dictionary with the name of the + calculation as key and the path to the calculation as value. + + Parameters + ---------- + paths : Dict[str, List[pathlib.Path]] + A dictionary with the name of the calculation as key and the path to the + calculation as value. + """ + base = cls() + refinement = _match_combine_with_refinement(cls.__name__) + setattr(base, f"_{cls.__name__.lower()}", {}) + for key, path in paths.items(): + all_refinements = [refinement.from_path(_path) for _path in path] + base.__getattribute__(f"_{cls.__name__.lower()}")[key] = all_refinements + return base + + @classmethod + def from_files(cls, files: Dict[str, List[pathlib.Path]]): + """Set up a BaseCombine object for files. + + Setup the object for files by passing in a dictionary with the name of the + calculation as key and the path to the calculation as value. + + Parameters + ---------- + files : Dict[str, List[pathlib.Path]] + A dictionary with the name of the calculation as key and the path to the + calculation as value. + """ + base = cls() + refinement = _match_combine_with_refinement(cls.__name__) + setattr(base, f"_{cls.__name__.lower()}", {}) + for key, file in files.items(): + all_refinements = [refinement.from_file(_file) for _file in file] + base.__getattribute__(f"_{cls.__name__.lower()}")[key] = all_refinements + return base + + def _to_dict(self, *args, **kwargs): + _data = {} + _class_name = f"_{self.__class__.__name__.lower()}" + keyval_refinements = self.__getattribute__(_class_name).items() + for key, refinement in keyval_refinements: + _data[key] = [ + _refinement.read(*args, **kwargs) for _refinement in refinement + ] + return _data + + def read(self, *args, **kwargs): + """Read the data from the :meth:`read` method of the refinement class.""" + return self._to_dict(*args, **kwargs) diff --git a/src/py4vasp/_combine/energies.py b/src/py4vasp/_combine/energies.py new file mode 100644 index 00000000..630021d1 --- /dev/null +++ b/src/py4vasp/_combine/energies.py @@ -0,0 +1,9 @@ +# Copyright © VASP Software GmbH, +# Licensed under the Apache License 2.0 (http://www.apache.org/licenses/LICENSE-2.0) + +from py4vasp._combine.base import BaseCombine + + +class Energies(BaseCombine): + def __init__(self): + pass diff --git a/src/py4vasp/_combine/forces.py b/src/py4vasp/_combine/forces.py new file mode 100644 index 00000000..d9de7cbe --- /dev/null +++ b/src/py4vasp/_combine/forces.py @@ -0,0 +1,9 @@ +# Copyright © VASP Software GmbH, +# Licensed under the Apache License 2.0 (http://www.apache.org/licenses/LICENSE-2.0) + +from py4vasp._combine.base import BaseCombine + + +class Forces(BaseCombine): + def __init__(self): + pass diff --git a/src/py4vasp/_combine/stresses.py b/src/py4vasp/_combine/stresses.py new file mode 100644 index 00000000..275b7241 --- /dev/null +++ b/src/py4vasp/_combine/stresses.py @@ -0,0 +1,9 @@ +# Copyright © VASP Software GmbH, +# Licensed under the Apache License 2.0 (http://www.apache.org/licenses/LICENSE-2.0) + +from py4vasp._combine.base import BaseCombine + + +class Stresses(BaseCombine): + def __init__(self): + pass diff --git a/src/py4vasp/_data/energy.py b/src/py4vasp/_data/energy.py index d544ac65..f84dd8d6 100644 --- a/src/py4vasp/_data/energy.py +++ b/src/py4vasp/_data/energy.py @@ -87,9 +87,10 @@ def to_dict(self, selection=None): return dict(self._read_data(tree, self._steps)) def _default_dict(self): + raw_values = np.array(self._raw_data.values).T return { convert.text_to_string(label).strip(): value[self._steps] - for label, value in zip(self._raw_data.labels, self._raw_data.values.T) + for label, value in zip(self._raw_data.labels, raw_values) } @base.data_access diff --git a/src/py4vasp/combine.py b/src/py4vasp/combine.py new file mode 100644 index 00000000..2178569c --- /dev/null +++ b/src/py4vasp/combine.py @@ -0,0 +1,5 @@ +# Copyright © VASP Software GmbH, +# Licensed under the Apache License 2.0 (http://www.apache.org/licenses/LICENSE-2.0) +from py4vasp._combine.energies import Energies +from py4vasp._combine.forces import Forces +from py4vasp._combine.stresses import Stresses diff --git a/src/py4vasp/scripts/error_analysis.py b/src/py4vasp/scripts/error_analysis.py index 2fa1f206..decd75ca 100644 --- a/src/py4vasp/scripts/error_analysis.py +++ b/src/py4vasp/scripts/error_analysis.py @@ -5,9 +5,8 @@ from argparse import RawTextHelpFormatter import numpy as np -import pandas as pd -import py4vasp +from py4vasp._analysis.mlff import MLFFErrorAnalysis from py4vasp._third_party.graph import Graph, Series @@ -46,18 +45,15 @@ def get_options(args=sys.argv[1:]): requiredNamed.add_argument( "-dft", "--DFTfiles", - nargs="+", required=True, - help="Your vaspout.h5 input files obtained from DFT calculations." - + " Supply in a form, as for example vaspout_{1..200}.h5", + type=str, + help="Your vaspout.h5 input files obtained from DFT calculations.", ) requiredNamed.add_argument( "-ml", "--MLfiles", - nargs="+", required=True, - help="Your vaspout.h5 input files obtained from machine learning calculations" - + " Supply in a form, as for example vaspout_{1..200}.h5", + type=str, ) parser.add_argument( "-plt", @@ -83,607 +79,67 @@ def get_options(args=sys.argv[1:]): return options -class Reader: - """ - Reading a single vaspout.h5 file - - Parameters - ---------- - fname : str - Input filename - - Attributes - ---------- - fname : str - file name of hdf5 file - energy : float - total energy computed by vasp - force : ndarray - force array ( NIONS , 3 ) - lattice : ndarray - Bravais matrix for structure used for VASP calculation - positions : ndarray - atomic positions in direct coordinates - NIONS : int - number of ions in current structure - stress : ndarray - stress tensor computed by vasp - - Methods - ------- - read_energy - reading the energy, initialize energy - read_force - reading force, initialize force, positions, NIONS, lattice - read_stress - reading stress tensor upper triangle - """ - - def __init__(self, fname): - self.fname = fname - self.calc = py4vasp.Calculation.from_file(self.fname) - self.read_energy() - self.read_forces() - self.read_stress() - - def read_energy(self): - """read energy from vaspout.h5 file""" - energy = self.calc.energy.read() - self.energy = energy["free energy TOTEN"] - - def read_forces(self): - """read from vaspout.h5 file the following arrays: - forces, positions, NIONS, lattice - """ - force = self.calc.force.read() - self.lattice = force["structure"]["lattice_vectors"] - self.positions = force["structure"]["positions"] - self.NIONS = self.positions.shape[0] - self.force = force["forces"] - - def read_stress(self): - """read upper triangle - of stress tensor from vaspout.h5 file - """ - stress = self.calc.stress.read() - self.stress = stress["stress"][np.triu_indices(3)] - - -class AnalyseErrorSingleFile: - """compare a VASP MLFF and DFT file - - Parameters - ---------- - MLFF : str - vaspout.h5 file from machine learning calculation - DFT : str - vaspout.h5 file from DFT calculation - - Attributes - ---------- - MLdata : Reader - contains raw machine learning data - DFTdata : Reader - contains DFT data - force_error : float - root mean square error force - energy_error : float - energy error per atom - stress_error : float - root mean square error of stress tensor - - Methods - ------- - write_array_to_screen - writing array to screen - check_structures_for_equivalence - compare structure parameters, positions, lattice, number ions for - equivalence between structures - compute_force_error - compute root mean square error between forces - compute_energy_error_atom - computing the error in energy per atom - compute_stress_error - compute the root mean square error of upper triangle of stress tensor - root_mean_square_error_numpy_array - compute the root mean square error between two numpy arrays - root_mean_square_error_list - compute root mean square error between two python lists containing - ndarrays arrays - """ - - def __init__(self, MLFF, DFT): - self.ml_fname = MLFF - self.dft_fname = DFT - - print("Analysing errors between", self.ml_fname, " and ", self.dft_fname) - - # reading input data with Reader class - self.ml_data = Reader(self.ml_fname) - self.dft_data = Reader(self.dft_fname) - - # error checking if the two structures to compare are the same - self.check_structures_for_equivalence() - - # computing errors of the data extracted from vaspout.h5 file - self.compute_force_error() - self.compute_energy_error_atom() - self.compute_stress_error() - - @staticmethod - def write_array_to_screen(data): - """writing an array to the screen""" - for row in data: - print(row) - - def check_structures_for_equivalence(self): - np.testing.assert_equal( - self.ml_data.NIONS, - self.dft_data.NIONS, - err_msg=f"Number of ions does not match in {self.ml_fname} and {self.dft_fname}", - ) - - np.testing.assert_array_almost_equal( - self.ml_data.positions, - self.dft_data.positions, - decimal=6, - err_msg="The positions of your input data does not match in files " - + f"{self.ml_fname} and {self.dft_fname}", - ) - - np.testing.assert_array_almost_equal( - self.ml_data.lattice, - self.dft_data.lattice, - decimal=6, - err_msg="The lattice of your input data does not match in files " - + f"{self.ml_fname} and {self.dft_fname}", - ) - - def compute_force_error(self): - """compute the root mean square error between force arrays - of the DFT and machine learning approach - - Returns - ------- - force_error : float - root mean square error of force: - over x,y,z components and ions - """ - error = np.linalg.norm(self.dft_data.force[:, :] - self.ml_data.force[:, :]) - self.force_error = error / np.sqrt(self.ml_data.NIONS * 3) - - def compute_energy_error_atom(self): - """computing the energy error per atom between machine learning and DFT - error = ( E_{DFT}-E_{MLFF} ) / NIONS - - Returns - ------- - energy_error : float - energy difference divided by number atoms - """ - energy_difference = self.ml_data.energy - self.dft_data.energy - self.energy_error = energy_difference / self.ml_data.NIONS - - def compute_stress_error(self): - """computing the root mean square error of the stress tensor - between DFT and machine learning calculation - - Returns - ------- - stress_error --> root mean square error of stress tensor over its components - """ - self.stress_error = np.linalg.norm(self.ml_data.stress - self.dft_data.stress) - self.stress_error /= np.sqrt(self.ml_data.stress.shape[0]) - - @staticmethod - def root_mean_square_error_numpy_array(data_A, data_B): - """computing root mean square error between two data sets - - Parameters - ---------- - dataA, dataB : ndarray - numpy ndarray where sizes have to match - - Returns - ------- - float - root mean square error between dataA and dataB - """ - return np.linalg.norm(data_A - data_B) / np.sqrt((np.product(data_A.shape))) - - @classmethod - def root_mean_square_error_list(cls, data_A, data_B): - """computing root mean square errors between equally sized python - lists containing numpy arrays - - Parameters - ---------- - data_A, data_B : list - python list containing numpy arrays of equal size and order as in - the data_B array - """ - np.testing.assert_equal( - len(data_A), - len(data_B), - err_msg=f"Error in root_mean_square_error_list in {cls.__name__}" - + "the dimensions of the list do not match", - ) - error = 0 - number_elements = 0 - for a, b in zip(data_A, data_B): - error += np.sum((a - b) ** 2) - number_elements += a.size - - error = np.sqrt(error / number_elements) - return error - - -class AnalyseError: - """compute errors between two vaspout files - - Parameters - ---------- - ML : str - list of machine learning vaspout.h5 files - Dft : str - list of DFT vaspout.h5 files to compare MLFF - - Attributes - ---------- - indx_start : int - is always one and gives the start point of xaxis - indx_end : int - end point of xaxis, determined by number of files - eV_to_meV : float - conversion factor from electron volts (eV) to meV - xaxis : ndarray - equally spaced axis between indx_start and indx_end - indx_start = 1 - indx_end = len( ML ) - len( ML ) points - energy_error : ndarray - energy errors per atom for all supplied file pairs - force_error : ndarray - root mean square errors between the supplied file pairs - stress_error : ndarray - root mean square error between supplied file pairs - energy : ndarray - DFT and MLFF energies per atom of all supplied files - force : list - forces of the DFT and MLFF calculations extracted from supplied files - stress : ndarray - upper triangular part of stress tensor of DFT and MLFF calculations - - Methods - ------- - make_x_axis - generate xaxis for plots, structure index - compute_errors - computing the errors between the two file sets for every file pair - separate - compute_average_errors - compute root mean square errors between total data set - print_average_errors - printing average errors to screen - plot_energy_error - generate plot for energy error per atom - plot_force_error - plot root mean square error for forces - plot_stress_error - plot root mean square errors of stress tensor - make_plot - plot energy, force, stress error in subplots - prepare_output_array - concatenate two arrays along second axis - format_float - convert ndarray to formatted output string - writing_energy_error_file - writing energy errors per atom to file - writing_force_error_file - writing root mean square errors to file - writing_stress_error_file - writing root mean square error of force to file - write_csv_output_file - writing all collected errors to csv file - """ - - def __init__(self, ML, Dft): - self.ml_files = ML - self.dft_files = Dft - np.testing.assert_equal( - len(self.ml_files), - len(self.dft_files), - err_msg="Different number of dft and mlff files supplied. Please check your input", - ) - - self.indx_start = 1 - self.indx_end = len(ML) - - # scaling factor to meV - self.eV_to_meV = 1000 - - self.make_x_axis() - - self.compute_errors() - self.compute_average_errors() - self.print_average_errors() - - def make_x_axis(self): - """create a x-axis for plots - - IndxStart and IndxEnd determine start and end - the interval will be decomposed into - number of supplied input files - """ - self.xaxis = np.linspace(self.indx_start, self.indx_end, len(self.ml_files)) - - def compute_errors(self): - """computing the errors in the data with the help of the class - AnalyseErrorSingleFile - - Returns - ------- - energy_error : ndarray - 2d array of size (Nstructures , 1 ) - error per atom between DFT and MLFF calculation, - computed in AnalyseErrorSingleFile - force_error : ndarray - 2d array of size ( Nstructures , 1 ) - root mean square error in force between the DFT and MLFF calculation - computed with AnalyseErrorSingleFile - stress_error : ndarray - 2d array of size (Nstructures , 1 ) - root mean square error of stress tensor between a machine learning - calculation and DFT calculation - energy : ndarray - 2d array of size (Nstructures , 2 ) - self.energy[ : , 0 ] -> DFT energies in [eV] - self.energy[ : , 1 ] -> MLFF energies in [eV] - force : list - python list of len 2 - index 0 is the dft data - index 1 is the machine learning data - both indexes then contain python lists of the len of reference - configurations; and in those numpy arrays are contained - NIONS,3 - list list numpy array - force[dft/ml][ reference configuration ][ NIONS , 3 ] - units are in eV/Angstroem - stress : ndarray - 3d array of size ( Nstructures , 2 , size stress ) - 1st index -> structure index - 2nd index refers to DFT -> 0 / MLFF -> 1 - 3rd index are components of stress tensor. - units are in kbar - """ - self.force_error = np.zeros(len(self.ml_files)) - self.energy_error = np.zeros(len(self.ml_files)) - self.stress_error = np.zeros(len(self.ml_files)) - - # extract NIONS - x = AnalyseErrorSingleFile(self.ml_files[0], self.dft_files[0]) - self.energy = { - "dft": np.zeros([len(self.ml_files), 1]), - "ml": np.zeros([len(self.ml_files), 1]), - } - - self.stress = { - "dft": np.zeros([len(self.ml_files), x.ml_data.stress.shape[0]]), - "ml": np.zeros([len(self.ml_files), x.ml_data.stress.shape[0]]), - } - # force has to be a list to make variable atom numbers possible - self.force = {"dft": [], "ml": []} - - for i in range(len(self.ml_files)): - x = AnalyseErrorSingleFile(self.ml_files[i], self.dft_files[i]) - # extract energies - self.energy_error[i] = x.energy_error * self.eV_to_meV - self.energy["dft"][i] = x.dft_data.energy / x.dft_data.NIONS - self.energy["ml"][i] = x.ml_data.energy / x.ml_data.NIONS - - # extract forces - self.force_error[i] = x.force_error * self.eV_to_meV - self.force["dft"].append(x.dft_data.force) - self.force["ml"].append(x.ml_data.force) - - # extract stress - self.stress_error[i] = x.stress_error - self.stress["dft"][i, :] = x.dft_data.stress - self.stress["ml"][i, :] = x.ml_data.stress - - def compute_average_errors(self): - """computing single error values for energy, forces, stress - over the whole data set - - Returns - ------- - rmse_energy : float - is the root mean square error of the energies computed over the - supplied configurations in [meV] - rmse_force : float - root mean square error of forces computed over all configurations, - atoms and x y and z direction in [meV]/Angstroem - rmse_stress : float - root mean square error of stress tensor computed over all configurations - and components of the tensor - """ - self.rmse_energy = AnalyseErrorSingleFile.root_mean_square_error_numpy_array( - self.energy["dft"], self.energy["ml"] - ) - self.rmse_energy *= self.eV_to_meV - - self.rmse_force = AnalyseErrorSingleFile.root_mean_square_error_list( - self.force["dft"], self.force["ml"] - ) - self.rmse_force *= self.eV_to_meV - - self.rmse_stress = AnalyseErrorSingleFile.root_mean_square_error_numpy_array( - self.stress["dft"], self.stress["ml"] - ) - - def print_average_errors(self): - """printing the root mean square errors of energy, force and stress tensor - in the test set to the screen - """ - print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~") - print("Root mean square error of energy in meV/atom", self.rmse_energy) - print("Root mean square error of force in meV/Angstroem", self.rmse_force) - print("Root mean square error of stress tensor in kbar", self.rmse_stress) - print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~") - - def plot_energy_error(self): - """plotting energy error/atom with sign versus the input configurations""" - graph = Graph( - series=Series(self.xaxis, self.energy_error[:, 0]), - xlabel="configuration", - ylabel="energy error [meV/atom]", - ) - figure = graph.to_plotly() - figure.show() - return figure - - def plot_force_error(self): - """plotting force rmse meV/Angstroem versus input configurations""" - graph = Graph( - series=Series(self.xaxis, self.force_error[:, 0]), - xlabel="configuration", - ylabel="rmse force [meV/Å]$", - ) - figure = graph.to_plotly() - figure.show() - return figure - - def plot_stress_error(self): - """plotting force rmse meV/Angstroem versus input configurations""" - graph = Graph( - series=Series(self.xaxis, self.stress_error[:, 0]), - xlabel="configuration", - ylabel="rmse stress [kbar]", - ) - figure = graph.to_plotly() +def write_energy_error_file(cls, fname="EnergyError.out"): + files = cls._calculations.files() + energy_error_per_atom = cls.get_energy_error_per_atom() + dft_files = files["dft_data"] + mlff_files = files["mlff_data"] + writeout = np.array([dft_files, mlff_files, energy_error_per_atom]).T + header = "file_path_dft, file_path_mlff, energy difference in eV/atom (value > 0 MLFF predicts too high value)" + np.savetxt(fname, writeout, fmt="%s", delimiter=",", header=header) + + +def write_force_error_file(cls, fname="ForceError.out"): + files = cls._calculations.files() + force_error = cls.get_force_rmse() + dft_files = files["dft_data"] + mlff_files = files["mlff_data"] + writeout = np.array([dft_files, mlff_files, force_error]).T + header = "file_path_dft, file_path_mlff, force rmse in eV/Angstrom" + np.savetxt(fname, writeout, fmt="%s", delimiter=",", header=header) + + +def write_stress_error_file(cls, fname="StressError.out"): + files = cls._calculations.files() + stress_error = cls.get_stress_rmse() + dft_files = files["dft_data"] + mlff_files = files["mlff_data"] + writeout = np.array([dft_files, mlff_files, stress_error]).T + header = "file_path_dft, file_path_mlff, stress rmse in kbar" + np.savetxt(fname, writeout, fmt="%s", delimiter=",", header=header) + + +def make_plot(cls, show=False, pdf=False, graph_name="ErrorAnalysis.pdf"): + energy_error = cls.get_energy_error_per_atom() + force_error = cls.get_force_rmse() + stress_error = cls.get_stress_rmse() + xaxis = np.arange(1, len(energy_error) + 1) + energy = Series(xaxis, energy_error[:], subplot=1) + force = Series(xaxis, force_error[:], subplot=2) + stress = Series(xaxis, stress_error[:], subplot=3) + graph = Graph((energy, force, stress)) + graph.xlabel = ("configuration",) * 3 + graph.ylabel = ( + "error energy [eV/atom]", + "rmse force [eV/Å]", + "rmse stress [kbar]", + ) + figure = graph.to_plotly() + if pdf: + figure.write_image(graph_name) + if show: figure.show() - return figure - - def make_plot(self, show=False, pdf=False, graph_name="ErrorAnalysis.pdf"): - """make plot summarizing the energy error per atom [meV/atom] - the root mean square error of force [meV/Angstroem] - and the root mean square error of the stress tensor in [kbar] - """ - energy = Series(self.xaxis, self.energy_error[:], subplot=1) - force = Series(self.xaxis, self.force_error[:], subplot=2) - stress = Series(self.xaxis, self.stress_error[:], subplot=3) - - graph = Graph((energy, force, stress)) - graph.xlabel = ("configuration",) * 3 - graph.ylabel = ( - "error energy [meV/atom]", - "rmse force [meV/Å]", - "rmse stress [kbar]", - ) - figure = graph.to_plotly() - if pdf: - figure.write_image(graph_name) - if show: - figure.show() - return figure - - @staticmethod - def prepare_output_array(x, y): - """concatenate two input arrays of shape (N , ) to a 2d array of shape (N,2) - that can be used by numpy.savetxt - - Parameters - ---------- - x, y : ndarray - numpy array of shape ( N , ) - - Returns - ---------- - data : ndarray - numpy array of shape ( N , 2 ) containing x,y - """ - xx = np.reshape(x, [x.shape[0], 1]) - yy = np.reshape(y, [y.shape[0], 1]) - data = np.concatenate((xx, yy), axis=1) - return data - - @staticmethod - def format_float(data): - return "".join(["{:20.8f}".format(x) for x in data]) - - def writing_energy_error_file(self, fname="EnergyError.out"): - """writing the energy error to a output file the output file format will be - structure index | energy per atom in meV - """ - print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~") - print("Writing the energy error in meV/atom to file ", fname) - print("the format is structure index vs error") - print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~") - data = self.prepare_output_array(self.xaxis, self.energy_error[:]) - with open(fname, "w") as outfile: - outfile.write( - f"# RMSE energy/atom in meV/atom {self.format_float([self.rmse_energy])}\n" - ) - outfile.write( - "# structure index energy difference in meV/atom " - + "(value > 0 MLFF predicts too high value)\n" - ) - for row in data: - outfile.write(self.format_float(row) + "\n") - - def writing_force_error_file(self, fname="ForceError.out"): - """writing the force error to a output file the output file format will be - structure index | root mean square force error in meV/Angstroem - """ - print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~") - print("Writing the force error in meV/Angstroem per atom to file ", fname) - print("the format is structure index vs error") - print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~") - data = self.prepare_output_array(self.xaxis, self.force_error[:]) - with open(fname, "w") as outfile: - outfile.write( - f"# RMSE force in meV/Angstroem {self.format_float([self.rmse_force])}\n" - ) - outfile.write("# structure index RMSE in meV/Angstroem \n") - for row in data: - outfile.write(self.format_float(row) + "\n") - - def writing_stress_error_file(self, fname="StressError.out"): - """writing the stress error to a output file the output file format will be - structure index | root mean square stress error in [kBar] - """ - print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~") - print("Writing the stress error in [kBar] to file ", fname) - print("the format is structure index vs error") - print("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~") - - data = self.prepare_output_array(self.xaxis, self.stress_error[:]) - with open(fname, "w") as outfile: - outfile.write( - f"# RMSE stress in kilo-bar {self.format_float([self.rmse_stress])}\n" - ) - outfile.write("# structure index RMSE in kbar \n") - for row in data: - outfile.write(self.format_float(row) + "\n") - - def write_csv_output_file(self, fname="ErrorAnalysis.csv"): - """write collected errors to a csv file""" - data = { - "energy error": self.energy_error[:], - "force error": self.force_error[:], - "stress error": self.stress_error[:], - } - dataframe = pd.DataFrame(data=data) - dataframe.to_csv(fname) + return figure def main(): options = get_options(sys.argv[1:]) - x = AnalyseError(options.MLfiles, options.DFTfiles) - if options.MakePlot or options.pdfplot: - x.make_plot(options.MakePlot, options.pdfplot) + mlff_error_analysis = MLFFErrorAnalysis.from_files( + dft_data=options.DFTfiles, mlff_data=options.MLfiles + ) if options.XYtextFile: - x.writing_energy_error_file() - x.writing_force_error_file() - x.writing_stress_error_file() - x.write_csv_output_file() + write_energy_error_file(mlff_error_analysis) + write_force_error_file(mlff_error_analysis) + write_stress_error_file(mlff_error_analysis) + if options.MakePlot or options.pdfplot: + make_plot(mlff_error_analysis, options.MakePlot, options.pdfplot) diff --git a/tests/analysis/__init__.py b/tests/analysis/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/analysis/test_mlff.py b/tests/analysis/test_mlff.py new file mode 100644 index 00000000..ebe55101 --- /dev/null +++ b/tests/analysis/test_mlff.py @@ -0,0 +1,358 @@ +# Copyright © VASP Software GmbH, +# Licensed under the Apache License 2.0 (http://www.apache.org/licenses/LICENSE-2.0) +from collections import defaultdict +from dataclasses import dataclass +from pathlib import Path +from typing import Dict +from unittest.mock import patch + +import numpy as np +import numpy.typing as npt +import pytest + +from py4vasp import exception +from py4vasp._analysis.mlff import MLFFErrorAnalysis +from py4vasp.data import Energy, Force, Stress + +from ..conftest import raw_data + + +class BaseCalculations: + def read(self): + return self._data + + +class Energies(BaseCalculations): + def __init__(self, data): + self._data = data + + +class Forces(BaseCalculations): + def __init__(self, data): + self._data = data + + +class Stresses(BaseCalculations): + def __init__(self, data): + self._data = data + + +class MockCalculations: + @classmethod + def set_attributes( + cls, + _energies: Dict[str, np.ndarray], + _forces: Dict[str, np.ndarray], + _stresses: Dict[str, np.ndarray], + _paths: Dict[str, Path], + ): + _cls = cls() + setattr(_cls, "energies", Energies(data=_energies)) + setattr(_cls, "forces", Forces(data=_forces)) + setattr(_cls, "stresses", Stresses(data=_stresses)) + setattr(_cls, "_paths", _paths) + return _cls + + def number_of_calculations(self): + num_ions = len(self.forces.read()["mlff_data"]) + return {"dft_data": num_ions, "mlff_data": num_ions} + + def paths(self): + return self._paths + + +@pytest.fixture +def mock_calculations(raw_data): + data = defaultdict(lambda: defaultdict(list)) + for datatype in ["dft_data", "mlff_data"]: + raw_energy = raw_data.energy("relax", randomize=True) + energy = Energy.from_data(raw_energy) + energy_data = energy.read() + data["_energies"][datatype].append(energy_data) + raw_force = raw_data.force("Sr2TiO4", randomize=True) + force = Force.from_data(raw_force) + force_data = force.read() + data["_forces"][datatype].append(force_data) + raw_stress = raw_data.stress("Sr2TiO4", randomize=True) + stress = Stress.from_data(raw_stress) + stress_data = stress.read() + data["_stresses"][datatype].append(stress_data) + data["_paths"][datatype].append(Path(__file__) / "calc") + data = {key: dict(value) for key, value in data.items()} + _mock_calculations = MockCalculations.set_attributes(**data) + return _mock_calculations + + +@pytest.fixture +def mock_multiple_calculations(raw_data): + data = defaultdict(lambda: defaultdict(list)) + for datatype in ["dft_data", "mlff_data"]: + for i in range(4): + raw_energy = raw_data.energy("relax", randomize=True) + energy = Energy.from_data(raw_energy) + energy_data = energy.read() + data["_energies"][datatype].append(energy_data) + raw_force = raw_data.force("Sr2TiO4", randomize=True) + force = Force.from_data(raw_force) + force_data = force.read() + data["_forces"][datatype].append(force_data) + raw_stress = raw_data.stress("Sr2TiO4", randomize=True) + stress = Stress.from_data(raw_stress) + stress_data = stress.read() + data["_stresses"][datatype].append(stress_data) + data["_paths"][datatype].append(Path(__file__) / "calc") + data = {key: dict(value) for key, value in data.items()} + _mock_calculations = MockCalculations.set_attributes(**data) + return _mock_calculations + + +@pytest.fixture +def mock_calculations_incorrect(raw_data): + data = defaultdict(lambda: defaultdict(list)) + for datatype in ["dft_data", "mlff_data"]: + raw_energy = raw_data.energy("relax", randomize=True) + energy = Energy.from_data(raw_energy) + energy_data = energy.read() + data["_energies"][datatype].append(energy_data) + if datatype == "mlff_data": + species = "Sr2TiO4" + else: + species = "Fe3O4" + raw_force = raw_data.force(species, randomize=True) + force = Force.from_data(raw_force) + force_data = force.read() + data["_forces"][datatype].append(force_data) + raw_stress = raw_data.stress("Sr2TiO4", randomize=True) + stress = Stress.from_data(raw_stress) + stress_data = stress.read() + data["_stresses"][datatype].append(stress_data) + data["_paths"][datatype].append(Path(__file__) / "calc") + data = {key: dict(value) for key, value in data.items()} + _mock_calculations = MockCalculations.set_attributes(**data) + return _mock_calculations + + +@patch("py4vasp._data.base.Refinery.from_path", autospec=True) +@patch("py4vasp.raw.access", autospec=True) +def test_read_inputs_from_path(mock_access, mock_from_path): + absolute_path_dft = Path(__file__) / "dft" + absolute_path_mlff = Path(__file__) / "mlff" + error_analysis = MLFFErrorAnalysis.from_paths( + dft_data=absolute_path_dft, mlff_data=absolute_path_mlff + ) + assert isinstance(error_analysis.mlff.energies, np.ndarray) + assert isinstance(error_analysis.dft.energies, np.ndarray) + assert isinstance(error_analysis.mlff.forces, np.ndarray) + assert isinstance(error_analysis.dft.forces, np.ndarray) + assert isinstance(error_analysis.mlff.lattice_vectors, np.ndarray) + assert isinstance(error_analysis.dft.lattice_vectors, np.ndarray) + assert isinstance(error_analysis.mlff.positions, np.ndarray) + assert isinstance(error_analysis.dft.positions, np.ndarray) + assert isinstance(error_analysis.mlff.nconfig, int) + assert isinstance(error_analysis.dft.nconfig, int) + assert isinstance(error_analysis.mlff.nions, np.ndarray) + assert isinstance(error_analysis.dft.nions, np.ndarray) + assert isinstance(error_analysis.mlff.stresses, np.ndarray) + assert isinstance(error_analysis.dft.stresses, np.ndarray) + + +@patch("py4vasp._data.base.Refinery.from_path", autospec=True) +@patch("py4vasp.raw.access", autospec=True) +def test_read_inputs_from_files(mock_analysis, mock_from_path): + absolute_files_dft = Path(__file__) / "dft*.h5" + absolute_files_mlff = Path(__file__) / "mlff*.h5" + error_analysis = MLFFErrorAnalysis.from_files( + dft_data=absolute_files_dft, mlff_data=absolute_files_mlff + ) + assert isinstance(error_analysis.mlff.energies, np.ndarray) + assert isinstance(error_analysis.dft.energies, np.ndarray) + assert isinstance(error_analysis.mlff.forces, np.ndarray) + assert isinstance(error_analysis.dft.forces, np.ndarray) + assert isinstance(error_analysis.mlff.lattice_vectors, np.ndarray) + assert isinstance(error_analysis.dft.lattice_vectors, np.ndarray) + assert isinstance(error_analysis.mlff.positions, np.ndarray) + assert isinstance(error_analysis.dft.positions, np.ndarray) + assert isinstance(error_analysis.mlff.nconfig, int) + assert isinstance(error_analysis.dft.nconfig, int) + assert isinstance(error_analysis.mlff.nions, np.ndarray) + assert isinstance(error_analysis.dft.nions, np.ndarray) + assert isinstance(error_analysis.mlff.stresses, np.ndarray) + assert isinstance(error_analysis.dft.stresses, np.ndarray) + + +def test_read_from_data(mock_calculations): + expected_energies = mock_calculations.energies.read() + expected_forces = mock_calculations.forces.read() + expected_stresses = mock_calculations.stresses.read() + mlff_error_analysis = MLFFErrorAnalysis._from_data(mock_calculations) + output_energies = mlff_error_analysis._calculations.energies.read() + output_forces = mlff_error_analysis._calculations.forces.read() + output_stresses = mlff_error_analysis._calculations.stresses.read() + assert output_energies == expected_energies + assert output_forces == expected_forces + assert output_stresses == expected_stresses + + +def _iter_properties(tag, data, return_array=True): + if return_array: + return np.array([_data[tag] for _data in data]) + else: + return [_data[tag] for _data in data] + + +@pytest.mark.parametrize("mocker", ["mock_calculations", "mock_multiple_calculations"]) +def test_attributes_from_data(mocker, request): + mock_calculations = request.getfixturevalue(mocker) + energies_dict = mock_calculations.energies.read() + mlff_energies = _iter_properties("free energy TOTEN", energies_dict["mlff_data"]) + dft_energies = _iter_properties("free energy TOTEN", energies_dict["dft_data"]) + forces_dict = mock_calculations.forces.read() + mlff_forces = _iter_properties("forces", forces_dict["mlff_data"]) + dft_forces = _iter_properties("forces", forces_dict["dft_data"]) + mlff_structures = _iter_properties( + "structure", forces_dict["mlff_data"], return_array=False + ) + dft_structures = _iter_properties( + "structure", forces_dict["dft_data"], return_array=False + ) + mlff_lattice_vectors = _iter_properties("lattice_vectors", mlff_structures) + dft_lattice_vectors = _iter_properties("lattice_vectors", dft_structures) + mlff_positions = _iter_properties("positions", mlff_structures) + dft_positions = _iter_properties("positions", dft_structures) + mlff_config, mlff_nions, _ = mlff_positions.shape + dft_config, dft_nions, _ = dft_positions.shape + stresses_dict = mock_calculations.stresses.read() + mlff_stresses = _iter_properties("stress", stresses_dict["mlff_data"]) + dft_stresses = _iter_properties("stress", stresses_dict["dft_data"]) + mlff_error_analysis = MLFFErrorAnalysis._from_data(mock_calculations) + assert np.array_equal(mlff_error_analysis.mlff.energies, mlff_energies) + assert np.array_equal(mlff_error_analysis.dft.energies, dft_energies) + assert np.array_equal(mlff_error_analysis.mlff.forces, mlff_forces) + assert np.array_equal(mlff_error_analysis.dft.forces, dft_forces) + assert np.array_equal( + mlff_error_analysis.mlff.lattice_vectors, mlff_lattice_vectors + ) + assert np.array_equal(mlff_error_analysis.dft.lattice_vectors, dft_lattice_vectors) + assert np.array_equal(mlff_error_analysis.mlff.positions, mlff_positions) + assert np.array_equal(mlff_error_analysis.dft.positions, dft_positions) + assert mlff_error_analysis.mlff.nconfig == mlff_config + assert mlff_error_analysis.dft.nconfig == dft_config + assert np.array_equal(mlff_error_analysis.mlff.stresses, mlff_stresses) + assert np.array_equal(mlff_error_analysis.dft.stresses, dft_stresses) + + +def test_validator(mock_calculations_incorrect): + with pytest.raises(exception.IncorrectUsage): + mlff_error_analysis = MLFFErrorAnalysis._from_data(mock_calculations_incorrect) + + +def test_energy_per_atom_computation(mock_calculations): + mlff_error_analysis = MLFFErrorAnalysis._from_data(mock_calculations) + + def rmse_error_energy(mlff_energy, dft_energy, natoms): + error = (mlff_energy - dft_energy) / natoms + return error + + expected_energy_error = rmse_error_energy( + mlff_energy=mlff_error_analysis.mlff.energies, + dft_energy=mlff_error_analysis.dft.energies, + natoms=mlff_error_analysis.mlff.nions, + ) + output_energy_error = mlff_error_analysis.get_energy_error_per_atom() + assert np.array_equal(expected_energy_error, output_energy_error) + + +def test_multiple_energy_per_atom_computation(mock_multiple_calculations): + mlff_error_analysis = MLFFErrorAnalysis._from_data(mock_multiple_calculations) + + def rmse_error_energy(mlff_energy, dft_energy, natoms, nconfig): + error = (mlff_energy - dft_energy) / natoms + return np.sum(np.abs(error), axis=-1) / nconfig + + expected_energy_error = rmse_error_energy( + mlff_energy=mlff_error_analysis.mlff.energies, + dft_energy=mlff_error_analysis.dft.energies, + natoms=mlff_error_analysis.mlff.nions, + nconfig=mlff_error_analysis.mlff.nconfig, + ) + output_energy_error = mlff_error_analysis.get_energy_error_per_atom( + normalize_by_configurations=True + ) + assert np.array_equal(expected_energy_error, output_energy_error) + + +def test_force_error_computation(mock_calculations): + mlff_error_analysis = MLFFErrorAnalysis._from_data(mock_calculations) + + def rmse_error_force(mlff_force, dft_force, natoms): + norm_error = np.linalg.norm(dft_force - mlff_force, axis=-1) + rmse = np.sqrt(np.sum(norm_error**2, axis=-1) / (3 * natoms)) + return rmse + + expected_force_error = rmse_error_force( + mlff_force=mlff_error_analysis.mlff.forces, + dft_force=mlff_error_analysis.dft.forces, + natoms=mlff_error_analysis.mlff.nions, + ) + output_force_error = mlff_error_analysis.get_force_rmse() + assert np.array_equal(expected_force_error, output_force_error) + + +def test_multiple_force_computation(mock_multiple_calculations): + mlff_error_analysis = MLFFErrorAnalysis._from_data(mock_multiple_calculations) + + def rmse_error_force(mlff_force, dft_force, natoms, nconfig): + norm_error = np.linalg.norm(dft_force - mlff_force, axis=-1) + rmse = np.sqrt(np.sum(norm_error**2, axis=-1) / (3 * natoms)) + return np.sum(rmse, axis=-1) / nconfig + + expected_force_error = rmse_error_force( + mlff_force=mlff_error_analysis.mlff.forces, + dft_force=mlff_error_analysis.dft.forces, + natoms=mlff_error_analysis.mlff.nions, + nconfig=mlff_error_analysis.mlff.nconfig, + ) + output_force_error = mlff_error_analysis.get_force_rmse( + normalize_by_configurations=True + ) + assert np.array_equal(expected_force_error, output_force_error) + + +def test_stress_error_computation(mock_calculations): + mlff_error_analysis = MLFFErrorAnalysis._from_data(mock_calculations) + + def rmse_error_stress(mlff_stress, dft_stress, natoms): + mlff_stress = np.triu(mlff_stress) + dft_stress = np.triu(dft_stress) + norm_error = np.linalg.norm(dft_stress - mlff_stress, axis=-1) + rmse = np.sqrt(np.sum(norm_error**2, axis=-1) / 6) + return rmse + + expected_stress_error = rmse_error_stress( + mlff_stress=mlff_error_analysis.mlff.stresses, + dft_stress=mlff_error_analysis.dft.stresses, + natoms=mlff_error_analysis.mlff.nions, + ) + output_stress_error = mlff_error_analysis.get_stress_rmse() + assert np.array_equal(expected_stress_error, output_stress_error) + + +def test_multiple_stress_computation(mock_multiple_calculations): + mlff_error_analysis = MLFFErrorAnalysis._from_data(mock_multiple_calculations) + + def rmse_error_stress(mlff_stress, dft_stress, nconfig): + mlff_stress = np.triu(mlff_stress) + dft_stress = np.triu(dft_stress) + norm_error = np.linalg.norm(dft_stress - mlff_stress, axis=-1) + rmse = np.sqrt(np.sum(norm_error**2, axis=-1) / 6) + return np.sum(rmse, axis=-1) / nconfig + + expected_stress_error = rmse_error_stress( + mlff_stress=mlff_error_analysis.mlff.stresses, + dft_stress=mlff_error_analysis.dft.stresses, + nconfig=mlff_error_analysis.mlff.nconfig, + ) + output_stress_error = mlff_error_analysis.get_stress_rmse( + normalize_by_configurations=True + ) + assert np.array_equal(expected_stress_error, output_stress_error) diff --git a/tests/conftest.py b/tests/conftest.py index 6660ba33..ae1c44b3 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -8,6 +8,7 @@ from numpy.testing import assert_array_almost_equal_nulp from py4vasp import exception, raw +from py4vasp._data.base import _DataWrapper number_steps = 4 number_atoms = 7 @@ -164,11 +165,11 @@ def elastic_modulus(selection): return _elastic_modulus() @staticmethod - def energy(selection): + def energy(selection, randomize: bool = False): if selection == "MD": - return _MD_energy() + return _MD_energy(randomize) elif selection == "relax": - return _relax_energy() + return _relax_energy(randomize) else: raise exception.NotImplemented() @@ -184,11 +185,11 @@ def force_constant(selection): raise exception.NotImplemented() @staticmethod - def force(selection): + def force(selection, randomize: bool = False): if selection == "Sr2TiO4": - return _Sr2TiO4_forces() + return _Sr2TiO4_forces(randomize) elif selection == "Fe3O4": - return _Fe3O4_forces() + return _Fe3O4_forces(randomize) else: raise exception.NotImplemented() @@ -246,11 +247,11 @@ def projector(selection): raise exception.NotImplemented() @staticmethod - def stress(selection): + def stress(selection, randomize: bool = False): if selection == "Sr2TiO4": - return _Sr2TiO4_stress() + return _Sr2TiO4_stress(randomize) elif selection == "Fe3O4": - return _Fe3O4_stress() + return _Fe3O4_stress(randomize) else: raise exception.NotImplemented() @@ -415,7 +416,7 @@ def _polarization(): return raw.Polarization(electron=np.array((1, 2, 3)), ion=np.array((4, 5, 6))) -def _MD_energy(): +def _MD_energy(randomize: bool = False): labels = ( "ion-electron TOTEN ", "kinetic energy EKIN ", @@ -425,22 +426,27 @@ def _MD_energy(): "nose kinetic EPS ", "total energy ETOTAL ", ) - return _create_energy(labels) + return _create_energy(labels, randomize=randomize) -def _relax_energy(): +def _relax_energy(randomize: bool = False): labels = ( "free energy TOTEN ", "energy without entropy ", "energy(sigma->0) ", ) - return _create_energy(labels) + return _create_energy(labels, randomize=randomize) -def _create_energy(labels): +def _create_energy(labels, randomize: bool = False): labels = np.array(labels, dtype="S") shape = (number_steps, len(labels)) - return raw.Energy(labels=labels, values=np.arange(np.prod(shape)).reshape(shape)) + if randomize: + return raw.Energy(labels=labels, values=np.random.random(shape)) + else: + return raw.Energy( + labels=labels, values=np.arange(np.prod(shape)).reshape(shape) + ) def _qpoints(): @@ -661,10 +667,15 @@ def _Sr2TiO4_force_constants(): ) -def _Sr2TiO4_forces(): +def _Sr2TiO4_forces(randomize): shape = (number_steps, number_atoms, axes) + if randomize: + forces = np.random.random(shape) + else: + forces = np.arange(np.prod(shape)).reshape(shape) return raw.Force( - structure=_Sr2TiO4_structure(), forces=np.arange(np.prod(shape)).reshape(shape) + structure=_Sr2TiO4_structure(), + forces=forces, ) @@ -685,11 +696,13 @@ def _Sr2TiO4_projectors(use_orbitals): ) -def _Sr2TiO4_stress(): +def _Sr2TiO4_stress(randomize): shape = (number_steps, axes, axes) - return raw.Stress( - structure=_Sr2TiO4_structure(), stress=np.arange(np.prod(shape)).reshape(shape) - ) + if randomize: + stresses = np.random.random(shape) + else: + stresses = np.arange(np.prod(shape)).reshape(shape) + return raw.Stress(structure=_Sr2TiO4_structure(), stress=stresses) def _Sr2TiO4_structure(): @@ -779,11 +792,13 @@ def _Fe3O4_dos(projectors): return raw_dos -def _Fe3O4_forces(): +def _Fe3O4_forces(randomize): shape = (number_steps, number_atoms, axes) - return raw.Force( - structure=_Fe3O4_structure(), forces=np.arange(np.prod(shape)).reshape(shape) - ) + if randomize: + forces = np.random.random(shape) + else: + forces = np.arange(np.prod(shape)).reshape(shape) + return raw.Force(structure=_Fe3O4_structure(), forces=forces) def _Fe3O4_projectors(use_orbitals): @@ -794,10 +809,15 @@ def _Fe3O4_projectors(use_orbitals): ) -def _Fe3O4_stress(): +def _Fe3O4_stress(randomize): shape = (number_steps, axes, axes) + if randomize: + stresses = np.random.random(shape) + else: + stresses = np.arange(np.prod(shape)).reshape(shape) return raw.Stress( - structure=_Fe3O4_structure(), stress=np.arange(np.prod(shape)).reshape(shape) + structure=_Fe3O4_structure(), + stress=stresses, ) diff --git a/tests/scripts/test_error_analysis.py b/tests/scripts/test_error_analysis.py new file mode 100644 index 00000000..e9c7a547 --- /dev/null +++ b/tests/scripts/test_error_analysis.py @@ -0,0 +1,6 @@ +import os + + +def test_error_analysis(): + errcode = os.system("error-analysis --help") + assert errcode == 0 diff --git a/tests/test_calculations.py b/tests/test_calculations.py new file mode 100644 index 00000000..04c048e4 --- /dev/null +++ b/tests/test_calculations.py @@ -0,0 +1,156 @@ +# Copyright © VASP Software GmbH, +# Licensed under the Apache License 2.0 (http://www.apache.org/licenses/LICENSE-2.0) +import os +from pathlib import Path +from unittest.mock import patch + +import pytest + +from py4vasp import Calculations + + +def test_error_when_using_constructor(): + with pytest.raises(Exception): + Calculations() + + +def test_creation_from_paths(): + # Test creation from absolute paths + absolute_path_1 = Path(__file__) / "path_1" + absolute_path_2 = Path(__file__) / "path_2" + calculations = Calculations.from_paths( + path_name_1=absolute_path_1, path_name_2=absolute_path_2 + ) + output_paths = calculations.paths() + assert output_paths["path_name_1"] == [absolute_path_1] + assert output_paths["path_name_2"] == [absolute_path_2] + output_number_of_calculations = calculations.number_of_calculations() + assert output_number_of_calculations["path_name_1"] == 1 + assert output_number_of_calculations["path_name_2"] == 1 + # Test creation from relative paths + relative_path_1 = os.path.relpath(absolute_path_1, Path.cwd()) + relative_path_2 = os.path.relpath(absolute_path_2, Path.cwd()) + calculations = Calculations.from_paths( + path_name_1=relative_path_1, path_name_2=relative_path_2 + ) + output_paths = calculations.paths() + assert output_paths["path_name_1"] == [absolute_path_1] + assert output_paths["path_name_2"] == [absolute_path_2] + output_number_of_calculations = calculations.number_of_calculations() + assert output_number_of_calculations["path_name_1"] == 1 + assert output_number_of_calculations["path_name_2"] == 1 + # Test creation with string paths + calculations = Calculations.from_paths( + path_name_1=absolute_path_1.as_posix(), path_name_2=absolute_path_2.as_posix() + ) + output_paths = calculations.paths() + assert output_paths["path_name_1"] == [absolute_path_1] + assert output_paths["path_name_2"] == [absolute_path_2] + output_number_of_calculations = calculations.number_of_calculations() + assert output_number_of_calculations["path_name_1"] == 1 + assert output_number_of_calculations["path_name_2"] == 1 + + +def test_creation_from_paths_with_incorrect_input(): + with pytest.raises(Exception): + Calculations.from_paths(path_name_1=1, path_name_2=2) + + +def test_creation_from_paths_with_wildcards(tmp_path): + paths_1 = [tmp_path / "path1_a", tmp_path / "path1_b"] + absolute_paths_1 = [path.resolve() for path in paths_1] + paths_2 = [tmp_path / "path2_a", tmp_path / "path2_b"] + absolute_paths_2 = [path.resolve() for path in paths_2] + create_paths = lambda paths: [path.mkdir() for path in paths] + create_paths(paths_1) + create_paths(paths_2) + calculations = Calculations.from_paths( + path_name_1=tmp_path / "path1_*", path_name_2=tmp_path / "path2_*" + ) + output_paths = calculations.paths() + assert all( + [ + output_paths["path_name_1"][i] == absolute_paths_1[i] + for i in range(len(absolute_paths_1)) + ] + ) + assert all( + [ + output_paths["path_name_2"][i] == absolute_paths_2[i] + for i in range(len(absolute_paths_2)) + ] + ) + output_number_of_calculations = calculations.number_of_calculations() + assert output_number_of_calculations["path_name_1"] == 2 + assert output_number_of_calculations["path_name_2"] == 2 + + +def test_creation_from_file(): + absolute_path_1 = Path(__file__) / "example_1.h5" + absolute_path_2 = Path(__file__) / "example_2.h5" + calculations = Calculations.from_files( + path_name_1=absolute_path_1, path_name_2=absolute_path_2 + ) + output_paths = calculations.paths() + assert output_paths["path_name_1"] == [absolute_path_1.parent] + assert output_paths["path_name_2"] == [absolute_path_2.parent] + output_number_of_calculations = calculations.number_of_calculations() + assert output_number_of_calculations["path_name_1"] == 1 + assert output_number_of_calculations["path_name_2"] == 1 + + +def test_create_from_files_with_wildcards(tmp_path): + paths_1 = [tmp_path / "example1_a.h5", tmp_path / "example1_b.h5"] + absolute_paths_1 = [path.resolve() for path in paths_1] + paths_2 = [tmp_path / "example2_a.h5", tmp_path / "example2_b.h5"] + absolute_paths_2 = [path.resolve() for path in paths_2] + create_files = lambda paths: [path.touch() for path in paths] + create_files(paths_1) + create_files(paths_2) + calculations = Calculations.from_files( + file_1=tmp_path / "example1_*.h5", + file_2=tmp_path / "example2_*.h5", + ) + output_paths = calculations.paths() + assert all( + [ + output_paths["file_1"][i] == absolute_paths_1[i].parent + for i in range(len(absolute_paths_1)) + ] + ) + assert all( + [ + output_paths["file_2"][i] == absolute_paths_2[i].parent + for i in range(len(absolute_paths_2)) + ] + ) + output_number_of_calculations = calculations.number_of_calculations() + assert output_number_of_calculations["file_1"] == 2 + assert output_number_of_calculations["file_2"] == 2 + + +@patch("py4vasp._data.base.Refinery.from_path", autospec=True) +@patch("py4vasp.raw.access", autospec=True) +def test_has_attributes(mock_access, mock_from_path): + calculations = Calculations.from_paths(path_name_1="path_1", path_name_2="path_2") + assert hasattr(calculations, "energies") + assert hasattr(calculations.energies, "read") + output_read = calculations.energies.read() + assert isinstance(output_read, dict) + assert output_read.keys() == {"path_name_1", "path_name_2"} + assert isinstance(output_read["path_name_1"], list) + assert isinstance(output_read["path_name_2"], list) + assert hasattr(calculations, "forces") + assert hasattr(calculations.forces, "read") + output_read = calculations.forces.read() + assert isinstance(output_read, dict) + assert output_read.keys() == {"path_name_1", "path_name_2"} + assert isinstance(output_read["path_name_1"], list) + assert isinstance(output_read["path_name_2"], list) + assert hasattr(calculations, "stresses") + assert hasattr(calculations.stresses, "read") + output_read = calculations.stresses.read() + assert isinstance(output_read, dict) + assert output_read.keys() == {"path_name_1", "path_name_2"} + assert isinstance(output_read["path_name_1"], list) + assert isinstance(output_read["path_name_2"], list)