From 47d0faa8d49ccb08023492df84afaa344b8910dd Mon Sep 17 00:00:00 2001
From: Veronika Juraskova <veronika.juraskova@chem.ox.ac.uk>
Date: Mon, 25 Nov 2024 10:54:14 +0000
Subject: [PATCH 1/2] Add test_calculate (#111)

Add test for calculate.py and pin versions of mace and openmm
---
 environment_mace.yml                 |   9 +-
 mlptrain/config.py                   |  25 ++--
 mlptrain/configurations/calculate.py |   4 +-
 tests/test_calculate.py              | 211 +++++++++++++++++++++++++++
 4 files changed, 233 insertions(+), 16 deletions(-)
 create mode 100644 tests/test_calculate.py

diff --git a/environment_mace.yml b/environment_mace.yml
index 70c2ede8..5385a265 100644
--- a/environment_mace.yml
+++ b/environment_mace.yml
@@ -21,12 +21,11 @@ dependencies:
   - scipy
   - xtb
   - scikit-learn
-  - openmm
-  - openmm-torch
+  - openmm=8.1.2
+  - openmm-torch=1.4
   - nnpops
-  - openmm-ml
+  - openmm-ml=1.2
   - git
-  - openmm-ml
   - pip:
     - ase@git+https://gitlab.com/ase/ase.git@f2615a6e9a # For PLUMED
-    - mace-torch
+    - mace-torch==0.3.6
diff --git a/mlptrain/config.py b/mlptrain/config.py
index 0f8dd597..d25cb67e 100644
--- a/mlptrain/config.py
+++ b/mlptrain/config.py
@@ -2,16 +2,23 @@
 
 
 class _ConfigClass:
-    """mlptrain configuration"""
+    """
+    MLP training configurations
+
+    This class contains default parameters for electronic structure computations and training of available MLPs.
+    Default settings for electronic structures is None to avoid accidentally running the wrong level of theory.
+    The desired level can be specified by, e.g.
+    ```
+    from mlptrain.config import Config
+
+    Config.orca_keywords = ['PBE', 'def2-SVP', 'EnGrad']
+    Config.gaussian_keywords = ['PBEPBE', 'Def2SVP', 'Force(NoStep)', 'integral=ultrafinegrid']
+    ```
+    """
 
     n_cores = 4
-    _orca_keywords = ['PBE', 'def2-SVP', 'EnGrad']
-    _gaussian_keywords = [
-        'PBEPBE',
-        'Def2SVP',
-        'Force(NoStep)',
-        'integral=ultrafinegrid',
-    ]
+    _orca_keywords = None
+    _gaussian_keywords = None
 
     # Default parameters for a GAP potential
     gap_default_params = {
@@ -87,7 +94,7 @@ def gaussian_keywords(self):
 
     @gaussian_keywords.setter
     def gaussian_keywords(self, value):
-        """ORCA keywords must be gradient"""
+        """Gaussian keywords must be gradient"""
         self._gaussian_keywords = value
 
 
diff --git a/mlptrain/configurations/calculate.py b/mlptrain/configurations/calculate.py
index d52f08eb..2da9aa0a 100644
--- a/mlptrain/configurations/calculate.py
+++ b/mlptrain/configurations/calculate.py
@@ -87,7 +87,7 @@ def _method_and_keywords(
 def _orca_keywords() -> 'autode.wrappers.keywords.Keywords':
     """Keywords e.g. functional and basis set to use for an ORCA calculation"""
 
-    if Config.orca_keywords is None:
+    if len(Config.orca_keywords) == 0:
         raise ValueError(
             'For ORCA training GTConfig.orca_keywords must be'
             ' set. e.g.\nmlt.Config.orca_keywords '
@@ -101,7 +101,7 @@ def _gaussian_keywords() -> 'autode.wrappers.keywords.Keywords':
     """Keywords e.g. functional and basis set to use for an Gaussian
     calculation, either Gaussian09 or Gaussian16"""
 
-    if Config.gaussian_keywords is None:
+    if len(Config.gaussian_keywords) == 0:
         raise ValueError(
             'To train with Gaussian QM calculations '
             'mlt.Config.gaussian_keywords must be set.'
diff --git a/tests/test_calculate.py b/tests/test_calculate.py
new file mode 100644
index 00000000..5f5e1841
--- /dev/null
+++ b/tests/test_calculate.py
@@ -0,0 +1,211 @@
+import pytest
+import numpy as np
+from mlptrain.configurations import Configuration
+from mlptrain.config import Config  # Import Config from mlptrain.config
+from mlptrain.energy import Energy
+from mlptrain.forces import Forces
+from mlptrain.configurations.calculate import (
+    run_autode,
+    _method_and_keywords,
+    _orca_keywords,
+    _gaussian_keywords,
+)
+
+
+class MockSpecies:
+    """Mock class for autode.species.Species"""
+
+    def __init__(self, name, atoms, charge, mult):
+        self.name = name
+        self.atoms = atoms
+        self.charge = charge
+        self.mult = mult
+
+
+class MockCalculation:
+    """Mock class for autode.calculation.Calculation"""
+
+    def __init__(self, name, molecule, method, keywords, n_cores):
+        self.name = name
+        self.molecule = molecule
+        self.method = method
+        self.keywords = keywords
+        self.n_cores = n_cores
+        self._energy_success = (
+            True  # Flag for controlling energy calculation success
+        )
+        self._gradient_success = (
+            True  # Flag for controlling gradient calculation success
+        )
+
+    def run(self):
+        """Simulate the run behavior."""
+        pass  # Assume the calculation runs successfully
+
+    def get_gradients(self):
+        """Return mock gradients or raise an error if _gradient_success is False"""
+        if self._gradient_success:
+            return MockUnitConversion(np.array([-0.1, 0.2, -0.3]))
+        else:
+            raise Exception('Could not get gradients')
+
+    def get_energy(self):
+        """Return mock energy or None if _energy_success is False"""
+        return MockUnitConversion(-1.0) if self._energy_success else None
+
+    def get_atomic_charges(self):
+        """Return mock partial charges"""
+        return np.array([0.5, -0.5])
+
+    @property
+    def output(self):
+        """Simulate output attribute with file lines"""
+
+        class Output:
+            file_lines = ['Some output log line'] * 50
+            exists = True
+
+        return Output()
+
+
+class MockUnitConversion:
+    """Mock class for autode's units conversion, simulating to()"""
+
+    def __init__(self, value):
+        self.value = value
+
+    def to(self, unit):
+        return self.value
+
+
+# Fixtures
+
+
+@pytest.fixture
+def configuration():
+    """Fixture for a mock Configuration object"""
+    config = Configuration(atoms=['H', 'O'], charge=0, mult=1)
+    config.forces = Forces()
+    config.energy = Energy()
+    config.partial_charges = None
+    return config
+
+
+@pytest.fixture
+def mock_method_and_keywords(monkeypatch):
+    """Fixture to mock _method_and_keywords function"""
+
+    def _mock_method_and_keywords(method_name):
+        return 'mock_method', 'mock_keywords'
+
+    monkeypatch.setattr(
+        'mlptrain.configurations.calculate._method_and_keywords',
+        _mock_method_and_keywords,
+    )
+
+
+@pytest.fixture
+def mock_autode(monkeypatch):
+    """Fixture to mock autode Species and Calculation"""
+    monkeypatch.setattr('autode.species.Species', MockSpecies)
+    monkeypatch.setattr('autode.calculation.Calculation', MockCalculation)
+
+
+@pytest.fixture
+def set_config(monkeypatch):
+    """Fixture to set required config values for ORCA and Gaussian keywords"""
+    gauss_kws = Config.gaussian_keywords
+    orca_kws = Config.orca_keywords
+    Config.orca_keywords = ['PBE', 'def2-SVP', 'EnGrad']
+    Config.gaussian_keywords = ['B3LYP', '6-31G(d)', 'Force']
+    yield
+    Config.gaussian_keywords = gauss_kws
+    Config.orca_keywords = orca_kws
+
+
+# Tests
+
+
+def test_run_autode_success(
+    mock_autode, mock_method_and_keywords, configuration
+):
+    """Test run_autode for a successful calculation"""
+
+    run_autode(configuration, method_name='mock_method', n_cores=1)
+
+    # Assertions to verify configuration attributes were set as expected
+    assert (configuration.forces.true == np.array([0.1, -0.2, 0.3])).all()
+    assert configuration.energy.true == -1.0
+    assert (configuration.partial_charges == [0.5, -0.5]).all()
+
+
+def test_run_autode_failed_energy(
+    mock_autode, mock_method_and_keywords, configuration, capsys
+):
+    """Test run_autode when energy calculation fails but gradients succeed"""
+
+    # Set up mock Calculation to simulate energy calculation failure
+    calc_instance = MockCalculation(
+        name='tmp', molecule=None, method=None, keywords=None, n_cores=1
+    )
+    calc_instance._energy_success = False  # Fail energy calculation
+    calc_instance._gradient_success = True  # Succeed in gradient calculation
+
+    with pytest.MonkeyPatch.context() as mp:
+        mp.setattr(
+            'autode.calculation.Calculation',
+            lambda *args, **kwargs: calc_instance,
+        )
+        run_autode(configuration, method_name='mock_method', n_cores=1)
+
+    captured = capsys.readouterr()
+    assert 'Failed to calculate the energy' in captured.err
+    assert configuration.energy.true is None
+
+
+def test_method_and_keywords_success(set_config):
+    """Test _method_and_keywords for valid methods"""
+    methods = {'orca': 'orca', 'g09': 'g09', 'g16': 'g16', 'xtb': 'xtb'}
+    for method_name, expected in methods.items():
+        method, keywords = _method_and_keywords(method_name)
+        assert (
+            method.name == expected
+        )  # Mocked ORCA, G09, etc., should have these names
+
+
+def test_method_and_keywords_invalid():
+    """Test _method_and_keywords raises ValueError for an invalid method"""
+    with pytest.raises(ValueError, match='Unknown method'):
+        _method_and_keywords('invalid_method')
+
+
+@pytest.mark.xfail
+def test_orca_keywords_success(set_config):
+    """Test _orca_keywords retrieves the ORCA keywords from Config"""
+    keywords = _orca_keywords()
+    assert keywords == Config.orca_keywords
+
+
+def test_orca_keywords_no_config():
+    """Test _orca_keywords raises ValueError when ORCA keywords are not set"""
+    with pytest.raises(
+        ValueError,
+        match='For ORCA training GTConfig.orca_keywords must be set',
+    ):
+        _orca_keywords()
+
+
+@pytest.mark.xfail
+def test_gaussian_keywords_success(set_config):
+    """Test _gaussian_keywords retrieves the Gaussian keywords from Config"""
+    keywords = _gaussian_keywords()
+    assert keywords == Config.gaussian_keywords
+
+
+def test_gaussian_keywords_no_config():
+    """Test _gaussian_keywords raises ValueError when Gaussian keywords are not set"""
+    with pytest.raises(
+        ValueError,
+        match='To train with Gaussian QM calculations mlt.Config.gaussian_keywords must be set',
+    ):
+        _gaussian_keywords()

From db7c4a74872c9ccae07e073a48a4ed96aeda2c15 Mon Sep 17 00:00:00 2001
From: Veronika Juraskova <veronika.juraskova@chem.ox.ac.uk>
Date: Mon, 25 Nov 2024 11:21:50 +0000
Subject: [PATCH 2/2] Add PBC and improve AL extra time setting

Add option to run AL in PBC for solvated systems. Improve AL extra time setting for metadynamics runs to avoid the formation of distorted structures.
---
 mlptrain/training/active.py | 31 +++++++++++++++++++++++++++++--
 1 file changed, 29 insertions(+), 2 deletions(-)

diff --git a/mlptrain/training/active.py b/mlptrain/training/active.py
index 9e4fbb58..4f45ce0a 100644
--- a/mlptrain/training/active.py
+++ b/mlptrain/training/active.py
@@ -14,6 +14,7 @@
 from mlptrain.training.selection import SelectionMethod, AbsDiffE
 from mlptrain.configurations import ConfigurationSet
 from mlptrain.log import logger
+from mlptrain.box import Box
 
 
 def train(
@@ -38,6 +39,8 @@ def train(
     constraints: Optional[List] = None,
     bias: Optional = None,
     md_program: str = 'ASE',
+    pbc: bool = False,
+    box_size: Optional[list] = None,
 ) -> None:
     """
     Train a system using active learning, by propagating dynamics using ML
@@ -130,6 +133,13 @@ def train(
               dynamics
 
         md_program: (str) 'ASE' or 'OpenMM'
+
+        pbc: (bool) If True, MLP-MD propagates with periodic boundary conditions.
+              The solvent should be therefore placed ina box and not sphere.
+              The training data are still treated as clusters in
+              electronic structure computations.
+
+        box_size: (List | None) Size of the box where MLP-MD propogated.
     """
     if md_program.lower() == 'openmm':
         if not isinstance(mlp, mlptrain.potentials.MACE):
@@ -145,6 +155,9 @@ def train(
 
     _check_bias(bias=bias, temp=temp, inherit_metad_bias=inherit_metad_bias)
 
+    if pbc and box_size is None:
+        raise ValueError('For PBC in MD, the box_size cannot be None')
+
     if restart_iter is not None:
         _initialise_restart(
             mlp=mlp,
@@ -179,6 +192,10 @@ def train(
     for iteration in range(max_active_iters):
         if restart_iter is not None and iteration <= restart_iter:
             continue
+        if isinstance(bias, PlumedBias) and iteration > bias_start_iter:
+            extra_time = 0
+        else:
+            extra_time = mlp.training_data.t_min(-n_configs_iter)
 
         previous_n_train = mlp.n_train
 
@@ -203,13 +220,15 @@ def train(
             bbond_energy=bbond_energy,
             fbond_energy=fbond_energy,
             init_temp=init_active_temp,
-            extra_time=mlp.training_data.t_min(-n_configs_iter),
+            extra_time=extra_time,
             constraints=constraints,
             bias=deepcopy(bias),
             inherit_metad_bias=inherit_metad_bias,
             bias_start_iter=bias_start_iter,
             iteration=iteration,
             md_program=md_program,
+            pbc=pbc,
+            box_size=box_size,
         )
 
         # Active learning finds no configurations
@@ -399,6 +418,9 @@ def _gen_active_config(
         else kwargs.pop('init_active_temp')
     )
 
+    pbc = False if 'pbc' not in kwargs else kwargs.pop('pbc')
+    box_size = None if 'box_size' not in kwargs else kwargs.pop('box_size')
+
     if extra_time > 0:
         logger.info(f'Running an extra {extra_time:.1f} fs of MD')
 
@@ -410,6 +432,8 @@ def _gen_active_config(
     ):
         kwargs = _modify_kwargs_for_metad_bias_inheritance(kwargs)
 
+    if pbc:
+        config.box = Box(box_size)
     if kwargs['md_program'].lower() == 'openmm':
         traj = run_mlp_md_openmm(
             config,
@@ -435,6 +459,8 @@ def _gen_active_config(
 
     traj.t0 = curr_time  # Increment the initial time (t0)
 
+    for frame in traj:
+        frame.box = Box([100, 100, 100])
     # Evaluate the selector on the final frame
     selector(traj.final_frame, mlp, method_name=method_name, n_cores=n_cores)
 
@@ -584,6 +610,7 @@ def _gen_and_set_init_training_configs(
             config = mlp.system.random_configuration(
                 min_dist=dist, with_intra=True
             )
+            config.box = Box([100, 100, 100])
             init_configs.append(config)
 
         except RuntimeError:
@@ -592,7 +619,7 @@ def _gen_and_set_init_training_configs(
     logger.info(f'Added {num} configurations with min dist = {dist:.3f} Å')
     init_configs.single_point(method_name)
     mlp.training_data += init_configs
-    return None
+    return init_configs
 
 
 def _initialise_restart(