Merge branch 'main' into numpy-update

juraskov · Nov 29, 2024 · 65941ce · 65941ce
2 parents f5f25e3 + db7c4a7
commit 65941ce
Show file tree

Hide file tree

Showing 37 changed files with 558 additions and 189 deletions.
diff --git a/.github/dependabot.yml b/.github/dependabot.yml
@@ -0,0 +1,12 @@
+---
+version: 2
+updates:
+  # Maintain dependencies for GitHub Actions
+  - package-ecosystem: "github-actions"
+    directory: "/"
+    schedule:
+      interval: "monthly"
+    groups:
+      gha-dependencies:
+        patterns:
+          - '*'
diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml
@@ -4,6 +4,9 @@ on:
   push:
   pull_request:
 
+env:
+  FORCE_COLOR: 1
+
 jobs:
   pre-commit:
     env:
@@ -15,4 +18,4 @@ jobs:
         uses: actions/checkout@v4
 
       - name: Run pre-commit
-        uses: pre-commit/action@v3.0.0
+        uses: pre-commit/action@v3.0.1
diff --git a/.github/workflows/pytest.yml b/.github/workflows/pytest.yml
@@ -6,25 +6,28 @@ on:
     branches: [main]
   pull_request:
 
+env:
+  FORCE_COLOR: 1
+
 jobs:
   test:
     name: GAP (${{ matrix.python-version }}, ${{ matrix.os }})
     runs-on: ${{ matrix.os }}
-    
+
     strategy:
       fail-fast: true
       matrix:
-        os: ["ubuntu-latest"]
+        os: ["ubuntu-22.04"]
         python-version: ["3.9"]
-      
+
     defaults:
       run:
         shell: bash -l {0}
-        
+
     steps:
     - uses: actions/checkout@v4
-        
-    - uses: mamba-org/setup-micromamba@v1
+
+    - uses: mamba-org/setup-micromamba@v2
       with:
         # the create command looks like this:
         # `micromamba create -n test-env python=3.9 -f environment.yml`
@@ -38,7 +41,7 @@ jobs:
       run: ./install_gap.sh
 
     - name: Test basic install
-      run: pytest --cov -k "not test_openmm"
+      run: pytest --cov=mlptrain -k "not test_openmm"
 
     - name: Upload coverage reports to Codecov
       uses: codecov/codecov-action@v4
@@ -47,25 +50,66 @@ jobs:
           token: ${{ secrets.CODECOV_TOKEN }}
           slug: duartegroup/mlp-train
 
+  test-ace:
+    name: ACE (${{ matrix.python-version }}, ${{ matrix.os }})
+    runs-on: ${{ matrix.os }}
+
+    strategy:
+      fail-fast: true
+      matrix:
+        os: ["ubuntu-22.04"]
+        python-version: ["3.9"]
+
+    defaults:
+      run:
+        shell: bash -l {0}
+
+    steps:
+    - uses: actions/checkout@v4
+
+    - uses: julia-actions/setup-julia@v2
+      with:
+        version: '1.6'
+
+    - uses: mamba-org/setup-micromamba@v2
+      with:
+        environment-file: environment_ace.yml
+        environment-name: gha-test-env
+        cache-environment: true
+        create-args: >-
+          python=${{ matrix.python-version }}
+
+    - name: ACE install
+      run: ./install_ace.sh
+
+    - name: Test ACE
+      run: pytest --cov=mlptrain -k "not test_openmm"
+
+    - name: Upload coverage reports to Codecov
+      uses: codecov/codecov-action@v4
+      with:
+          flags: python-${{ matrix.python-version }}-ace
+          token: ${{ secrets.CODECOV_TOKEN }}
+          slug: duartegroup/mlp-train
 
   test-mace:
     name: MACE (${{ matrix.python-version }}, ${{ matrix.os }})
     runs-on: ${{ matrix.os }}
-    
+
     strategy:
       fail-fast: true
       matrix:
-        os: ["ubuntu-latest"]
+        os: ["ubuntu-22.04"]
         python-version: ["3.9"]
-      
+
     defaults:
       run:
         shell: bash -l {0}
-        
+
     steps:
     - uses: actions/checkout@v4
-        
-    - uses: mamba-org/setup-micromamba@v1
+
+    - uses: mamba-org/setup-micromamba@v2
       with:
         environment-file: environment_mace.yml
         environment-name: gha-test-env
@@ -77,7 +121,7 @@ jobs:
       run: ./install_mace.sh
 
     - name: Test MACE install
-      run: pytest --cov
+      run: pytest --cov=mlptrain
 
     - name: Upload coverage reports to Codecov
       uses: codecov/codecov-action@v4

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -10,7 +10,7 @@ repos:
       - id: check-shebang-scripts-are-executable
       - id: check-added-large-files
         args: ['--maxkb=500', '--enforce-all']
-        exclude: mlptrain/sampling/tests/data.zip
+        exclude: tests/data/data.zip
       - id: check-yaml
       - id: check-toml
 

diff --git a/README.md b/README.md
@@ -1,9 +1,9 @@
+[![DOI](https://img.shields.io/badge/doi-10.6084/m9.figshare.25816864.v1-blue.svg?style=flat&labelColor=whitesmoke&logo=data%3Aimage%2Fpng%3Bbase64%2CiVBORw0KGgoAAAANSUhEUgAAAB8AAAAfCAYAAAAfrhY5AAAJsklEQVR42qWXd1DTaRrHf%2BiB2Hdt5zhrAUKz4IKEYu9IGiGFFJJQ0gkJCAKiWFDWBRdFhCQUF3UVdeVcRQEBxUI3yY9iEnQHb3bdW1fPubnyz%2F11M7lvEHfOQee2ZOYzPyDv%2B3yf9%2Fk95YX4fx%2BltfUt08GcFEuPR4U9hDDZ%2FVngIlhb%2FSiI6InkTgLzgDcgfvtnovhH4BzoVlrbwr55QnhCtBW4QHXnFrZbPBaQoBh4%2FSYH2EnpBEtqcDMVzB93wA%2F8AFwa23XFGcc8CkT3mxz%2BfXWtq9T9IQlLIXYEuHojudb%2BCM7Hgdq8ydi%2FAHiBXyY%2BLjwFlAEnS6Jnar%2FvnQVhvdzasad0eKvWZKe8hvDB2ofLZ%2FZEcWsh%2BhyIuyO5Bxs2iZIE4nRv7NWAb0EO8AC%2FWPxjYAWuOEX2MSXZVgPxzmRL3xKz3ScGpx6p6QnOx4mDIFqO0w6Q4fEhO5IzwxlSwyD2FYHzwAW%2BAZ4fEsf74gCumykwNHskLM7taQxLYjjIyy8MUtraGhTWdkfhkFJqtvuVl%2F9l2ZquDfEyrH8B0W06nnpH3JtIyRGpH1iJ6SfxDIHjRXHJmdQjLpfHeN54gnfFx4W9QRnovx%2FN20aXZeTD2J84hn3%2BqoF2Tqr14VqTPUCIcP%2B5%2Fly4qC%2BUL3sYxSvNj1NwsVYPsWdMUfomsdkYm3Tj0nbV0N1wRKwFe1MgKACDIBdMAhPE%2FwicwNWxll8Ag40w%2BFfhibJkGHmutjYeQ8gVlaN%2BjO51nDysa9TwNUFMqaGbKdRJZFfOJSp6mkRKsv0rRIpEVWjAvyFkxNOEpwvcAVPfEe%2Bl8ojeNTx3nXLBcWRrYGxSRjDEk0VlpxYrbe1ZmaQ5xuT0u3r%2B2qe5j0J5uytiZPGsRL2Jm32AldpxPUNJ3jmmsN4x62z1cXrbedXBQf2yvIFCeZrtyicZZG2U2nrrBJzYorI2EXLrvTfCSB43s41PKEvbZDEfQby6L4JTj%2FfIwam%2B4%2BwucBu%2BDgNK05Nle1rSt9HvR%2FKPC4U6LTfvUIaip1mjIa8fPzykii23h2eanT57zQ7fsyYH5QjywwlooAUcAdOh5QumgTHx6aAO7%2FL52eaQNEShrxfhL6albEDmfhGflrsT4tps8gTHNOJbeDeBlt0WJWDHSgxs6cW6lQqyg1FpD5ZVDfhn1HYFF1y4Eiaqa18pQf3zzYMBhcanlBjYfgWNayAf%2FASOgklu8bmgD7hADrk4cRlOL7NSOewEcbqSmaivT33QuFdHXj5sdvjlN5yMDrAECmdgDWG2L8P%2BAKLs9ZLZ7dJda%2BB4Xl84t7QvnKfvpXJv9obz2KgK8dXyqISyV0sXGZ0U47hOA%2FAiigbEMECJxC9aoKp86re5O5prxOlHkcksutSQJzxZRlPZmrOKhsQBF5zEZKybUC0vVjG8PqOnhOq46qyDTDnj5gZBriWCk4DvXrudQnXQmnXblebhAC2cCB6zIbM4PYgGl0elPSgIf3iFEA21aLdHYLHUQuVkpgi02SxFdrG862Y8ymYGMvXDzUmiX8DS5vKZyZlGmsSgQqfLub5RyLNS4zfDiZc9Edzh%2FtCE%2BX8j9k%2FqWB071rcZyMImne1SLkL4GRw4UPHMV3jjwEYpPG5uW5fAEot0aTSJnsGAwHJi2nvF1Y5OIqWziVCQd5NT7t6Q8guOSpgS%2Fa1dSRn8JGGaCD3BPXDyQRG4Bqhu8XrgAp0yy8DMSvvyVXDgJcJTcr1wQ2BvFKf65jqhvmxXUuDpGBlRvV36XvGjQzLi8KAKT2lYOnmxQPGorURSV0NhyTIuIyqOmKTMhQ%2BieEsgOgpc4KBbfDM4B3SIgFljvfHF6cef7qpyLBXAiQcXvg5l3Iunp%2FWv4dH6qFziO%2BL9PbrimQ9RY6MQphEfGUpOmma7KkGzuS8sPUFnCtIYcKCaI9EXo4HlQLgGrBjbiK5EqMj2AKWt9QWcIFMtnVvQVDQV9lXJJqdPVtUQpbh6gCI2Ov1nvZts7yYdsnvRgxiWFOtNJcOMVLn1vgptVi6qrNiFOfEjHCDB3J%2BHDLqUB77YgQGwX%2Fb1eYna3hGKdlqJKIyiE4nSbV8VFgxmxR4b5mVkkeUhMgs5YTi4ja2XZ009xJRHdkfwMi%2BfocaancuO7h%2FMlcLOa0V%2FSw6Dq47CumRQAKhgbOP8t%2BMTjuxjJGhXCY6XpmDDFqWlVYbQ1aDJ5Cptdw4oLbf3Ck%2BdWkVP0LpH7s9XLPXI%2FQX8ws%2Bj2In63IcRvOOo%2BTTjiN%2BlssfRsanW%2B3REVKoavBOAPTXABW4AL7e4NygHdpAKBscmlDh9Jysp4wxbnUNna3L3xBvyE1jyrGIkUHaqQMuxhHElV6oj1picvgL1QEuS5PyZTEaivqh5vUCKJqOuIgPFGESns8kyFk7%2FDxyima3cYxi%2FYOQCj%2F%2B9Ms2Ll%2Bhn4FmKnl7JkGXQGDKDAz9rUGL1TIlBpuJr9Be2JjK6qPzyDg495UxXYF7JY1qKimw9jWjF0iV6DRIqE%2B%2FeWG0J2ofmZTk0mLYVd4GLiFCOoKR0Cg727tWq981InYynvCuKW43aXgEjofVbxIqrm0VL76zlH3gQzWP3R3Bv9oXxclrlO7VVtgBRpSP4hMFWJ8BrUSBCJXC07l40X4jWuvtc42ofNCxtlX2JH6bdeojXgTh5TxOBKEyY5wvBE%2BACh8BtOPNPkApjoxi5h%2B%2FFMQQNpWvZaMH7MKFu5Ax8HoCQdmGkJrtnOiLHwD3uS5y8%2F2xTSDrE%2F4PT1yqtt6vGe8ldMBVMEPd6KwqiYECHDlfbvzphcWP%2BJiZuL5swoWQYlS%2Br7Yu5mNUiGD2retxBi9fl6RDGn4Ti9B1oyYy%2BMP5G87D%2FCpRlvdnuy0PY6RC8BzTA40NXqckQ9TaOUDywkYsudxJzPgyDoAWn%2BB6nEFbaVxxC6UXjJiuDkW9TWq7uRBOJocky9iMfUhGpv%2FdQuVVIuGjYqACbXf8aa%2BPeYNIHZsM7l4s5gAQuUAzRUoT51hnH3EWofXf2vkD5HJJ33vwE%2FaEWp36GHr6GpMaH4AAPuqM5eabH%2FhfG9zcCz4nN6cPinuAw6IHwtvyB%2FdO1toZciBaPh25U0ducR2PI3Zl7mokyLWKkSnEDOg1x5fCsJE9EKhH7HwFNhWMGMS7%2BqxyYsbHHRUDUH4I%2FAheQY7wujJNnFUH4KdCju83riuQeHU9WEqNzjsJFuF%2FdTDAZ%2FK7%2F1WaAU%2BAWymT59pVMT4g2AxcwNa0XEBDdBDpAPvgDIH73R25teeuAF5ime2Ul0OUIiG4GpSAEJeYW9wDTf43wfwHgHLKJoPznkwAAAABJRU5ErkJggg%3D%3D)](https://doi.org/10.6084/m9.figshare.25816864.v1)
 [![pytest CI](https://github.com/duartegroup/mlp-train/actions/workflows/pytest.yml/badge.svg?event=push)](https://github.com/duartegroup/mlp-train/actions/workflows/pytest.yml)
 [![codecov](https://codecov.io/gh/duartegroup/mlp-train/branch/main/graph/badge.svg)](https://codecov.io/gh/duartegroup/mlp-train)
 [![pre-commit](https://img.shields.io/badge/pre--commit-enabled-brightgreen?logo=pre-commit)](https://github.com/pre-commit/pre-commit)
 [![Ruff](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/ruff/main/assets/badge/v2.json)](https://github.com/astral-sh/ruff)
 [![License](https://img.shields.io/badge/License-MIT%202.0-blue.svg)](https://opensource.org/licenses/mit)
-[![GitHub issues](https://img.shields.io/github/issues/duartegroup/mlp-train.svg)](https://github.com/duartegroup/mlp-train/issues)
 
 # mlp-train
 General machine learning potentials (MLP) training for molecular systems in gas phase and solution
@@ -16,7 +16,7 @@ Available models:
 
 ## Install
 
-Each model is installed into individual conda environment:
+Each model is installed into an individual conda environment:
 
 ```
 # Install GAP

diff --git a/environment.yml b/environment.yml
@@ -13,7 +13,7 @@ dependencies:
   - cython
   - dscribe=2.0
   - matplotlib-base
-  - numpy
+  - numpy<2
   - pytest=8
   - pytest-cov=5
   - py-plumed

diff --git a/environment_ace.yml b/environment_ace.yml
@@ -13,7 +13,7 @@ dependencies:
   - cython
   - dscribe=2.0
   - matplotlib-base
-  - numpy
+  - numpy<2
   - pytest=8
   - pytest-cov=5
   - py-plumed

diff --git a/environment_mace.yml b/environment_mace.yml
@@ -21,10 +21,11 @@ dependencies:
   - scipy
   - xtb
   - scikit-learn
-  - openmm
-  - openmm-torch
+  - openmm=8.1.2
+  - openmm-torch=1.4
   - nnpops
+  - openmm-ml=1.2
+  - git
   - pip:
-    - openmmml@git+https://github.com/openmm/openmm-ml.git@main
     - ase@git+https://gitlab.com/ase/ase.git@f2615a6e9a # For PLUMED
-    - mace-torch
+    - mace-torch==0.3.6
diff --git a/install_ace.sh b/install_ace.sh
@@ -16,8 +16,6 @@ if ! which julia; then
 fi
 
 source create_conda_environment.sh
-# NOTE: `conda activate` does not work in scripts, we use `conda run` below.
-# https://stackoverflow.com/a/72395091
 
 echo "* Adding required registries and packages to Julia *"
 echo "using Pkg
@@ -30,8 +28,10 @@ Pkg.add(\"IJulia\")
 Pkg.add(\"ASE\")" > add_julia_pkgs.jl
 julia add_julia_pkgs.jl
 
+# NOTE: `conda activate` does not work in scripts, need to use `conda run`, see:
+# https://stackoverflow.com/a/72395091
 echo "* Setting up Python-Julia integration *"
-conda run -n ${CONDA_ENV_NAME} python -c "import julia; julia.install()"
+$CONDA_EXE run -n ${CONDA_ENV_NAME} python -c "import julia; julia.install()"
 
 echo "* Pointing PyCall to the version of Python in the new env *"
 

diff --git a/mlptrain/__init__.py b/mlptrain/__init__.py
@@ -20,7 +20,7 @@
     PlumedCustomCV,
 )
 
-__version__ = '1.0.0a0'
+__version__ = '1.0.0b0'
 
 __all__ = [
     'Configuration',

diff --git a/mlptrain/config.py b/mlptrain/config.py
@@ -2,16 +2,23 @@
 
 
 class _ConfigClass:
-    """mlptrain configuration"""
+    """
+    MLP training configurations
+
+    This class contains default parameters for electronic structure computations and training of available MLPs.
+    Default settings for electronic structures is None to avoid accidentally running the wrong level of theory.
+    The desired level can be specified by, e.g.
+    ```
+    from mlptrain.config import Config
+
+    Config.orca_keywords = ['PBE', 'def2-SVP', 'EnGrad']
+    Config.gaussian_keywords = ['PBEPBE', 'Def2SVP', 'Force(NoStep)', 'integral=ultrafinegrid']
+    ```
+    """
 
     n_cores = 4
-    _orca_keywords = ['PBE', 'def2-SVP', 'EnGrad']
-    _gaussian_keywords = [
-        'PBEPBE',
-        'Def2SVP',
-        'Force(NoStep)',
-        'integral=ultrafinegrid',
-    ]
+    _orca_keywords = None
+    _gaussian_keywords = None
 
     # Default parameters for a GAP potential
     gap_default_params = {
@@ -87,7 +94,7 @@ def gaussian_keywords(self):
 
     @gaussian_keywords.setter
     def gaussian_keywords(self, value):
-        """ORCA keywords must be gradient"""
+        """Gaussian keywords must be gradient"""
         self._gaussian_keywords = value
 
 

diff --git a/mlptrain/configurations/calculate.py b/mlptrain/configurations/calculate.py
@@ -87,7 +87,7 @@ def _method_and_keywords(
 def _orca_keywords() -> 'autode.wrappers.keywords.Keywords':
     """Keywords e.g. functional and basis set to use for an ORCA calculation"""
 
-    if Config.orca_keywords is None:
+    if len(Config.orca_keywords) == 0:
         raise ValueError(
             'For ORCA training GTConfig.orca_keywords must be'
             ' set. e.g.\nmlt.Config.orca_keywords '
@@ -101,7 +101,7 @@ def _gaussian_keywords() -> 'autode.wrappers.keywords.Keywords':
     """Keywords e.g. functional and basis set to use for an Gaussian
     calculation, either Gaussian09 or Gaussian16"""
 
-    if Config.gaussian_keywords is None:
+    if len(Config.gaussian_keywords) == 0:
         raise ValueError(
             'To train with Gaussian QM calculations '
             'mlt.Config.gaussian_keywords must be set.'

diff --git a/mlptrain/potentials/ace/ace.py b/mlptrain/potentials/ace/ace.py
@@ -40,22 +40,34 @@ def _train(self) -> None:
         p = Popen(
             [shutil.which('julia'), f'{self.name}.jl'],
             shell=False,
+            encoding='utf-8',
             stdout=PIPE,
             stderr=PIPE,
         )
         out, err = p.communicate(timeout=None)
 
+        filename_ace_out = 'ACE_output.out'
+
+        with open(filename_ace_out, 'a') as f:
+            f.write(f'ACE training output:\n{out}')
+            if err:
+                f.write(f'ACE training error:\n{err}')
+
         delta_time = time() - start_time
         logger.info(f'ACE training ran in {delta_time / 60:.1f} m')
 
         if any(
             (
                 delta_time < 0.01,
-                b'SYSTEM ABORT' in err,
+                'SYSTEM ABORT' in err,
+                p.returncode != 0,
                 not os.path.exists(f'{self.name}.json'),
             )
         ):
-            raise RuntimeError(f'ACE train errored with:\n{err.decode()}\n')
+            raise RuntimeError(
+                f'ACE train errored with a return code:\n{p.returncode}\n'
+                f'and error:\n{err}\n'
+            )
 
         for filename in (f'{self.name}_data.xyz', f'{self.name}.jl'):
             os.remove(filename)
@@ -246,7 +258,7 @@ def _print_input(self, filename: str, **kwargs) -> None:
             '             asmerrs=true, weights=weights)\n'
             'save_dict(save_name,'
             '           Dict("IP" => write_dict(IP), "info" => lsqinfo))\n'
-            'rmse_table(lsqinfo["errors"])\n'
+            '#rmse_table(lsqinfo["errors"])\n'
             'println("The L2 norm of the fit is ", round(norm(lsqinfo["c"]), digits=2))\n',
             file=inp_file,
         )

diff --git a/mlptrain/potentials/mace/mace.py b/mlptrain/potentials/mace/mace.py
@@ -2,6 +2,7 @@
 import mlptrain
 import argparse
 import os
+import gc
 import ast
 import time
 import shutil
@@ -120,6 +121,10 @@ def _train(self, n_cores: Optional[int] = None) -> None:
         self._reset_train_objs()
 
         os.remove(f'{self.name}_data.xyz')
+
+        gc.collect()
+        torch.cuda.empty_cache()
+
         return None
 
     @property
@@ -205,17 +210,14 @@ def _print_error_table(self) -> None:
 
         logging.info('Generating error table')
 
-        all_collections = [
-            ('train', self.train_configs),
-            ('valid', self.valid_configs),
-        ]
+        all_collections = {
+            'training': self.train_loader,
+            'validation': self.valid_loader,
+        }
 
         table = create_error_table(
             table_type=Config.mace_params['error_table'],
-            all_collections=all_collections,
-            z_table=self.z_table,
-            r_max=Config.mace_params['r_max'],
-            valid_batch_size=self.valid_batch_size,
+            all_data_loaders=all_collections,
             model=self.model,
             loss_fn=self.loss_fn,
             output_args=self.output_args,