Add pre commit (duartegroup#66)

Added pre-commit hooks and Ruff linter to check the files before committing. This PR also includes changes suggested by Ruff, fixed either automatically or manually. Ruff formatter will be added in the follow-up PR. * Add pre-commit configuration * Automatic and manual fix of errors found by linter * Add pre-commit to dependencies * Update README.md
juraskov · Oct 30, 2023 · 41fbb2c · 41fbb2c
1 parent be29fa9
commit 41fbb2c
Show file tree

Hide file tree

Showing 38 changed files with 142 additions and 71 deletions.
diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml
@@ -0,0 +1,18 @@
+name: "Lint"
+
+on:
+  push:
+  pull_request:
+
+jobs:
+  pre-commit:
+    env:
+      SKIP: 'no-commit-to-branch'
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
+      - name: Run pre-commit
+        uses: pre-commit/action@v3.0.0
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -0,0 +1,21 @@
+default_language_version:
+    # all hooks should run with python 3.6+
+    python: python3
+repos:
+  - repo: https://github.com/pre-commit/pre-commit-hooks
+    rev: v4.5.0
+    hooks:
+      - id: no-commit-to-branch
+      - id: check-executables-have-shebangs
+      - id: check-shebang-scripts-are-executable
+      - id: check-added-large-files
+        args: ['--maxkb=500', '--enforce-all']
+        exclude: mlptrain/sampling/tests/data.zip
+      - id: check-yaml
+
+  - repo: https://github.com/astral-sh/ruff-pre-commit
+    # Ruff version.
+    rev: v0.1.3
+    hooks:
+      - id: ruff
+        args: [--show-source, --fix]
diff --git a/README.md b/README.md
@@ -1,6 +1,8 @@
 [![Test with pytest](https://github.com/duartegroup/mlp-train/actions/workflows/pytest.yml/badge.svg?event=push)](https://github.com/duartegroup/mlp-train/actions/workflows/pytest.yml)
 [![pre-commit](https://img.shields.io/badge/pre--commit-enabled-brightgreen?logo=pre-commit)](https://github.com/pre-commit/pre-commit)
 [![Ruff](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/ruff/main/assets/badge/v2.json)](https://github.com/astral-sh/ruff)
+[![License](https://img.shields.io/badge/License-MIT%202.0-blue.svg)](https://opensource.org/licenses/mit)
+[![GitHub issues](https://img.shields.io/github/issues/duartegroup/mlp-train.svg)](https://github.com/duartegroup/mlp-train/issues)
 
 # mlp-train
 General machine learning potentials (MLP) training for molecular systems in gas phase and solution
@@ -11,8 +13,7 @@ Available models:
 - MACE
 
 
-***
-### Install
+## Install
 
 Each model is installed into individual conda environment:
 
@@ -27,12 +28,21 @@ Each model is installed into individual conda environment:
 ./install_mace.sh 
 ```
 
-
 ### Notes
 
 - Units are: distance (Å), energy (eV), force (eV Å$`^{-1}`$), time (fs)
 
-## Citation
+## For developers
+
+We are happy to accept pull requests from users. Please first fork mlp-train repository. We use `pre-commit`, `Ruff` and `pytest` to check the code. Your PR needs to pass through these checks before is accepted. `Pre-commit` is installed as one the dependecies. To use it in your repository, run the following command in the mlp-train folder:
+
+```
+pre-commit install 
+```
+
+`Pre-commit` will then run automatically at each commit and will take care of installation and running of `Ruff`.
+
+## Citations
 
 If _mlptrain_ is used in a publication please consider citing the [paper](https://doi.org/10.1039/D2CP02978B):
 
@@ -47,3 +57,8 @@ If _mlptrain_ is used in a publication please consider citing the [paper](https:
   journal = {Phys. Chem. Chem. Phys.}
 }
 ```
+
+## Contact
+
+For bugs or implementation requests, please use [GitHub Issues](https://github.com/duartegroup/mlp-train/issues)
+
diff --git a/create_conda_environment.sh b/create_conda_environment.sh
@@ -1,4 +1,3 @@
-#!/bin/bash
 # NOTE: This script should not be called on its own,
 # but should be sourced from other install scripts such as install_ace.sh
 set -euo pipefail

diff --git a/environment.yml b/environment.yml
@@ -6,6 +6,7 @@ channels:
 dependencies:
   - python=3.9
   - pip
+  - pre-commit
   - ase
   - autode=1.1
   - coloredlogs

diff --git a/environment_ace.yml b/environment_ace.yml
@@ -6,6 +6,7 @@ channels:
 dependencies:
   - python=3.9
   - pip
+  - pre-commit
   - ase
   - autode=1.1
   - coloredlogs

diff --git a/environment_mace.yml b/environment_mace.yml
@@ -7,6 +7,7 @@ channels:
 dependencies:
   - python=3.9
   - pip
+  - pre-commit
   - ase
   - autode=1.1
   - coloredlogs

diff --git a/examples/DA_paper/1d_fes/fes.py b/examples/DA_paper/1d_fes/fes.py
@@ -1,4 +1,3 @@
-import sys
 import mlptrain as mlt
 import numpy as np
 from mlptrain.box import Box

diff --git a/examples/DA_paper/2D_pes/pes.py b/examples/DA_paper/2D_pes/pes.py
@@ -187,9 +187,6 @@ def get_energy(self, calc):
     def get_free_energy(self, calc):
         return None
 
-    def get_free_energy(self, calc):
-        return None
-
     def get_enthalpy(self, calc):
         return None
 
@@ -263,7 +260,7 @@ def optimise_with_fix_solute(solute, configuration, fmax, mlp, constraint = True
     from ase.optimize import BFGS
     from ase.io.trajectory import Trajectory as ASETrajectory
 
-    assert configuration.box != None, 'configuration must have box'
+    assert configuration.box is not None, 'configuration must have box'
     logger.info('Optimise the configuration with fixed solute (solute coords should at the first in configuration coords) by MLP')
 
     n_cores = kwargs['n_cores'] if 'n_cores' in kwargs else min(Config.n_cores, 8)

diff --git a/examples/DA_paper/training/explicit/endo_ace_ex.py b/examples/DA_paper/training/explicit/endo_ace_ex.py
@@ -1,10 +1,9 @@
 import mlptrain as mlt
 import numpy as np
-from autode.atoms import Atoms, Atom
+from autode.atoms import Atom
 from mlptrain.log import logger
 from mlptrain.box import Box
 from mlptrain.training.selection import MaxAtomicEnvDistance
-import random
 
 mlt.Config.n_cores = 10
 mlt.Config.orca_keywords = ['wB97M-D3BJ', 'def2-TZVP','def2/J', 'RIJCOSX','EnGrad']
@@ -30,7 +29,6 @@ def add_water(solute, n = 2):
        n: number of water molecules to add"""
     from ase import Atoms
     from ase.calculators.tip3p import rOH, angleHOH
-    from ase.io import read , write
 
     # water molecule
     x = angleHOH * np.pi / 180 / 2
@@ -46,7 +44,7 @@ def add_water(solute, n = 2):
     water0.rotate(180, 'x')
     water0.rotate(180, 'z')
 
-    assert solute.box != None, 'configuration must have box'
+    assert solute.box is not None, 'configuration must have box'
     sol = solute.ase_atoms
     sol.center()
     sys = sol.copy()
@@ -104,13 +102,13 @@ def solvation(solute_config, solvent_config, apm, radius, enforce = True):
     """function to generate solvated system by adding the solute at the center of box,
        then remove the overlapped solvent molecules
        adapted from https://doi.org/10.1002/qua.26343
-       solute: mlt.Configuration() solute.box != None
-       solvent: mlt.Configuration() solvent.box != None
+       solute: mlt.Configuration() solute.box is not None
+       solvent: mlt.Configuration() solvent.box is not None
        aps: number of atoms per solvent molecule
        radius: cutout radius around each solute atom
        enforce: True / False Wrap solvent regardless of previous solvent PBC choices"""
-    assert solute_config.box != None, 'configuration must have box'
-    assert solvent_config.box != None, 'configuration must have box'
+    assert solute_config.box is not None, 'configuration must have box'
+    assert solvent_config.box is not None, 'configuration must have box'
 
     solute = solute_config.ase_atoms
     solvent = solvent_config.ase_atoms
@@ -221,7 +219,7 @@ def generate_init_configs(n, bulk_water = True, TS = True):
 
     # TS bounded with two water molecules at carbonyl group to form hydrogen bond
     else:
-        assert TS == True, 'cannot generate initial configuration'
+        assert TS is True, 'cannot generate initial configuration'
         for i in range(n):
             TS_with_water = add_water(solute=TS, n=2)
             init_configs.append(TS_with_water)

diff --git a/examples/DA_paper/uphill/generate_rs.py b/examples/DA_paper/uphill/generate_rs.py
@@ -4,6 +4,8 @@
 from mlptrain.log import logger
 from mlptrain.config import Config
 from mlptrain.md import  _convert_ase_traj
+from ase.md.velocitydistribution import MaxwellBoltzmannDistribution
+from numpy.random import RandomState
 import numpy as np
 
 mlt.Config.n_cores = 10
@@ -28,13 +30,13 @@ def from_ase_to_autode(atoms):
 
 def solvation(solute_config, solvent_config, apm, radius, enforce = True):
     """same function applied in training an MLP for reaction in explicit water
-       solute: mlt.Configuration() solute.box != None
-       solvent: mlt.Configuration() solvent.box != None
+       solute: mlt.Configuration() solute.box is not None
+       solvent: mlt.Configuration() solvent.box is not None
        aps: number of atoms per solvent molecule
        radius: cutout radius around each solute atom
        enforce: True / False Wrap solvent regardless of previous solvent PBC choices"""
-    assert solute_config.box != None, 'configuration must have box'
-    assert solvent_config.box != None, 'configuration must have box'
+    assert solute_config.box is not None, 'configuration must have box'
+    assert solvent_config.box is not None, 'configuration must have box'
 
     solute = solute_config.ase_atoms
     solvent = solvent_config.ase_atoms
@@ -135,7 +137,7 @@ def mlpmd_fix_solute(solute, configuration, mlp, temp, dt, interval, n_steps, **
     from ase.io.trajectory import Trajectory as ASETrajectory
     from ase.md.langevin import Langevin
     from ase import units as ase_units
-    assert configuration.box != None, 'configuration must have box'
+    assert configuration.box is not None, 'configuration must have box'
 
     logger.info('Run MLP MD with fixed solute (solute coords should at the first in configuration coords) by MLP')
 
@@ -174,7 +176,7 @@ def optimize_sys(configuration, mlp, **kwargs):
     # applied MLP to optimised geometry with BFGS method
     from ase.io.trajectory import Trajectory as ASETrajectory
     from ase.optimize import BFGS
-    assert configuration.box != None, 'configuration must have box'
+    assert configuration.box is not None, 'configuration must have box'
 
     logger.info('Optimise the configuratoin with fixed solute (solute coords should at the first in configuration coords) by MLP')
 
@@ -266,6 +268,10 @@ def baised_md(configuration, mlp, temp, dt, interval, bias, **kwargs):
                                      rng=RandomState())
 
     traj = ASETrajectory("tmp.traj", 'w', ase_atoms)
+    energies = []
+
+    def append_energy(_atoms=ase_atoms):
+        energies.append(_atoms.get_potential_energy())
 
     if temp > 0:                                         # Default Langevin NVT
         dyn = Langevin(ase_atoms, dt * ase_units.fs,
@@ -312,7 +318,7 @@ def generate_rs(TS, solution, mlp, box_size):
     for i, species in enumerate(reactants):
         bias = mlt.Bias(zeta_func=mlt.AverageDistance((1,12), (6,11)), kappa=0.5, reference=ref[i])
         traj = baised_md(configuration=species,
-                              mlp=endo,
+                              mlp=mlp,
                               temp=300,
                               dt=0.5,
                               interval=20,

diff --git a/examples/DA_paper/uphill/uphill.py b/examples/DA_paper/uphill/uphill.py
@@ -1,14 +1,9 @@
 import mlptrain as mlt
 import numpy as np
-import matplotlib.pyplot as plt
 from mlptrain.log import logger
-import random
 from mlptrain.box import Box
 from scipy.spatial import distance_matrix
 import os
-import math
-from ase.constraints import Hookean
-from ase.geometry import find_mic
 from generate_rs import generate_rs
 
 mlt.Config.n_cores = 10
@@ -116,7 +111,7 @@ def get_cavity_volume(atoms = ase_atoms):
 
     dyn.attach(append_energy, interval=interval)
     dyn.attach(get_reaction_coord,interval=interval)
-    dyn.attach(get_cavity_volumn,interval=interval)
+    dyn.attach(get_cavity_volume,interval=interval)
     dyn.attach(traj.write, interval=interval)
 
     logger.info(f'Running {n_steps:.0f} steps with a timestep of {dt} fs')
@@ -133,20 +128,10 @@ def get_cavity_volume(atoms = ase_atoms):
 
 
 def traj_study(configs,  ml_potential,  init_md_time_fs = 500, max_time_fs = 3000):     
-    num_config = len(configs)
-
-    C2_C7_recrossing_list = []
-    C4_C6_recrossing_list = []
-
-    C2_C7_product_list = []
-    C4_C6_product_list = []
-
+
     C2_C7_initial_list = []
     C4_C6_initial_list = []
 
-    time_sep = []
-    intermediate_time = []
-
     for k in range(500):
         config =configs[k]
         logger.info(f'start trajectory study for {k} th configuration')
@@ -175,7 +160,7 @@ def traj_study(configs,  ml_potential,  init_md_time_fs = 500, max_time_fs = 300
                                                                 temp=300,
                                                                 dt=0.5,
                                                                 interval=2,
-                                                                fs=md_time_f)
+                                                                fs=md_time_fs_f)
             ending = 0
             for (i, j) in zip (C2_C7_list, C4_C6_list):
                 logger.info(f'C2-C7 and C4-C6 bond lengths are {(i,j)}')

diff --git a/mlptrain/__init__.py b/mlptrain/__init__.py
@@ -28,6 +28,7 @@
            'Box',
            'Bias',
            'PlumedBias',
+           'PlumedCalculator', 
            'UmbrellaSampling',
            'Metadynamics',
            'AverageDistance',

diff --git a/mlptrain/config.py b/mlptrain/config.py
@@ -1,4 +1,4 @@
-from autode.wrappers.keywords import *
+from autode.wrappers.keywords import GradientKeywords
 
 
 class _ConfigClass:

diff --git a/mlptrain/configurations/calculate.py b/mlptrain/configurations/calculate.py
@@ -1,3 +1,5 @@
+import mlptrain
+import autode
 from typing import Tuple
 from mlptrain.log import logger
 from mlptrain.utils import work_in_tmp_dir

diff --git a/mlptrain/configurations/configuration.py b/mlptrain/configurations/configuration.py
@@ -1,8 +1,11 @@
+import mlptrain
+import ase
 import numpy as np
 from typing import Optional, Union, List
 from copy import deepcopy
-from autode.atoms import AtomCollection, Atoms, Atom
-from ase.atoms import Atoms
+from autode.atoms import AtomCollection, Atom
+import autode.atoms
+import ase.atoms
 from mlptrain.log import logger
 from mlptrain.energy import Energy
 from mlptrain.forces import Forces
@@ -14,7 +17,7 @@ class Configuration(AtomCollection):
     """Configuration of atoms"""
 
     def __init__(self,
-                 atoms:  Union[Atoms, List[Atom], None] = None,
+                 atoms:  Union[autode.atoms.Atoms, List[Atom], None] = None,
                  charge: int = 0,
                  mult:   int = 1,
                  box:    Optional[Box] = None
@@ -55,7 +58,7 @@ def ase_atoms(self) -> 'ase.atoms.Atoms':
         Returns:
             (ase.atoms.Atoms): ASE atoms
         """
-        _atoms = Atoms(symbols=[atom.label for atom in self.atoms],
+        _atoms = ase.atoms.Atoms(symbols=[atom.label for atom in self.atoms],
                        positions=self.coordinates,
                        pbc=self.box is not None)
 

diff --git a/mlptrain/configurations/configuration_set.py b/mlptrain/configurations/configuration_set.py
@@ -1,3 +1,4 @@
+import mlptrain
 import os
 import numpy as np
 from time import time
@@ -447,12 +448,12 @@ def plumed_coordinates(self) -> Optional[np.ndarray]:
                 n_cvs_set.add(len(config.plumed_coordinates))
 
         if len(n_cvs_set) == 0:
-            logger.info(f'PLUMED coordinates not defined - returning None')
+            logger.info('PLUMED coordinates not defined - returning None')
             return None
 
         elif len(n_cvs_set) != 1:
-            logger.info(f'Number of CVs differ between configurations - '
-                        f'returning None')
+            logger.info('Number of CVs differ between configurations - '
+                        'returning None')
             return None
 
         n_cvs = n_cvs_set.pop()

diff --git a/mlptrain/configurations/plotting.py b/mlptrain/configurations/plotting.py
@@ -1,3 +1,4 @@
+import mlptrain
 import numpy as np
 import matplotlib as mpl
 import matplotlib.pyplot as plt