Bugfixes to custom kernels, doc updates, changelog updates (#102)

* Update docs and changelog 1. Replace all occurrences of `restore_db` with `restart_db`. A note is added to the documentation to reflect this change. 2. Update changelog for breaking changes of `restart_db` and `make_trainvalidtest_split`. * Fix typos and rephrase the note * Check cuda - torch.cuda.get_device_capability - Only run triton check if cuda is available. * update a lot of settings and doc releated things. fix custom kernel handling * update changelog, revert ipynb figure changes --------- Co-authored-by: Xinyang Li <lix@lanl.gov> Co-authored-by: Nicholas Lubbers <nlubbers@lanl.gov>
lanl · Sep 13, 2024 · 144c160 · 144c160
1 parent 110b831
commit 144c160
Show file tree

Hide file tree

Showing 17 changed files with 355 additions and 203 deletions.
diff --git a/CHANGELOG.rst b/CHANGELOG.rst
@@ -3,8 +3,16 @@
 Breaking changes:
 -----------------
 
-- set_e0_values has been renamed hierarchical_energy_initialization. The old name is
-  still provided but deprecated, and will be removed.
+- ``set_e0_values`` has been renamed to ``hierarchical_energy_initialization``.
+  The old name is still provided but deprecated, and will be removed.
+- The argument ``restore_db`` has been renamed to ``restart_db``. The affected
+  functions are ``load_checkpoint``, ``load_checkpoint_from_cwd``, and
+  ``restore_checkpoint``.
+- ``database.make_trainvalidtest_split`` now only takes keyword arguments to
+  avoid confusions. Use ``make_trainvalidtest_split(test_size=a, valid_size=b)``
+  instead of ``make_trainvalidtest_split(a, b)``.
+- Invalid custom kernel specifications are now errors rather than warnings.
+
 
 New Features:
 -------------
@@ -22,6 +30,7 @@ New Features:
 - Added tool to drastically simplify creating ensemble models. The ensemblized graphs
   are compatible with molecular dynamics codes such ASE and LAMMPS.
 - Added the ability to weight different systems/atoms/bonds in a loss function.
+- Added new function to reload library settings.
 
 
 Improvements:
@@ -35,6 +44,9 @@ Improvements:
   using a library setting.
 - Provide tunable regularization of HIP-NN-TS with an epsilon parameter, and
   set the default to use a better value for epsilon.
+- Improved detection of valid custom kernel implementation.
+- Improved computational efficiency of HIP-NN-TS network.
+
 
 
 Bug Fixes:

diff --git a/docs/source/conf.py b/docs/source/conf.py
@@ -23,6 +23,7 @@
 
 # The full version, including alpha/beta/rc tags
 import hippynn
+
 release = hippynn.__version__
 
 # -- General configuration ---------------------------------------------------
@@ -31,7 +32,6 @@
 # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 # ones.
 extensions = ["sphinx.ext.autodoc", "sphinx_rtd_theme", "sphinx.ext.viewcode"]
-add_module_names = False
 
 # Add any paths that contain templates here, relative to this directory.
 templates_path = ["_templates"]
@@ -45,12 +45,13 @@
     "no-show-inheritance": True,
     "special-members": "__init__",
 }
-autodoc_member_order = 'bysource'
+autodoc_member_order = "bysource"
 
 
 # The following are highly optional, so we mock them for doc purposes.
-autodoc_mock_imports = ["pyanitools", "seqm", "schnetpack", "cupy", "lammps", "numba", "triton", "pytorch_lightning", 'triton', 'scipy']
-
+# TODO: Can we programmatically get these from our list of optional dependencies?
+autodoc_mock_imports = ["ase", "h5py", "seqm", "schnetpack", "cupy", "lammps", "numba", "triton", "pytorch_lightning", 'scipy']
+add_module_names = False
 
 # -- Options for HTML output -------------------------------------------------
 

diff --git a/docs/source/examples/mliap_unified.rst b/docs/source/examples/mliap_unified.rst
@@ -11,7 +11,7 @@ species atomic symbols (whose order must agree with the order of the training hy
 
 Example::
 
-    bundle = load_checkpoint_from_cwd(map_location="cpu", restore_db=False)
+    bundle = load_checkpoint_from_cwd(map_location="cpu", restart_db=False)
     model = bundle["training_modules"].model
     energy_node = model.node_from_name("HEnergy")
     unified = MLIAPInterface(energy_node, ["Al"], model_device=torch.device("cuda"))

diff --git a/docs/source/examples/restarting.rst b/docs/source/examples/restarting.rst
@@ -43,6 +43,14 @@ or to use the default filenames and load from the current directory::
     check = load_checkpoint_from_cwd()
     train_model(**check, callbacks=None, batch_callbacks=None)
 
+.. note::
+   In release 0.0.4, the ``restore_db`` argument has been renamed to
+   ``restart_db`` for internal consistence. ``restore_db`` in all scripts using 
+   `hippynn > 0.0.3` should be replaced with ``restart_db``. The affected
+   functions are ``load_checkpoint``, ``load_checkpoint_from_cwd``, and
+   ``restore_checkpoint``. If `hippynn <= 0.0.3` is used, please keep the
+   original ``restore_db`` keyword.
+
 If all you want to do is use a previously trained model, here is how to load the model only::
 
     from hippynn.experiment.serialization import load_model_from_cwd

diff --git a/docs/source/user_guide/settings.rst b/docs/source/user_guide/settings.rst
@@ -11,6 +11,7 @@ There are four possible sources for settings.
 3. A file specified by the environment variable `HIPPYNN_LOCAL_RC_FILE`
    which is treated the same as the user rc file.
 4. Environment variables prefixed by ``HIPPYNN_``, e.g. ``HIPPYNN_DEFAULT_PLOT_FILETYPE``.
+5. Arguments passed to :func:`hippynn.reload_settings`.
 
 These three sources are checked in order, so that values in later sources overwrite values
 found in earlier sources.

diff --git a/examples/lammps/hippynn_lammps_example.ipynb b/examples/lammps/hippynn_lammps_example.ipynb
@@ -38,7 +38,7 @@
     "\n",
     "try:\n",
     "    with active_directory(\"./TEST_INP_MODEL\", create=False):\n",
-    "        bundle = load_checkpoint_from_cwd(map_location='cpu',restore_db=False)\n",
+    "        bundle = load_checkpoint_from_cwd(map_location='cpu',restart_db=False)\n",
     "except FileNotFoundError:\n",
     "    raise FileNotFoundError(\"Model not found, run lammps_example.py first!\")\n",
     "\n",

diff --git a/hippynn/__init__.py b/hippynn/__init__.py
@@ -2,17 +2,21 @@
 
 The hippynn python package.
 
+.. autodata:: settings
+   :no-value:
+
+
 """
 
 from . import _version
 __version__ = _version.get_versions()['version']
 
 # Configuration settings
-from ._settings_setup import settings
+from ._settings_setup import settings, reload_settings
 
 # Pytorch modules
 from . import layers
-from . import networks # wait this one is different from the other one.
+from . import networks
 
 # Graph abstractions
 from . import graphs
@@ -40,3 +44,12 @@
 
 from . import tools
 from .tools import active_directory, log_terminal
+
+# The order is adjusted to put functions after objects in the documentation.
+_dir = dir()
+_lowerdir = [x for x in _dir if x[0].lower() == x[0]]
+_upperdir = [x for x in _dir if x[0].upper() == x[0]]
+__all__ = _lowerdir + _upperdir
+del _dir, _lowerdir, _upperdir
+
+__all__ = [x for x in __all__ if not x.startswith("_")]