From 144c160ea64122ae1666fe95de9ecd9380c2e5d8 Mon Sep 17 00:00:00 2001 From: "Michael G. Taylor" <119455260+mgt16-LANL@users.noreply.github.com> Date: Fri, 13 Sep 2024 14:24:52 -0600 Subject: [PATCH] Bugfixes to custom kernels, doc updates, changelog updates (#102) * Update docs and changelog 1. Replace all occurrences of `restore_db` with `restart_db`. A note is added to the documentation to reflect this change. 2. Update changelog for breaking changes of `restart_db` and `make_trainvalidtest_split`. * Fix typos and rephrase the note * Check cuda - torch.cuda.get_device_capability - Only run triton check if cuda is available. * update a lot of settings and doc releated things. fix custom kernel handling * update changelog, revert ipynb figure changes --------- Co-authored-by: Xinyang Li Co-authored-by: Nicholas Lubbers --- CHANGELOG.rst | 16 +- docs/source/conf.py | 9 +- docs/source/examples/mliap_unified.rst | 2 +- docs/source/examples/restarting.rst | 8 + docs/source/user_guide/settings.rst | 1 + examples/lammps/hippynn_lammps_example.ipynb | 2 +- hippynn/__init__.py | 17 +- hippynn/_settings_setup.py | 218 ++++++++++++------- hippynn/custom_kernels/__init__.py | 155 +++++++++---- hippynn/databases/database.py | 2 +- hippynn/experiment/device.py | 2 - hippynn/experiment/serialization.py | 1 - hippynn/layers/pairs/dispatch.py | 9 +- hippynn/molecular_dynamics/__init__.py | 7 +- hippynn/molecular_dynamics/md.py | 84 +++---- pyproject.toml | 2 +- tests/progress_settings.py | 23 ++ 17 files changed, 355 insertions(+), 203 deletions(-) create mode 100644 tests/progress_settings.py diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 6842e0d5..f9d40038 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -3,8 +3,16 @@ Breaking changes: ----------------- -- set_e0_values has been renamed hierarchical_energy_initialization. The old name is - still provided but deprecated, and will be removed. +- ``set_e0_values`` has been renamed to ``hierarchical_energy_initialization``. + The old name is still provided but deprecated, and will be removed. +- The argument ``restore_db`` has been renamed to ``restart_db``. The affected + functions are ``load_checkpoint``, ``load_checkpoint_from_cwd``, and + ``restore_checkpoint``. +- ``database.make_trainvalidtest_split`` now only takes keyword arguments to + avoid confusions. Use ``make_trainvalidtest_split(test_size=a, valid_size=b)`` + instead of ``make_trainvalidtest_split(a, b)``. +- Invalid custom kernel specifications are now errors rather than warnings. + New Features: ------------- @@ -22,6 +30,7 @@ New Features: - Added tool to drastically simplify creating ensemble models. The ensemblized graphs are compatible with molecular dynamics codes such ASE and LAMMPS. - Added the ability to weight different systems/atoms/bonds in a loss function. +- Added new function to reload library settings. Improvements: @@ -35,6 +44,9 @@ Improvements: using a library setting. - Provide tunable regularization of HIP-NN-TS with an epsilon parameter, and set the default to use a better value for epsilon. +- Improved detection of valid custom kernel implementation. +- Improved computational efficiency of HIP-NN-TS network. + Bug Fixes: diff --git a/docs/source/conf.py b/docs/source/conf.py index 8e707bdf..945fc9e7 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -23,6 +23,7 @@ # The full version, including alpha/beta/rc tags import hippynn + release = hippynn.__version__ # -- General configuration --------------------------------------------------- @@ -31,7 +32,6 @@ # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = ["sphinx.ext.autodoc", "sphinx_rtd_theme", "sphinx.ext.viewcode"] -add_module_names = False # Add any paths that contain templates here, relative to this directory. templates_path = ["_templates"] @@ -45,12 +45,13 @@ "no-show-inheritance": True, "special-members": "__init__", } -autodoc_member_order = 'bysource' +autodoc_member_order = "bysource" # The following are highly optional, so we mock them for doc purposes. -autodoc_mock_imports = ["pyanitools", "seqm", "schnetpack", "cupy", "lammps", "numba", "triton", "pytorch_lightning", 'triton', 'scipy'] - +# TODO: Can we programmatically get these from our list of optional dependencies? +autodoc_mock_imports = ["ase", "h5py", "seqm", "schnetpack", "cupy", "lammps", "numba", "triton", "pytorch_lightning", 'scipy'] +add_module_names = False # -- Options for HTML output ------------------------------------------------- diff --git a/docs/source/examples/mliap_unified.rst b/docs/source/examples/mliap_unified.rst index 4d627ee6..065608cb 100644 --- a/docs/source/examples/mliap_unified.rst +++ b/docs/source/examples/mliap_unified.rst @@ -11,7 +11,7 @@ species atomic symbols (whose order must agree with the order of the training hy Example:: - bundle = load_checkpoint_from_cwd(map_location="cpu", restore_db=False) + bundle = load_checkpoint_from_cwd(map_location="cpu", restart_db=False) model = bundle["training_modules"].model energy_node = model.node_from_name("HEnergy") unified = MLIAPInterface(energy_node, ["Al"], model_device=torch.device("cuda")) diff --git a/docs/source/examples/restarting.rst b/docs/source/examples/restarting.rst index 75d14aa3..18a00949 100644 --- a/docs/source/examples/restarting.rst +++ b/docs/source/examples/restarting.rst @@ -43,6 +43,14 @@ or to use the default filenames and load from the current directory:: check = load_checkpoint_from_cwd() train_model(**check, callbacks=None, batch_callbacks=None) +.. note:: + In release 0.0.4, the ``restore_db`` argument has been renamed to + ``restart_db`` for internal consistence. ``restore_db`` in all scripts using + `hippynn > 0.0.3` should be replaced with ``restart_db``. The affected + functions are ``load_checkpoint``, ``load_checkpoint_from_cwd``, and + ``restore_checkpoint``. If `hippynn <= 0.0.3` is used, please keep the + original ``restore_db`` keyword. + If all you want to do is use a previously trained model, here is how to load the model only:: from hippynn.experiment.serialization import load_model_from_cwd diff --git a/docs/source/user_guide/settings.rst b/docs/source/user_guide/settings.rst index a4c8fcef..a7837531 100644 --- a/docs/source/user_guide/settings.rst +++ b/docs/source/user_guide/settings.rst @@ -11,6 +11,7 @@ There are four possible sources for settings. 3. A file specified by the environment variable `HIPPYNN_LOCAL_RC_FILE` which is treated the same as the user rc file. 4. Environment variables prefixed by ``HIPPYNN_``, e.g. ``HIPPYNN_DEFAULT_PLOT_FILETYPE``. +5. Arguments passed to :func:`hippynn.reload_settings`. These three sources are checked in order, so that values in later sources overwrite values found in earlier sources. diff --git a/examples/lammps/hippynn_lammps_example.ipynb b/examples/lammps/hippynn_lammps_example.ipynb index f42f31a3..12ed548f 100644 --- a/examples/lammps/hippynn_lammps_example.ipynb +++ b/examples/lammps/hippynn_lammps_example.ipynb @@ -38,7 +38,7 @@ "\n", "try:\n", " with active_directory(\"./TEST_INP_MODEL\", create=False):\n", - " bundle = load_checkpoint_from_cwd(map_location='cpu',restore_db=False)\n", + " bundle = load_checkpoint_from_cwd(map_location='cpu',restart_db=False)\n", "except FileNotFoundError:\n", " raise FileNotFoundError(\"Model not found, run lammps_example.py first!\")\n", "\n", diff --git a/hippynn/__init__.py b/hippynn/__init__.py index bf59fa7d..3b451ac9 100644 --- a/hippynn/__init__.py +++ b/hippynn/__init__.py @@ -2,17 +2,21 @@ The hippynn python package. +.. autodata:: settings + :no-value: + + """ from . import _version __version__ = _version.get_versions()['version'] # Configuration settings -from ._settings_setup import settings +from ._settings_setup import settings, reload_settings # Pytorch modules from . import layers -from . import networks # wait this one is different from the other one. +from . import networks # Graph abstractions from . import graphs @@ -40,3 +44,12 @@ from . import tools from .tools import active_directory, log_terminal + +# The order is adjusted to put functions after objects in the documentation. +_dir = dir() +_lowerdir = [x for x in _dir if x[0].lower() == x[0]] +_upperdir = [x for x in _dir if x[0].upper() == x[0]] +__all__ = _lowerdir + _upperdir +del _dir, _lowerdir, _upperdir + +__all__ = [x for x in __all__ if not x.startswith("_")] diff --git a/hippynn/_settings_setup.py b/hippynn/_settings_setup.py index c72eb94f..675fae91 100644 --- a/hippynn/_settings_setup.py +++ b/hippynn/_settings_setup.py @@ -10,46 +10,79 @@ import warnings import os import configparser +from typing import Union from distutils.util import strtobool from types import SimpleNamespace from functools import partial +# Significant strings +SECTION_NAME = "GLOBALS" +SETTING_PREFIX = "HIPPYNN_" +LOCAL_RC_FILE_KEY = "LOCAL_RC_FILE" -try: - from tqdm.contrib import tqdm_auto +# Globals +DEFAULT_PROGRESS = None # this gets set to a tqdm object if possible +TQDM_PROGRESS = None # the best progress bar from tqdm, if available. - TQDM_PROGRESS = tqdm_auto -except ImportError: +def setup_tqdm(): + global TQDM_PROGRESS + global DEFAULT_PROGRESS try: - from tqdm import tqdm + from tqdm.contrib import tqdm_auto - TQDM_PROGRESS = tqdm + TQDM_PROGRESS = tqdm_auto except ImportError: - TQDM_PROGRESS = None - -if TQDM_PROGRESS is not None: - DEFAULT_PROGRESS = partial(TQDM_PROGRESS, mininterval=1.0, leave=False) -else: - DEFAULT_PROGRESS = None -### Progress handlers - -def progress_handler(prog_str): - if prog_str == "tqdm": - return DEFAULT_PROGRESS - elif prog_str.lower() == "none": - return None - else: try: - prog_float = float(prog_str) - return partial(TQDM_PROGRESS, mininterval=prog_float, leave=False) - except: - pass - warnings.warn(f"Unrecognized progress setting: '{prog_str}'. Setting to none.") + from tqdm import tqdm + + TQDM_PROGRESS = tqdm + except ImportError: + TQDM_PROGRESS = None + + if TQDM_PROGRESS is not None: + DEFAULT_PROGRESS = partial(TQDM_PROGRESS, mininterval=1.0, leave=False) + else: + DEFAULT_PROGRESS = None + +# Setting handlers: Take an input str or other value and return the appropriate value. + +def progress_handler(prog_setting: Union[str, float, bool, None]): + """ + Function for handling the progress bar settings. + + :param prog_setting: + :return: + """ + if TQDM_PROGRESS is None: + setup_tqdm() + + if prog_setting in (True, False, None): + prog_setting = { + True: "tqdm", + False: "none", + None: "none", + }[prog_setting] + + if isinstance(prog_setting, str): + prog_setting = prog_setting.lower() + if prog_setting == "tqdm": + return DEFAULT_PROGRESS + elif prog_setting.lower() == "none": + return None + + prog_setting = float(prog_setting) # Trigger error if not floatable. + + return partial(TQDM_PROGRESS, mininterval=prog_setting, leave=False) + def kernel_handler(kernel_string): + """ + :param kernel_string: + :return: + """ kernel_string = kernel_string.lower() kernel = { @@ -61,76 +94,107 @@ def kernel_handler(kernel_string): }.get(kernel_string, kernel_string) if kernel not in [True, False, "auto", "triton", "cupy", "numba"]: - warnings.warn(f"Unrecognized custom kernel option: {kernel_string}. Setting custom kernels to 'auto'") - kernel = "auto" + warnings.warn(f"Unexpected custom kernel setting: {kernel_string}.", stacklevel=3) return kernel -# keys: defaults, types, and handlers -default_settings = { - "PROGRESS": (DEFAULT_PROGRESS, progress_handler), +def bool_or_strtobool(key: Union[bool, str]): + if isinstance(key, bool): + return key + else: + return strtobool(key) + + +# keys: defaults, types, and handlers. +DEFAULT_SETTINGS = { + "PROGRESS": ('tqdm', progress_handler), "DEFAULT_PLOT_FILETYPE": (".pdf", str), - "TRANSPARENT_PLOT": (False, strtobool), - "DEBUG_LOSS_BROADCAST": (False, strtobool), - "DEBUG_GRAPH_EXECUTION": (False, strtobool), - "DEBUG_NODE_CREATION": (False, strtobool), - "DEBUG_AUTOINDEXING": (False, strtobool), + "TRANSPARENT_PLOT": (False, bool_or_strtobool), + "DEBUG_LOSS_BROADCAST": (False, bool_or_strtobool), + "DEBUG_GRAPH_EXECUTION": (False, bool_or_strtobool), + "DEBUG_NODE_CREATION": (False, bool_or_strtobool), + "DEBUG_AUTOINDEXING": (False, bool_or_strtobool), "USE_CUSTOM_KERNELS": ("auto", kernel_handler), - "WARN_LOW_DISTANCES": (True, strtobool), - "TIMEPLOT_AUTOSCALING": (True, strtobool), + "WARN_LOW_DISTANCES": (True, bool_or_strtobool), + "TIMEPLOT_AUTOSCALING": (True, bool_or_strtobool), "PYTORCH_GPU_MEM_FRAC": (1.0, float), } -settings = SimpleNamespace(**{k: default for k, (default, handler) in default_settings.items()}) +INITIAL_SETTINGS = {k: handler(default) for k, (default, handler) in DEFAULT_SETTINGS.items()} + +settings = SimpleNamespace(**INITIAL_SETTINGS) settings.__doc__ = """ Values for the current hippynn settings. See :doc:`/user_guide/settings` for a description. """ -config_sources = {} # Dictionary of configuration variable sources mapping to dictionary of configuration. -# We add to this dictionary in order of application -SECTION_NAME = "GLOBALS" +def reload_settings(**kwargs): + """ + Attempt to reload the hippynn library settings. -rc_name = os.path.expanduser("~/.hippynnrc") -if os.path.exists(rc_name) and os.path.isfile(rc_name): - config = configparser.ConfigParser(inline_comment_prefixes="#") - config.read(rc_name) - if SECTION_NAME not in config: - warnings.warn(f"Config file {rc_name} does not contain a {SECTION_NAME} section and will be ignored!") - else: - config_sources["~/.hippynnrc"] = config[SECTION_NAME] + Settings sources are, in order from least to greatest priority: + - Default values + - The file `~/.hippynnrc`, which is a standard python config file which contains + variables under the section name [GLOBALS]. + - A file specified by the environment variable `HIPPYNN_LOCAL_RC_FILE` + which is treated the same as the user rc file. + - Environment variables prefixed by ``HIPPYNN_``, e.g. ``HIPPYNN_DEFAULT_PLOT_FILETYPE``. + - Keyword arguments passed to this function. -SETTING_PREFIX = "HIPPYNN_" -hippynn_environment_variables = { - k.replace(SETTING_PREFIX, ""): v for k, v in os.environ.items() if k.startswith(SETTING_PREFIX) -} -LOCAL_RC_FILE_KEY = "LOCAL_RC_FILE" + :param kwargs: explicit settings to change. + + :return: + """ + # Developer note: this function modifies the module-scope `settings` directly. -if LOCAL_RC_FILE_KEY in hippynn_environment_variables: - local_rc_fname = hippynn_environment_variables.pop(LOCAL_RC_FILE_KEY) - if os.path.exists(local_rc_fname) and os.path.isfile(local_rc_fname): - local_config = configparser.ConfigParser() - local_config.read(local_rc_fname) - if SECTION_NAME not in local_config: - warnings.warn(f"Config file {local_rc_fname} does not contain a {SECTION_NAME} section and will be ignored!") + config_sources = {} # Dictionary of configuration variable sources mapping to dictionary of configuration. + # We add to this dictionary in order of application + + rc_name = os.path.expanduser("~/.hippynnrc") + if os.path.exists(rc_name) and os.path.isfile(rc_name): + config = configparser.ConfigParser(inline_comment_prefixes="#") + config.read(rc_name) + if SECTION_NAME not in config: + warnings.warn(f"Config file {rc_name} does not contain a {SECTION_NAME} section and will be ignored!") else: - config_sources[LOCAL_RC_FILE_KEY] = local_config[SECTION_NAME] - else: - warnings.warn(f"Local configuration file {local_rc_fname} not found.") - -config_sources["environment variables"] = hippynn_environment_variables - -for sname, source in config_sources.items(): - for key, value in source.items(): - key = key.upper() - if key in default_settings: - default, handler = default_settings[key] - try: - setattr(settings, key, handler(value)) - except Exception as ee: - raise ValueError(f"Value {value} for setting {key} is invalid") from ee + config_sources["~/.hippynnrc"] = config[SECTION_NAME] + + hippynn_environment_variables = { + k.replace(SETTING_PREFIX, ""): v for k, v in os.environ.items() if k.startswith(SETTING_PREFIX) + } + + if LOCAL_RC_FILE_KEY in hippynn_environment_variables: + local_rc_fname = hippynn_environment_variables.pop(LOCAL_RC_FILE_KEY) + if os.path.exists(local_rc_fname) and os.path.isfile(local_rc_fname): + local_config = configparser.ConfigParser() + local_config.read(local_rc_fname) + if SECTION_NAME not in local_config: + warnings.warn(f"Config file {local_rc_fname} does not contain a {SECTION_NAME} section and will be ignored!") + else: + config_sources[LOCAL_RC_FILE_KEY] = local_config[SECTION_NAME] else: - warnings.warn(f"Configuration source {sname} contains invalid variables ({key}). They will not be used.") + warnings.warn(f"Local configuration file {local_rc_fname} not found.") + + config_sources["environment variables"] = hippynn_environment_variables + config_sources["kwargs"] = kwargs.copy() + + for sname, source in config_sources.items(): + for key, value in source.items(): + key = key.upper() + if key in DEFAULT_SETTINGS: + default, handler = DEFAULT_SETTINGS[key] + try: + setattr(settings, key, handler(value)) + except Exception as ee: + raise ValueError(f"Value {value} for setting {key} is invalid") from ee + else: + warnings.warn(f"Configuration source {sname} contains invalid variables ({key}). These will be ignored.") + + return settings + + +reload_settings() + diff --git a/hippynn/custom_kernels/__init__.py b/hippynn/custom_kernels/__init__.py index 8ef8953b..ab9bdcce 100644 --- a/hippynn/custom_kernels/__init__.py +++ b/hippynn/custom_kernels/__init__.py @@ -1,64 +1,90 @@ """ Custom Kernels for hip-nn interaction sum. -This module provides implementations in pytorch, numba, and cupy. +This module provides implementations in pytorch, numba, cupy, and triton. Pytorch implementations take extra memory, but launch faster than numba kernels. - Numba kernels use far less memory, but do come with some launching overhead on GPUs. - Cupy kernels only work on the GPU, but are faster than numba. Cupy kernels require numba for CPU operations. +Triton custom kernels only work on the GPU, and are generaly faster than CUPY. +Triton kernels revert to numba or pytorch as available on module import. + +On import, this module attempts to set the custom kernels as specified by the +user in hippynn.settings. + +.. py:data:: CUSTOM_KERNELS_AVAILABLE + :type: list[str] + + The set of custom kernels available, based on currently installed packages and hardware. + +.. py:data:: CUSTOM_KERNELS_ACTIVE + :type: str + + The currently active implementation of custom kernels. + """ import warnings from typing import Union - +import torch from .. import settings from . import autograd_wrapper, env_pytorch -CUSTOM_KERNELS_AVAILABLE = [] -try: - import numba - CUSTOM_KERNELS_AVAILABLE.append("numba") -except ImportError: +class CustomKernelError(Exception): pass -try: - import cupy - if "numba" not in CUSTOM_KERNELS_AVAILABLE: - warnings.warn("Cupy was found, but numba was not. Cupy custom kernels not available.") - else: - CUSTOM_KERNELS_AVAILABLE.append("cupy") -except ImportError: - pass +def populate_custom_kernel_availability(): + """ + Check available imports and populate the list of available custom kernels. -try: - import triton - import torch - device_capability = torch.cuda.get_device_capability() - if device_capability[0] > 6: - CUSTOM_KERNELS_AVAILABLE.append("triton") - else: - warnings.warn( - f"Triton found but not supported by GPU's compute capability: {device_capability}" - ) -except ImportError: - pass + This function changes the global variable custom_kernels.CUSTOM_KERNELS_AVAILABLE - -except ImportError: - pass + :return: + """ -if not CUSTOM_KERNELS_AVAILABLE: - warnings.warn( - "Triton, cupy and numba are not available: Custom kernels will be disabled and performance maybe be degraded.") + # check order for kernels is numba, cupy, triton. + global CUSTOM_KERNELS_AVAILABLE -CUSTOM_KERNELS_ACTIVE = False + CUSTOM_KERNELS_AVAILABLE = [] -envsum, sensesum, featsum = None, None, None + try: + import numba + + CUSTOM_KERNELS_AVAILABLE.append("numba") + except ImportError: + pass + + if torch.cuda.is_available(): + try: + import cupy + + if "numba" not in CUSTOM_KERNELS_AVAILABLE: + warnings.warn("Cupy was found, but numba was not. Cupy custom kernels not available.") + else: + CUSTOM_KERNELS_AVAILABLE.append("cupy") + except ImportError: + pass + try: + import triton + if torch.cuda.is_available(): + device_capability = torch.cuda.get_device_capability() + if device_capability[0] > 6: + CUSTOM_KERNELS_AVAILABLE.append("triton") + else: + warnings.warn( + f"Triton found but not supported by GPU's compute capability: {device_capability}" + ) + except ImportError: + pass + + + if not CUSTOM_KERNELS_AVAILABLE: + warnings.warn( + "Triton, cupy and numba are not available: Custom kernels will be disabled and performance maybe be degraded.") + return CUSTOM_KERNELS_AVAILABLE def _check_numba(): import numba.cuda @@ -86,12 +112,18 @@ def _check_cupy(): if not cupy.cuda.is_available(): if torch.cuda.is_available(): warnings.warn("cupy.cuda.is_available() returned False: Custom kernels will fail on GPU tensors.") - + return def set_custom_kernels(active: Union[bool, str] = True): """ Activate or deactivate custom kernels for interaction. + This function changes the global variables: + - custom_kernels.envsum + - custom_kernels.sensum + - custom_kernels.featsum + - custom_kernels.CUSTOM_KERNELS_ACTIVE + :param active: If true, set custom kernels to the best available. If False, turn them off and default to pytorch. If "triton", "numba" or "cupy", use those implementations explicitly. If "auto", use best available. :return: None @@ -101,17 +133,20 @@ def set_custom_kernels(active: Union[bool, str] = True): if isinstance(active, str): active = active.lower() - if active not in [True, False, "triton", "numba", "cupy", "pytorch", "auto"]: - raise ValueError(f"Unrecognized custom kernel implementation: {active}") + if active not in _POSSIBLE_CUSTOM_KERNELS: + raise CustomKernelError(f"Unrecognized custom kernel implementation: {active}") - active_map = {"auto": True, "pytorch": False} if not CUSTOM_KERNELS_AVAILABLE: - if active == "auto" or active == "pytorch": + if active in ("auto", "pytorch"): # These are equivalent to "false" when custom kernels are not available. active = False elif active: - raise RuntimeError( - "Triton, numba and cupy were not found. Custom kernels are not available, but they were required by library settings.") + # The user explicitly set a custom kernel implementation or just True. + raise CustomKernelError( + "Triton, numba and cupy were not found." + + f"Custom kernels are not available, but they were required by library setting: {active}") else: + # If custom kernels are available, then "auto" and "pytorch" revert to bool values. + active_map = {"auto": True, "pytorch": False} active = active_map.get(active, active) # Handle fallback to pytorch kernels. @@ -124,7 +159,7 @@ def set_custom_kernels(active: Union[bool, str] = True): # Select custom kernel implementation if not CUSTOM_KERNELS_AVAILABLE: - raise RuntimeError("Numba was not found. Custom kernels are not available.") + raise CustomKernelError("Numba was not found. Custom kernels are not available.") if active is True: if "triton" in CUSTOM_KERNELS_AVAILABLE: @@ -135,18 +170,20 @@ def set_custom_kernels(active: Union[bool, str] = True): active = "numba" if active not in CUSTOM_KERNELS_AVAILABLE: - raise RuntimeError(f"Unavailable custom kernel implementation: {active}") + raise CustomKernelError(f"Unavailable custom kernel implementation: {active}") if active == "triton": from .env_triton import envsum as triton_envsum, sensesum as triton_sensesum, featsum as triton_featsum envsum, sensesum, featsum = autograd_wrapper.wrap_envops(triton_envsum, triton_sensesum, triton_featsum) + elif active == "cupy": _check_numba() _check_cupy() from .env_cupy import cupy_envsum, cupy_featsum, cupy_sensesum envsum, sensesum, featsum = autograd_wrapper.wrap_envops(cupy_envsum, cupy_sensesum, cupy_featsum) + elif active == "numba": _check_numba() from .env_numba import new_envsum, new_featsum, new_sensesum @@ -157,11 +194,33 @@ def set_custom_kernels(active: Union[bool, str] = True): # We shouldn't get here except possibly mid-development, but just in case: # if you add a custom kernel implementation remember to add to this # dispatch block. - raise ValueError(f"Unknown Implementation: {active}") + raise CustomKernelError(f"Unknown Implementation: {active}") CUSTOM_KERNELS_ACTIVE = active + return +CUSTOM_KERNELS_AVAILABLE = [] + +_POSSIBLE_CUSTOM_KERNELS = [True, False, "triton", "numba", "cupy", "pytorch", "auto"] try_custom_kernels = settings.USE_CUSTOM_KERNELS -set_custom_kernels(try_custom_kernels) + +CUSTOM_KERNELS_ACTIVE = None + +envsum, sensesum, featsum = None, None, None + +try: + populate_custom_kernel_availability() + set_custom_kernels(try_custom_kernels) +except CustomKernelError as eee: + raise +except Exception as ee: + warnings.warn(f"Custom kernels are disabled due to an expected error:\n\t{ee}", stacklevel=2) + del ee + + envsum = env_pytorch.envsum + sensesum = env_pytorch.sensesum + featsum = env_pytorch.featsum + CUSTOM_KERNELS_ACTIVE = False + del try_custom_kernels diff --git a/hippynn/databases/database.py b/hippynn/databases/database.py index 15bdd93b..a21301a6 100644 --- a/hippynn/databases/database.py +++ b/hippynn/databases/database.py @@ -347,7 +347,7 @@ def make_automatic_splits(self, split_prefix=None, dry_run=False): it fails pretty strictly. :param split_prefix: None, use default. - If otherwise, use this prefix to determine what arrays are masks. + If otherwise, use this prefix to determine what arrays are masks. :param dry_run: Only validate that existing split masks are correct; don't perform splitting. :return: """ diff --git a/hippynn/experiment/device.py b/hippynn/experiment/device.py index b648ba65..9f144dcc 100644 --- a/hippynn/experiment/device.py +++ b/hippynn/experiment/device.py @@ -22,9 +22,7 @@ def set_devices( Evaluation loss is performed on CPU. :param model: current model on CPU - :type model: GraphModule :param loss: current loss module on CPU - :type loss: GraphModule :param evaluator: evaluator :type evaluator: Evaluator :param optimizer: optimizer with state dictionary on CPU diff --git a/hippynn/experiment/serialization.py b/hippynn/experiment/serialization.py index 57f9574c..90ddf1aa 100644 --- a/hippynn/experiment/serialization.py +++ b/hippynn/experiment/serialization.py @@ -193,7 +193,6 @@ def load_model_from_cwd(map_location=None, model_device=None, **kwargs) -> Graph :param model_device: automatically handle device mapping. Defaults to None, defaults to None :type model_device: Union[int, str, torch.device], optional :return: model with reloaded parameters - :rtype: GraphModule """ mapped, model_device = check_mapping_devices(map_location, model_device) kwargs["map_location"] = mapped diff --git a/hippynn/layers/pairs/dispatch.py b/hippynn/layers/pairs/dispatch.py index b14a8b92..853efa05 100644 --- a/hippynn/layers/pairs/dispatch.py +++ b/hippynn/layers/pairs/dispatch.py @@ -4,7 +4,6 @@ from itertools import product import numpy as np -from scipy.spatial import KDTree import torch from .open import PairMemory @@ -137,11 +136,13 @@ def neighbor_list_torch(cutoff: float, coords, cell): return pf, ps, pi def neighbor_list_kdtree(cutoff, coords, cell): - ''' + """ Use KD Tree implementation from scipy.spatial to find pairs under periodic boundary conditions with an orthorhombic cell. - ''' - + """ + # Dev note: Imports are cached, this will only be slow once. + from scipy.spatial import KDTree + # Verify that cell is orthorhombic cell_prod = cell @ cell.T if torch.count_nonzero(cell_prod - torch.diag(torch.diag(cell_prod))): diff --git a/hippynn/molecular_dynamics/__init__.py b/hippynn/molecular_dynamics/__init__.py index 622cb6ce..0807e466 100644 --- a/hippynn/molecular_dynamics/__init__.py +++ b/hippynn/molecular_dynamics/__init__.py @@ -1,7 +1,10 @@ """ -Molecular dynamics driver with great flexibility and customizability regarding which quantities which are evolved -and what algorithms are used to evolve them. Calls a hippynn `Predictor` on current state during each MD step. + +Molecular dynamics driver with great flexibility and customizability regarding which quantities which are evolved +and what algorithms are used to evolve them. Calls a hippynn `Predictor` on current state during each MD step. + """ + from .md import MolecularDynamics, Variable, NullUpdater, VelocityVerlet, LangevinDynamics diff --git a/hippynn/molecular_dynamics/md.py b/hippynn/molecular_dynamics/md.py index 1375fd77..58bca979 100644 --- a/hippynn/molecular_dynamics/md.py +++ b/hippynn/molecular_dynamics/md.py @@ -1,4 +1,5 @@ from __future__ import annotations +from typing import Optional from functools import singledispatchmethod from copy import copy @@ -25,26 +26,20 @@ def __init__( data: dict[str, torch.Tensor], model_input_map: dict[str, str] = dict(), updater: VariableUpdater = None, - device: torch.device = None, - dtype: torch.dtype = None, + device: Optional[torch.device] = None, + dtype: Optional[torch.dtype] = None, ) -> None: """ :param name: name for variable - :type name: str :param data: dictionary of tracked data in the form `value_name: value` - :type data: dict[str, torch.Tensor] :param model_input_map: dictionary of correspondences between data tracked by Variable and inputs to the HIP-NN model in the form `hipnn-db_name: variable-data-key`, defaults to dict() - :type model_input_map: dict[str, str], optional :param updater: object which will update the data of the Variable over the course of the MD simulation, defaults to None - :type updater: VariableUpdater, optional :param device: device on which to keep data, defaults to None - :type device: torch.device, optional :param dtype: dtype for float type data, defaults to None - :type dtype: torch.dtype, optional - """ + """ self.name = name self.data = data self.model_input_map = model_input_map @@ -164,22 +159,19 @@ def variable(self, variable): ) self._variable = variable - def pre_step(self, dt): + def pre_step(self, dt: float): """Updates to variables performed during each step of MD simulation before HIPNN model evaluation :param dt: timestep - :type dt: float """ pass - def post_step(self, dt, model_outputs): + def post_step(self, dt: float, model_outputs: dict): """Updates to variables performed during each step of MD simulation after HIPNN model evaluation :param dt: timestep - :type dt: float :param model_outputs: dictionary of HIPNN model outputs - :type model_outputs: dict - """ + """ pass @@ -210,25 +202,21 @@ def __init__( """ :param force_db_name: key which will correspond to the force on the corresponding Variable in the HIPNN model output dictionary - :type force_db_name: str :param units_force: amount of eV equal to one in the units used for force output of HIPNN model (eg. if force output in kcal, units_force = ase.units.kcal = 2.6114e22 since 2.6114e22 kcal = 1 eV), by default ase.units.eV = 1, defaults to ase.units.eV - :type units_force: float, optional :param units_acc: amount of Ang/fs^2 equal to one in the units used for acceleration in the corresponding Variable, by default units.Ang/(1.0 ** 2) = 1, defaults to ase.units.Ang/(1.0**2) - :type units_acc: float, optional - """ + """ self.force_key = force_db_name self.force_factor = units_force / units_acc - def pre_step(self, dt): + def pre_step(self, dt: float): """Updates to variables performed during each step of MD simulation before HIPNN model evaluation :param dt: timestep - :type dt: float - """ + """ self.variable.data["velocity"] = self.variable.data["velocity"] + 0.5 * dt * self.variable.data["acceleration"] self.variable.data["position"] = self.variable.data["position"] + self.variable.data["velocity"] * dt try: @@ -236,14 +224,12 @@ def pre_step(self, dt): except KeyError: pass - def post_step(self, dt, model_outputs): + def post_step(self, dt: float, model_outputs: dict): """Updates to variables performed during each step of MD simulation after HIPNN model evaluation :param dt: timestep - :type dt: float :param model_outputs: dictionary of HIPNN model outputs - :type model_outputs: dict - """ + """ self.variable.data["force"] = model_outputs[self.force_key].to(self.variable.device) if len(self.variable.data["force"].shape) == len(self.variable.data["mass"].shape): self.variable.data["acceleration"] = self.variable.data["force"].detach() / self.variable.data["mass"] * self.force_factor @@ -266,29 +252,23 @@ def __init__( force_db_name: str, temperature: float, frix: float, - units_force=ase.units.eV, - units_acc=ase.units.Ang / (1.0**2), - seed: int = None, + units_force: float = ase.units.eV, + units_acc: float = ase.units.Ang / (1.0**2), + seed: Optional[int] = None, ): """ :param force_db_name: key which will correspond to the force on the corresponding Variable in the HIPNN model output dictionary - :type force_db_name: str :param temperature: temperature for Langevin algorithm - :type temperature: float :param frix: friction coefficient for Langevin algorithm - :type frix: float :param units_force: amount of eV equal to one in the units used for force output of HIPNN model (eg. if force output in kcal, units_force = ase.units.kcal = 2.6114e22 since 2.6114e22 kcal = 1 eV), by default ase.units.eV = 1, defaults to ase.units.eV - :type units_force: float, optional :param units_acc: amount of Ang/fs^2 equal to one in the units used for acceleration in the corresponding Variable, by default units.Ang/(1.0 ** 2) = 1, defaults to ase.units.Ang/(1.0**2) - :type units_acc: float, optional :param seed: used to set seed for reproducibility, defaults to None - :type seed: int, optional - """ + """ self.force_key = force_db_name self.force_factor = units_force / units_acc @@ -299,12 +279,11 @@ def __init__( if seed is not None: torch.manual_seed(seed) - def pre_step(self, dt): + def pre_step(self, dt:float): """Updates to variables performed during each step of MD simulation before HIPNN model evaluation :param dt: timestep - :type dt: float - """ + """ self.variable.data["position"] = self.variable.data["position"] + self.variable.data["velocity"] * dt @@ -314,14 +293,13 @@ def pre_step(self, dt): self.variable.data["unwrapped_position"] = self.variable.data["unwrapped_position"] + self.variable.data["velocity"] * dt except KeyError: self.variable.data["unwrapped_position"] = copy(self.variable.data["position"]) - def post_step(self, dt, model_outputs): - """Updates to variables performed during each step of MD simulation after HIPNN model evaluation + def post_step(self, dt: float, model_outputs: dict): + """ + Updates to variables performed during each step of MD simulation after HIPNN model evaluation :param dt: timestep - :type dt: float :param model_outputs: dictionary of HIPNN model outputs - :type model_outputs: dict - """ + """ self.variable.data["force"] = model_outputs[self.force_key].to(self.variable.device) @@ -348,19 +326,15 @@ def __init__( self, variables: list[Variable], model: Predictor, - device: torch.device = None, - dtype: torch.dtype = None, + device: Optional[torch.device] = None, + dtype: Optional[torch.dtype] = None, ): """ :param variables: list of Variable objects which will be tracked during simulation - :type variables: list[Variable] :param model: HIPNN Predictor - :type model: Predictor :param device: device to move variables and model to, defaults to None - :type device: torch.device, optional :param dtype: dtype to convert all float type variable data and model parameters to, defaults to None - :type dtype: torch.dtype, optional - """ + """ self.variables = variables self.model = model @@ -484,19 +458,15 @@ def _update_data(self, model_outputs: dict): self._data[f"output_{key}"].append(value.cpu().detach()[0]) except KeyError: self._data[f"output_{key}"] = [value.cpu().detach()[0]] - - def run(self, dt: float, n_steps: int, record_every: int = None): + def run(self, dt: float, n_steps: int, record_every: Optional[int] = None): """Run `n_steps` of MD algorithm. :param dt: timestep - :type dt: float :param n_steps: number of steps to execute - :type n_steps: int :param record_every: frequency at which to store the data at a step in memory, record_every = 1 means every step will be stored, defaults to None - :type record_every: int, optional - """ + """ for i in progress_bar(range(n_steps)): model_outputs = self._step(dt) diff --git a/pyproject.toml b/pyproject.toml index c95a1da9..b2173b6d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -27,7 +27,6 @@ dependencies=[ docs=[ "sphinx", "sphinx_rtd_theme", - "ase", ] full=[ "ase", @@ -37,4 +36,5 @@ full=[ "graphviz", "h5py", "lightning", + "scipy", ] \ No newline at end of file diff --git a/tests/progress_settings.py b/tests/progress_settings.py new file mode 100644 index 00000000..71bb8686 --- /dev/null +++ b/tests/progress_settings.py @@ -0,0 +1,23 @@ + +import hippynn + + +def trigger_progress(): + print(hippynn.settings.PROGRESS) + for _ in hippynn.tools.progress_bar(range(30_000_000)): + _ = _-1 + +hippynn.reload_settings(PROGRESS=None) +trigger_progress() + +hippynn.reload_settings(PROGRESS=True) +trigger_progress() + +hippynn.reload_settings(PROGRESS=False) +trigger_progress() + +hippynn.reload_settings(PROGRESS="tqdm") +trigger_progress() + +hippynn.reload_settings(PROGRESS=0.01) +trigger_progress()