From 761c9deb4a7b56ab09f88f4b7b33bb6a66dee071 Mon Sep 17 00:00:00 2001 From: amkrajewski Date: Wed, 8 Nov 2023 18:36:36 -0500 Subject: [PATCH 01/59] (core) added a small (6) prototype library based on OQMD to act as a quick starting point for random solid solutions --- pysipfenn/misc/prototypeLibrary.yaml | 84 ++++++++++++++++++++++++++++ 1 file changed, 84 insertions(+) create mode 100644 pysipfenn/misc/prototypeLibrary.yaml diff --git a/pysipfenn/misc/prototypeLibrary.yaml b/pysipfenn/misc/prototypeLibrary.yaml new file mode 100644 index 0000000..abba8e7 --- /dev/null +++ b/pysipfenn/misc/prototypeLibrary.yaml @@ -0,0 +1,84 @@ +- name: FCC + origin: https://www.oqmd.org/materials/prototype/A1_Cu + POSCAR: | + A1_Cu + 1.0 + 0.00000 1.80750 1.80750 + 1.80750 0.00000 1.80750 + 1.80750 1.80750 0.00000 + Cu + 1 + Direct + 0.00000 0.00000 0.00000 + +- name: BCC + origin: https://www.oqmd.org/materials/prototype/A2_W + POSCAR: | + W + 1.0 + -1.58250 1.58250 1.58250 + 1.58250 -1.58250 1.58250 + 1.58250 1.58250 -1.58250 + W + 1 + Direct + 0.00000 0.00000 0.00000 + +- name: HCP + origin: https://www.oqmd.org/materials/prototype/A3_Mg + POSCAR: | + Mg + 1.0 + 3.20900 0.00000 0.00000 + -1.60450 2.77907 0.00000 + 0.00000 0.00000 5.21100 + Mg + 2 + Direct + 0.33333 0.66667 0.25000 + 0.66667 0.33333 0.75000 + +- name: Diamond + origin: https://www.oqmd.org/materials/prototype/C(cF8) + POSCAR: | + Si + 1.0 + 0.000000 2.732954 2.732954 + 2.732954 0.000000 2.732954 + 2.732954 2.732954 0.000000 + Si + 2 + Direct + 0.500000 0.500000 0.500000 + 0.750000 0.750000 0.750000 + +- name: DHCP + origin: https://www.oqmd.org/materials/prototype/La + POSCAR: | + Nd + 1.0 + 3.68789 0.00000 0.00000 + -1.84394 3.19380 0.00000 + 0.00000 0.00000 11.88128 + Nd + 4 + Direct + 0.00000 0.00000 0.00000 + 0.33333 0.66667 0.25000 + 0.00000 0.00000 0.50000 + 0.66667 0.33333 0.75000 + +- name: Sn_A5 + origin: https://www.oqmd.org/materials/prototype/A5_Sn + POSCAR: | + Sn + 1.0 + -2.91550 2.91550 1.59100 + 2.91550 -2.91550 1.59100 + 2.91550 2.91550 -1.59100 + Sn + 2 + Direct + 0.00000 0.00000 0.00000 + 0.75000 0.25000 0.50000 + From c6362085d02afe59ae4707c791ee48500a290732 Mon Sep 17 00:00:00 2001 From: amkrajewski Date: Wed, 8 Nov 2023 19:36:25 -0500 Subject: [PATCH 02/59] - bump pymatgen to >=2023.05.31 for compressed structure handling PR3003 and torch to >=2.0 for future compatibility --- pyproject.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 9ab77fb..26a22a1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -28,8 +28,8 @@ readme = "README.md" requires-python = ">=3.9" dependencies = [ - "pymatgen>=2023.2.22", - "torch>=1.11.0", + "pymatgen>=2023.05.31", + "torch>=2.0", "onnx2torch>=1.5.2", "onnx>=1.13.0", "numpy>=1.22.0", From ebc4411d3f2f9fa3b74442c5272269961afdb31c Mon Sep 17 00:00:00 2001 From: amkrajewski Date: Wed, 8 Nov 2023 19:37:34 -0500 Subject: [PATCH 03/59] - bump coreml tools to 7.0 for torch 2.0 compatibility --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 26a22a1..951d6eb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -42,7 +42,7 @@ dependencies = [ [project.optional-dependencies] dev = [ - "coremltools>=6.3", + "coremltools>=7.0", "onnxconverter_common>=1.13.0", "onnxsim==0.4.33" ] From 8ee9f5f20ef47682b2ebad4bfd4417b668b62037 Mon Sep 17 00:00:00 2001 From: amkrajewski Date: Wed, 8 Nov 2023 20:06:48 -0500 Subject: [PATCH 04/59] - bump pymatgen to v2023.10.3 which explicitly supports modern numpy, per my issue #3348; also bump our numpy requirement --- pyproject.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 951d6eb..053c85a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -28,11 +28,11 @@ readme = "README.md" requires-python = ">=3.9" dependencies = [ - "pymatgen>=2023.05.31", + "pymatgen>=2023.10.3", "torch>=2.0", "onnx2torch>=1.5.2", "onnx>=1.13.0", - "numpy>=1.22.0", + "numpy>=1.25.0", "tqdm>=4.65.0", "natsort>=8.3.0", "pymongo>=4.2", From 6e4f2f337dce0b372ecf9d03117f73b5153e9c35 Mon Sep 17 00:00:00 2001 From: amkrajewski Date: Wed, 8 Nov 2023 20:07:17 -0500 Subject: [PATCH 05/59] - bump onnxconverter slightly for minor bugfixes --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 053c85a..f36c781 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -43,7 +43,7 @@ dependencies = [ [project.optional-dependencies] dev = [ "coremltools>=7.0", - "onnxconverter_common>=1.13.0", + "onnxconverter_common>=1.14.0", "onnxsim==0.4.33" ] From d7dba5fe34eb5313545a295fe39c8cebb28d0536 Mon Sep 17 00:00:00 2001 From: amkrajewski Date: Wed, 8 Nov 2023 20:07:27 -0500 Subject: [PATCH 06/59] - bump pymongo to >=4.4 for explicit Python 3.11 and MongoDB 7.0 support --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index f36c781..f9f3263 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -35,7 +35,7 @@ dependencies = [ "numpy>=1.25.0", "tqdm>=4.65.0", "natsort>=8.3.0", - "pymongo>=4.2", + "pymongo>=4.4", "pySmartDL>=1.3.4", "dnspython", ] From ba502d164973420d6cde69477b7eafc98c38b994 Mon Sep 17 00:00:00 2001 From: amkrajewski Date: Wed, 15 Nov 2023 09:03:36 -0500 Subject: [PATCH 07/59] - add __init__ to misc directory with prototype library --- pysipfenn/misc/__init__.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 pysipfenn/misc/__init__.py diff --git a/pysipfenn/misc/__init__.py b/pysipfenn/misc/__init__.py new file mode 100644 index 0000000..e69de29 From f67c5d1b111a0a035d35c6ad24ec327f3468c609 Mon Sep 17 00:00:00 2001 From: amkrajewski Date: Wed, 15 Nov 2023 09:08:33 -0500 Subject: [PATCH 08/59] - add a function to parse the prototype library --- pysipfenn/core/pysipfenn.py | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/pysipfenn/core/pysipfenn.py b/pysipfenn/core/pysipfenn.py index 5e06d95..a5bb48f 100644 --- a/pysipfenn/core/pysipfenn.py +++ b/pysipfenn/core/pysipfenn.py @@ -1,5 +1,6 @@ # General Imports import os +import yaml import natsort from pySmartDL import SmartDL @@ -12,7 +13,6 @@ from time import perf_counter from importlib import resources - import torch import onnx2torch import onnx @@ -80,6 +80,9 @@ def __init__(self, else: print(f'Skipping model loading (autoLoad=False)') + self.prototypeLibrary = {} + self.parsePrototypeLibrary(verbose=verbose) + self.toRun = [] self.descriptorData = [] self.predictions = [] @@ -121,6 +124,27 @@ def updateModelAvailability(self) -> None: print('\u292B ' + netName) self.network_list_available = detectedNets + def parsePrototypeLibrary(self, verbose=False) -> None: + """Parses the prototype library YAML file in the `misc` directory, interprets them into pymatgen Structure + objects, and stores them in the prototypeLibrary dict attribute of the Calculator object. + + Args: + verbose: If True, prints the number of prototypes loaded. + + Returns: + None + """ + with resources.files('pysipfenn.misc').joinpath('prototypeLibrary.yaml').open('r') as f: + prototypes = yaml.safe_load(f) + for prototype in prototypes: + self.prototypeLibrary.update({ + prototype['name']: { + 'structure': Structure.from_str(prototype['POSCAR'], fmt='poscar') + } + }) + if verbose: + print(f'Loaded {len(self.prototypeLibrary)} prototype structures from the prototype library.') + def downloadModels(self, network: str = 'all') -> None: """Downloads ONNX models. By default, all available models are downloaded. If a model is already available on disk, it is skipped. If a specific network is given, only that network is downloaded possibly overwriting From 040401110b57b714ea7ae06b043814b349b8bb8d Mon Sep 17 00:00:00 2001 From: amkrajewski Date: Wed, 15 Nov 2023 09:13:09 -0500 Subject: [PATCH 09/59] - added ability to parse custom prototype library files --- pysipfenn/core/pysipfenn.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/pysipfenn/core/pysipfenn.py b/pysipfenn/core/pysipfenn.py index a5bb48f..9c38b56 100644 --- a/pysipfenn/core/pysipfenn.py +++ b/pysipfenn/core/pysipfenn.py @@ -124,18 +124,24 @@ def updateModelAvailability(self) -> None: print('\u292B ' + netName) self.network_list_available = detectedNets - def parsePrototypeLibrary(self, verbose=False) -> None: + def parsePrototypeLibrary(self, customPath: str = "default", verbose: bool = False) -> None: """Parses the prototype library YAML file in the `misc` directory, interprets them into pymatgen Structure objects, and stores them in the prototypeLibrary dict attribute of the Calculator object. Args: - verbose: If True, prints the number of prototypes loaded. + customPath: Path to the prototype library YAML file. Defaults to magic string 'default', which loads the + default prototype library included in the package in the `misc` directory. + verbose: If True, it prints the number of prototypes loaded. Returns: None """ - with resources.files('pysipfenn.misc').joinpath('prototypeLibrary.yaml').open('r') as f: - prototypes = yaml.safe_load(f) + if customPath == 'default': + with resources.files('pysipfenn.misc').joinpath('prototypeLibrary.yaml').open('r') as f: + prototypes = yaml.safe_load(f) + else: + with open(customPath, 'r') as f: + prototypes = yaml.safe_load(f) for prototype in prototypes: self.prototypeLibrary.update({ prototype['name']: { @@ -143,7 +149,7 @@ def parsePrototypeLibrary(self, verbose=False) -> None: } }) if verbose: - print(f'Loaded {len(self.prototypeLibrary)} prototype structures from the prototype library.') + print(f'{len(self.prototypeLibrary)} prototype structures present into the prototype library.') def downloadModels(self, network: str = 'all') -> None: """Downloads ONNX models. By default, all available models are downloaded. If a model is already available From 2d86e2874b01edb02a0ed441b519d012563e2d20 Mon Sep 17 00:00:00 2001 From: amkrajewski Date: Wed, 15 Nov 2023 12:11:32 -0500 Subject: [PATCH 10/59] - remove unnecessary spaces in POSCARs in YAML prototype library --- pysipfenn/misc/prototypeLibrary.yaml | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/pysipfenn/misc/prototypeLibrary.yaml b/pysipfenn/misc/prototypeLibrary.yaml index abba8e7..2057190 100644 --- a/pysipfenn/misc/prototypeLibrary.yaml +++ b/pysipfenn/misc/prototypeLibrary.yaml @@ -10,7 +10,6 @@ 1 Direct 0.00000 0.00000 0.00000 - - name: BCC origin: https://www.oqmd.org/materials/prototype/A2_W POSCAR: | @@ -23,7 +22,6 @@ 1 Direct 0.00000 0.00000 0.00000 - - name: HCP origin: https://www.oqmd.org/materials/prototype/A3_Mg POSCAR: | @@ -37,7 +35,6 @@ Direct 0.33333 0.66667 0.25000 0.66667 0.33333 0.75000 - - name: Diamond origin: https://www.oqmd.org/materials/prototype/C(cF8) POSCAR: | @@ -51,7 +48,6 @@ Direct 0.500000 0.500000 0.500000 0.750000 0.750000 0.750000 - - name: DHCP origin: https://www.oqmd.org/materials/prototype/La POSCAR: | @@ -67,7 +63,6 @@ 0.33333 0.66667 0.25000 0.00000 0.00000 0.50000 0.66667 0.33333 0.75000 - - name: Sn_A5 origin: https://www.oqmd.org/materials/prototype/A5_Sn POSCAR: | @@ -80,5 +75,4 @@ 2 Direct 0.00000 0.00000 0.00000 - 0.75000 0.25000 0.50000 - + 0.75000 0.25000 0.50000 \ No newline at end of file From e573802518e599353c42edd7065c40b757d9f6a1 Mon Sep 17 00:00:00 2001 From: amkrajewski Date: Wed, 15 Nov 2023 12:15:25 -0500 Subject: [PATCH 11/59] - restructured imports to core --- pysipfenn/core/pysipfenn.py | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/pysipfenn/core/pysipfenn.py b/pysipfenn/core/pysipfenn.py index 9c38b56..0d37830 100644 --- a/pysipfenn/core/pysipfenn.py +++ b/pysipfenn/core/pysipfenn.py @@ -1,18 +1,22 @@ -# General Imports +# Standard Library Imports import os -import yaml - -import natsort -from pySmartDL import SmartDL import csv -import numpy as np -from pymatgen.core import Structure import json +from time import perf_counter +from typing import List, Union, Dict +from importlib import resources + +# Helper Imports from tqdm import tqdm from tqdm.contrib.concurrent import process_map -from time import perf_counter +import natsort +from pySmartDL import SmartDL -from importlib import resources +# Scientific Computing Imports +import numpy as np +from pymatgen.core import Structure, Composition + +# Machine Learning Imports import torch import onnx2torch import onnx From b2d41febb5b5ea8fb3c68f5f8fb1c9465d890676 Mon Sep 17 00:00:00 2001 From: amkrajewski Date: Wed, 15 Nov 2023 12:16:17 -0500 Subject: [PATCH 12/59] - added printCustomLibrary option for library parsing function --- pysipfenn/core/pysipfenn.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/pysipfenn/core/pysipfenn.py b/pysipfenn/core/pysipfenn.py index 0d37830..ce18469 100644 --- a/pysipfenn/core/pysipfenn.py +++ b/pysipfenn/core/pysipfenn.py @@ -128,7 +128,10 @@ def updateModelAvailability(self) -> None: print('\u292B ' + netName) self.network_list_available = detectedNets - def parsePrototypeLibrary(self, customPath: str = "default", verbose: bool = False) -> None: + def parsePrototypeLibrary(self, + customPath: str = "default", + verbose: bool = False, + printCustomLibrary: bool = False) -> None: """Parses the prototype library YAML file in the `misc` directory, interprets them into pymatgen Structure objects, and stores them in the prototypeLibrary dict attribute of the Calculator object. @@ -145,7 +148,10 @@ def parsePrototypeLibrary(self, customPath: str = "default", verbose: bool = Fal prototypes = yaml.safe_load(f) else: with open(customPath, 'r') as f: - prototypes = yaml.safe_load(f) + prototypes = yaml_safeLoader.load(f) + if printCustomLibrary: + for prototype in prototypes: + print(f'{prototype["name"]}:\n{prototype["POSCAR"]}') for prototype in prototypes: self.prototypeLibrary.update({ prototype['name']: { From 3f2069e8c51f11aed0e3d7d335f840d3369d5ae5 Mon Sep 17 00:00:00 2001 From: amkrajewski Date: Wed, 15 Nov 2023 12:17:15 -0500 Subject: [PATCH 13/59] - switched from PyYAML to the better-maintained ruamel.yaml --- pysipfenn/core/pysipfenn.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/pysipfenn/core/pysipfenn.py b/pysipfenn/core/pysipfenn.py index ce18469..f3c662a 100644 --- a/pysipfenn/core/pysipfenn.py +++ b/pysipfenn/core/pysipfenn.py @@ -21,7 +21,12 @@ import onnx2torch import onnx -from typing import List, Union, Dict +# YAML Handling Imports and Configuration +from ruamel.yaml import YAML +from ruamel.yaml.scalarstring import LiteralScalarString +yaml_safeLoader=YAML(typ='safe') +yaml_customDumper=YAML() +yaml_customDumper.top_level_colon_align = True # Descriptor Generators from pysipfenn.descriptorDefinitions import Ward2017, KS2022, KS2022_dilute @@ -145,7 +150,7 @@ def parsePrototypeLibrary(self, """ if customPath == 'default': with resources.files('pysipfenn.misc').joinpath('prototypeLibrary.yaml').open('r') as f: - prototypes = yaml.safe_load(f) + prototypes = yaml_safeLoader.load(f) else: with open(customPath, 'r') as f: prototypes = yaml_safeLoader.load(f) From eb92debe943cb1ed850a859562c519901efb1009 Mon Sep 17 00:00:00 2001 From: amkrajewski Date: Wed, 15 Nov 2023 12:18:13 -0500 Subject: [PATCH 14/59] - added origin and POSCAR to the prototype dictionary values to enable YAML round-tripping --- pysipfenn/core/pysipfenn.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pysipfenn/core/pysipfenn.py b/pysipfenn/core/pysipfenn.py index f3c662a..00ff7c9 100644 --- a/pysipfenn/core/pysipfenn.py +++ b/pysipfenn/core/pysipfenn.py @@ -160,7 +160,9 @@ def parsePrototypeLibrary(self, for prototype in prototypes: self.prototypeLibrary.update({ prototype['name']: { - 'structure': Structure.from_str(prototype['POSCAR'], fmt='poscar') + 'POSCAR': prototype['POSCAR'], + 'structure': Structure.from_str(prototype['POSCAR'], fmt='poscar'), + 'origin': prototype['origin'] } }) if verbose: From bd096eae68959ab5f5a664c2af8b9453767163be Mon Sep 17 00:00:00 2001 From: amkrajewski Date: Wed, 15 Nov 2023 12:19:22 -0500 Subject: [PATCH 15/59] - added function to append (persist) the custom prototype library to the default one stored within pySIPFENN --- pysipfenn/core/pysipfenn.py | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) diff --git a/pysipfenn/core/pysipfenn.py b/pysipfenn/core/pysipfenn.py index 00ff7c9..b4f894f 100644 --- a/pysipfenn/core/pysipfenn.py +++ b/pysipfenn/core/pysipfenn.py @@ -168,6 +168,36 @@ def parsePrototypeLibrary(self, if verbose: print(f'{len(self.prototypeLibrary)} prototype structures present into the prototype library.') + def appendPrototypeLibrary(self, customPath: str) -> None: + """Parses a custom prototype library YAML file and appends it into the internal prototypeLibrary of the + pySIPFENN package. They will be persisted for future use and, by default, they will be loaded automatically + when instantiating the Calculator object. + + Args: + customPath: Path to the prototype library YAML file to be appended to the internal prototypeLibrary of the + pySIPFENN package. + + Returns: + None + """ + self.parsePrototypeLibrary(customPath=customPath, printCustomLibrary=True) + print(f'Now, {len(self.prototypeLibrary)} prototype structures are present into the prototype library. ' + f'Persisting them for future use.') + with resources.files('pysipfenn.misc').joinpath('prototypeLibrary.yaml').open('w+') as f: + # Restructutre the prototype library back to original format of a list of dictionaries + print(self.prototypeLibrary) + prototypeList = [ + {'name': key, + 'origin': value['origin'], + 'POSCAR': LiteralScalarString(str(value['POSCAR'])) + } + for key, value in self.prototypeLibrary.items()] + print(prototypeList) + # Persist the prototype library + yaml_customDumper.dump(prototypeList, f) + print(f'Updated prototype library persisted to {f.name}') + + def downloadModels(self, network: str = 'all') -> None: """Downloads ONNX models. By default, all available models are downloaded. If a model is already available on disk, it is skipped. If a specific network is given, only that network is downloaded possibly overwriting From 695d28693140a1bc5348c312a87bb5454aed0268 Mon Sep 17 00:00:00 2001 From: amkrajewski Date: Wed, 15 Nov 2023 14:53:26 -0500 Subject: [PATCH 16/59] - moved YAML parsers initialization to functions as needed --- pysipfenn/core/pysipfenn.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/pysipfenn/core/pysipfenn.py b/pysipfenn/core/pysipfenn.py index b4f894f..d36e0c1 100644 --- a/pysipfenn/core/pysipfenn.py +++ b/pysipfenn/core/pysipfenn.py @@ -24,9 +24,6 @@ # YAML Handling Imports and Configuration from ruamel.yaml import YAML from ruamel.yaml.scalarstring import LiteralScalarString -yaml_safeLoader=YAML(typ='safe') -yaml_customDumper=YAML() -yaml_customDumper.top_level_colon_align = True # Descriptor Generators from pysipfenn.descriptorDefinitions import Ward2017, KS2022, KS2022_dilute @@ -148,6 +145,8 @@ def parsePrototypeLibrary(self, Returns: None """ + yaml_safeLoader = YAML(typ='safe') + if customPath == 'default': with resources.files('pysipfenn.misc').joinpath('prototypeLibrary.yaml').open('r') as f: prototypes = yaml_safeLoader.load(f) @@ -180,6 +179,9 @@ def appendPrototypeLibrary(self, customPath: str) -> None: Returns: None """ + yaml_customDumper = YAML() + yaml_customDumper.top_level_colon_align = True + self.parsePrototypeLibrary(customPath=customPath, printCustomLibrary=True) print(f'Now, {len(self.prototypeLibrary)} prototype structures are present into the prototype library. ' f'Persisting them for future use.') From e33b716e948610d30a6e7fea3f0f62de6e9d6e58 Mon Sep 17 00:00:00 2001 From: amkrajewski Date: Wed, 15 Nov 2023 14:54:08 -0500 Subject: [PATCH 17/59] - added ruamel YAML parser to requirements --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index f9f3263..a7d6581 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -38,6 +38,7 @@ dependencies = [ "pymongo>=4.4", "pySmartDL>=1.3.4", "dnspython", + "ruamel.yaml" ] [project.optional-dependencies] From f503dc01bf78fedf6a9df80c4aae8e2627bfb51d Mon Sep 17 00:00:00 2001 From: amkrajewski Date: Mon, 29 Jan 2024 21:37:13 -0500 Subject: [PATCH 18/59] - added name appending function to the Model exporters --- pysipfenn/core/modelExporters.py | 42 +++++++++++++++++++++----------- 1 file changed, 28 insertions(+), 14 deletions(-) diff --git a/pysipfenn/core/modelExporters.py b/pysipfenn/core/modelExporters.py index 1d0bd86..68dbacb 100644 --- a/pysipfenn/core/modelExporters.py +++ b/pysipfenn/core/modelExporters.py @@ -124,11 +124,13 @@ def toFP16All(self): self.toFP16(model) print('***** Done converting all models to FP16! *****') - def export(self, model: str): + def export(self, model: str, append: str = '') -> None: """Export a loaded model to ONNX format. Args: model: The name of the model to export (must be loaded in the Calculator). + append: A string to append to the exported model name after the model name, simplification marker, and + FP16 marker. Useful for adding a version number or other information to the exported model name. Returns: None @@ -141,14 +143,18 @@ def export(self, model: str): name += '_simplified' if self.fp16Dict[model]: name += '_fp16' + if append: + name += f'_{append}' name += '.onnx' onnx.save(loadedModel, name) print(f'--> Exported as {name}', flush=True) - def exportAll(self): - """Export all loaded models to ONNX format with the export function.""" + def exportAll(self, append: str = '') -> None: + """Export all loaded models to ONNX format with the export function. `append` can be passed to the export + function. + """ for model in tqdm(self.calculator.loadedModels): - self.export(model) + self.export(model, append=append) print('***** Done exporting all models! *****') @@ -167,7 +173,7 @@ def __init__(self, calculator: Calculator): assert len(self.calculator.loadedModels) > 0, 'No models loaded in calculator. Nothing to export.' print(f'Initialized TorchExporter with models: {list(self.calculator.loadedModels.keys())}') - def export(self, model: str): + def export(self, model: str, append: str = '') -> None: """Export a loaded model to PyTorch PT format. Models are exported in eval mode (no dropout) and saved in the current working directory. @@ -175,6 +181,8 @@ def export(self, model: str): model: The name of the model to export (must be loaded in the Calculator) and it must have a descriptor (Ward2017 or KS2022) defined in the calculator.models dictionary created when the Calculator was initialized. + append: A string to append to the exported model name after the model name. Useful for adding a version + number or other information to the exported model name. Returns: None @@ -200,14 +208,16 @@ def export(self, model: str): tracedModel = torch.jit.trace(loadedModel, inputs_tracer) - name = f"{model}.pt" + name = f"{model}{f'_{append}' if append else ''}.pt" tracedModel.save(name) print(f'--> Exported as {name}', flush=True) - def exportAll(self): - """Export all loaded models to PyTorch PT format with the export function.""" + def exportAll(self, append: str = '') -> None: + """Exports all loaded models to PyTorch PT format with the export function. `append` can be passed to the export + function + """ for model in tqdm(self.calculator.loadedModels): - self.export(model) + self.export(model, append=append) print('***** Done exporting all models! *****') @@ -227,7 +237,7 @@ def __init__(self, calculator: Calculator): assert len(self.calculator.loadedModels)>0, 'No models loaded in calculator. Nothing to export.' print(f'Initialized CoreMLExporter with models: {list(self.calculator.loadedModels.keys())}') - def export(self, model: str): + def export(self, model: str, append: str = '') -> None: """Export a loaded model to CoreML format. Models will be saved as {model}.mlpackage in the current working directory. Models will be annotated with the feature vector name (Ward2017 or KS2022) and the output will be named "property". The latter behavior will be adjusted in the future when model output name and unit will be @@ -237,6 +247,8 @@ def export(self, model: str): model: The name of the model to export (must be loaded in the Calculator) and it must have a descriptor (Ward2017 or KS2022) defined in the calculator.models dictionary created when the Calculator was initialized. + append: A string to append to the exported model name after the model name. Useful for adding a version + number or other information to the exported model name. Returns: None @@ -270,12 +282,14 @@ def export(self, model: str): inputs=inputs_converter, outputs=[ct.TensorType(name='property')] ) - name = f"{model}.mlpackage" + name = f"{model}{f'_{append}' if append else ''}.mlpackage" coreml_model.save(name) print(f'--> Exported as {name}', flush=True) - def exportAll(self): - """Export all loaded models to CoreML format with the export function.""" + def exportAll(self, append: str = '') -> None: + """Export all loaded models to CoreML format with the export function. `append` can be passed to the export + function. + """ for model in tqdm(self.calculator.loadedModels): - self.export(model) + self.export(model, append=append) print('***** Done exporting all models! *****') From 50c4b9c5937dee0ffc73f7aea2a6953f9e9fdaf8 Mon Sep 17 00:00:00 2001 From: "Adam M. Krajewski" <54290107+amkrajewski@users.noreply.github.com> Date: Sun, 11 Feb 2024 19:38:54 -0500 Subject: [PATCH 19/59] - improved documentation of the prototype library handling functions --- pysipfenn/core/pysipfenn.py | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/pysipfenn/core/pysipfenn.py b/pysipfenn/core/pysipfenn.py index d36e0c1..73cd9a4 100644 --- a/pysipfenn/core/pysipfenn.py +++ b/pysipfenn/core/pysipfenn.py @@ -134,13 +134,18 @@ def parsePrototypeLibrary(self, customPath: str = "default", verbose: bool = False, printCustomLibrary: bool = False) -> None: - """Parses the prototype library YAML file in the `misc` directory, interprets them into pymatgen Structure - objects, and stores them in the prototypeLibrary dict attribute of the Calculator object. + """Parses the prototype library YAML file in the `misc` directory, interprets them into pymatgen `Structure` + objects, and stores them in the `self.prototypeLibrary` dict attribute of the Calculator object. You can use it + also to temporarily append a custom prototype library (by providing a path) which will live as long as the + Calculator. For permanent changes, use `appendPrototypeLibrary()`. Args: - customPath: Path to the prototype library YAML file. Defaults to magic string 'default', which loads the + customPath: Path to the prototype library YAML file. Defaults to the magic string 'default', which loads the default prototype library included in the package in the `misc` directory. - verbose: If True, it prints the number of prototypes loaded. + verbose: If True, it prints the number of prototypes loaded. Defaults to False, but note that `Calculator` + class automatically initializes with verbose=True. + printCustomLibrary: If True, it prints the name and POSCAR of each prototype being added to the prototype + library. Defaults to False. Returns: None @@ -168,9 +173,9 @@ def parsePrototypeLibrary(self, print(f'{len(self.prototypeLibrary)} prototype structures present into the prototype library.') def appendPrototypeLibrary(self, customPath: str) -> None: - """Parses a custom prototype library YAML file and appends it into the internal prototypeLibrary of the - pySIPFENN package. They will be persisted for future use and, by default, they will be loaded automatically - when instantiating the Calculator object. + """Parses a custom prototype library YAML file and permanently appends it into the internal prototypeLibrary + of the pySIPFENN package. They will be persisted for future use and, by default, they will be loaded + automatically when instantiating the Calculator object, similar to your custom models. Args: customPath: Path to the prototype library YAML file to be appended to the internal prototypeLibrary of the From f7a7b11dcd080a742c7790561a50148a9259bb9a Mon Sep 17 00:00:00 2001 From: "Adam M. Krajewski" <54290107+amkrajewski@users.noreply.github.com> Date: Sun, 11 Feb 2024 20:43:25 -0500 Subject: [PATCH 20/59] - added custom test case for prototype library construction --- .../tests/testCaseFiles/prototypeLibrary-custom.yaml | 12 ++++++++++++ 1 file changed, 12 insertions(+) create mode 100644 pysipfenn/tests/testCaseFiles/prototypeLibrary-custom.yaml diff --git a/pysipfenn/tests/testCaseFiles/prototypeLibrary-custom.yaml b/pysipfenn/tests/testCaseFiles/prototypeLibrary-custom.yaml new file mode 100644 index 0000000..dc1a894 --- /dev/null +++ b/pysipfenn/tests/testCaseFiles/prototypeLibrary-custom.yaml @@ -0,0 +1,12 @@ +- name: NicePhase + origin: https://somecustomsource.org + POSCAR: | + A1_U + 1.0 + 0.00000 1.80750 1.80750 + 1.80750 0.00000 1.80750 + 1.80750 1.80750 0.00000 + U + 1 + Direct + 0.00000 0.00000 0.00000 \ No newline at end of file From a8f0c95c0de77db91c2979ea57e380624b71ec40 Mon Sep 17 00:00:00 2001 From: "Adam M. Krajewski" <54290107+amkrajewski@users.noreply.github.com> Date: Sun, 11 Feb 2024 21:00:36 -0500 Subject: [PATCH 21/59] - added tests for default prototype library parsing --- pysipfenn/tests/test_Core_prototypeLibrary.py | 48 +++++++++++++++++++ 1 file changed, 48 insertions(+) create mode 100644 pysipfenn/tests/test_Core_prototypeLibrary.py diff --git a/pysipfenn/tests/test_Core_prototypeLibrary.py b/pysipfenn/tests/test_Core_prototypeLibrary.py new file mode 100644 index 0000000..58c5b55 --- /dev/null +++ b/pysipfenn/tests/test_Core_prototypeLibrary.py @@ -0,0 +1,48 @@ +import unittest +from pymatgen.core import Structure +from importlib import resources +import shutil +import pysipfenn +import pytest +import os + +class TestPL(unittest.TestCase): + """Tests correct loading of the prototype library (used, e.g., for random solid solution generation).""" + + def setUp(self) -> None: + """Load the prototype library.""" + self.c = pysipfenn.Calculator(autoLoad=False) + + def test_autoload(self): + """Test that the default prototype library is loaded.""" + self.assertTrue(self.c.prototypeLibrary is not None) + self.assertTrue(len(self.c.prototypeLibrary) > 0) + + def test_defaultPresent(self): + """Test that the loaded prototype library was correctly parsed.""" + for prototype in ["FCC", "BCC", "HCP", "Diamond", "DHCP", "Sn_A5"]: + with self.subTest(msg=prototype): + self.assertTrue(prototype in self.c.prototypeLibrary) + + def test_correctContentFCC(self): + """Test that the FCC prototype was correctly parsed.""" + fcc = self.c.prototypeLibrary["FCC"] + self.assertEqual(fcc["origin"], "https://www.oqmd.org/materials/prototype/A1_Cu") + self.assertEqual( + fcc["POSCAR"], + ('A1_Cu\n' + '1.0\n' + ' 0.00000 1.80750 1.80750\n' + ' 1.80750 0.00000 1.80750\n' + ' 1.80750 1.80750 0.00000\n' + 'Cu\n' + '1\n' + 'Direct\n' + ' 0.00000 0.00000 0.00000\n')) + with self.subTest(msg="Is a pymatgen Structure"): + self.assertTrue(isinstance(fcc["structure"], Structure)) + with self.subTest(msg="Is valid pymatgen Structure"): + self.assertTrue(fcc["structure"].is_valid()) + with self.subTest(msg="Has correct formula"): + self.assertEqual(fcc["structure"].formula, "Cu1") + From 05c259506ee3596d1bee976111848b5c44b23f46 Mon Sep 17 00:00:00 2001 From: "Adam M. Krajewski" <54290107+amkrajewski@users.noreply.github.com> Date: Sun, 11 Feb 2024 21:31:29 -0500 Subject: [PATCH 22/59] - moved prototypeLibrary-overwriting to a satellite function for conciseness and easier access without calculator present --- pysipfenn/core/pysipfenn.py | 39 ++++++++++++++++++++++--------------- 1 file changed, 23 insertions(+), 16 deletions(-) diff --git a/pysipfenn/core/pysipfenn.py b/pysipfenn/core/pysipfenn.py index 73cd9a4..9a259ff 100644 --- a/pysipfenn/core/pysipfenn.py +++ b/pysipfenn/core/pysipfenn.py @@ -184,25 +184,11 @@ def appendPrototypeLibrary(self, customPath: str) -> None: Returns: None """ - yaml_customDumper = YAML() - yaml_customDumper.top_level_colon_align = True - self.parsePrototypeLibrary(customPath=customPath, printCustomLibrary=True) + self.parsePrototypeLibrary(customPath=customPath, printCustomLibrary=True, verbose=True) print(f'Now, {len(self.prototypeLibrary)} prototype structures are present into the prototype library. ' f'Persisting them for future use.') - with resources.files('pysipfenn.misc').joinpath('prototypeLibrary.yaml').open('w+') as f: - # Restructutre the prototype library back to original format of a list of dictionaries - print(self.prototypeLibrary) - prototypeList = [ - {'name': key, - 'origin': value['origin'], - 'POSCAR': LiteralScalarString(str(value['POSCAR'])) - } - for key, value in self.prototypeLibrary.items()] - print(prototypeList) - # Persist the prototype library - yaml_customDumper.dump(prototypeList, f) - print(f'Updated prototype library persisted to {f.name}') + overwritePrototypeLibrary(self.prototypeLibrary) def downloadModels(self, network: str = 'all') -> None: @@ -801,3 +787,24 @@ def ward2ks2022(ward2017: np.ndarray) -> np.ndarray: ), axis=-1, dtype=np.float32) return ks2022 + +def overwritePrototypeLibrary(prototypeLibrary: dict) -> None: + """Destructively overwrites the prototype library with a custom one. Used by the `appendPrototypeLibrary` function + to persist its changes. The other main use it to restore the default one to the original state based on a backup + made earlier (see tests for an example).""" + yaml_customDumper = YAML() + yaml_customDumper.top_level_colon_align = True + + with resources.files('pysipfenn.misc').joinpath('prototypeLibrary.yaml').open('w+') as f: + # Restructutre the prototype library back to original format of a list of dictionaries + print(prototypeLibrary) + prototypeList = [ + {'name': key, + 'origin': value['origin'], + 'POSCAR': LiteralScalarString(str(value['POSCAR'])) + } + for key, value in prototypeLibrary.items()] + print(prototypeList) + # Persist the prototype library + yaml_customDumper.dump(prototypeList, f) + print(f'Updated prototype library persisted to {f.name}') \ No newline at end of file From d99492f34928bbabd534373b02ec7a18cff481a6 Mon Sep 17 00:00:00 2001 From: "Adam M. Krajewski" <54290107+amkrajewski@users.noreply.github.com> Date: Sun, 11 Feb 2024 21:32:13 -0500 Subject: [PATCH 23/59] - finished the prototype library tests (100% coverage now!) --- pysipfenn/misc/prototypeLibrary.yaml | 2 +- pysipfenn/tests/test_Core_prototypeLibrary.py | 50 +++++++++++++++++++ 2 files changed, 51 insertions(+), 1 deletion(-) diff --git a/pysipfenn/misc/prototypeLibrary.yaml b/pysipfenn/misc/prototypeLibrary.yaml index 2057190..0b29eff 100644 --- a/pysipfenn/misc/prototypeLibrary.yaml +++ b/pysipfenn/misc/prototypeLibrary.yaml @@ -75,4 +75,4 @@ 2 Direct 0.00000 0.00000 0.00000 - 0.75000 0.25000 0.50000 \ No newline at end of file + 0.75000 0.25000 0.50000 diff --git a/pysipfenn/tests/test_Core_prototypeLibrary.py b/pysipfenn/tests/test_Core_prototypeLibrary.py index 58c5b55..2d7f598 100644 --- a/pysipfenn/tests/test_Core_prototypeLibrary.py +++ b/pysipfenn/tests/test_Core_prototypeLibrary.py @@ -46,3 +46,53 @@ def test_correctContentFCC(self): with self.subTest(msg="Has correct formula"): self.assertEqual(fcc["structure"].formula, "Cu1") + def test_customPrototypeLoad(self): + """Test that a custom prototype can be loaded. Then test that a custom prototype can be appended to the default + library and stay there.""" + + with resources.files('pysipfenn').joinpath('tests/testCaseFiles/prototypeLibrary-custom.yaml') as f: + self.c.parsePrototypeLibrary(customPath=f, verbose=True, printCustomLibrary=True) + + with self.subTest(msg="Custom prototype present with correct parse"): + self.assertTrue("NicePhase" in self.c.prototypeLibrary) + self.assertEqual(self.c.prototypeLibrary["NicePhase"]["origin"], "https://somecustomsource.org") + + with self.subTest(msg="Nice phase is a valid pymatgen Structure"): + self.assertTrue(isinstance(self.c.prototypeLibrary["NicePhase"]["structure"], Structure)) + self.assertTrue(self.c.prototypeLibrary["NicePhase"]["structure"].is_valid()) + self.assertEqual(self.c.prototypeLibrary["NicePhase"]["structure"].formula, "U1") + + with self.subTest(msg="FCC prototype still present"): + self.assertTrue("FCC" in self.c.prototypeLibrary) + + with self.subTest(msg="Test that it does not affect the default prototype library"): + otherC = pysipfenn.Calculator(autoLoad=False) + self.assertTrue("NicePhase" not in otherC.prototypeLibrary) + + # Create a backup of the default library + self.c = pysipfenn.Calculator(autoLoad=False) + backup = self.c.prototypeLibrary.copy() + + with resources.files('pysipfenn').joinpath('tests/testCaseFiles/prototypeLibrary-custom.yaml') as f: + self.c.appendPrototypeLibrary(customPath=f) + + with self.subTest(msg="Custom prototype present and valid in a different Calculator instance"): + otherC = pysipfenn.Calculator(autoLoad=False) + self.assertTrue("NicePhase" in otherC.prototypeLibrary) + self.assertEqual(otherC.prototypeLibrary["NicePhase"]["origin"], "https://somecustomsource.org") + self.assertTrue(isinstance(otherC.prototypeLibrary["NicePhase"]["structure"], Structure)) + self.assertTrue(otherC.prototypeLibrary["NicePhase"]["structure"].is_valid()) + self.assertEqual(otherC.prototypeLibrary["NicePhase"]["structure"].formula, "U1") + + with self.subTest(msg="FCC/BCC/HCP prototype still present in a different Calculator instance"): + self.assertTrue("FCC" in otherC.prototypeLibrary) + self.assertTrue("BCC" in otherC.prototypeLibrary) + self.assertTrue("HCP" in otherC.prototypeLibrary) + + with self.subTest(msg="Restore the original prototype library"): + pysipfenn.overwritePrototypeLibrary(backup) + + + + + From 1ee15e202358b391b1ff12e4c551850200794341 Mon Sep 17 00:00:00 2001 From: "Adam M. Krajewski" <54290107+amkrajewski@users.noreply.github.com> Date: Sun, 11 Feb 2024 21:46:45 -0500 Subject: [PATCH 24/59] - added a set of assertions to the prototypeLibrary parsing; some style improvements --- pysipfenn/core/pysipfenn.py | 33 ++++++++++++++++++++------------- 1 file changed, 20 insertions(+), 13 deletions(-) diff --git a/pysipfenn/core/pysipfenn.py b/pysipfenn/core/pysipfenn.py index 9a259ff..bee93ad 100644 --- a/pysipfenn/core/pysipfenn.py +++ b/pysipfenn/core/pysipfenn.py @@ -97,7 +97,7 @@ def __init__(self, print(f'********* pySIPFENN Successfully Initialized **********') def __str__(self): - '''Prints the status of the Calculator object.''' + """Prints the status of the `Calculator` object.""" printOut = f'pySIPFENN Calculator Object. Version: {__version__}\n' printOut += f'Models are located in:\n{resources.files("pysipfenn.modelsSIPFENN")}\n{"-" * 80}\n' printOut += f'Loaded Networks: {list(self.loadedModels.keys())}\n' @@ -115,10 +115,8 @@ def __str__(self): return printOut def updateModelAvailability(self) -> None: - """ - Updates availability of models based on the pysipfenn.modelsSIPFENN directory contents. Works only for - current ONNX model definitions. - """ + """Updates availability of models based on the pysipfenn.modelsSIPFENN directory contents. Works only for + current ONNX model definitions.""" with resources.files('pysipfenn.modelsSIPFENN') as p: all_files = os.listdir(p) detectedNets = [] @@ -145,7 +143,7 @@ def parsePrototypeLibrary(self, verbose: If True, it prints the number of prototypes loaded. Defaults to False, but note that `Calculator` class automatically initializes with verbose=True. printCustomLibrary: If True, it prints the name and POSCAR of each prototype being added to the prototype - library. Defaults to False. + library. Has no effect if customPath is 'default'. Defaults to False. Returns: None @@ -162,10 +160,18 @@ class automatically initializes with verbose=True. for prototype in prototypes: print(f'{prototype["name"]}:\n{prototype["POSCAR"]}') for prototype in prototypes: + assert isinstance(prototype['name'], str), 'Prototype name must be a string.' + assert isinstance(prototype['POSCAR'], str), 'Prototype POSCAR must be a string.' + assert isinstance(prototype['origin'], str), 'Prototype origin must be a string.' + struct = Structure.from_str(prototype['POSCAR'], fmt='poscar') + assert struct.is_valid(), f'Invalid structure for prototype {prototype["name"]}' + assert struct.is_ordered, f'Unordered structure for prototype {prototype["name"]}. Make sure that the ' \ + f'POSCAR file is in the direct format and that no prior randomization has ' \ + f'been applied to the structure occupancies.' self.prototypeLibrary.update({ prototype['name']: { 'POSCAR': prototype['POSCAR'], - 'structure': Structure.from_str(prototype['POSCAR'], fmt='poscar'), + 'structure': struct, 'origin': prototype['origin'] } }) @@ -193,8 +199,8 @@ def appendPrototypeLibrary(self, customPath: str) -> None: def downloadModels(self, network: str = 'all') -> None: """Downloads ONNX models. By default, all available models are downloaded. If a model is already available - on disk, it is skipped. If a specific network is given, only that network is downloaded possibly overwriting - the existing one. If the networks name is not recognized message is printed. + on disk, it is skipped. If a specific network is given, only that network is downloaded, possibly overwriting + the existing one. If the network name is not recognized, the message will be printed. Args: network: Name of the network to download. Defaults to 'all'. @@ -799,10 +805,11 @@ def overwritePrototypeLibrary(prototypeLibrary: dict) -> None: # Restructutre the prototype library back to original format of a list of dictionaries print(prototypeLibrary) prototypeList = [ - {'name': key, - 'origin': value['origin'], - 'POSCAR': LiteralScalarString(str(value['POSCAR'])) - } + { + 'name': key, + 'origin': value['origin'], + 'POSCAR': LiteralScalarString(str(value['POSCAR'])) + } for key, value in prototypeLibrary.items()] print(prototypeList) # Persist the prototype library From f5446ed8c856f25e24784d7ded48e2bf314cfd4d Mon Sep 17 00:00:00 2001 From: "Adam M. Krajewski" <54290107+amkrajewski@users.noreply.github.com> Date: Sun, 11 Feb 2024 22:11:07 -0500 Subject: [PATCH 25/59] - improved type hinting throughout `KS2022_dilute.py` --- .../descriptorDefinitions/KS2022_dilute.py | 59 ++++++++++++------- 1 file changed, 38 insertions(+), 21 deletions(-) diff --git a/pysipfenn/descriptorDefinitions/KS2022_dilute.py b/pysipfenn/descriptorDefinitions/KS2022_dilute.py index dc160be..cb23ec7 100644 --- a/pysipfenn/descriptorDefinitions/KS2022_dilute.py +++ b/pysipfenn/descriptorDefinitions/KS2022_dilute.py @@ -4,18 +4,13 @@ import time import numpy as np import os -from pymatgen.core import Structure, Element +from pymatgen.core import Structure, Element, PeriodicSite from pymatgen.analysis.local_env import VoronoiNN from pymatgen.symmetry.analyzer import SpacegroupAnalyzer import json from tqdm import tqdm from collections import Counter - -citations = [ - 'Adam M. Krajewski, Jonathan W. Siegel, Jinchao Xu, Zi-Kui Liu, Extensible Structure-Informed Prediction of ' - 'Formation Energy with improved accuracy and usability employing neural networks, Computational ' - 'Materials Science, Volume 208, 2022, 111254' - ] +from typing import List periodic_table_size = 112 attribute_matrix = np.loadtxt(os.path.join(os.path.dirname(__file__), 'Magpie_element_properties.csv'), delimiter=',') @@ -27,7 +22,11 @@ # A prototype function which computes a weighted average over neighbors, # weighted by the area of the voronoi cell between them. -def local_env_function(local_env, site, struct): +def local_env_function( + local_env: dict, + site: PeriodicSite, + struct: Structure, +) -> List[np.ndarray]: local_attributes = np.zeros(attribute_matrix.shape[1]) for key, value in site.species.get_el_amt_dict().items(): local_attributes += value * attribute_matrix[Element(key).Z - 1, :] @@ -68,7 +67,7 @@ def local_env_function(local_env, site, struct): elemental_properties_attributes[1]] -def findDilute(struct): +def findDilute(struct: Structure) -> int: spoList = struct.species_and_occu spCount = dict(Counter(spoList)) spDilute = [spoList.index(sp) for sp in spCount if spCount[sp] == 1] @@ -80,7 +79,12 @@ def findDilute(struct): raise RuntimeError -def generate_voronoi_attributes(struct, baseStruct='pure', local_funct=local_env_function): +def generate_voronoi_attributes( + struct: Structure, + baseStruct: str = 'pure', + local_funct=local_env_function +) -> (np.ndarray, np.ndarray): + local_generator = LocalAttributeGenerator(struct, local_funct) # Generate a base structure of pure elemental solid or take one as input @@ -99,7 +103,7 @@ def generate_voronoi_attributes(struct, baseStruct='pure', local_funct=local_env baseStruct = struct.copy() for sp in set(baseStruct.species): baseStruct.replace_species({sp: 'A'}) - # Find position of the 1 dilute atom and calculate output for it + # Find the position of the 1 dilute atom and calculate output for it diluteSite = findDilute(struct) else: raise TypeError @@ -145,17 +149,20 @@ def generate_voronoi_attributes(struct, baseStruct='pure', local_funct=local_env # A wrapper class which contains an instance of an NN generator (the default is a VoronoiNN), a structure, and # a function which computes the local environment attributes. class LocalAttributeGenerator: - def __init__(self, struct, local_env_func, - nn_generator=VoronoiNN(compute_adj_neighbors=False, extra_nn_info=False)): + def __init__( + self, + struct: Structure, + local_env_func, + nn_generator: VoronoiNN = VoronoiNN(compute_adj_neighbors=False, extra_nn_info=False)): self.generator = nn_generator self.struct = struct self.function = local_env_func - def generate_local_attributes(self, n): + def generate_local_attributes(self, n: int): local_env = self.generator.get_voronoi_polyhedra(self.struct, n) return self.function(local_env, self.struct[n], self.struct) - def generate_local_attributes_diluteSite(self, n): + def generate_local_attributes_diluteSite(self, n: int): local_env = self.generator.get_voronoi_polyhedra(self.struct, n) local_env_result = self.function(local_env, self.struct[n], self.struct) @@ -172,7 +179,10 @@ def generate_local_attributes_diluteSite(self, n): # Calculates the attributes corresponding to the most common elements. -def magpie_mode(attribute_properties, axis=0): +def magpie_mode( + attribute_properties, + axis: int = 0 +) -> np.ndarray: scores = np.unique(np.ravel(attribute_properties[:, 0])) # get all unique atomic numbers max_occurrence = 0 top_elements = [] @@ -191,7 +201,10 @@ def magpie_mode(attribute_properties, axis=0): return output / len(top_elements) -def generate_descriptor(struct: Structure, baseStruct='pure'): +def generate_descriptor( + struct: Structure, + baseStruct='pure' +) -> np.ndarray: diff_properties, attribute_properties = generate_voronoi_attributes(struct, baseStruct=baseStruct) properties = np.concatenate( (np.stack( @@ -250,11 +263,15 @@ def generate_descriptor(struct: Structure, baseStruct='pure'): return properties -def cite(): - return citation +def cite() -> List[str]: + return [ + 'Adam M. Krajewski, Jonathan W. Siegel, Jinchao Xu, Zi-Kui Liu, Extensible Structure-Informed Prediction of ' + 'Formation Energy with improved accuracy and usability employing neural networks, Computational ' + 'Materials Science, Volume 208, 2022, 111254' + ] -def profile(test='JVASP-10001', nRuns=10): +def profile(test='JVASP-10001', nRuns=10) -> None: if test == 'diluteNiAlloy': print( f'KS2022 profiling/testing task will calculate a descriptor for a dilute Ni alloy {nRuns} times in series.') @@ -270,7 +287,7 @@ def profile(test='JVASP-10001', nRuns=10): print('Done!') -def profileParallel(test='JVASP-10001', nRuns=1000): +def profileParallel(test='JVASP-10001', nRuns=1000) -> None: from tqdm.contrib.concurrent import process_map if test == 'diluteNiAlloy': print( From c6122b5e5ccfe9cc2ba04254476123539c6c826c Mon Sep 17 00:00:00 2001 From: "Adam M. Krajewski" <54290107+amkrajewski@users.noreply.github.com> Date: Sun, 11 Feb 2024 22:40:17 -0500 Subject: [PATCH 26/59] - fix error message in `KS2022_dilute.py` --- pysipfenn/descriptorDefinitions/KS2022_dilute.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pysipfenn/descriptorDefinitions/KS2022_dilute.py b/pysipfenn/descriptorDefinitions/KS2022_dilute.py index cb23ec7..0404b55 100644 --- a/pysipfenn/descriptorDefinitions/KS2022_dilute.py +++ b/pysipfenn/descriptorDefinitions/KS2022_dilute.py @@ -75,7 +75,9 @@ def findDilute(struct: Structure) -> int: return spDilute[0] else: print( - 'Custom dilute structure descriptor calculation is defined only one dilute species in a single element matrix') + 'The automated dilute structure descriptor calculation is defined only for cases where there is exactly ONE' + ' dilute species, which exists in a SINGLE component matrix. If you are using a multi-component system, ' + 'please provide a base `Structure` object manually.') raise RuntimeError From edb489d32ac759836948b456af08eff8d65a292e Mon Sep 17 00:00:00 2001 From: "Adam M. Krajewski" <54290107+amkrajewski@users.noreply.github.com> Date: Sun, 11 Feb 2024 22:54:43 -0500 Subject: [PATCH 27/59] - fix parallel mode of featurization high-level API in core for `KS2022_dilute` --- pysipfenn/core/pysipfenn.py | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/pysipfenn/core/pysipfenn.py b/pysipfenn/core/pysipfenn.py index bee93ad..d905b60 100644 --- a/pysipfenn/core/pysipfenn.py +++ b/pysipfenn/core/pysipfenn.py @@ -303,7 +303,7 @@ def calculate_KS2022_dilute(self, structList: List[Structure], baseStruct: Union[str, List[Structure]] = 'pure', mode: str = 'serial', - max_workers: int = 8) -> list: + max_workers: int = 8) -> List[np.ndarray]: """Calculates KS2022 descriptors for a list of dilute structures (either based on pure elements and on custom base structures, e.g. TCP endmember configurations) that contain a single alloying atom. Speed increases are substantial compared to the KS2022 descriptor, which is more general and can be used on any structure. The @@ -323,7 +323,7 @@ def calculate_KS2022_dilute(self, max_workers: Number of workers to use in parallel mode. Defaults to 8. Returns: - List of KS2022 descriptor (feature vector) for each structure. + List of KS2022 descriptor (feature vector) np.ndarray for each structure. """ if baseStruct == 'pure' or isinstance(baseStruct, Structure): @@ -333,8 +333,9 @@ def calculate_KS2022_dilute(self, self.descriptorData = descList return descList elif mode == 'parallel': - descList = process_map(KS2022_dilute.generate_descriptor(baseStruct=baseStruct), - structList, + pairedInput = list(zip(structList, [baseStruct] * len(structList))) + descList = process_map(wrapper_KS2022_dilute_generate_descriptor, + pairedInput, max_workers=max_workers) print('Done!') self.descriptorData = descList @@ -347,11 +348,15 @@ def calculate_KS2022_dilute(self, self.descriptorData = descList return descList elif mode == 'parallel': - descList = process_map(KS2022_dilute.generate_descriptor, - structList, baseStruct, max_workers=max_workers) + pairedInput = list(zip(structList, baseStruct)) + descList = process_map(wrapper_KS2022_dilute_generate_descriptor, + pairedInput, max_workers=max_workers) print('Done!') self.descriptorData = descList return descList + else: + raise ValueError('`baseStruct` must be (1) `pure`, (2) `Structure` or a list of them.') + def loadModels(self, network: str = 'all') -> None: """ @@ -770,7 +775,7 @@ def writeDescriptorsToCSV(self, descriptor: str, file: str = 'descriptorData.csv f.write(f'{i},{",".join(str(v) for v in dd)}\n') i += 1 - +# UTILS def ward2ks2022(ward2017: np.ndarray) -> np.ndarray: """Converts a Ward 2017 descriptor to a KS2022 descriptor (which is its subset). @@ -814,4 +819,9 @@ def overwritePrototypeLibrary(prototypeLibrary: dict) -> None: print(prototypeList) # Persist the prototype library yaml_customDumper.dump(prototypeList, f) - print(f'Updated prototype library persisted to {f.name}') \ No newline at end of file + print(f'Updated prototype library persisted to {f.name}') + +# WRAPPERS + +def wrapper_KS2022_dilute_generate_descriptor(args): + return KS2022_dilute.generate_descriptor(*args) \ No newline at end of file From b04ea968c1299370017e439e26b685b9ffafe5c2 Mon Sep 17 00:00:00 2001 From: "Adam M. Krajewski" <54290107+amkrajewski@users.noreply.github.com> Date: Sun, 11 Feb 2024 22:56:12 -0500 Subject: [PATCH 28/59] - implemented improved tests for the parallel mode of featurization high-level API in pysipfenn core for `KS2022_dilute` --- pysipfenn/tests/test_pysipfenn.py | 58 +++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) diff --git a/pysipfenn/tests/test_pysipfenn.py b/pysipfenn/tests/test_pysipfenn.py index f204da9..b143533 100644 --- a/pysipfenn/tests/test_pysipfenn.py +++ b/pysipfenn/tests/test_pysipfenn.py @@ -170,6 +170,64 @@ def test_descriptorCalculate_KS2022_parallel(self): descList = self.c.calculate_KS2022(structList=testStructures, mode='parallel', max_workers=4) self.assertEqual(len(descList), len(testStructures)) + def test_descriptorCalculate_KS2022_dilute_serial(self): + """Test succesful execution of the descriptorCalculate() method with KS2022_dilute in series based on an Al + prototype loaded from the default prototype library. A separate test for calculation accuracy is done in + test_KS2022.py""" + diluteStruct = self.c.prototypeLibrary['FCC']['structure'].copy() + diluteStruct.make_supercell([2, 2, 2]) + diluteStruct.replace(0, 'Fe') + testStructures = [diluteStruct.copy()]*2 + descList = self.c.calculate_KS2022_dilute(structList=testStructures, mode='serial') + self.assertEqual(len(descList), len(testStructures), "Not all structures were processed.") + for desc in descList: + self.assertListEqual( + desc.tolist(), + descList[0].tolist(), + "All descriptors should be equal for the same structure are the same." + ) + + + def test_descriptorCalculate_KS2022_dilute_parallel(self): + """Test succesful execution of the descriptorCalculate() method with KS2022_dilute in parallel based on an Al + prototype loaded from the default prototype library. A separate test for calculation accuracy is done in + test_KS2022.py""" + with self.subTest(msg="Constructing dilute structures"): + diluteStruct = self.c.prototypeLibrary['FCC']['structure'].copy() + diluteStruct.make_supercell([2, 2, 2]) + testStructures = [] + for i in range(8): + tempStruct = diluteStruct.copy() + tempStruct.replace(i, 'Fe') + testStructures.append(tempStruct) + + with self.subTest(msg="Running parallel calculation with default 'pure' base structure"): + descList = self.c.calculate_KS2022_dilute(structList=testStructures, mode='parallel', max_workers=4) + self.assertEqual(len(descList), len(testStructures), "Not all structures were processed.") + + with self.subTest(msg="All descriptors should be equal for the same structure as sites are equivalent"): + temp0 = descList[0].tolist() + for desc in descList: + temp1 = desc.tolist() + for t0, t1 in zip(temp0, temp1): + self.assertAlmostEqual(t0, t1, places=6) + + with self.subTest(msg="Running parallel calculation with defined base structures"): + baseStructs = [diluteStruct.copy()]*8 + descList = self.c.calculate_KS2022_dilute( + structList=testStructures, + baseStruct=baseStructs, + mode='parallel', + max_workers=4) + self.assertEqual(len(descList), len(testStructures), "Not all structures were processed.") + + with self.subTest(msg="All descriptors should be equal for the same structure as sites are equivalent"): + for desc in descList: + temp1 = desc.tolist() + for t0, t1 in zip(temp0, temp1): + self.assertAlmostEqual(t0, t1, places=6) + + def test_RunModels_Errors(self): '''Test that the runModels() and runModels_dilute() methods raise errors correctly when it is called with no models to run or with a descriptor handling that has not been implemented. From e502c3a24a5a2285fa529e10c357134fe5c97c18 Mon Sep 17 00:00:00 2001 From: "Adam M. Krajewski" <54290107+amkrajewski@users.noreply.github.com> Date: Mon, 12 Feb 2024 09:56:18 -0500 Subject: [PATCH 29/59] (RSS) started implementation of the random solid solution calculation API --- pysipfenn/core/pysipfenn.py | 71 ++++++++++++++++++++++++++++++++++++- 1 file changed, 70 insertions(+), 1 deletion(-) diff --git a/pysipfenn/core/pysipfenn.py b/pysipfenn/core/pysipfenn.py index d905b60..84c0a2b 100644 --- a/pysipfenn/core/pysipfenn.py +++ b/pysipfenn/core/pysipfenn.py @@ -26,7 +26,9 @@ from ruamel.yaml.scalarstring import LiteralScalarString # Descriptor Generators -from pysipfenn.descriptorDefinitions import Ward2017, KS2022, KS2022_dilute +from pysipfenn.descriptorDefinitions import ( + Ward2017, KS2022, KS2022_dilute, KS2022_randomSolutions +) # - add new ones here if extending the code @@ -357,6 +359,73 @@ def calculate_KS2022_dilute(self, else: raise ValueError('`baseStruct` must be (1) `pure`, (2) `Structure` or a list of them.') + def calculate_KS2022_randomSolutions( + self, + baseStruct: Union[str, List[str], Structure, List[Structure], List[Union[Composition, str]]], + compList: Union[str, List[str], Composition, List[Composition], List[Union[Composition, str]]], + minimumSitesPerExpansion: int = 50, + featureConvergenceCriterion: float = 0.005, + compositionConvergenceCriterion: float = 0.01, + minimumElementOccurrences: int = 10, + plotParameters: bool = False, + printProgress: bool = False, + mode: str = 'serial', + max_workers: int = 8) -> List[np.ndarray]: + """Calculates KS2022 descriptors corresponding to random solid solutions occupying base structure / lattice + sites for a list of compositions through method described in `descriptorDefinitions.KS2022_randomSolutions` + submodule. The results are stored in the descriptorData attribute. The function returns the list of descriptors + in numpy format as well. + + Args: + baseStruct: The base structure to generate a random solid solution (RSS). It does _not_ need to be a simple + Bravis lattice, such as BCC lattice, but can be any `Structure` object or a list of them, if you need to + define them on per-case basis. In addition to `Structure` objects, you can use "magic" strings + corresponding to one of the structures in the library you can find under `pysipfenn.misc` directory or + loaded under `self.prototypeLibrary` attribute. The magic strings include, but are not limited to: + 'BCC', 'FCC', 'HCP', 'DHCP', 'Diamond', and so on. You can invoke them by their name, e.g. `BCC`, or + by passing `self.prototypeLibrary['BCC']['structure']` directly. If you pass a list to `baseStruct`, + you are allowed to mix-and-match `Structure` objects and magic strings. + compList: The composition to populate the supercell with until KS2022 descriptor converges. You can use + pymatgen's `Composition` objects or strings of valid chemical formulas (symbol - atomic fraction pairs), + like 'Fe0.5Ni0.3Cr0.2', 'Fe50 Ni30 Cr20', or 'Fe5 Ni3 Cr2'. You can either pass a single entity, in + which case it will be used for all structures (use to run the same composition for different base + structures), or a list of entities, in which case pairs will be used in the order of the list. If you + pass a list to `compList`, you are allowed to mix-and-match `Composition` objects and composition + strings. + minimumSitesPerExpansion: The minimum number of sites that the base structure will be expanded to (doubling + dimension-by-dimension) before it is used as expansion step/batch in each iteration of adding local + chemical environment information to the global ensemble. + The optimal value will depend on the number of species and their relative fractions in the composition. + Generally, low values (<20ish) will result in a slower convergence, as some extreme local chemical + environments will have strong influence on the global ensemble, and too high values (>150ish) will + result in a needlessly slow computation for not-complex compositions, as at least two iterations will + be processed. The default value is 50 and works well for simple cases. + featureConvergenceCriterion: The maximum difference between any feature belonging to the current iteration + (statistics based on the global ensemble of local chemical environments) and the previous iteration + (before last expansion) expressed as a fraction of the maximum value of each feature found in the OQMD + database at the time of SIPFENN creation (see `KS2022_randomSolutions/maxFeaturesInOQMD` array). + The default value is 0.01, corresponding to 1% of the maximum value. + compositionConvergenceCriterion: The maximum average difference between any element fraction belonging to + the current composition (net of all expansions) and the target composition (`comp`). The default value + is 0.01, corresponding to 1% deviation, which interpretation will depend on the number of elements + in the composition. + minimumElementOccurrences: The minimum number of times all elements must occur in the composition before it + is considered converged. This setting prevents the algorithm from converging before very dilute elements + like C in low-carbon steel, have had a chance to occur. The default value is 10. + plotParameters: If True, the convergence history will be plotted using plotly. The default value is False, + but tracking them is recommended and will be accesiible in the `metas` attribute of the Calculator under + the key `RSS_convergence`. + printProgress: If True, the progress will be printed to the console. The default value is False. + mode: Mode of calculation. Options are `serial` (default) and `parallel`. + max_workers: Number of workers to use in parallel mode. Defaults to 8. + + Returns: + A list of `numpy.ndarrays` containing the KS2022 descriptor, just like the ordinary `KS2022`. **Please note + the stochastic nature of this algorithm**. The result will likely vary slightly between runs and parameters, + so if convergence is critical, verify it with a test matrix of `minimumSitesPerExpansion`, + `featureConvergenceCriterion`, and `compositionConvergenceCriterion` values. + """ + def loadModels(self, network: str = 'all') -> None: """ From 3634ec26736f7a58efa6c76872f87dc75b609a11 Mon Sep 17 00:00:00 2001 From: "Adam M. Krajewski" <54290107+amkrajewski@users.noreply.github.com> Date: Mon, 12 Feb 2024 10:06:38 -0500 Subject: [PATCH 30/59] (QoL) moved benchmarking from Linux to much faster Mac M1 for quicker results --- .github/workflows/benchmarks.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/benchmarks.yml b/.github/workflows/benchmarks.yml index ba5f858..ffaac4a 100644 --- a/.github/workflows/benchmarks.yml +++ b/.github/workflows/benchmarks.yml @@ -7,9 +7,8 @@ jobs: strategy: matrix: python-version: ["3.9", "3.10", "3.11"] - platform: [ubuntu-latest] fail-fast: false - runs-on: ${{ matrix.platform }} + runs-on: macos-14 steps: - uses: actions/checkout@v3 From 2fd446529523826698b0435a3a212451d29da640 Mon Sep 17 00:00:00 2001 From: "Adam M. Krajewski" <54290107+amkrajewski@users.noreply.github.com> Date: Mon, 12 Feb 2024 10:08:47 -0500 Subject: [PATCH 31/59] (QoL) moved full tests from Intel Mac, which was the only one offering 12+GB of RAM, to the current Linux Ubuntu 22, now offering 16GB. Ideally, Mac M1 would be used, but they are only 7GB. --- .github/workflows/fullTest.yml | 6 +++--- .github/workflows/weeklyTesting.yml | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/fullTest.yml b/.github/workflows/fullTest.yml index 4d89a35..8a07961 100644 --- a/.github/workflows/fullTest.yml +++ b/.github/workflows/fullTest.yml @@ -4,7 +4,7 @@ on: [push, pull_request] jobs: testPython309: - runs-on: macos-latest + runs-on: ubuntu-22.04 strategy: fail-fast: false steps: @@ -37,7 +37,7 @@ jobs: testPython310: needs: testPython309 - runs-on: macos-latest + runs-on: ubuntu-22.04 steps: - uses: actions/checkout@v3 @@ -64,7 +64,7 @@ jobs: testPython311: needs: testPython310 - runs-on: macos-latest + runs-on: ubuntu-22.04 steps: - uses: actions/checkout@v3 diff --git a/.github/workflows/weeklyTesting.yml b/.github/workflows/weeklyTesting.yml index 52bc6db..8a6a2f0 100644 --- a/.github/workflows/weeklyTesting.yml +++ b/.github/workflows/weeklyTesting.yml @@ -34,7 +34,7 @@ jobs: testFullPython310: needs: coreTests - runs-on: macos-latest + runs-on: ubuntu-22.04 steps: - uses: actions/checkout@v3 From 279115829eb94d80ca91823d6f75edb06369e48a Mon Sep 17 00:00:00 2001 From: "Adam M. Krajewski" <54290107+amkrajewski@users.noreply.github.com> Date: Mon, 12 Feb 2024 10:10:24 -0500 Subject: [PATCH 32/59] (QoL) added test duration recording to the workflow and removed unnecessary coverage recording --- .github/workflows/fullTest.yml | 10 +++++----- .github/workflows/partialTest.yml | 4 ++-- .github/workflows/weeklyTesting.yml | 4 ++-- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/.github/workflows/fullTest.yml b/.github/workflows/fullTest.yml index 8a07961..7efa818 100644 --- a/.github/workflows/fullTest.yml +++ b/.github/workflows/fullTest.yml @@ -28,7 +28,7 @@ jobs: run: python -c "import pysipfenn; c = pysipfenn.Calculator(); c.downloadModels(); c.loadModels();" - name: Test with pytest - run: pytest --cov=pysipfenn --cov-report=xml + run: pytest --durations=0 --cov=pysipfenn --cov-report=xml env: MODELS_FETCHED: true @@ -52,13 +52,13 @@ jobs: run: | python -m pip install --upgrade pip python -m pip install --upgrade setuptools - python -m pip install wheel flask pytest pytest-cov + python -m pip install wheel flask pytest python -m pip install -e ".[dev]" - name: Download Models run: python -c "import pysipfenn; c = pysipfenn.Calculator(); c.downloadModels(); c.loadModels();" - name: Test with pytest - run: pytest --cov=pysipfenn --cov-report=xml + run: pytest --durations=0 env: MODELS_FETCHED: true @@ -79,12 +79,12 @@ jobs: run: | python -m pip install --upgrade pip python -m pip install --upgrade setuptools - python -m pip install wheel flask pytest pytest-cov + python -m pip install wheel flask pytest python -m pip install -e ".[dev]" - name: Download Models run: python -c "import pysipfenn; c = pysipfenn.Calculator(); c.downloadModels(); c.loadModels();" - name: Test with pytest - run: pytest --cov=pysipfenn --cov-report=xml + run: pytest --durations=0 env: MODELS_FETCHED: true diff --git a/.github/workflows/partialTest.yml b/.github/workflows/partialTest.yml index 09e7d1b..c880efe 100644 --- a/.github/workflows/partialTest.yml +++ b/.github/workflows/partialTest.yml @@ -24,8 +24,8 @@ jobs: run: | python -m pip install --upgrade pip python -m pip install --upgrade setuptools - python -m pip install wheel flask pytest pytest-cov + python -m pip install wheel flask pytest python -m pip install -e . - name: Test with pytest run: | - pytest --cov=pysipfenn --cov-report=xml + pytest --durations=0 diff --git a/.github/workflows/weeklyTesting.yml b/.github/workflows/weeklyTesting.yml index 8a6a2f0..f846814 100644 --- a/.github/workflows/weeklyTesting.yml +++ b/.github/workflows/weeklyTesting.yml @@ -30,7 +30,7 @@ jobs: python -m pip install -e . - name: Test with pytest run: | - pytest + pytest --durations=0 testFullPython310: needs: coreTests @@ -55,7 +55,7 @@ jobs: run: python -c "import pysipfenn; c = pysipfenn.Calculator(); c.downloadModels(); c.loadModels();" - name: Test with pytest - run: pytest + run: pytest --durations=0 env: MODELS_FETCHED: true From 6ab0975b88a2c1c66ffd60ed23c794efdfd7bb68 Mon Sep 17 00:00:00 2001 From: "Adam M. Krajewski" <54290107+amkrajewski@users.noreply.github.com> Date: Mon, 12 Feb 2024 10:10:51 -0500 Subject: [PATCH 33/59] (QoL) switcher PyPI-publishing action to use Mac M1 for faster builds --- .github/workflows/publishPyPI.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/publishPyPI.yml b/.github/workflows/publishPyPI.yml index c76b9f3..9018bbc 100644 --- a/.github/workflows/publishPyPI.yml +++ b/.github/workflows/publishPyPI.yml @@ -7,7 +7,7 @@ on: jobs: deploy: - runs-on: ubuntu-latest + runs-on: macos-14 steps: - uses: actions/checkout@v3 From 79ab63ee12cf8b4a7c8d893349bfd3db650bbab8 Mon Sep 17 00:00:00 2001 From: "Adam M. Krajewski" <54290107+amkrajewski@users.noreply.github.com> Date: Mon, 12 Feb 2024 10:59:57 -0500 Subject: [PATCH 34/59] (QoL) moved benchmarks back to Linux to allow for older Python 3.9 and moved full tests back to Mac, as the memory limit seems to still be a problem on Linux --- .github/workflows/benchmarks.yml | 2 +- .github/workflows/fullTest.yml | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/benchmarks.yml b/.github/workflows/benchmarks.yml index ffaac4a..3c5889d 100644 --- a/.github/workflows/benchmarks.yml +++ b/.github/workflows/benchmarks.yml @@ -8,7 +8,7 @@ jobs: matrix: python-version: ["3.9", "3.10", "3.11"] fail-fast: false - runs-on: macos-14 + runs-on: ubuntu-22.04 steps: - uses: actions/checkout@v3 diff --git a/.github/workflows/fullTest.yml b/.github/workflows/fullTest.yml index 7efa818..b756d49 100644 --- a/.github/workflows/fullTest.yml +++ b/.github/workflows/fullTest.yml @@ -4,7 +4,7 @@ on: [push, pull_request] jobs: testPython309: - runs-on: ubuntu-22.04 + runs-on: macos-12 strategy: fail-fast: false steps: @@ -37,7 +37,7 @@ jobs: testPython310: needs: testPython309 - runs-on: ubuntu-22.04 + runs-on: macos-12 steps: - uses: actions/checkout@v3 @@ -64,7 +64,7 @@ jobs: testPython311: needs: testPython310 - runs-on: ubuntu-22.04 + runs-on: macos-12 steps: - uses: actions/checkout@v3 From 2bdfb4b807feb29ce322ee891bc996ebe896d9c6 Mon Sep 17 00:00:00 2001 From: "Adam M. Krajewski" <54290107+amkrajewski@users.noreply.github.com> Date: Mon, 12 Feb 2024 12:35:31 -0500 Subject: [PATCH 35/59] (RSS) switched the RSS profiling task to use prototype library in place of the hard-coded values; style improvements --- .../KS2022_randomSolutions.py | 77 ++++++++----------- 1 file changed, 32 insertions(+), 45 deletions(-) diff --git a/pysipfenn/descriptorDefinitions/KS2022_randomSolutions.py b/pysipfenn/descriptorDefinitions/KS2022_randomSolutions.py index dc173ea..d445d76 100755 --- a/pysipfenn/descriptorDefinitions/KS2022_randomSolutions.py +++ b/pysipfenn/descriptorDefinitions/KS2022_randomSolutions.py @@ -5,17 +5,12 @@ import os from pymatgen.core import Structure, Element, Composition from pymatgen.analysis.local_env import VoronoiNN -import json from collections import Counter from typing import List, Union, Tuple import random from importlib import resources - -citations = [ - 'Adam M. Krajewski, Jonathan W. Siegel, Jinchao Xu, Zi-Kui Liu, Extensible Structure-Informed Prediction of ' - 'Formation Energy with improved accuracy and usability employing neural networks, Computational ' - 'Materials Science, Volume 208, 2022, 111254' -] +from tqdm.contrib.concurrent import process_map +import pysipfenn periodic_table_size = 112 attribute_matrix = np.loadtxt(os.path.join(os.path.dirname(__file__), 'Magpie_element_properties.csv'), delimiter=',') @@ -194,7 +189,7 @@ def generate_descriptor(struct: Structure, descriptor. The default value is False. Returns: By default, a numpy array containing the KS2022 descriptor. Please note the stochastic nature of the - algorithm and that the result may vary slightly between runs and parameters. If returnMeta is True, + algorithm, and that the result may vary slightly between runs and parameters. If returnMeta is True, a tuple containing the descriptor and a dictionary containing the convergence history will be returned. """ @@ -327,13 +322,6 @@ def generate_descriptor(struct: Structure, f'{"(init)":^21} | ' f'{minOccupationCount:^4}') - if returnMeta: - metaData = {'diffHistory': diffHistory, - 'propHistory': propHistory, - 'finalAtomsN': attribute_properties.shape[0], - 'finalCompositionDistance': compositionDistance - } - if plotParameters: import plotly.express as px import pandas as pd @@ -359,7 +347,12 @@ def generate_descriptor(struct: Structure, assert properties.shape == (256,) assert isinstance(properties, np.ndarray) if returnMeta: - return properties, metaData + return properties, { + 'diffHistory': diffHistory, + 'propHistory': propHistory, + 'finalAtomsN': attribute_properties.shape[0], + 'finalCompositionDistance': compositionDistance + } else: return properties else: @@ -368,7 +361,11 @@ def generate_descriptor(struct: Structure, def cite() -> List[str]: """Citation/s for the descriptor.""" - return citations + return [ + 'Adam M. Krajewski, Jonathan W. Siegel, Jinchao Xu, Zi-Kui Liu, Extensible Structure-Informed Prediction of ' + 'Formation Energy with improved accuracy and usability employing neural networks, Computational ' + 'Materials Science, Volume 208, 2022, 111254' + ] def onlyStructural(descriptor: np.ndarray) -> np.ndarray: @@ -380,8 +377,8 @@ def onlyStructural(descriptor: np.ndarray) -> np.ndarray: Returns: A 103-length numpy array of the structure-dependent part of the KS2022 descriptor. Useful in cases where the descriptor is used as a fingerprint to compare polymorphs of the same compound. - """ + assert isinstance(descriptor, np.ndarray) assert descriptor.shape == (256,) descriptorSplit = np.split(descriptor, [68, 73, 93, 98, 113]) @@ -418,54 +415,44 @@ def profile(test: str = 'FCC', the descriptor and a dictionary containing the convergence history, or None. In either case, the descriptor will be persisted in `f'TestResult_KS2022_randomSolution_{test}_{nIterations}iter.csv'` file. """ + c = pysipfenn.Calculator(autoLoad=False) - if test == 'FCC': - print( - f'KS2022 Random Solid Solution profiling/testing task will calculate a descriptor for a random FCC alloy.') - matStr = '{"@module": "pymatgen.core.structure", "@class": "Structure", "charge": 0, "lattice": {"matrix": [[3.475145865948011, 0.0, 2.1279131306516942e-16], [5.588460777961125e-16, 3.475145865948011, 2.1279131306516942e-16], [0.0, 0.0, 3.475145865948011]], "pbc": [true, true, true], "a": 3.475145865948011, "b": 3.475145865948011, "c": 3.475145865948011, "alpha": 90.0, "beta": 90.0, "gamma": 90.0, "volume": 41.968081364279875}, "sites": [{"species": [{"element": "Ni", "occu": 1}], "abc": [0.0, 0.0, 0.0], "xyz": [0.0, 0.0, 0.0], "properties": {}, "label": "Ni"}, {"species": [{"element": "Ni", "occu": 1}], "abc": [0.0, 0.5, 0.5], "xyz": [2.7942303889805623e-16, 1.7375729329740055, 1.7375729329740055], "properties": {}, "label": "Ni"}, {"species": [{"element": "Ni", "occu": 1}], "abc": [0.5, 0.0, 0.5], "xyz": [1.7375729329740055, 0.0, 1.7375729329740055], "properties": {}, "label": "Ni"}, {"species": [{"element": "Ni", "occu": 1}], "abc": [0.5, 0.5, 0.0], "xyz": [1.7375729329740057, 1.7375729329740055, 2.1279131306516942e-16], "properties": {}, "label": "Ni"}]}' - elif test == 'BCC': - print('KS2022 Random Solution profiling/testing task will calculate the descriptor for a random BCC alloy.') - matStr = '{"@module": "pymatgen.core.structure", "@class": "Structure", "charge": 0, "lattice": {"matrix": [[2.863035498949916, 0.0, 1.75310362981713e-16], [4.60411223268961e-16, 2.863035498949916, 1.75310362981713e-16], [0.0, 0.0, 2.863035498949916]], "pbc": [true, true, true], "a": 2.863035498949916, "b": 2.863035498949916, "c": 2.863035498949916, "alpha": 90.0, "beta": 90.0, "gamma": 90.0, "volume": 23.468222587900303}, "sites": [{"species": [{"element": "Fe", "occu": 1}], "abc": [0.0, 0.0, 0.0], "xyz": [0.0, 0.0, 0.0], "properties": {}, "label": "Fe"}, {"species": [{"element": "Fe", "occu": 1}], "abc": [0.5, 0.5, 0.5], "xyz": [1.4315177494749582, 1.431517749474958, 1.4315177494749582], "properties": {}, "label": "Fe"}]}' - elif test == 'HCP': - print('KS2022 Random Solution profiling/testing task will calculate the descriptor for a random HCP alloy.') - matStr = '{"@module": "pymatgen.core.structure", "@class": "Structure", "charge": 0, "lattice": {"matrix": [[1.4678659615336875, -2.54241842407729, 0.0], [1.4678659615336875, 2.54241842407729, 0.0], [0.0, 0.0, 4.64085615]], "pbc": [true, true, true], "a": 2.9357319230673746, "b": 2.9357319230673746, "c": 4.64085615, "alpha": 90.0, "beta": 90.0, "gamma": 120.00000000000001, "volume": 34.6386956150451}, "sites": [{"species": [{"element": "Ti", "occu": 1}], "abc": [0.3333333333333333, 0.6666666666666666, 0.25], "xyz": [1.4678659615336875, 0.8474728080257632, 1.1602140375], "properties": {}, "label": "Ti"}, {"species": [{"element": "Ti", "occu": 1}], "abc": [0.6666666666666667, 0.33333333333333337, 0.75], "xyz": [1.4678659615336878, -0.8474728080257634, 3.4806421125], "properties": {}, "label": "Ti"}]}' - else: + try: + s = c.prototypeLibrary[test]['structure'] + except KeyError: raise NotImplementedError(f'Unrecognized test name: {test}') if nIterations == 1: - s = Structure.from_dict(json.loads(matStr)) d, meta = generate_descriptor(s, comp, plotParameters=plotParameters, returnMeta=True) print(f"Got meta with :{meta.keys()} keys") elif nIterations > 1: print(f'Running {nIterations} iterations in parallel...') - s = Structure.from_dict(json.loads(matStr)) - from tqdm.contrib.concurrent import process_map d = process_map(generate_descriptor, [s for _ in range(nIterations)], [comp for _ in range(nIterations)], chunksize=1, max_workers=8) else: - d = None - - if d is None: print('No descriptors generated.') return None + + name = f'TestResult_KS2022_randomSolution_{test}_{nIterations}iter.csv' + if nIterations == 1: + with open(name, 'w+') as f: + f.writelines([f'{v}\n' for v in d]) + if returnDescriptorAndMeta: + return d, meta else: - name = f'TestResult_KS2022_randomSolution_{test}_{nIterations}iter.csv' - if nIterations == 1: - with open(name, 'w+') as f: - f.writelines([f'{v}\n' for v in d]) - if returnDescriptorAndMeta: - return d, meta - else: - with open(name, 'w+') as f: - f.writelines([f'{",".join([str(v) for v in di])}\n' for di in d]) - return None + with open(name, 'w+') as f: + f.writelines([f'{",".join([str(v) for v in di])}\n' for di in d]) + return None print('Done!') if __name__ == "__main__": + print('You are running the KS2022_randomSolutions.py file directly. It is intended to be used as a module. ' + 'A profiling task will now commence, going over several cases. This will take a while.') + profile(test='FCC', plotParameters=True) profile(test='BCC', plotParameters=True) profile(test='HCP', plotParameters=True) From 1a8de5c81b29eb3075355773d654577d35fbb1d3 Mon Sep 17 00:00:00 2001 From: "Adam M. Krajewski" <54290107+amkrajewski@users.noreply.github.com> Date: Mon, 12 Feb 2024 13:01:22 -0500 Subject: [PATCH 36/59] (RSS) modified the convergence criterion to apply to _two_ past iterations of the expansion to limit ocasional (1/50) flukes where it two initial iterations would be similar --- .../KS2022_randomSolutions.py | 39 +++++++++++-------- 1 file changed, 22 insertions(+), 17 deletions(-) diff --git a/pysipfenn/descriptorDefinitions/KS2022_randomSolutions.py b/pysipfenn/descriptorDefinitions/KS2022_randomSolutions.py index d445d76..d741f33 100755 --- a/pysipfenn/descriptorDefinitions/KS2022_randomSolutions.py +++ b/pysipfenn/descriptorDefinitions/KS2022_randomSolutions.py @@ -156,30 +156,29 @@ def generate_descriptor(struct: Structure, printProgress: bool = True, returnMeta: bool = False, ) -> Union[np.ndarray, Tuple[np.ndarray, dict]]: - """Main functionality. Generates the KS2022 descriptor for a given composition randomly distributed on a given - structure until the convergence criteria are met. The descriptor is KS2022 which is compatible with all KS2022 - models and approaches values that would be reached by infinite supercell size. + """**Main functionality.** Generates the KS2022 descriptor for a given composition randomly distributed on a given + structure until the convergence criteria are met. The descriptor is **KS2022** which is compatible with all KS2022 + models. It approaches values that would be reached by infinite supercell size. Args: - struct: A pymatgen Structure object that will be used as the basis for the structure to be generated. It can + struct: A pymatgen `Structure` object that will be used as the basis for the structure to be generated. It can be occupied by any species without affecting the result since all will be replaced by the composition. - comp: A pymatgen Composition object that will be randomly distributed on the structure within accuracy - determined by the compositionConvergenceCriterion. + comp: A pymatgen `Composition` object that will be randomly distributed on the structure within accuracy + determined by the `compositionConvergenceCriterion`. minimumSitesPerExpansion: The minimum number of sites that the base structure will be expanded to (doubling - dimension-by-dimension) before it will be used as expansion step in each iteration adding local chemical + dimension-by-dimension) before it is used as an expansion step in each iteration adding local chemical environment information to the global pool. Optimal value will depend on the number of species and their relative fractions in the composition. Generally, low values will result in slower convergence (<20ish) and too high values (>150ish) will result in slower computation. The default value is 50. - featureConvergenceCriterion: The maximum difference between any feature belonging to the current iteration - (statistics based on the - global ensemble of local chemical environments) and the previous iteration (before last expansion) - expressed as a fraction of the maximum value of each feature found in the OQMD database at the time of - SIPFENN creation (see maxFeaturesInOQMD array). The default value is 0.01, corresponding to 1% of the - maximum value. + featureConvergenceCriterion: **The maximum difference between any feature belonging to the current iteration + (statistics based on the global ensemble of local chemical environments) and the previous two iterations + (before the last expansion, and the one before that)** expressed as a fraction of the maximum value of each + structure-dependent KS2022 feature found in the OQMD database at the time of SIPFENN creation + (see `maxFeaturesInOQMD` array). The default value is 0.005, corresponding to 0.5% of the maximum value. compositionConvergenceCriterion: The maximum average difference between any element fraction belonging in the - current composition (all expansions) and the the target composition (comp). The default value is 0.01, - corresponding to deviation depending on the number of elements in the composition. + current composition (superposition of all expansions) and the target composition (comp). The default value + is 0.01, corresponding to deviation depending on the number of elements in the composition. minimumElementOccurrences: The minimum number of times all elements must occur in the composition before it is considered converged. This is to prevent the algorithm from converging before very dilute elements have had a chance to occur. The default value is 10. @@ -305,11 +304,16 @@ def generate_descriptor(struct: Structure, propHistory.append(properties) # Calculate the difference between the current step and the previous step and divide it by maximum value of # each feature found in OQMD to normalize the difference. - if len(propHistory) > 1: + if len(propHistory) > 2: + # Current iteration diff diff = np.subtract(properties, propHistory[-2]) diff /= maxFeaturesInOQMD diffHistory.append(diff) - maxDiff = np.max(np.abs(diff)) + # Calculate the additional diff to one level older iteration + diff2 = np.subtract(properties, propHistory[-3]) + diff2 /= maxFeaturesInOQMD + # Calculate the maximum difference across both differences + maxDiff = max(np.concatenate((diff, diff2), axis=0)) if printProgress: print(f'{attribute_properties.shape[0]:^6} | ' f'{compositionDistance: 18.6f} | ' @@ -321,6 +325,7 @@ def generate_descriptor(struct: Structure, f'{compositionDistance: 18.6f} | ' f'{"(init)":^21} | ' f'{minOccupationCount:^4}') + # ^^^ End of the long while-loop above if plotParameters: import plotly.express as px From 7b8ae27baf77e093ee147ef58aa694c78bfd5206 Mon Sep 17 00:00:00 2001 From: "Adam M. Krajewski" <54290107+amkrajewski@users.noreply.github.com> Date: Mon, 12 Feb 2024 13:21:04 -0500 Subject: [PATCH 37/59] (RSS) `KS2022_randomSolutions` code improvements --- .../KS2022_randomSolutions.py | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/pysipfenn/descriptorDefinitions/KS2022_randomSolutions.py b/pysipfenn/descriptorDefinitions/KS2022_randomSolutions.py index d741f33..ae5d1ab 100755 --- a/pysipfenn/descriptorDefinitions/KS2022_randomSolutions.py +++ b/pysipfenn/descriptorDefinitions/KS2022_randomSolutions.py @@ -427,9 +427,15 @@ def profile(test: str = 'FCC', except KeyError: raise NotImplementedError(f'Unrecognized test name: {test}') + name = f'TestResult_KS2022_randomSolution_{test}_{nIterations}iter.csv' + if nIterations == 1: d, meta = generate_descriptor(s, comp, plotParameters=plotParameters, returnMeta=True) print(f"Got meta with :{meta.keys()} keys") + with open(name, 'w+') as f: + f.writelines([f'{v}\n' for v in d]) + if returnDescriptorAndMeta: + return d, meta elif nIterations > 1: print(f'Running {nIterations} iterations in parallel...') d = process_map(generate_descriptor, @@ -437,20 +443,13 @@ def profile(test: str = 'FCC', [comp for _ in range(nIterations)], chunksize=1, max_workers=8) + with open(name, 'w+') as f: + f.writelines([f'{",".join([str(v) for v in di])}\n' for di in d]) + return None else: print('No descriptors generated.') return None - name = f'TestResult_KS2022_randomSolution_{test}_{nIterations}iter.csv' - if nIterations == 1: - with open(name, 'w+') as f: - f.writelines([f'{v}\n' for v in d]) - if returnDescriptorAndMeta: - return d, meta - else: - with open(name, 'w+') as f: - f.writelines([f'{",".join([str(v) for v in di])}\n' for di in d]) - return None print('Done!') From 0eee0c1a5c9ec79109d77f4979bb95df9302e6a5 Mon Sep 17 00:00:00 2001 From: "Adam M. Krajewski" <54290107+amkrajewski@users.noreply.github.com> Date: Mon, 12 Feb 2024 13:22:22 -0500 Subject: [PATCH 38/59] (RSS) tests for `KS2022_randomSolutions` now have tighter convergence limits thanks to the two-iteration convergence requirement; added new reference data --- ...KS2022_randomSolution_valueRangesMeans.csv | 512 +++++++++--------- pysipfenn/tests/test_KS2022_randomSolution.py | 5 +- 2 files changed, 260 insertions(+), 257 deletions(-) diff --git a/pysipfenn/tests/testCaseFiles/TestFile_DescriptorData_KS2022_randomSolution_valueRangesMeans.csv b/pysipfenn/tests/testCaseFiles/TestFile_DescriptorData_KS2022_randomSolution_valueRangesMeans.csv index 731f1a8..e357428 100644 --- a/pysipfenn/tests/testCaseFiles/TestFile_DescriptorData_KS2022_randomSolution_valueRangesMeans.csv +++ b/pysipfenn/tests/testCaseFiles/TestFile_DescriptorData_KS2022_randomSolution_valueRangesMeans.csv @@ -1,256 +1,256 @@ -0 , 11.956922 -2.25653E-13 , 8.41582E-14 -0 , 11.956922 -0 , 11.956922 -2.59662E-14 , 9.997E-15 -0 , 1 -0 , 1 -0 , 0.051981524 -6.1973E-16 , 5.05368E-16 -0 , 0.051981524 -0 , 0.051981524 -1.96799E-15 , 7.81255E-15 -0 , 0.68017477 -0.495706 , 7.156059838 -0.3536611 , 1.836693654 -1.4406951 , 1.630815723 -2.075388 , 13.82523968 -3.031089 , 12.19442395 -0.69352 , 9.54204688 -0.3396263 , 2.305430422 -2.7079263 , 3.0541716 -4.030012 , 20.01459632 -5.103964 , 16.96042466 -1.143624 , 15.94405229 -0.7997267 , 4.173909624 -3.4152157 , 3.301960464 -5.195869 , 30.99760564 -7.014918 , 27.69564488 -38.30625 , 497.9362926 -21.44297 , 118.9397736 -172.6905 , 138.1111104 -225.01376 , 1032.616791 -239.11011 , 894.505668 -0.2121525 , 2.71884916 -0.08371147 , 0.689176477 -0.7313326 , 0.811622682 -1.045017 , 5.978859842 -1.335655 , 5.167237158 -0.03960428 , 0.461833102 -0.03373618 , 0.160188756 -0.037334874 , 0.012693857 -0.0969989 , 0.980139291 -0.11200464 , 0.967445437 -0.6193517 , 6.76711774 -0.2724491 , 1.903122842 -2.47034754 , 1.669776075 -2.633975 , 15.9932682 -3.321367 , 14.32349224 -0.01000747 , 0.1416168 -0.005979603 , 0.028839132 -0.05060023 , 0.03627259 -0.05269384 , 0.257864171 -0.08299397 , 0.221591581 -0.06750154 , 0.378515474 -0.02223407 , 0.20529251 -0 , 0 -0.03733486 , 0.999253303 -0.03733486 , 0.999253303 -0.03960428 , 0.461833102 -0.03373618 , 0.160188756 -0.037334874 , 0.012693857 -0.0969989 , 0.980139291 -0.11200464 , 0.967445437 -0.3310651 , 4.107159046 -0.1388009 , 0.981995232 -1.18667435 , 1.292237272 -1.685956 , 8.195215332 -2.0219698 , 6.902978044 -0 , 0 -0 , 0 -0 , 0 -0 , 0 -0 , 0 -0.2633564 , 3.46139504 -0.1110069 , 0.736185641 -1.1866744 , 1.123709131 -1.1266513 , 6.61388276 -1.8883546 , 5.490173664 -0.06750154 , 0.378515474 -0.02223407 , 0.20529251 -0 , 0 -0.03733486 , 0.999253303 -0.03733486 , 0.999253303 -0.1980214 , 2.30916551 -0.1686809 , 0.800943778 -0.18667437 , 0.063469286 -0.4849944 , 4.900696344 -0.5600233 , 4.83722702 -0.1624366 , 2.07243773 -0.0728083 , 0.543533358 -0.53733489 , 0.345320251 -0.6866742 , 4.489935366 -0.8360138 , 4.144615076 -0 , 0 -0 , 0 -0 , 0 -0 , 0 -0 , 0 -0.1622579 , 1.797459108 -0.076864 , 0.505663534 -0.53001155 , 0.406009489 -0.7536616 , 3.975001402 -0.8356548 , 3.568991906 -0.2124889 , 2.836383928 -0.1711379 , 0.832906893 -0.43589822 , 0.428110908 -0.734869 , 5.705615302 -0.7972693 , 5.277504378 -0 , 0 -0 , 0 -0 , 0 -0 , 0 -0 , 0 -0.0927644 , 0.76869418 -0.03608947 , 0.334015947 -0.115505196 , 0.023239168 -0.1501998 , 2.055008426 -0.2079524 , 2.031769284 -0 , 6 -0.02524308 , 0.461492413 -0.03814195 , 0.387214791 -0.0483238 , 0.36416448 -0.05011593 , 0.362282451 -0.0504089 , 0.362087858 -0.73686 , 21.80250736 -0 , 16 -0.3910186 , 6.37080091 -0 , 29 -0 , 13 -0 , 13 -0.783653 , 63.04611016 -0 , 24 -0.3591037 , 7.44639268 -0 , 73 -0 , 49 -0 , 73 -1.650208 , 46.64339502 -0 , 36.56446 -0.892993 , 14.23025184 -0 , 63.546 -0 , 26.981539 -0 , 26.981539 -44.0357 , 1466.598146 -0 , 1246.53 -15.55008 , 413.511442 -0 , 2180 -0 , 933.47 -0 , 933.47 -0.226071 , 10.31995622 -0 , 7 -0.1153323 , 2.112603636 -0 , 13 -0 , 6 -0 , 13 -0.0504262 , 3.637919518 -0 , 1 -0.02872285 , 0.461688638 -0 , 4 -0 , 3 -0 , 3 -0.64773 , 127.1209695 -0 , 18 -0.448271 , 5.511034156 -0 , 139 -0 , 121 -0 , 121 -0.0126491 , 1.754152112 -0 , 0.3 -0.00336262 , 0.128383431 -0 , 1.91 -0 , 1.61 -0 , 1.61 -0.0513139 , 1.744924402 -0 , 1 -0.05056679 , 0.379712166 -0 , 2 -0 , 1 -0 , 2 -0.05042613 , 0.362080494 -0 , 1 -0.02872285 , 0.461688638 -0 , 1 -0 , 0 -0 , 1 -0.4154825 , 4.59214641 -0 , 10 -0.2052717 , 3.323552678 -0 , 10 -0 , 0 -0 , 0 -0 , 0 -0 , 0 -0 , 0 -0 , 0 -0 , 0 -0 , 0 -0.3334517 , 6.699151302 -0 , 8 -0.1331742 , 2.856055348 -0 , 11 -0 , 3 -0 , 3 -0.05131393 , 0.255075604 -0 , 1 -0.05056679 , 0.379712166 -0 , 1 -0 , 0 -0 , 0 -0.2521306 , 1.81040247 -0 , 5 -0.1436144 , 2.308443166 -0 , 5 -0 , 0 -0 , 5 -0.1974431 , 1.787048632 -0 , 5 -0.0906735 , 1.748041772 -0 , 5 -0 , 0 -0 , 0 -0 , 0 -0 , 0 -0 , 0 -0 , 0 -0 , 0 -0 , 0 -0.147626 , 3.85252671 -0 , 5 -0.1222616 , 1.416195596 -0 , 6 -0 , 1 -0 , 5 -0.291387 , 12.79934667 -0 , 6.235 -0.1661144 , 2.663854348 -0 , 16.48 -0 , 10.245 -0 , 16.48 -0 , 0 -0 , 0 -0 , 0 -0 , 0 -0 , 0 -0 , 0 -0.10456043 , 0.543091895 -0 , 2.1106627 -0.0729257 , 0.669993732 -0 , 2.1106627 -0 , 0 -0 , 0 -0.01790073 , 0.260508935 -0.010140542 , 0.054071563 -0.0278215 , 0.685419501 -0 , 0 -0 , 0.022248764 -0.000333759 , 0.008531074 \ No newline at end of file +0,11.956922 +5.4845E-13,1.78364E-13 +0,11.956922 +0,11.956922 +4.93266E-14,1.40795E-14 +0,1 +0,1 +0,0.051981524 +3.28969E-15,8.93664E-16 +0,0.051981524 +0,0.051981524 +5.68129E-14,1.81415E-14 +0,0.68017477 +0.3870897,7.12652049 +0.2899097,1.837693244 +1.4187251,1.503641655 +1.969989,13.9414315 +2.687034,12.43778996 +0.611908,9.53060188 +0.2614777,2.284466884 +2.618969,3.066401706 +3.448019,20.47419864 +4.07503,17.40779674 +0.875219,15.87736112 +0.634954,4.17719946 +3.4312229,3.09477411 +3.950918,31.22195464 +5.636962,28.1271806 +31.39746,497.090631 +16.30954,117.7722932 +125.92536,131.2679296 +191.5083,1057.475323 +236.76546,926.2073952 +0.1834395,2.717744856 +0.0802421,0.681151868 +0.73133256,0.840033091 +0.977671,6.09192841 +1.1420163,5.251895358 +0.03386527,0.459122396 +0.02649208,0.16089159 +0.07466975,0.006720277 +0.0969989,0.981932798 +0.1343337,0.975212524 +0.5560754,6.78144089 +0.2390648,1.877612342 +1.47802996,1.58645044 +2.037694,16.22205616 +2.342977,14.63560574 +0.00590287,0.141375248 +0.003675785,0.028447453 +0.036507761,0.035212654 +0.03926406,0.259923003 +0.06024842,0.224710346 +0.04256493,0.380936394 +0.01227298,0.204455063 +0,0 +0,1 +0,1 +0.03386527,0.459122396 +0.02649208,0.16089159 +0.07466975,0.006720277 +0.0969989,0.981932798 +0.1343337,0.975212524 +0.20239,4.09579141 +0.16311316,0.977560412 +0.95498273,1.226774915 +1.5223296,8.267676606 +1.8360136,7.040901604 +0,0 +0,0 +0,0 +0,0 +0,0 +0.139879,3.453541334 +0.11973184,0.729494118 +0.94765933,1.074820572 +1.0593047,6.670559944 +1.6336153,5.59573941 +0.04256493,0.380936394 +0.01227298,0.204455063 +0,0 +0,1 +0,1 +0.1693264,2.295611978 +0.1324603,0.804457947 +0.37334874,0.033601387 +0.4849944,4.909663886 +0.6716685,4.876062508 +0.1462688,2.07578746 +0.07372314,0.531491133 +0.56002307,0.326988845 +0.798679,4.565558512 +0.992677,4.238569714 +0,0 +0,0 +0,0 +0,0 +0,0 +0.1143462,1.799108396 +0.0441769,0.500102094 +0.4030011,0.366938569 +0.6193281,4.01516943 +0.7536616,3.64823086 +0.187165,2.822845694 +0.1357858,0.834206766 +0.57286467,0.38234843 +0.6499481,5.727070804 +0.7513733,5.344722386 +0,0 +0,0 +0,0 +0,0 +0,0 +0.08364035,0.773743065 +0.02746088,0.331854182 +0.115505196,0.019976304 +0.1378528,2.057713226 +0.1600819,2.03773695 +0,6 +0.02158918,0.459222344 +0.03288508,0.383836727 +0.04213888,0.359973431 +0.04385403,0.35796432 +0.04415008,0.357751596 +0.577753,21.8641378 +0,16 +0.401033,6.339413866 +0,29 +0,13 +0,13 +0.966626,62.98378654 +0,24 +0.408654,7.424505706 +0,73 +0,49 +0,73 +1.280416,46.77879226 +0,36.56446 +0.895185,14.15848442 +0,63.546 +0,26.981539 +0,26.981539 +51.3505,1469.79143 +0,1246.53 +18.80364,412.3796886 +0,2180 +0,933.47 +0,933.47 +0.284296,10.30351869 +0,7 +0.1146178,2.107594864 +0,13 +0,6 +0,13 +0.0441693,3.642256634 +0,1 +0.02584112,0.4593225 +0,4 +0,3 +0,3 +0.65975,127.1667527 +0,18 +0.378223,5.520421104 +0,139 +0,121 +0,121 +0.0115318,1.755211672 +0,0.3 +0.00296068,0.128206865 +0,1.91 +0,1.61 +0,1.61 +0.039237,1.743257468 +0,1 +0.03804427,0.381459904 +0,2 +0,1 +0,2 +0.04416913,0.357743385 +0,1 +0.02584112,0.4593225 +0,1 +0,0 +0,1 +0.2941426,4.625083994 +0,10 +0.2393346,3.307764096 +0,10 +0,0 +0,0 +0,0 +0,0 +0,0 +0,0 +0,0 +0,0 +0.2614398,6.726084862 +0,8 +0.1340166,2.85048759 +0,11 +0,3 +0,3 +0.03923698,0.256742528 +0,1 +0.03804427,0.381459904 +0,1 +0,0 +0,0 +0.2208457,1.788716936 +0,5 +0.1292055,2.296612498 +0,5 +0,0 +0,5 +0.2137315,1.79748213 +0,5 +0.0796188,1.748064026 +0,5 +0,0 +0,0 +0,0 +0,0 +0,0 +0,0 +0,0 +0,0 +0.1629317,3.842941588 +0,5 +0.1175019,1.419926944 +0,6 +0,1 +0,5 +0.2446755,12.77501871 +0,6.235 +0.1560111,2.649703098 +0,16.48 +0,10.245 +0,16.48 +0,0 +0,0 +0,0 +0,0 +0,0 +0,0 +0.0674975,0.547386102 +0,2.1106627 +0.05785012,0.672396997 +0,2.1106627 +0,0 +0,0 +0.013614,0.259207257 +0.008346618,0.053205072 +0.02080855,0.687587671 +0,0 +0,0.022248764 +0.000330044,0.008516198 \ No newline at end of file diff --git a/pysipfenn/tests/test_KS2022_randomSolution.py b/pysipfenn/tests/test_KS2022_randomSolution.py index 45ac2a5..4290863 100644 --- a/pysipfenn/tests/test_KS2022_randomSolution.py +++ b/pysipfenn/tests/test_KS2022_randomSolution.py @@ -29,7 +29,10 @@ def test_results(self): feature converges to near exactly the mean value with near-zero range (e.g. coordination number in BCC in case of ideal lattice positions). ''' - testValues, meta = KS2022_randomSolutions.profile(test='BCC', returnDescriptorAndMeta=True, plotParameters=True) + testValues, meta = KS2022_randomSolutions.profile( + test='BCC', + returnDescriptorAndMeta=True, + plotParameters=True) for testValue, descriptorRange, descriptorMean, label in zip( testValues, From 3ddb64c1b7cf8c86eb52ef2d6498e96a30490141 Mon Sep 17 00:00:00 2001 From: "Adam M. Krajewski" <54290107+amkrajewski@users.noreply.github.com> Date: Mon, 12 Feb 2024 14:33:25 -0500 Subject: [PATCH 39/59] (QoL) `Calculator` printout improvements --- pysipfenn/core/pysipfenn.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pysipfenn/core/pysipfenn.py b/pysipfenn/core/pysipfenn.py index 84c0a2b..e7889ec 100644 --- a/pysipfenn/core/pysipfenn.py +++ b/pysipfenn/core/pysipfenn.py @@ -101,7 +101,8 @@ def __init__(self, def __str__(self): """Prints the status of the `Calculator` object.""" printOut = f'pySIPFENN Calculator Object. Version: {__version__}\n' - printOut += f'Models are located in:\n{resources.files("pysipfenn.modelsSIPFENN")}\n{"-" * 80}\n' + printOut += f'Models are located in:\n {resources.files("pysipfenn.modelsSIPFENN")}\n' + printOut += f'Auxiliary files (incl. structure prototypes):\n {resources.files("pysipfenn.misc")}\n{"-" * 80}\n' printOut += f'Loaded Networks: {list(self.loadedModels.keys())}\n' if len(self.inputFiles) > 0: printOut += f'Last files selected as input: {len(self.inputFiles)}\n' @@ -125,9 +126,9 @@ def updateModelAvailability(self) -> None: for net, netName in zip(self.network_list, self.network_list_names): if all_files.__contains__(net + '.onnx'): detectedNets.append(net) - print('\u2714 ' + netName) + print('✔ ' + netName) else: - print('\u292B ' + netName) + print('⨯ ' + netName) self.network_list_available = detectedNets def parsePrototypeLibrary(self, From 23bb36ec6f80896e907310b986b9e11c85031158 Mon Sep 17 00:00:00 2001 From: "Adam M. Krajewski" <54290107+amkrajewski@users.noreply.github.com> Date: Mon, 12 Feb 2024 18:29:26 -0500 Subject: [PATCH 40/59] (core) improved verbosity handling --- pysipfenn/core/pysipfenn.py | 30 +++++++++++++++++++++--------- 1 file changed, 21 insertions(+), 9 deletions(-) diff --git a/pysipfenn/core/pysipfenn.py b/pysipfenn/core/pysipfenn.py index e7889ec..3daca5d 100644 --- a/pysipfenn/core/pysipfenn.py +++ b/pysipfenn/core/pysipfenn.py @@ -45,7 +45,11 @@ class Calculator: structure-informed descriptors (feature vectors) and predicting properties using models that utilize them. Args: - autoLoad: Automatically load all available models. Default: True. + autoLoad: Automatically load all available ML models based on the `models.json` file. This _will_ require + significant memory and time if they are available, so for featurization and other non-model-requiring + tasks, it is recommended to set this to False. Defaults to True. + verbose: Print initialization messages and several other non-critical messages during runtime procedures. + Defaults to True. Attributes: models: Dictionary with all model information based on the models.json file in the modelsSIPFENN @@ -67,6 +71,7 @@ def __init__(self, verbose: bool = True): if verbose: print('********* Initializing pySIPFENN Calculator **********') + self.verbose = verbose # dictionary with all model information with resources.files('pysipfenn.modelsSIPFENN').joinpath('models.json').open('r') as f: if verbose: @@ -94,6 +99,9 @@ def __init__(self, self.toRun = [] self.descriptorData = [] self.predictions = [] + self.metas = { + 'RSS': [] + } self.inputFiles = [] if verbose: print(f'********* pySIPFENN Successfully Initialized **********') @@ -264,12 +272,12 @@ def calculate_Ward2017(self, """ if mode == 'serial': descList = [Ward2017.generate_descriptor(s) for s in tqdm(structList)] - print('Done!') + if self.verbose: print('Done!') self.descriptorData = descList return descList elif mode == 'parallel': descList = process_map(Ward2017.generate_descriptor, structList, max_workers=max_workers) - print('Done!') + if self.verbose: print('Done!') self.descriptorData = descList return descList @@ -293,12 +301,12 @@ def calculate_KS2022(self, """ if mode == 'serial': descList = [KS2022.generate_descriptor(s) for s in tqdm(structList)] - print('Done!') + if self.verbose: print('Done!') self.descriptorData = descList return descList elif mode == 'parallel': descList = process_map(KS2022.generate_descriptor, structList, max_workers=max_workers) - print('Done!') + if self.verbose: print('Done!') self.descriptorData = descList return descList @@ -332,7 +340,8 @@ def calculate_KS2022_dilute(self, if baseStruct == 'pure' or isinstance(baseStruct, Structure): if mode == 'serial': descList = [KS2022_dilute.generate_descriptor(s, baseStruct=baseStruct) for s in tqdm(structList)] - print('Done!') + if self.verbose: + print('Done!') self.descriptorData = descList return descList elif mode == 'parallel': @@ -340,21 +349,24 @@ def calculate_KS2022_dilute(self, descList = process_map(wrapper_KS2022_dilute_generate_descriptor, pairedInput, max_workers=max_workers) - print('Done!') + if self.verbose: + print('Done!') self.descriptorData = descList return descList elif isinstance(baseStruct, List) and len(baseStruct) == len(structList): if mode == 'serial': descList = [KS2022_dilute.generate_descriptor(s, bs) for s, bs in tqdm(zip(structList, baseStruct))] - print('Done!') + if self.verbose: + print('Done!') self.descriptorData = descList return descList elif mode == 'parallel': pairedInput = list(zip(structList, baseStruct)) descList = process_map(wrapper_KS2022_dilute_generate_descriptor, pairedInput, max_workers=max_workers) - print('Done!') + if self.verbose: + print('Done!') self.descriptorData = descList return descList else: From 24a0f49da828dd7cc1a346048ddfd739f7be3548 Mon Sep 17 00:00:00 2001 From: "Adam M. Krajewski" <54290107+amkrajewski@users.noreply.github.com> Date: Mon, 12 Feb 2024 18:33:36 -0500 Subject: [PATCH 41/59] (core/RSS) implemented `calculate_KS2022_randomSolutions` high-level API with many input type handlers --- pysipfenn/core/pysipfenn.py | 107 +++++++++++++++++++++++++++++++++--- 1 file changed, 100 insertions(+), 7 deletions(-) diff --git a/pysipfenn/core/pysipfenn.py b/pysipfenn/core/pysipfenn.py index 3daca5d..2867b37 100644 --- a/pysipfenn/core/pysipfenn.py +++ b/pysipfenn/core/pysipfenn.py @@ -374,7 +374,7 @@ def calculate_KS2022_dilute(self, def calculate_KS2022_randomSolutions( self, - baseStruct: Union[str, List[str], Structure, List[Structure], List[Union[Composition, str]]], + baseStructList: Union[str, Structure, List[str], List[Structure], List[Union[Composition, str]]], compList: Union[str, List[str], Composition, List[Composition], List[Union[Composition, str]]], minimumSitesPerExpansion: int = 50, featureConvergenceCriterion: float = 0.005, @@ -390,7 +390,7 @@ def calculate_KS2022_randomSolutions( in numpy format as well. Args: - baseStruct: The base structure to generate a random solid solution (RSS). It does _not_ need to be a simple + baseStructList: The base structure to generate a random solid solution (RSS). It does _not_ need to be a simple Bravis lattice, such as BCC lattice, but can be any `Structure` object or a list of them, if you need to define them on per-case basis. In addition to `Structure` objects, you can use "magic" strings corresponding to one of the structures in the library you can find under `pysipfenn.misc` directory or @@ -426,19 +426,91 @@ def calculate_KS2022_randomSolutions( is considered converged. This setting prevents the algorithm from converging before very dilute elements like C in low-carbon steel, have had a chance to occur. The default value is 10. plotParameters: If True, the convergence history will be plotted using plotly. The default value is False, - but tracking them is recommended and will be accesiible in the `metas` attribute of the Calculator under - the key `RSS_convergence`. + but tracking them is recommended and will be accessible in the `metas` attribute of the Calculator under + the key `RSS`. printProgress: If True, the progress will be printed to the console. The default value is False. mode: Mode of calculation. Options are `serial` (default) and `parallel`. max_workers: Number of workers to use in parallel mode. Defaults to 8. Returns: - A list of `numpy.ndarrays` containing the KS2022 descriptor, just like the ordinary `KS2022`. **Please note + A list of `numpy.ndarray`s containing the KS2022 descriptor, just like the ordinary `KS2022`. **Please note the stochastic nature of this algorithm**. The result will likely vary slightly between runs and parameters, so if convergence is critical, verify it with a test matrix of `minimumSitesPerExpansion`, `featureConvergenceCriterion`, and `compositionConvergenceCriterion` values. """ + # LIST-LIST: Assert that if both baseStruct and compList are lists, they have the same length + if isinstance(baseStructList, list) and isinstance(compList, list): + assert len(baseStructList) == len(compList), \ + 'baseStruct and compList must have the same length if both are lists. If you want to use the same ' \ + 'entity for all calculations, do not wrap it.' + + # STRING / STRUCT handling and extension + if isinstance(baseStructList, str) or isinstance(baseStructList, Structure): + baseStructList = [baseStructList] + if isinstance(compList, list) and len(compList) > 1: + baseStructList = baseStructList * len(compList) + else: + assert isinstance(baseStructList, list), 'baseStruct must be a list if it is not a string or Structure.' + + if isinstance(compList, str) or isinstance(compList, Composition): + compList = [compList] + if isinstance(baseStructList, list) and len(baseStructList) > 1: + compList = compList * len(baseStructList) + else: + assert isinstance(compList, list), 'compList must be a list if it is not a string or Composition.' + + # LISTS of STRING / STRUCT + for i in range(len(baseStructList)): + assert isinstance(baseStructList[i], (str, Structure)), \ + 'baseStruct must be a list of strings or Structure objects.' + if isinstance(baseStructList[i], str): + baseStructList[i] = string2prototype(self, baseStructList[i]) + + for i in range(len(compList)): + assert isinstance(compList[i], (str, Composition)), \ + 'compList must be a list of strings or Composition objects.' + if isinstance(compList[i], str): + c = Composition(compList[i]) + assert c.valid, f'Unrecognized composition string: {compList}. Please provide a valid composition ' \ + f'string, e.g. "Fe0.5Ni0.3Cr0.2", "Fe50 Ni30 Cr20", or "Fe5 Ni3 Cr2".' + compList[i] = c + + assert len(baseStructList) == len(compList), 'baseStruct and compList must have the same length at this point.' + pairedInputAndSettings, descList, metaList = [], [], [] + + for i in range(len(baseStructList)): + pairedInputAndSettings.append( + (baseStructList[i], + compList[i], + minimumSitesPerExpansion, + featureConvergenceCriterion, + compositionConvergenceCriterion, + minimumElementOccurrences, + plotParameters, + printProgress, + True)) + + if mode == 'serial': + for base, comp, *settings in tqdm(pairedInputAndSettings): + desc, meta = KS2022_randomSolutions.generate_descriptor(base, comp, *settings) + descList.append(desc) + metaList.append(meta) + + elif mode == 'parallel': + print(pairedInputAndSettings) + descList, metaList = zip(*process_map( + wrapper_KS2022_randomSolutions_generate_descriptor, + pairedInputAndSettings, + max_workers=max_workers + )) + else: + raise ValueError('Incorrect calculation mode selected. Must be either `serial` or `parallel`.') + if self.verbose: + print('Done!') + self.descriptorData = descList + self.metas['RSS'] = metaList + return descList def loadModels(self, network: str = 'all') -> None: """ @@ -903,7 +975,28 @@ def overwritePrototypeLibrary(prototypeLibrary: dict) -> None: yaml_customDumper.dump(prototypeList, f) print(f'Updated prototype library persisted to {f.name}') -# WRAPPERS +# HELPERS +def string2prototype(c: Calculator, prototype: str) -> Structure: + """Converts a prototype string to a pymatgen Structure object. + + Args: + c: Calculator object with the `prototypeLibrary`. + prototype: Prototype string. + + Returns: + Structure object. + """ + assert isinstance(prototype, str), 'Prototype string must be a string.' + assert prototype in c.prototypeLibrary, \ + f'Unrecognized magic string for baseStruct: {prototype}. Please use one of the recognized magic ' \ + f'strings: {list(c.prototypeLibrary.keys())} or provide a Structure object.' + s: Structure = c.prototypeLibrary[prototype]['structure'] + assert s.is_valid(), f'Invalid structure: {s}' + return s +# WRAPPERS def wrapper_KS2022_dilute_generate_descriptor(args): - return KS2022_dilute.generate_descriptor(*args) \ No newline at end of file + return KS2022_dilute.generate_descriptor(*args) + +def wrapper_KS2022_randomSolutions_generate_descriptor(args): + return KS2022_randomSolutions.generate_descriptor(*args) \ No newline at end of file From d492ae3c57a9d1d1641be98d42bfa7728d985dfd Mon Sep 17 00:00:00 2001 From: "Adam M. Krajewski" <54290107+amkrajewski@users.noreply.github.com> Date: Mon, 12 Feb 2024 18:34:18 -0500 Subject: [PATCH 42/59] (RSS) added `'finalComposition'` to the metadata return and added some assertions --- .../KS2022_randomSolutions.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/pysipfenn/descriptorDefinitions/KS2022_randomSolutions.py b/pysipfenn/descriptorDefinitions/KS2022_randomSolutions.py index ae5d1ab..5e1c494 100755 --- a/pysipfenn/descriptorDefinitions/KS2022_randomSolutions.py +++ b/pysipfenn/descriptorDefinitions/KS2022_randomSolutions.py @@ -210,14 +210,22 @@ def generate_descriptor(struct: Structure, propHistory = [] diffHistory = [] allOccupations = [] - maxDiff = 1 - compositionDistance = 0 + maxDiff = 5 + compositionDistance = 1 minOccupationCount = 0 - properties = None + properties: np.ndarray = None + currentComposition: Composition = None if printProgress: print(f'#Atoms | Comp. Distance AVG | Convergence Crit. MAX | Occupation Count MIN') + if maxDiff < featureConvergenceCriterion: + raise AssertionError('Invalid convergence criteria (maxDiff < featureConvergenceCriterion).') + if compositionDistance < compositionConvergenceCriterion: + raise AssertionError('Invalid convergence criteria (compositionDistance > compositionConvergenceCriterion).') + if minOccupationCount > minimumElementOccurrences: + raise AssertionError('Invalid convergence criteria (minOccupationCount > minimumElementOccurrences).') + while maxDiff > featureConvergenceCriterion \ or compositionDistance > compositionConvergenceCriterion \ or minOccupationCount < minimumElementOccurrences: @@ -356,7 +364,8 @@ def generate_descriptor(struct: Structure, 'diffHistory': diffHistory, 'propHistory': propHistory, 'finalAtomsN': attribute_properties.shape[0], - 'finalCompositionDistance': compositionDistance + 'finalCompositionDistance': compositionDistance, + 'finalComposition': currentComposition.fractional_composition } else: return properties From 455a8b5a0d9499b9b20ebc8b0b7e252e004ac1a2 Mon Sep 17 00:00:00 2001 From: "Adam M. Krajewski" <54290107+amkrajewski@users.noreply.github.com> Date: Mon, 12 Feb 2024 18:34:39 -0500 Subject: [PATCH 43/59] (RSS) added testing for `'finalComposition'` presence --- ..._KS2022_randomSolution.py => test_KS2022_randomSolutions.py} | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename pysipfenn/tests/{test_KS2022_randomSolution.py => test_KS2022_randomSolutions.py} (98%) diff --git a/pysipfenn/tests/test_KS2022_randomSolution.py b/pysipfenn/tests/test_KS2022_randomSolutions.py similarity index 98% rename from pysipfenn/tests/test_KS2022_randomSolution.py rename to pysipfenn/tests/test_KS2022_randomSolutions.py index 4290863..648c708 100644 --- a/pysipfenn/tests/test_KS2022_randomSolution.py +++ b/pysipfenn/tests/test_KS2022_randomSolutions.py @@ -43,7 +43,7 @@ def test_results(self): self.assertGreaterEqual(testValue, (0.98*descriptorMean)-descriptorRange-1e-6) self.assertLessEqual(testValue, (1.02*descriptorMean)+descriptorRange+1e-6) - for field in ['diffHistory', 'propHistory', 'finalAtomsN', 'finalCompositionDistance']: + for field in ['diffHistory', 'propHistory', 'finalAtomsN', 'finalCompositionDistance', 'finalComposition']: with self.subTest(msg=f'{field} present in meta'): self.assertIn(field, meta) From c6d823b11a6027f743e9047ab70ad4cb39a2a5ca Mon Sep 17 00:00:00 2001 From: "Adam M. Krajewski" <54290107+amkrajewski@users.noreply.github.com> Date: Mon, 12 Feb 2024 18:36:12 -0500 Subject: [PATCH 44/59] (RSS) adding tests for `calculate_KS2022_randomSolutions` with matrix of input types: serial pair --- pysipfenn/tests/test_pysipfenn.py | 30 +++++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/pysipfenn/tests/test_pysipfenn.py b/pysipfenn/tests/test_pysipfenn.py index b143533..acaeee2 100644 --- a/pysipfenn/tests/test_pysipfenn.py +++ b/pysipfenn/tests/test_pysipfenn.py @@ -227,7 +227,6 @@ def test_descriptorCalculate_KS2022_dilute_parallel(self): for t0, t1 in zip(temp0, temp1): self.assertAlmostEqual(t0, t1, places=6) - def test_RunModels_Errors(self): '''Test that the runModels() and runModels_dilute() methods raise errors correctly when it is called with no models to run or with a descriptor handling that has not been implemented. @@ -293,5 +292,34 @@ def test_CalculatorPrint(self): self.assertIn('Loaded Networks', printOut) +class TestCoreRSS(unittest.TestCase): + """Test the high-level API functionality of the Calculator object in regard to random solution structures (RSS). It + does not test the accuracy, just all runtime modes and known physicality of the results (e.g., FCC should have + coordination number of `12`). + + Note: + The execution of the descriptorCalculate() method with KS2022_randomSolution is done under coarse settings + (for speed reasons) and should not be used for any accuracy tests. A separate testing for calculation accuracy + against consistency and reference values is done in `test_KS2022_randomSolutions.py`. + """ + def setUp(self): + self.c = pysipfenn.Calculator() + self.assertIsNotNone(self.c) + + def test_descriptorCalculate_KS2022_randomSolution_serial_pair(self): + """Test successful execution of a composition-structure pair in series""" + + with self.subTest(msg="Running single composition-structure pair"): + d1 = self.c.calculate_KS2022_randomSolutions( + 'BCC', + 'FeNi', + minimumSitesPerExpansion=16, + featureConvergenceCriterion=0.02, + compositionConvergenceCriterion=0.05, + mode='serial') + self.assertEqual(len(d1), 1, "Only one composition-structure pair should be processed.") + self.assertEqual(len(d1[0]), 256, "All 256 KS2022 features should be obtained.") + + if __name__ == '__main__': unittest.main() From 3f14836e309b75a729680c7ca73ca2c96279362e Mon Sep 17 00:00:00 2001 From: "Adam M. Krajewski" <54290107+amkrajewski@users.noreply.github.com> Date: Mon, 12 Feb 2024 18:36:49 -0500 Subject: [PATCH 45/59] (RSS) adding tests for `calculate_KS2022_randomSolutions` with matrix of input types: serial multiple compositions --- pysipfenn/tests/test_pysipfenn.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/pysipfenn/tests/test_pysipfenn.py b/pysipfenn/tests/test_pysipfenn.py index acaeee2..527abc0 100644 --- a/pysipfenn/tests/test_pysipfenn.py +++ b/pysipfenn/tests/test_pysipfenn.py @@ -320,6 +320,29 @@ def test_descriptorCalculate_KS2022_randomSolution_serial_pair(self): self.assertEqual(len(d1), 1, "Only one composition-structure pair should be processed.") self.assertEqual(len(d1[0]), 256, "All 256 KS2022 features should be obtained.") + def test_descriptorCalculate_KS2022_randomSolution_serial_multiple(self): + """Test successful execution (in series) of multiple compositions occupying the same FCC lattice.""" + with self.subTest(msg="Running multiple compositions occupying the same FCC lattice"): + d2 = self.c.calculate_KS2022_randomSolutions( + 'FCC', + ['FeNi', 'CrNi'], + minimumSitesPerExpansion=16, + featureConvergenceCriterion=0.02, + compositionConvergenceCriterion=0.05, + mode='serial') + self.assertEqual(len(d2), 2, "Two composition-structure pairs should be processed.") + self.assertEqual(len(d2[0]), 256, "All 256 KS2022 features should be obtained.") + self.assertEqual(len(d2[1]), 256, "All 256 KS2022 features should be obtained.") + self.assertAlmostEqual( + float(d2[0][0]), + float(d2[1][0]) + , places=6, msg="Coordination number (KS2022[0]) should be the same (12) for both compositions.") + self.assertNotAlmostEquals( + float(d2[0][13]), + float(d2[1][13]) + , places=6, msg="mean_NeighDiff_shell1_Number (KS2022[13]) should be different (1.0vs2.0)." + ) + if __name__ == '__main__': unittest.main() From 9f157d2bb5d27a885494dc1b2b81812d8e3e81a7 Mon Sep 17 00:00:00 2001 From: "Adam M. Krajewski" <54290107+amkrajewski@users.noreply.github.com> Date: Mon, 12 Feb 2024 18:37:17 -0500 Subject: [PATCH 46/59] (RSS) adding tests for `calculate_KS2022_randomSolutions` with matrix of input types: parallel pair --- pysipfenn/tests/test_pysipfenn.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/pysipfenn/tests/test_pysipfenn.py b/pysipfenn/tests/test_pysipfenn.py index 527abc0..839c359 100644 --- a/pysipfenn/tests/test_pysipfenn.py +++ b/pysipfenn/tests/test_pysipfenn.py @@ -343,6 +343,19 @@ def test_descriptorCalculate_KS2022_randomSolution_serial_multiple(self): , places=6, msg="mean_NeighDiff_shell1_Number (KS2022[13]) should be different (1.0vs2.0)." ) + def test_descriptorCalculate_KS2022_randomSolution_parallel_pair(self): + """Test successful execution of a composition-structure pair in parallel mode. Just for the input passing + validation.""" + + with self.subTest(msg="Running single composition-structure pair"): + d1 = self.c.calculate_KS2022_randomSolutions( + 'BCC', + 'FeNi', + mode='parallel', + max_workers=1) + self.assertEqual(len(d1), 1, "Only one composition-structure pair should be processed.") + self.assertEqual(len(d1[0]), 256, "All 256 KS2022 features should be obtained.") + if __name__ == '__main__': unittest.main() From ebf3aeef70d9f020204b3b46a79b7ce5d7ac1ddb Mon Sep 17 00:00:00 2001 From: "Adam M. Krajewski" <54290107+amkrajewski@users.noreply.github.com> Date: Mon, 12 Feb 2024 18:40:04 -0500 Subject: [PATCH 47/59] (RSS) adding tests for `calculate_KS2022_randomSolutions` with matrix of input types: parallel two lists --- pysipfenn/tests/test_pysipfenn.py | 33 ++++++++++++++++++++++++++++++- 1 file changed, 32 insertions(+), 1 deletion(-) diff --git a/pysipfenn/tests/test_pysipfenn.py b/pysipfenn/tests/test_pysipfenn.py index 839c359..01f12ce 100644 --- a/pysipfenn/tests/test_pysipfenn.py +++ b/pysipfenn/tests/test_pysipfenn.py @@ -6,7 +6,7 @@ from importlib import resources from natsort import natsorted -from pymatgen.core import Structure +from pymatgen.core import Structure, Composition IN_GITHUB_ACTIONS = os.getenv("GITHUB_ACTIONS") == "true" and os.getenv("MODELS_FETCHED") != "true" @@ -356,6 +356,37 @@ def test_descriptorCalculate_KS2022_randomSolution_parallel_pair(self): self.assertEqual(len(d1), 1, "Only one composition-structure pair should be processed.") self.assertEqual(len(d1[0]), 256, "All 256 KS2022 features should be obtained.") + def test_descriptorCalculate_KS2022_randomSolution_parallel_multiple(self): + """Test successful execution of manu composition-structure pairs given in ordered lists of input.""" + myBCC = self.c.prototypeLibrary['BCC']['structure'] + + with self.subTest(msg="Running multiple compositions occupying multiple prototypes"): + d2 = self.c.calculate_KS2022_randomSolutions( + ['FCC', myBCC, 'BCC', 'HCP'], + ['WMo', Composition('WMo'), 'FeNi', 'CrNi'], + mode='parallel', + max_workers=4) + self.assertEqual(len(d2), 4, "Four composition-structure pairs should be processed.") + for i in range(4): + self.assertEqual(len(d2[i]), 256, "All 256 KS2022 features should be obtained.") + self.assertNotAlmostEquals( + float(d2[0][0]), + float(d2[1][0]), + places=6, msg="Coordination number (KS2022[0]) should be different for BCC and FCC.") + self.assertAlmostEqual( + float(d2[1][0]), + float(d2[2][0]), + places=6, msg="Coordination number (KS2022[0]) should be the same for both BCCs.") + + with self.subTest(msg='Verify that the metadata was correctly recorded.'): + assert len(self.c.metas['RSS']) == 4, "There should be 4 metadata records." + for meta in self.c.metas['RSS']: + self.assertIn('diffHistory', meta) + self.assertIn('propHistory', meta) + self.assertIn('finalAtomsN', meta) + self.assertIn('finalCompositionDistance', meta) + self.assertIn('finalComposition', meta) + if __name__ == '__main__': unittest.main() From 549371fc2affbdff20afc307791ee3b10aa75e57 Mon Sep 17 00:00:00 2001 From: "Adam M. Krajewski" <54290107+amkrajewski@users.noreply.github.com> Date: Mon, 12 Feb 2024 19:12:49 -0500 Subject: [PATCH 48/59] (core) manu docstring updates --- pysipfenn/core/pysipfenn.py | 268 +++++++++++++++++++----------------- 1 file changed, 140 insertions(+), 128 deletions(-) diff --git a/pysipfenn/core/pysipfenn.py b/pysipfenn/core/pysipfenn.py index 2867b37..2c7d5b5 100644 --- a/pysipfenn/core/pysipfenn.py +++ b/pysipfenn/core/pysipfenn.py @@ -39,20 +39,19 @@ class Calculator: - """ - pySIPFENN Calculator automatically initializes all functionalities including identification and loading - of all available models defined statically in models.json file. It exposes methods for calculating predefined - structure-informed descriptors (feature vectors) and predicting properties using models that utilize them. + """pySIPFENN Calculator automatically initializes all functionalities including identification and loading + of all available models defined statically in the ``models.json`` file. It exposes methods for calculating predefined + structure-informed descriptors (feature vectors) and predicting properties using models that utilize them. Args: - autoLoad: Automatically load all available ML models based on the `models.json` file. This _will_ require + autoLoad: Automatically load all available ML models based on the ``models.json`` file. This `will` require significant memory and time if they are available, so for featurization and other non-model-requiring tasks, it is recommended to set this to False. Defaults to True. verbose: Print initialization messages and several other non-critical messages during runtime procedures. Defaults to True. Attributes: - models: Dictionary with all model information based on the models.json file in the modelsSIPFENN + models: Dictionary with all model information based on the ``models.json`` file in the modelsSIPFENN directory. The keys are the network names and the values are dictionaries with the model information. loadedModels: Dictionary with all loaded models. The keys are the network names and the values are the loaded pytorch models. @@ -69,6 +68,7 @@ class Calculator: def __init__(self, autoLoad: bool = True, verbose: bool = True): + """Initializes the pySIPFENN Calculator object.""" if verbose: print('********* Initializing pySIPFENN Calculator **********') self.verbose = verbose @@ -107,7 +107,7 @@ def __init__(self, print(f'********* pySIPFENN Successfully Initialized **********') def __str__(self): - """Prints the status of the `Calculator` object.""" + """Prints the status of the ``Calculator`` object.""" printOut = f'pySIPFENN Calculator Object. Version: {__version__}\n' printOut += f'Models are located in:\n {resources.files("pysipfenn.modelsSIPFENN")}\n' printOut += f'Auxiliary files (incl. structure prototypes):\n {resources.files("pysipfenn.misc")}\n{"-" * 80}\n' @@ -143,18 +143,18 @@ def parsePrototypeLibrary(self, customPath: str = "default", verbose: bool = False, printCustomLibrary: bool = False) -> None: - """Parses the prototype library YAML file in the `misc` directory, interprets them into pymatgen `Structure` - objects, and stores them in the `self.prototypeLibrary` dict attribute of the Calculator object. You can use it + """Parses the prototype library YAML file in the ``misc`` directory, interprets them into pymatgen ``Structure`` + objects, and stores them in the ``self.prototypeLibrary`` dict attribute of the ``Calculator`` object. You can use it also to temporarily append a custom prototype library (by providing a path) which will live as long as the - Calculator. For permanent changes, use `appendPrototypeLibrary()`. + Calculator. For permanent changes, use ``appendPrototypeLibrary()``. Args: - customPath: Path to the prototype library YAML file. Defaults to the magic string 'default', which loads the + customPath: Path to the prototype library YAML file. Defaults to the magic string ``"default"``, which loads the default prototype library included in the package in the `misc` directory. - verbose: If True, it prints the number of prototypes loaded. Defaults to False, but note that `Calculator` - class automatically initializes with verbose=True. + verbose: If True, it prints the number of prototypes loaded. Defaults to False, but note that ``Calculator`` + class automatically initializes with ``verbose=True``. printCustomLibrary: If True, it prints the name and POSCAR of each prototype being added to the prototype - library. Has no effect if customPath is 'default'. Defaults to False. + library. Has no effect if customPath is ``'default'``. Defaults to False. Returns: None @@ -214,7 +214,7 @@ def downloadModels(self, network: str = 'all') -> None: the existing one. If the network name is not recognized, the message will be printed. Args: - network: Name of the network to download. Defaults to 'all'. + network: Name of the network to download. Defaults to ``'all'``. """ with resources.files('pysipfenn.modelsSIPFENN') as modelPath: @@ -256,18 +256,19 @@ def calculate_Ward2017(self, structList: List[Structure], mode: str = 'serial', max_workers: int = 4) -> list: - """Calculates Ward2017 descriptors for a list of structures. The calculation can be done in serial or parallel + """Calculates `Ward2017` descriptors for a list of structures. The calculation can be done in serial or parallel mode. In parallel mode, the number of workers can be specified. The results are stored in the descriptorData attribute. The function returns the list of descriptors as well. Args: structList: List of structures to calculate descriptors for. The structures must be initialized with the pymatgen Structure class. - mode: Mode of calculation. Defaults to 'serial'. Options are 'serial' and 'parallel'. - max_workers: Number of workers to use in parallel mode. Defaults to 4. + mode: Mode of calculation. Defaults to 'serial'. Options are ``'serial'`` and ``'parallel'``. + max_workers: Number of workers to use in parallel mode. Defaults to ``4``. If ``None``, the number of workers + will be set to the number of available CPU cores. If set to ``0``, 1 worker will be used. Returns: - List of Ward2017 descriptor (feature vector) for each structure. + List of ``Ward2017`` descriptor (feature vector) for each structure. """ if mode == 'serial': @@ -291,9 +292,10 @@ def calculate_KS2022(self, Args: structList: List of structures to calculate descriptors for. The structures must be - initialized with the pymatgen Structure class. - mode: Mode of calculation. Defaults to 'serial'. Options are 'serial' and 'parallel'. - max_workers: Number of workers to use in parallel mode. Defaults to 8. + initialized with the pymatgen ``Structure`` class. + mode: Mode of calculation. Defaults to 'serial'. Options are ``'serial'`` and ``'parallel'``. + max_workers: Number of workers to use in parallel mode. Defaults to ``8``. If ``None``, the number of workers + will be set to the number of available CPU cores. If set to ``0``, 1 worker will be used. Returns: List of KS2022 descriptor (feature vector) for each structure. @@ -326,15 +328,16 @@ def calculate_KS2022_dilute(self, dilute structures (either based on pure elements and on custom base structures, e.g. TCP endmember configurations) that contain a single alloying atom. The structures must be initialized with the pymatgen Structure class. - baseStruct: Non-diluted references for the dilute structures. Defaults to 'pure', which assumes that the structures + baseStruct: Non-diluted references for the dilute structures. Defaults to ``'pure'``, which assumes that the structures are based on pure elements and generates references automatically. Alternatively, a list of structures can be provided, which can be either pure elements or custom base structures (e.g. TCP endmember configurations). - mode: Mode of calculation. Defaults to 'serial'. Options are 'serial' and 'parallel'. - max_workers: Number of workers to use in parallel mode. Defaults to 8. + mode: Mode of calculation. Defaults to 'serial'. Options are ``'serial'`` and ``'parallel'``. + max_workers: Number of workers to use in parallel mode. Defaults to ``8``. If ``None``, the number of workers + will be set to the number of available CPU cores. If set to ``0``, 1 worker will be used. Returns: - List of KS2022 descriptor (feature vector) np.ndarray for each structure. + List of KS2022 descriptor (feature vector) ``np.ndarray`` for each structure. """ if baseStruct == 'pure' or isinstance(baseStruct, Structure): @@ -385,21 +388,21 @@ def calculate_KS2022_randomSolutions( mode: str = 'serial', max_workers: int = 8) -> List[np.ndarray]: """Calculates KS2022 descriptors corresponding to random solid solutions occupying base structure / lattice - sites for a list of compositions through method described in `descriptorDefinitions.KS2022_randomSolutions` + sites for a list of compositions through method described in ``descriptorDefinitions.KS2022_randomSolutions`` submodule. The results are stored in the descriptorData attribute. The function returns the list of descriptors in numpy format as well. Args: baseStructList: The base structure to generate a random solid solution (RSS). It does _not_ need to be a simple - Bravis lattice, such as BCC lattice, but can be any `Structure` object or a list of them, if you need to + Bravis lattice, such as BCC lattice, but can be any ``Structure`` object or a list of them, if you need to define them on per-case basis. In addition to `Structure` objects, you can use "magic" strings corresponding to one of the structures in the library you can find under `pysipfenn.misc` directory or loaded under `self.prototypeLibrary` attribute. The magic strings include, but are not limited to: 'BCC', 'FCC', 'HCP', 'DHCP', 'Diamond', and so on. You can invoke them by their name, e.g. `BCC`, or - by passing `self.prototypeLibrary['BCC']['structure']` directly. If you pass a list to `baseStruct`, - you are allowed to mix-and-match `Structure` objects and magic strings. + by passing ``self.prototypeLibrary['BCC']['structure']`` directly. If you pass a list to ``baseStruct``, + you are allowed to mix-and-match ``Structure`` objects and magic strings. compList: The composition to populate the supercell with until KS2022 descriptor converges. You can use - pymatgen's `Composition` objects or strings of valid chemical formulas (symbol - atomic fraction pairs), + pymatgen's ``Composition`` objects or strings of valid chemical formulas (symbol - atomic fraction pairs), like 'Fe0.5Ni0.3Cr0.2', 'Fe50 Ni30 Cr20', or 'Fe5 Ni3 Cr2'. You can either pass a single entity, in which case it will be used for all structures (use to run the same composition for different base structures), or a list of entities, in which case pairs will be used in the order of the list. If you @@ -416,27 +419,27 @@ def calculate_KS2022_randomSolutions( featureConvergenceCriterion: The maximum difference between any feature belonging to the current iteration (statistics based on the global ensemble of local chemical environments) and the previous iteration (before last expansion) expressed as a fraction of the maximum value of each feature found in the OQMD - database at the time of SIPFENN creation (see `KS2022_randomSolutions/maxFeaturesInOQMD` array). - The default value is 0.01, corresponding to 1% of the maximum value. + database at the time of SIPFENN creation (see ``KS2022_randomSolutions/maxFeaturesInOQMD`` array). + The default value is ``0.01``, corresponding to 1% of the maximum value. compositionConvergenceCriterion: The maximum average difference between any element fraction belonging to the current composition (net of all expansions) and the target composition (`comp`). The default value - is 0.01, corresponding to 1% deviation, which interpretation will depend on the number of elements + is ``0.01``, corresponding to 1% deviation, which interpretation will depend on the number of elements in the composition. minimumElementOccurrences: The minimum number of times all elements must occur in the composition before it is considered converged. This setting prevents the algorithm from converging before very dilute elements - like C in low-carbon steel, have had a chance to occur. The default value is 10. - plotParameters: If True, the convergence history will be plotted using plotly. The default value is False, + like C in low-carbon steel, have had a chance to occur. The default value is ``10``. + plotParameters: If True, the convergence history will be plotted using plotly. The default value is ``False``, but tracking them is recommended and will be accessible in the `metas` attribute of the Calculator under - the key `RSS`. + the key ``'RSS'``. printProgress: If True, the progress will be printed to the console. The default value is False. mode: Mode of calculation. Options are `serial` (default) and `parallel`. - max_workers: Number of workers to use in parallel mode. Defaults to 8. + max_workers: Number of workers to use in parallel mode. Defaults to ``8``. Returns: - A list of `numpy.ndarray`s containing the KS2022 descriptor, just like the ordinary `KS2022`. **Please note + A list of ``numpy.ndarray``s containing the KS2022 descriptor, just like the ordinary ``KS2022``. **Please note the stochastic nature of this algorithm**. The result will likely vary slightly between runs and parameters, - so if convergence is critical, verify it with a test matrix of `minimumSitesPerExpansion`, - `featureConvergenceCriterion`, and `compositionConvergenceCriterion` values. + so if convergence is critical, verify it with a test matrix of ``minimumSitesPerExpansion``, + ``featureConvergenceCriterion``, and ``compositionConvergenceCriterion`` values. """ # LIST-LIST: Assert that if both baseStruct and compList are lists, they have the same length if isinstance(baseStructList, list) and isinstance(compList, list): @@ -513,20 +516,26 @@ def calculate_KS2022_randomSolutions( return descList def loadModels(self, network: str = 'all') -> None: - """ - Load model/models into memory of the Calculator class. The models are loaded from the modelsSIPFENN directory inside - the package. It's location can be seen by calling print() on the Calculator. The models are stored in the - loadedModels attribute as a dictionary with the network string as key and the PyTorch model as value. - - Note: - This function only works with models that are stored in the modelsSIPFENN directory inside the package, - are in ONNX format, and have corresponding entries in models.json. For all others, you will need to use - loadModelCustom(). - - Args: - network: Default is 'all', which loads all models detected as available. Alternatively, a specific model - can be loaded by its corresponding key in models.json. E.g. 'SIPFENN_Krajewski2020_NN9' or - 'SIPFENN_Krajewski2022_NN30'. The key is the same as the network argument in downloadModels(). + """Load model/models into memory of the ``Calculator`` class. The models are loaded from the ``modelsSIPFENN`` directory + inside the package. Its location can be seen by calling ``print()`` on the ``Calculator``. The models are stored in the + loadedModels attribute as a dictionary with the network string as key and the PyTorch model as value. + + Note: + This function only works with models that are stored in the ``modelsSIPFENN`` directory inside the package, + are in ONNX format, and have corresponding entries in ``models.json``. For all others, you will need to use + ``loadModelCustom()``. + + Args: + network: Default is ``'all'``, which loads all models detected as available. Alternatively, a specific model + can be loaded by its corresponding key in models.json. E.g. ``'SIPFENN_Krajewski2020_NN9'`` or + ``'SIPFENN_Krajewski2022_NN30'``. The key is the same as the network argument in downloadModels(). + + Raises: + ValueError: If the network name is not recognized or if the model is not available in the modelsSIPFENN + directory. + + Returns: + None. It updates the loadedModels attribute of the Calculatorclass. """ with resources.files('pysipfenn.modelsSIPFENN') as modelPath: if network == 'all': @@ -546,20 +555,19 @@ def loadModels(self, network: str = 'all') -> None: 'to download the models. Currently available models are: ', self.network_list_available) def loadModelCustom(self, networkName: str, modelName: str, descriptor: str, modelDirectory: str = '.') -> None: - """ - Load a custom ONNX model from a custom directory specified by the user. The primary use case for this - function is to load models that are not included in the package and cannot be placed in the package - directory because of write permissions (e.g. on restrictive HPC systems) or storage allocations. - - Args: - modelDirectory: Directory where the model is located. Defaults to the current directory. - networkName: Name of the network. This is the name used to refer to the ONNX network. It has to be - unique, not contain any spaces, and correspond to the name of the ONNX file (excluding the .onnx - extension). - modelName: Name of the model. This is the name that will be displayed in the model selection menu. It - can be any string desired. - descriptor: Descriptor/feature vector used by the model. pySIPFENN currently supports the following - descriptors: KS2022, and Ward2017. + """Load a custom ONNX model from a custom directory specified by the user. The primary use case for this + function is to load models that are not included in the package and cannot be placed in the package + directory because of write permissions (e.g. on restrictive HPC systems) or storage allocations. + + Args: + modelDirectory: Directory where the model is located. Defaults to the current directory. + networkName: Name of the network. This is the name used to refer to the ONNX network. It has to be + unique, not contain any spaces, and correspond to the name of the ONNX file (excluding the ``.onnx`` + extension). + modelName: Name of the model. This is the name that will be displayed in the model selection menu. It + can be any string desired. + descriptor: Descriptor/feature vector used by the model. pySIPFENN currently supports the following + descriptors: ``'KS2022'``, and ``'Ward2017'``. """ self.loadedModels.update({ @@ -579,22 +587,25 @@ def makePredictions(self, models: Dict[str, torch.nn.Module], toRun: List[str], dataInList: List[Union[List[float], np.array]]) -> List[list]: - """Makes predictions using PyTorch networks listed in toRun and provided in models dictionary. + """Makes predictions using PyTorch networks listed in toRun and provided in models dictionary. Shared among all + "predict" functions. Args: - models: Dictionary of models to use. Keys are network names and values are PyTorch models. - toRun: List of networks to run. Must be a subset of models.keys(). + models: Dictionary of models to use. Keys are network names and values are PyTorch models loaded from ONNX + with ``loadModels()`` / ``loadModelCustom()`` or manually (fairly simple!). + toRun: List of networks to run. It must be a subset of ``models.keys()``. dataInList: List of data to make predictions for. Each element of the list should be a descriptor accepted by all networks in toRun. Can be a list of lists of floats or a list of numpy arrays. Returns: - List of predictions. Each element of the list is a list of predictions for all ran network. The order of the - predictions is the same as the order of the networks in toRun. + List of predictions. Each element of the list is a list of predictions for all run networks. The order of the + predictions is the same as the order of the networks in ``toRun``. """ dataOuts = [] print('Making predictions...') # Run for each network dataIn = torch.from_numpy(np.array(dataInList)).float() + assert set(toRun).issubset(set(models.keys())), 'Some networks to run are not available in the models.' for net in toRun: t0 = perf_counter() model = models[net] @@ -615,11 +626,11 @@ def makePredictions(self, def findCompatibleModels(self, descriptor: str) -> List[str]: """Finds all models compatible with a given descriptor based on the descriptor definitions loaded from the - models.json file. + ``models.json`` file. Args: descriptor: Descriptor to use. Must be one of the available descriptors. See pysipfenn.descriptorDefinitions - to see available modules or add yours. Available default descriptors are: 'Ward2017', 'KS2022'. + to see available modules or add yours. Available default descriptors are: ``'Ward2017'``, ``'KS2022'``. Returns: List of compatible models. @@ -642,12 +653,12 @@ def runModels(self, Args: descriptor: Descriptor to use. Must be one of the available descriptors. See pysipfenn.descriptorDefinitions - to see available modules or add yours. Available default descriptors are: 'Ward2017', 'KS2022'. + to see available modules or add yours. Available default descriptors are: ``'Ward2017'``, ``'KS2022'``. structList: List of pymatgen Structure objects to run the models on. - mode: Computation mode. 'serial' or 'parallel'. Default is 'serial'. Parallel mode is not recommended for + mode: Computation mode. ``'serial'`` or ``'parallel'``. Default is ``'serial'``. Parallel mode is not recommended for small datasets. - max_workers: Number of workers to use in parallel mode. Default is 4. Ignored in serial mode. If set to - None, will use all available cores. If set to 0, will use 1 core. + max_workers: Number of workers to use in parallel mode. Default is ``4``. Ignored in serial mode. If set to + ``None``, will use all available cores. If set to ``0``, will use ``1`` core. Returns: List of predictions. Each element of the list is a list of predictions for all ran networks. The @@ -693,33 +704,33 @@ def runModels_dilute(self, mode: str = 'serial', max_workers: int = 4) -> List[list]: """Runs all loaded models on a list of Structures using specified descriptor. A critical difference - from runModels() is that this function will call dilute-specific featurizer, e.g. KS2022_dilute when KS2022 is + from runModels() is that this function will call dilute-specific featurizer, e.g. ``KS2022_dilute`` when ``'KS2022'`` is provided as input, which can only be used on dilute structures (both based on pure elements and on custom base structures, e.g. TCP endmember configurations) that contain a single alloying atom. Speed increases are substantial compared to the KS2022 descriptor, which is more general and can be used on any structure. - Supports serial and parallel modes in the same way as runModels(). + Supports serial and parallel modes in the same way as ``runModels()``. Args: descriptor: Descriptor to use for predictions. Must be one of the descriptors which support the dilute - structures (i.e. *_dilute). See pysipfenn.descriptorDefinitions to see available modules or add yours - here. Available default dilute descriptors are now: 'KS2022'. The 'KS2022' can also be called from + structures (i.e. `*_dilute`). See pysipfenn.descriptorDefinitions to see available modules or add yours + here. Available default dilute descriptors are now: ``'KS2022'``. The ``'KS2022'`` can also be called from runModels() function, but is not recommended for dilute alloys, as it negates the speed increase of the dilute structure featurizer. - structList: List of pymatgen Structure objects to run the models on. Must be dilute structures as described + structList: List of pymatgen ``Structure`` objects to run the models on. Must be dilute structures as described above. baseStruct: Non-diluted references for the dilute structures. Defaults to 'pure', which assumes that the structures are based on pure elements and generates references automatically. Alternatively, a list of structures can be provided, which can be either pure elements or custom base structures (e.g. TCP endmember configurations). - mode: Computation mode. 'serial' or 'parallel'. Default is 'serial'. Parallel mode is not recommended for + mode: Computation mode. ``'serial'`` or ``'parallel'``. Default is ``'serial'``. Parallel mode is not recommended for small datasets. - max_workers: Number of workers to use in parallel mode. Default is 4. Ignored in serial mode. If set to - None, will use all available cores. If set to 0, will use 1 core. + max_workers: Number of workers to use in parallel mode. Default is ``4``. Ignored in serial mode. If set to + ``None``, will use all available cores. If set to ``0``, will use ``1`` core. Returns: - List of predictions. Each element of the list is a list of predictions for all ran networks. The + List of predictions. Each element of the list is a list of predictions for all run networks. The order of the predictions is the same as the order of the input structures. The order of the networks - is the same as the order of the networks in self.network_list_available. If a network is not available, + is the same as the order of the networks in ``network_list_available``. If a network is not available, it will not be included in the list. If a network is not compatible with the selected descriptor, it will not be included in the list. """ @@ -752,7 +763,7 @@ def runModels_dilute(self, def get_resultDicts(self) -> List[dict]: """Returns a list of dictionaries with the predictions for each network. The keys of the dictionaries are the names of the networks. The order of the dictionaries is the same as the order of the input structures passed - through runModels() functions. + through ``runModels()`` functions. Returns: List of dictionaries with the predictions. @@ -763,9 +774,9 @@ def get_resultDictsWithNames(self) -> List[dict]: """Returns a list of dictionaries with the predictions for each network. The keys of the dictionaries are the names of the networks and the names of the input structures. The order of the dictionaries is the same as the order of the input structures passed through runModels() functions. Note that this function requires - self.inputFiles to be set, which is done automatically when using runFromDirectory() or - runFromDirectory_dilute() but not when using runModels() or runModels_dilute(), as the input structures are - passed directly to the function and names have to be provided separately by assigning them to self.inputFiles. + self.inputFiles to be set, which is done automatically when using ``runFromDirectory()`` or + ``runFromDirectory_dilute()`` but not when using ``runModels()`` or ``runModels_dilute()``, as the input structures are + passed directly to the function and names have to be provided separately by assigning them to ``self.inputFiles``. Returns: List of dictionaries with the predictions. @@ -784,30 +795,30 @@ def runFromDirectory(self, max_workers: int = 4 ) -> List[list]: """Runs all loaded models on a list of Structures it automatically imports from a specified directory. The - directory must contain only atomic structures in formats such as 'poscar', 'cif', 'json', 'mcsqs', etc., + directory must contain only atomic structures in formats such as ``'poscar'``, ``'cif'``, ``'json'``, ``'mcsqs'``, etc., or a mix of these. The structures are automatically sorted using natsort library, so the order of the structures in the directory, as defined by the operating system, is not important. Natural sorting, - for example, will sort the structures in the following order: '1-Fe', '2-Al', '10-xx', '11-xx', '20-xx', - '21-xx', '11111-xx', etc. This is useful when the structures are named using a numbering system. The order of + for example, will sort the structures in the following order: ``'1-Fe'``, ``'2-Al'``, ``'10-xx'``, ``'11-xx'``, ``'20-xx'``, + ``'21-xx'``, ``'11111-xx'``, etc. This is useful when the structures are named using a numbering system. The order of the predictions is the same as the order of the input structures. The order of the networks in a prediction - is the same as the order of the networks in self.network_list_available. If a network is not available, + is the same as the order of the networks in ``self.network_list_available``. If a network is not available, it will not be included in the list. Args: directory: Directory containing the structures to run the models on. The directory must contain only atomic - structures in formats such as 'poscar', 'cif', 'json', 'mcsqs', etc., or a mix of these. The structures + structures in formats such as ``'poscar'``, ``'cif'``, ``'json'``, ``'mcsqs'``, etc., or a mix of these. The structures are automatically sorted as described above. - descriptor: Descriptor to use. Must be one of the available descriptors. See pysipgenn.descriptorDefinitions + descriptor: Descriptor to use. Must be one of the available descriptors. See ``pysipgenn.descriptorDefinitions`` for a list of available descriptors. - mode: Computation mode. 'serial' or 'parallel'. Default is 'serial'. Parallel mode is not recommended for + mode: Computation mode. ``'serial'`` or ``'parallel'``. Default is ``'serial'``. Parallel mode is not recommended for small datasets. - max_workers: Number of workers to use in parallel mode. Default is 4. Ignored in serial mode. If set to - None, will use all available cores. If set to 0, will use 1 core. + max_workers: Number of workers to use in parallel mode. Default is ``4``. Ignored in serial mode. If set to + ``None``, will use all available cores. If set to ``0``, will use 1 core. Returns: - List of predictions. Each element of the list is a list of predictions for all ran networks. The order of + List of predictions. Each element of the list is a list of predictions for all run networks. The order of the predictions is the same as the order of the input structures. The order of the networks is the same as - the order of the networks in self.network_list_available. If a network is not available, it will not be + the order of the networks in ``self.network_list_available``. If a network is not available, it will not be included in the list. """ @@ -841,21 +852,21 @@ def runFromDirectory_dilute(self, structures in formats such as 'poscar', 'cif', 'json', 'mcsqs', etc., or a mix of these. The structures are automatically sorted as described above. The structures must be dilute structures, i.e. they must contain only one alloying element. - descriptor: Descriptor to use. Must be one of the available descriptors. See pysipgenn.descriptorDefinitions + descriptor: Descriptor to use. Must be one of the available descriptors. See ``pysipfenn.descriptorDefinitions`` for a list of available descriptors. - baseStruct: Non-diluted references for the dilute structures. Defaults to 'pure', which assumes that the + baseStruct: Non-diluted references for the dilute structures. Defaults to ``'pure'``, which assumes that the structures are based on pure elements and generates references automatically. Alternatively, a list of structures can be provided, which can be either pure elements or custom base structures (e.g. TCP endmember configurations). mode: Computation mode. 'serial' or 'parallel'. Default is 'serial'. Parallel mode is not recommended for small datasets. - max_workers: Number of workers to use in parallel mode. Default is 8. Ignored in serial mode. If set to - None, will use all available cores. If set to 0, will use 1 core. + max_workers: Number of workers to use in parallel mode. Default is ``8``. Ignored in serial mode. If set to + ``None``, will use all available cores. If set to ``0``, will use 1 core. Returns: - List of predictions. Each element of the list is a list of predictions for all ran networks. The order of + List of predictions. Each element of the list is a list of predictions for all run networks. The order of the predictions is the same as the order of the input structures. The order of the networks is the same as - the order of the networks in self.network_list_available. If a network is not available, it will not be + the order of the networks in ``self.network_list_available``. If a network is not available, it will not be included in the list. """ @@ -871,14 +882,14 @@ def runFromDirectory_dilute(self, print('Done!') def writeResultsToCSV(self, file: str) -> None: - """Writes the results to a CSV file. The first column is the name of the structure. If the self.inputFiles - attribute is populated automatically by runFromDirectory() or set manually, the names of the structures will - be used. Otherwise, the names will be '1', '2', '3', etc. The remaining columns are the predictions for each - network. The order of the columns is the same as the order of the networks in self.network_list_available. + """Writes the results to a CSV file. The first column is the name of the structure. If the ``self.inputFiles`` + attribute is populated automatically by ``runFromDirectory()`` or set manually, the names of the structures will + be used. Otherwise, the names will be ``'1'``, ``'2'``, ``'3'``, etc. The remaining columns are the predictions for each + network. The order of the columns is the same as the order of the networks in ``self.network_list_available``. Args: file: Name of the file to write the results to. If the file already exists, it will be overwritten. If the - file does not exist, it will be created. The file must have a '.csv' extension to be recognized + file does not exist, it will be created. The file must have a ``'.csv'`` extension to be recognized correctly. """ @@ -898,15 +909,16 @@ def writeResultsToCSV(self, file: str) -> None: def writeDescriptorsToCSV(self, descriptor: str, file: str = 'descriptorData.csv') -> None: """Writes the descriptor data to a CSV file. The first column is the name of the structure. If the self.inputFiles attribute is populated automatically by runFromDirectory() or set manually, the names of the - structures will be used. Otherwise, the names will be '1', '2', '3', etc. The remaining columns are the + structures will be used. Otherwise, the names will be ``'1'``, ``'2'``, ``'3'``, etc. The remaining columns are the descriptor values. The order of the columns is the same as the order of the labels in the descriptor definition file. Args: - descriptor: Descriptor to use. Must be one of the available descriptors. See pysipgenn.descriptorDefinitions - for a list of available descriptors, such as 'KS2022' and 'Ward2017'. + descriptor: Descriptor to use. Must be one of the available descriptors. See ``pysipgenn.descriptorDefinitions`` + for a list of available descriptors, such as ``'KS2022'`` and ``'Ward2017'``. It provides the labels for the + descriptor values. file: Name of the file to write the results to. If the file already exists, it will be overwritten. If the - file does not exist, it will be created. The file must have a '.csv' extension to be recognized + file does not exist, it will be created. The file must have a ``'.csv'`` extension to be recognized correctly. """ @@ -931,13 +943,13 @@ def writeDescriptorsToCSV(self, descriptor: str, file: str = 'descriptorData.csv # UTILS def ward2ks2022(ward2017: np.ndarray) -> np.ndarray: - """Converts a Ward 2017 descriptor to a KS2022 descriptor (which is its subset). + """Converts a ``Ward2017`` descriptor to a ``KS2022`` descriptor (which is its subset). Args: - ward2017: Ward2017 descriptor. Must be a 1D NumPy array of length 271. + ward2017: ``Ward2017`` descriptor. Must be a 1D ``np.ndarray`` of length ``271``. Returns: - KS2022 descriptor array. + ``KS2022`` descriptor array. """ assert isinstance(ward2017, np.ndarray) @@ -954,14 +966,14 @@ def ward2ks2022(ward2017: np.ndarray) -> np.ndarray: return ks2022 def overwritePrototypeLibrary(prototypeLibrary: dict) -> None: - """Destructively overwrites the prototype library with a custom one. Used by the `appendPrototypeLibrary` function + """Destructively overwrites the prototype library with a custom one. Used by the ``appendPrototypeLibrary`` function to persist its changes. The other main use it to restore the default one to the original state based on a backup made earlier (see tests for an example).""" yaml_customDumper = YAML() yaml_customDumper.top_level_colon_align = True with resources.files('pysipfenn.misc').joinpath('prototypeLibrary.yaml').open('w+') as f: - # Restructutre the prototype library back to original format of a list of dictionaries + # Restructure the prototype library back to the original format of a list of dictionaries print(prototypeLibrary) prototypeList = [ { @@ -977,14 +989,14 @@ def overwritePrototypeLibrary(prototypeLibrary: dict) -> None: # HELPERS def string2prototype(c: Calculator, prototype: str) -> Structure: - """Converts a prototype string to a pymatgen Structure object. + """Converts a prototype string to a pymatgen ``Structure`` object. Args: - c: Calculator object with the `prototypeLibrary`. + c: ``Calculator`` object with the ``prototypeLibrary``. prototype: Prototype string. Returns: - Structure object. + ``Structure`` object. """ assert isinstance(prototype, str), 'Prototype string must be a string.' assert prototype in c.prototypeLibrary, \ From 4bfe6920acc68bd3719365a0164bd11fd346bdec Mon Sep 17 00:00:00 2001 From: "Adam M. Krajewski" <54290107+amkrajewski@users.noreply.github.com> Date: Mon, 12 Feb 2024 19:15:00 -0500 Subject: [PATCH 49/59] (QoL) removed duration testing from core tests as they did not play well with Windows (will be added back in 15.1 for Linux and MacOS); still present elsewhere --- .github/workflows/partialTest.yml | 2 +- pysipfenn/tests/test_pysipfenn.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/partialTest.yml b/.github/workflows/partialTest.yml index c880efe..9b0a66f 100644 --- a/.github/workflows/partialTest.yml +++ b/.github/workflows/partialTest.yml @@ -28,4 +28,4 @@ jobs: python -m pip install -e . - name: Test with pytest run: | - pytest --durations=0 + pytest diff --git a/pysipfenn/tests/test_pysipfenn.py b/pysipfenn/tests/test_pysipfenn.py index 01f12ce..7ea34ef 100644 --- a/pysipfenn/tests/test_pysipfenn.py +++ b/pysipfenn/tests/test_pysipfenn.py @@ -386,7 +386,7 @@ def test_descriptorCalculate_KS2022_randomSolution_parallel_multiple(self): self.assertIn('finalAtomsN', meta) self.assertIn('finalCompositionDistance', meta) self.assertIn('finalComposition', meta) - + if __name__ == '__main__': unittest.main() From 78f19db0732d7f6e670f01350adcbcadf492f8d0 Mon Sep 17 00:00:00 2001 From: "Adam M. Krajewski" <54290107+amkrajewski@users.noreply.github.com> Date: Mon, 12 Feb 2024 19:36:55 -0500 Subject: [PATCH 50/59] (core) many core docstring updates --- pysipfenn/core/pysipfenn.py | 144 ++++++++++++++++++------------------ 1 file changed, 73 insertions(+), 71 deletions(-) diff --git a/pysipfenn/core/pysipfenn.py b/pysipfenn/core/pysipfenn.py index 2c7d5b5..cac928e 100644 --- a/pysipfenn/core/pysipfenn.py +++ b/pysipfenn/core/pysipfenn.py @@ -43,26 +43,26 @@ class Calculator: of all available models defined statically in the ``models.json`` file. It exposes methods for calculating predefined structure-informed descriptors (feature vectors) and predicting properties using models that utilize them. - Args: - autoLoad: Automatically load all available ML models based on the ``models.json`` file. This `will` require - significant memory and time if they are available, so for featurization and other non-model-requiring - tasks, it is recommended to set this to False. Defaults to True. - verbose: Print initialization messages and several other non-critical messages during runtime procedures. - Defaults to True. - - Attributes: - models: Dictionary with all model information based on the ``models.json`` file in the modelsSIPFENN - directory. The keys are the network names and the values are dictionaries with the model information. - loadedModels: Dictionary with all loaded models. The keys are the network names and the values - are the loaded pytorch models. - descriptorData: List of all descriptor data created during the last predictions run. The order - of the list corresponds to the order of atomic structures given to models as input. The order of the - list of descriptor data for each structure corresponds to the order of networks in the toRun list. - predictions: List of all predictions created during the last predictions run. The order of the - list corresponds to the order of atomic structures given to models as input. The order of the list - of predictions for each structure corresponds to the order of networks in the toRun list. - inputFiles: List of all input file names used during the last predictions run. The order of the list - corresponds to the order of atomic structures given to models as input. + Args: + autoLoad: Automatically load all available ML models based on the ``models.json`` file. This `will` require + significant memory and time if they are available, so for featurization and other non-model-requiring + tasks, it is recommended to set this to ``False``. Defaults to ``True``. + verbose: Print initialization messages and several other non-critical messages during runtime procedures. + Defaults to True. + + Attributes: + models: Dictionary with all model information based on the ``models.json`` file in the modelsSIPFENN + directory. The keys are the network names and the values are dictionaries with the model information. + loadedModels: Dictionary with all loaded models. The keys are the network names and the values + are the loaded pytorch models. + descriptorData: List of all descriptor data created during the last predictions run. The order + of the list corresponds to the order of atomic structures given to models as input. The order of the + list of descriptor data for each structure corresponds to the order of networks in the toRun list. + predictions: List of all predictions created during the last predictions run. The order of the + list corresponds to the order of atomic structures given to models as input. The order of the list + of predictions for each structure corresponds to the order of networks in the toRun list. + inputFiles: List of all input file names used during the last predictions run. The order of the list + corresponds to the order of atomic structures given to models as input. """ def __init__(self, @@ -146,15 +146,15 @@ def parsePrototypeLibrary(self, """Parses the prototype library YAML file in the ``misc`` directory, interprets them into pymatgen ``Structure`` objects, and stores them in the ``self.prototypeLibrary`` dict attribute of the ``Calculator`` object. You can use it also to temporarily append a custom prototype library (by providing a path) which will live as long as the - Calculator. For permanent changes, use ``appendPrototypeLibrary()``. + ``Calculator``. For permanent changes, use ``appendPrototypeLibrary()``. Args: customPath: Path to the prototype library YAML file. Defaults to the magic string ``"default"``, which loads the - default prototype library included in the package in the `misc` directory. - verbose: If True, it prints the number of prototypes loaded. Defaults to False, but note that ``Calculator`` + default prototype library included in the package in the ``misc`` directory. + verbose: If True, it prints the number of prototypes loaded. Defaults to ``False``, but note that ``Calculator`` class automatically initializes with ``verbose=True``. printCustomLibrary: If True, it prints the name and POSCAR of each prototype being added to the prototype - library. Has no effect if customPath is ``'default'``. Defaults to False. + library. Has no effect if ``customPath`` is ``'default'``. Defaults to ``False``. Returns: None @@ -192,11 +192,11 @@ class automatically initializes with ``verbose=True``. def appendPrototypeLibrary(self, customPath: str) -> None: """Parses a custom prototype library YAML file and permanently appends it into the internal prototypeLibrary of the pySIPFENN package. They will be persisted for future use and, by default, they will be loaded - automatically when instantiating the Calculator object, similar to your custom models. + automatically when instantiating the ``Calculator`` object, similar to your custom models. Args: - customPath: Path to the prototype library YAML file to be appended to the internal prototypeLibrary of the - pySIPFENN package. + customPath: Path to the prototype library YAML file to be appended to the internal ``self.prototypeLibrary`` + of the ``Calculator`` object. Returns: None @@ -210,8 +210,8 @@ def appendPrototypeLibrary(self, customPath: str) -> None: def downloadModels(self, network: str = 'all') -> None: """Downloads ONNX models. By default, all available models are downloaded. If a model is already available - on disk, it is skipped. If a specific network is given, only that network is downloaded, possibly overwriting - the existing one. If the network name is not recognized, the message will be printed. + on disk, it is skipped. If a specific ``network`` is given, only that network is downloaded, possibly overwriting + the existing one. If the ``network`` name is not recognized, the message will be printed. Args: network: Name of the network to download. Defaults to ``'all'``. @@ -256,13 +256,13 @@ def calculate_Ward2017(self, structList: List[Structure], mode: str = 'serial', max_workers: int = 4) -> list: - """Calculates `Ward2017` descriptors for a list of structures. The calculation can be done in serial or parallel - mode. In parallel mode, the number of workers can be specified. The results are stored in the descriptorData + """Calculates ``Ward2017`` descriptors for a list of structures. The calculation can be done in serial or parallel + mode. In parallel mode, the number of workers can be specified. The results are stored in the ``self.descriptorData`` attribute. The function returns the list of descriptors as well. Args: structList: List of structures to calculate descriptors for. The structures must be - initialized with the pymatgen Structure class. + initialized with the pymatgen ``Structure`` class. mode: Mode of calculation. Defaults to 'serial'. Options are ``'serial'`` and ``'parallel'``. max_workers: Number of workers to use in parallel mode. Defaults to ``4``. If ``None``, the number of workers will be set to the number of available CPU cores. If set to ``0``, 1 worker will be used. @@ -286,7 +286,7 @@ def calculate_KS2022(self, structList: List[Structure], mode: str = 'serial', max_workers: int = 8) -> list: - """Calculates KS2022 descriptors for a list of structures. The calculation can be done in serial or parallel + """Calculates ``KS2022`` descriptors for a list of structures. The calculation can be done in serial or parallel mode. In parallel mode, the number of workers can be specified. The results are stored in the descriptorData attribute. The function returns the list of descriptors as well. @@ -298,7 +298,7 @@ def calculate_KS2022(self, will be set to the number of available CPU cores. If set to ``0``, 1 worker will be used. Returns: - List of KS2022 descriptor (feature vector) for each structure. + List of ``KS2022`` descriptor (feature vector) for each structure. """ if mode == 'serial': @@ -317,27 +317,27 @@ def calculate_KS2022_dilute(self, baseStruct: Union[str, List[Structure]] = 'pure', mode: str = 'serial', max_workers: int = 8) -> List[np.ndarray]: - """Calculates KS2022 descriptors for a list of dilute structures (either based on pure elements and on custom + """Calculates ``KS2022`` descriptors for a list of dilute structures (either based on pure elements and on custom base structures, e.g. TCP endmember configurations) that contain a single alloying atom. Speed increases are - substantial compared to the KS2022 descriptor, which is more general and can be used on any structure. The + substantial compared to the ``KS2022`` descriptor, which is more general and can be used on any structure. The calculation can be done in serial or parallel mode. In parallel mode, the number of workers can be specified. - The results are stored in the descriptorData attribute. The function returns the list of descriptors as well. + The results are stored in the ``self.descriptorData`` attribute. The function returns the list of descriptors as well. Args: structList: List of structures to calculate descriptors for. The structures must be dilute structures (either based on pure elements and on custom base structures, e.g. TCP endmember configurations) that contain a single alloying atom. The structures must be initialized with the - pymatgen Structure class. + pymatgen ``Structure`` class. baseStruct: Non-diluted references for the dilute structures. Defaults to ``'pure'``, which assumes that the structures are based on pure elements and generates references automatically. Alternatively, a list of structures can be provided, which can be either pure elements or custom base structures (e.g. TCP endmember configurations). - mode: Mode of calculation. Defaults to 'serial'. Options are ``'serial'`` and ``'parallel'``. + mode: Mode of calculation. Defaults to ``'serial'``. Options are ``'serial'`` and ``'parallel'``. max_workers: Number of workers to use in parallel mode. Defaults to ``8``. If ``None``, the number of workers will be set to the number of available CPU cores. If set to ``0``, 1 worker will be used. Returns: - List of KS2022 descriptor (feature vector) ``np.ndarray`` for each structure. + List of ``KS2022`` descriptor (feature vector) ``np.ndarray`` for each structure. """ if baseStruct == 'pure' or isinstance(baseStruct, Structure): @@ -387,7 +387,7 @@ def calculate_KS2022_randomSolutions( printProgress: bool = False, mode: str = 'serial', max_workers: int = 8) -> List[np.ndarray]: - """Calculates KS2022 descriptors corresponding to random solid solutions occupying base structure / lattice + """Calculates ``KS2022`` descriptors corresponding to random solid solutions occupying base structure / lattice sites for a list of compositions through method described in ``descriptorDefinitions.KS2022_randomSolutions`` submodule. The results are stored in the descriptorData attribute. The function returns the list of descriptors in numpy format as well. @@ -396,17 +396,17 @@ def calculate_KS2022_randomSolutions( baseStructList: The base structure to generate a random solid solution (RSS). It does _not_ need to be a simple Bravis lattice, such as BCC lattice, but can be any ``Structure`` object or a list of them, if you need to define them on per-case basis. In addition to `Structure` objects, you can use "magic" strings - corresponding to one of the structures in the library you can find under `pysipfenn.misc` directory or - loaded under `self.prototypeLibrary` attribute. The magic strings include, but are not limited to: - 'BCC', 'FCC', 'HCP', 'DHCP', 'Diamond', and so on. You can invoke them by their name, e.g. `BCC`, or + corresponding to one of the structures in the library you can find under ``pysipfenn.misc`` directory or + loaded under ``self.prototypeLibrary`` attribute. The magic strings include, but are not limited to: + ``'BCC'``, ``'FCC'``, ``'HCP'``, ``'DHCP'``, ``'Diamond'``, and so on. You can invoke them by their name, e.g. ``BCC``, or by passing ``self.prototypeLibrary['BCC']['structure']`` directly. If you pass a list to ``baseStruct``, you are allowed to mix-and-match ``Structure`` objects and magic strings. compList: The composition to populate the supercell with until KS2022 descriptor converges. You can use pymatgen's ``Composition`` objects or strings of valid chemical formulas (symbol - atomic fraction pairs), - like 'Fe0.5Ni0.3Cr0.2', 'Fe50 Ni30 Cr20', or 'Fe5 Ni3 Cr2'. You can either pass a single entity, in + like ``'Fe0.5Ni0.3Cr0.2'``, ``'Fe50 Ni30 Cr20'``, or ``'Fe5 Ni3 Cr2'``. You can either pass a single entity, in which case it will be used for all structures (use to run the same composition for different base structures), or a list of entities, in which case pairs will be used in the order of the list. If you - pass a list to `compList`, you are allowed to mix-and-match `Composition` objects and composition + pass a list to ``compList``, you are allowed to mix-and-match ``Composition`` objects and composition strings. minimumSitesPerExpansion: The minimum number of sites that the base structure will be expanded to (doubling dimension-by-dimension) before it is used as expansion step/batch in each iteration of adding local @@ -415,14 +415,14 @@ def calculate_KS2022_randomSolutions( Generally, low values (<20ish) will result in a slower convergence, as some extreme local chemical environments will have strong influence on the global ensemble, and too high values (>150ish) will result in a needlessly slow computation for not-complex compositions, as at least two iterations will - be processed. The default value is 50 and works well for simple cases. + be processed. The default value is ``50`` and works well for simple cases. featureConvergenceCriterion: The maximum difference between any feature belonging to the current iteration (statistics based on the global ensemble of local chemical environments) and the previous iteration (before last expansion) expressed as a fraction of the maximum value of each feature found in the OQMD - database at the time of SIPFENN creation (see ``KS2022_randomSolutions/maxFeaturesInOQMD`` array). + database at the time of SIPFENN creation (see ``KS2022_randomSolutions.maxFeaturesInOQMD`` array). The default value is ``0.01``, corresponding to 1% of the maximum value. compositionConvergenceCriterion: The maximum average difference between any element fraction belonging to - the current composition (net of all expansions) and the target composition (`comp`). The default value + the current composition (net of all expansions) and the target composition (``comp``). The default value is ``0.01``, corresponding to 1% deviation, which interpretation will depend on the number of elements in the composition. minimumElementOccurrences: The minimum number of times all elements must occur in the composition before it @@ -432,11 +432,11 @@ def calculate_KS2022_randomSolutions( but tracking them is recommended and will be accessible in the `metas` attribute of the Calculator under the key ``'RSS'``. printProgress: If True, the progress will be printed to the console. The default value is False. - mode: Mode of calculation. Options are `serial` (default) and `parallel`. + mode: Mode of calculation. Options are ``serial`` (default) and ``parallel``. max_workers: Number of workers to use in parallel mode. Defaults to ``8``. Returns: - A list of ``numpy.ndarray``s containing the KS2022 descriptor, just like the ordinary ``KS2022``. **Please note + A list of ``numpy.ndarray``s containing the ``KS2022`` descriptor, just like the ordinary ``KS2022``. **Please note the stochastic nature of this algorithm**. The result will likely vary slightly between runs and parameters, so if convergence is critical, verify it with a test matrix of ``minimumSitesPerExpansion``, ``featureConvergenceCriterion``, and ``compositionConvergenceCriterion`` values. @@ -518,7 +518,7 @@ def calculate_KS2022_randomSolutions( def loadModels(self, network: str = 'all') -> None: """Load model/models into memory of the ``Calculator`` class. The models are loaded from the ``modelsSIPFENN`` directory inside the package. Its location can be seen by calling ``print()`` on the ``Calculator``. The models are stored in the - loadedModels attribute as a dictionary with the network string as key and the PyTorch model as value. + ``self.loadedModels`` attribute as a dictionary with the network string as key and the PyTorch model as value. Note: This function only works with models that are stored in the ``modelsSIPFENN`` directory inside the package, @@ -528,10 +528,10 @@ def loadModels(self, network: str = 'all') -> None: Args: network: Default is ``'all'``, which loads all models detected as available. Alternatively, a specific model can be loaded by its corresponding key in models.json. E.g. ``'SIPFENN_Krajewski2020_NN9'`` or - ``'SIPFENN_Krajewski2022_NN30'``. The key is the same as the network argument in downloadModels(). + ``'SIPFENN_Krajewski2022_NN30'``. The key is the same as the network argument in ``downloadModels()``. Raises: - ValueError: If the network name is not recognized or if the model is not available in the modelsSIPFENN + ValueError: If the network name is not recognized or if the model is not available in the ``modelsSIPFENN`` directory. Returns: @@ -595,7 +595,7 @@ def makePredictions(self, with ``loadModels()`` / ``loadModelCustom()`` or manually (fairly simple!). toRun: List of networks to run. It must be a subset of ``models.keys()``. dataInList: List of data to make predictions for. Each element of the list should be a descriptor accepted - by all networks in toRun. Can be a list of lists of floats or a list of numpy arrays. + by all networks in toRun. Can be a list of lists of floats or a list of numpy ``nd.array``s. Returns: List of predictions. Each element of the list is a list of predictions for all run networks. The order of the @@ -629,11 +629,11 @@ def findCompatibleModels(self, descriptor: str) -> List[str]: ``models.json`` file. Args: - descriptor: Descriptor to use. Must be one of the available descriptors. See pysipfenn.descriptorDefinitions + descriptor: Descriptor to use. Must be one of the available descriptors. See ``pysipfenn.descriptorDefinitions`` to see available modules or add yours. Available default descriptors are: ``'Ward2017'``, ``'KS2022'``. Returns: - List of compatible models. + List of strings corresponding to compatible models. """ compatibleList = [] @@ -652,7 +652,7 @@ def runModels(self, featurization of structures (90-99+% of computational intensity) and models are then run in series. Args: - descriptor: Descriptor to use. Must be one of the available descriptors. See pysipfenn.descriptorDefinitions + descriptor: Descriptor to use. Must be one of the available descriptors. See ``pysipfenn.descriptorDefinitions`` to see available modules or add yours. Available default descriptors are: ``'Ward2017'``, ``'KS2022'``. structList: List of pymatgen Structure objects to run the models on. mode: Computation mode. ``'serial'`` or ``'parallel'``. Default is ``'serial'``. Parallel mode is not recommended for @@ -663,7 +663,7 @@ def runModels(self, Returns: List of predictions. Each element of the list is a list of predictions for all ran networks. The order of the predictions is the same as the order of the input structures. The order of the networks is - the same as the order of the networks in self.network_list_available. If a network is not available, it + the same as the order of the networks in ``self.network_list_available``. If a network is not available, it will not be included in the list. If a network is not compatible with the selected descriptor, it will not be included in the list. """ @@ -712,9 +712,9 @@ def runModels_dilute(self, Args: descriptor: Descriptor to use for predictions. Must be one of the descriptors which support the dilute - structures (i.e. `*_dilute`). See pysipfenn.descriptorDefinitions to see available modules or add yours + structures (i.e. `*_dilute`). See ``pysipfenn.descriptorDefinitions`` to see available modules or add yours here. Available default dilute descriptors are now: ``'KS2022'``. The ``'KS2022'`` can also be called from - runModels() function, but is not recommended for dilute alloys, as it negates the speed increase of the + ``runModels()`` function, but is not recommended for dilute alloys, as it negates the speed increase of the dilute structure featurizer. structList: List of pymatgen ``Structure`` objects to run the models on. Must be dilute structures as described above. @@ -730,7 +730,7 @@ def runModels_dilute(self, Returns: List of predictions. Each element of the list is a list of predictions for all run networks. The order of the predictions is the same as the order of the input structures. The order of the networks - is the same as the order of the networks in ``network_list_available``. If a network is not available, + is the same as the order of the networks in ``self.network_list_available``. If a network is not available, it will not be included in the list. If a network is not compatible with the selected descriptor, it will not be included in the list. """ @@ -773,8 +773,8 @@ def get_resultDicts(self) -> List[dict]: def get_resultDictsWithNames(self) -> List[dict]: """Returns a list of dictionaries with the predictions for each network. The keys of the dictionaries are the names of the networks and the names of the input structures. The order of the dictionaries is the same as the - order of the input structures passed through runModels() functions. Note that this function requires - self.inputFiles to be set, which is done automatically when using ``runFromDirectory()`` or + order of the input structures passed through ``runModels()`` functions. Note that this function requires + ``self.inputFiles`` to be set, which is done automatically when using ``runFromDirectory()`` or ``runFromDirectory_dilute()`` but not when using ``runModels()`` or ``runModels_dilute()``, as the input structures are passed directly to the function and names have to be provided separately by assigning them to ``self.inputFiles``. @@ -838,18 +838,18 @@ def runFromDirectory_dilute(self, mode: str = 'serial', max_workers: int = 8) -> None: """Runs all loaded models on a list of dilute Structures it automatically imports from a specified directory. - The directory must contain only atomic structures in formats such as 'poscar', 'cif', 'json', 'mcsqs', etc., + The directory must contain only atomic structures in formats such as ``'poscar'``, ``'cif'``, ``'json'``, ``'mcsqs'``, etc., or a mix of these. The structures are automatically sorted using natsort library, so the order of the structures in the directory, as defined by the operating system, is not important. Natural sorting, - for example, will sort the structures in the following order: '1-Fe', '2-Al', '10-xx', '11-xx', '20-xx', - '21-xx', '11111-xx', etc. This is useful when the structures are named using a numbering system. The order of + for example, will sort the structures in the following order: ``'1-Fe'``, ``'2-Al'``, ``'10-xx'``, ``'11-xx'``, ``'20-xx'``, + ``'21-xx'``, ``'11111-xx'``, etc. This is useful when the structures are named using a numbering system. The order of the predictions is the same as the order of the input structures. The order of the networks in a prediction is the same as the order of the networks in self.network_list_available. If a network is not available, it will not be included in the list. Args: directory: Directory containing the structures to run the models on. The directory must contain only atomic - structures in formats such as 'poscar', 'cif', 'json', 'mcsqs', etc., or a mix of these. The structures + structures in formats such as ``'poscar'``, ``'cif'``, ``'json'``, ``'mcsqs'``, etc., or a mix of these. The structures are automatically sorted as described above. The structures must be dilute structures, i.e. they must contain only one alloying element. descriptor: Descriptor to use. Must be one of the available descriptors. See ``pysipfenn.descriptorDefinitions`` @@ -858,7 +858,7 @@ def runFromDirectory_dilute(self, structures are based on pure elements and generates references automatically. Alternatively, a list of structures can be provided, which can be either pure elements or custom base structures (e.g. TCP endmember configurations). - mode: Computation mode. 'serial' or 'parallel'. Default is 'serial'. Parallel mode is not recommended for + mode: Computation mode. ``'serial'`` or ``'parallel'``. Default is ``'serial'``. Parallel mode is not recommended for small datasets. max_workers: Number of workers to use in parallel mode. Default is ``8``. Ignored in serial mode. If set to ``None``, will use all available cores. If set to ``0``, will use 1 core. @@ -908,7 +908,7 @@ def writeResultsToCSV(self, file: str) -> None: def writeDescriptorsToCSV(self, descriptor: str, file: str = 'descriptorData.csv') -> None: """Writes the descriptor data to a CSV file. The first column is the name of the structure. If the - self.inputFiles attribute is populated automatically by runFromDirectory() or set manually, the names of the + ``self.inputFiles`` attribute is populated automatically by runFromDirectory() or set manually, the names of the structures will be used. Otherwise, the names will be ``'1'``, ``'2'``, ``'3'``, etc. The remaining columns are the descriptor values. The order of the columns is the same as the order of the labels in the descriptor definition file. @@ -966,7 +966,7 @@ def ward2ks2022(ward2017: np.ndarray) -> np.ndarray: return ks2022 def overwritePrototypeLibrary(prototypeLibrary: dict) -> None: - """Destructively overwrites the prototype library with a custom one. Used by the ``appendPrototypeLibrary`` function + """Destructively overwrites the prototype library with a custom one. Used by the ``appendPrototypeLibrary()`` function to persist its changes. The other main use it to restore the default one to the original state based on a backup made earlier (see tests for an example).""" yaml_customDumper = YAML() @@ -1008,7 +1008,9 @@ def string2prototype(c: Calculator, prototype: str) -> Structure: # WRAPPERS def wrapper_KS2022_dilute_generate_descriptor(args): + """Wraps the ``KS2022_dilute.generate_descriptor`` function for parallel processing.""" return KS2022_dilute.generate_descriptor(*args) def wrapper_KS2022_randomSolutions_generate_descriptor(args): + """Wraps the ``KS2022_randomSolutions.generate_descriptor`` function for parallel processing.""" return KS2022_randomSolutions.generate_descriptor(*args) \ No newline at end of file From abdd9bcf76c8a1504ab84a6ebeeab4ca780c69c2 Mon Sep 17 00:00:00 2001 From: "Adam M. Krajewski" <54290107+amkrajewski@users.noreply.github.com> Date: Mon, 12 Feb 2024 19:45:24 -0500 Subject: [PATCH 51/59] (core) many core docstring updates --- pysipfenn/core/modelExporters.py | 58 ++++++++++++++++---------------- 1 file changed, 29 insertions(+), 29 deletions(-) diff --git a/pysipfenn/core/modelExporters.py b/pysipfenn/core/modelExporters.py index 68dbacb..c1feb10 100644 --- a/pysipfenn/core/modelExporters.py +++ b/pysipfenn/core/modelExporters.py @@ -15,22 +15,22 @@ class ONNXExporter: """Export models to the ONNX format (what they ship in by default) to allow (1) exporting modified pySIPFENN models, - (2) simplify the models using ONNX optimizer, and (3) convert them to FP16 precision, cutting the size in half. + (2) simplify the models using ONNX optimizer, and (3) convert them to `FP16` precision, cutting the size in half. Args: - calculator: A calculator object with loaded models that has loaded PyTorch models (happens automatically - when the autoLoad argument is kept to its default value of True when initializing the Calculator). During the + calculator: A ``Calculator`` object with loaded models that has loaded PyTorch models (happens automatically + when the ``autoLoad`` argument is kept to its default value of ``True`` when initializing the Calculator). During the initialization, the loaded PyTorch models are converted back to ONNX (in memory) to be then either adjusted or persisted to disk. Attributes: - calculator: A calculator object with ONNX loaded models. - simplifiedDict: A dictionary of models that have been simplified. - fp16Dict: A dictionary of models that have been converted to FP16. + calculator: A Calculator object with ONNX loaded models. + simplifiedDict: A boolean dictionary of models that have been simplified. + fp16Dict: A boolean dictionary of models that have been converted to FP16. """ def __init__(self, calculator: Calculator): - """Initialize the ONNXExporter using a calculator object.""" + """Initialize the ``ONNXExporter`` using a calculator object.""" self.simplifiedDict = {model: False for model in calculator.loadedModels.keys()} self.fp16Dict = {model: False for model in calculator.loadedModels.keys()} self.calculator = calculator @@ -80,7 +80,7 @@ def simplify(self, model: str) -> None: """Simplify a loaded model using the ONNX optimizer. Args: - model: The name of the model to simplify (must be loaded in the Calculator). + model: The name of the model to simplify (must be loaded in the ``Calculator``). Returns: None @@ -100,11 +100,11 @@ def simplifyAll(self): self.simplify(model) print('***** Done simplifying all models! *****') - def toFP16(self, model: str): + def toFP16(self, model: str) -> None: """Convert a loaded model to FP16 precision. Args: - model: The name of the model to convert to FP16 (must be loaded in the Calculator). + model: The name of the model to convert to FP16 (must be loaded in the ``Calculator``). Returns: None @@ -125,10 +125,10 @@ def toFP16All(self): print('***** Done converting all models to FP16! *****') def export(self, model: str, append: str = '') -> None: - """Export a loaded model to ONNX format. + """Export a loaded model to ``ONNX``format. Args: - model: The name of the model to export (must be loaded in the Calculator). + model: The name of the model to export (must be loaded in the ``Calculator``). append: A string to append to the exported model name after the model name, simplification marker, and FP16 marker. Useful for adding a version number or other information to the exported model name. @@ -150,8 +150,8 @@ def export(self, model: str, append: str = '') -> None: print(f'--> Exported as {name}', flush=True) def exportAll(self, append: str = '') -> None: - """Export all loaded models to ONNX format with the export function. `append` can be passed to the export - function. + """Export all loaded models to ``ONNX`` format with the export function. ``append`` string can be passed to the export + function to append to the exported model name. """ for model in tqdm(self.calculator.loadedModels): self.export(model, append=append) @@ -159,13 +159,13 @@ def exportAll(self, append: str = '') -> None: class TorchExporter: - """Export models to the PyTorch PT format to allow for easy loading and inference in PyTorch in other projects. + """Export models to the ``PyTorch PT`` format to allow for easy loading and inference in PyTorch in other projects. Args: - calculator: A calculator object with loaded models. + calculator: A ``Calculator`` object with loaded models. Attributes: - calculator: A calculator object with loaded models. + calculator: A ``Calculator`` object with loaded models. """ def __init__(self, calculator: Calculator): """Initialize the TorchExporter with a calculator object that has loaded models.""" @@ -174,12 +174,12 @@ def __init__(self, calculator: Calculator): print(f'Initialized TorchExporter with models: {list(self.calculator.loadedModels.keys())}') def export(self, model: str, append: str = '') -> None: - """Export a loaded model to PyTorch PT format. Models are exported in eval mode (no dropout) and saved in the + """Export a loaded model to ``PyTorch PT`` format. Models are exported in eval mode (no dropout) and saved in the current working directory. Args: - model: The name of the model to export (must be loaded in the Calculator) and it must have a descriptor - (Ward2017 or KS2022) defined in the calculator.models dictionary created when the Calculator was + model: The name of the model to export (must be loaded in the ``Calculator``) and it must have a descriptor + (``Ward2017`` or ``KS2022``) defined in the ``Calculator.models`` dictionary created when the ``Calculator`` was initialized. append: A string to append to the exported model name after the model name. Useful for adding a version number or other information to the exported model name. @@ -222,15 +222,15 @@ def exportAll(self, append: str = '') -> None: class CoreMLExporter: - """Export models to the CoreML format to allow for easy loading and inference in CoreML in other projects, + """Export models to the ``CoreML`` format to allow for easy loading and inference in ``CoreML`` in other projects, particularly valuable for Apple devices, as pySIPFENN models can be run using the Neural Engine accelerator with minimal power consumption and neat optimizations. Args: - calculator: A calculator object with loaded models. + calculator: A ``Calculator`` object with loaded models. Attributes: - calculator: A calculator object with loaded models. + calculator: A ``Calculator`` object with loaded models. """ def __init__(self, calculator: Calculator): self.calculator = calculator @@ -238,14 +238,14 @@ def __init__(self, calculator: Calculator): print(f'Initialized CoreMLExporter with models: {list(self.calculator.loadedModels.keys())}') def export(self, model: str, append: str = '') -> None: - """Export a loaded model to CoreML format. Models will be saved as {model}.mlpackage in the current working - directory. Models will be annotated with the feature vector name (Ward2017 or KS2022) and the output will be + """Export a loaded model to ``CoreML`` format. Models will be saved as ``{model}.mlpackage`` in the current working + directory. Models will be annotated with the feature vector name (``Ward2017`` or ``KS2022``) and the output will be named "property". The latter behavior will be adjusted in the future when model output name and unit will be added to the model JSON metadata. Args: - model: The name of the model to export (must be loaded in the Calculator) and it must have a descriptor - (Ward2017 or KS2022) defined in the calculator.models dictionary created when the Calculator was + model: The name of the model to export (must be loaded in the ``Calculator``) and it must have a descriptor + (``Ward2017`` or ``KS2022``) defined in the ``calculator.models`` dictionary created when the ``Calculator`` was initialized. append: A string to append to the exported model name after the model name. Useful for adding a version number or other information to the exported model name. @@ -287,8 +287,8 @@ def export(self, model: str, append: str = '') -> None: print(f'--> Exported as {name}', flush=True) def exportAll(self, append: str = '') -> None: - """Export all loaded models to CoreML format with the export function. `append` can be passed to the export - function. + """Export all loaded models to ``CoreML`` format with the export function. ``append`` can be passed to the export + function to append to all exported model names. """ for model in tqdm(self.calculator.loadedModels): self.export(model, append=append) From ddfc833399e9ab36a611f76b101ac6e698cb65c1 Mon Sep 17 00:00:00 2001 From: "Adam M. Krajewski" <54290107+amkrajewski@users.noreply.github.com> Date: Mon, 12 Feb 2024 21:56:41 -0500 Subject: [PATCH 52/59] (core) reorganized the core order to follow API levels --- pysipfenn/core/pysipfenn.py | 267 ++++++++++++++++++------------------ 1 file changed, 137 insertions(+), 130 deletions(-) diff --git a/pysipfenn/core/pysipfenn.py b/pysipfenn/core/pysipfenn.py index cac928e..f3e4bb0 100644 --- a/pysipfenn/core/pysipfenn.py +++ b/pysipfenn/core/pysipfenn.py @@ -37,7 +37,7 @@ ["Jonathan Siegel", "jwsiegel@tamu.edu"]] __name__ = 'pysipfenn' - +# ********************************* CALCULATION HIGH-LEVEL ENVIRONMENT ********************************* class Calculator: """pySIPFENN Calculator automatically initializes all functionalities including identification and loading of all available models defined statically in the ``models.json`` file. It exposes methods for calculating predefined @@ -125,20 +125,7 @@ def __str__(self): printOut += f' {len(self.predictions[0])} predictions/structure\n' return printOut - def updateModelAvailability(self) -> None: - """Updates availability of models based on the pysipfenn.modelsSIPFENN directory contents. Works only for - current ONNX model definitions.""" - with resources.files('pysipfenn.modelsSIPFENN') as p: - all_files = os.listdir(p) - detectedNets = [] - for net, netName in zip(self.network_list, self.network_list_names): - if all_files.__contains__(net + '.onnx'): - detectedNets.append(net) - print('✔ ' + netName) - else: - print('⨯ ' + netName) - self.network_list_available = detectedNets - + # ********************************* PROTOTYPE HANDLING ********************************* def parsePrototypeLibrary(self, customPath: str = "default", verbose: bool = False, @@ -207,6 +194,20 @@ def appendPrototypeLibrary(self, customPath: str) -> None: f'Persisting them for future use.') overwritePrototypeLibrary(self.prototypeLibrary) + # ********************************* MODEL HANDLING ********************************* + def updateModelAvailability(self) -> None: + """Updates availability of models based on the pysipfenn.modelsSIPFENN directory contents. Works only for + current ONNX model definitions.""" + with resources.files('pysipfenn.modelsSIPFENN') as p: + all_files = os.listdir(p) + detectedNets = [] + for net, netName in zip(self.network_list, self.network_list_names): + if all_files.__contains__(net + '.onnx'): + detectedNets.append(net) + print('✔ ' + netName) + else: + print('⨯ ' + netName) + self.network_list_available = detectedNets def downloadModels(self, network: str = 'all') -> None: """Downloads ONNX models. By default, all available models are downloaded. If a model is already available @@ -252,6 +253,93 @@ def downloadModels(self, network: str = 'all') -> None: print('Network name not recognized') self.updateModelAvailability() + def loadModels(self, network: str = 'all') -> None: + """Load model/models into memory of the ``Calculator`` class. The models are loaded from the ``modelsSIPFENN`` directory + inside the package. Its location can be seen by calling ``print()`` on the ``Calculator``. The models are stored in the + ``self.loadedModels`` attribute as a dictionary with the network string as key and the PyTorch model as value. + + Note: + This function only works with models that are stored in the ``modelsSIPFENN`` directory inside the package, + are in ONNX format, and have corresponding entries in ``models.json``. For all others, you will need to use + ``loadModelCustom()``. + + Args: + network: Default is ``'all'``, which loads all models detected as available. Alternatively, a specific model + can be loaded by its corresponding key in models.json. E.g. ``'SIPFENN_Krajewski2020_NN9'`` or + ``'SIPFENN_Krajewski2022_NN30'``. The key is the same as the network argument in ``downloadModels()``. + + Raises: + ValueError: If the network name is not recognized or if the model is not available in the ``modelsSIPFENN`` + directory. + + Returns: + None. It updates the loadedModels attribute of the Calculatorclass. + """ + with resources.files('pysipfenn.modelsSIPFENN') as modelPath: + if network == 'all': + print('Loading models:') + for net in tqdm(self.network_list_available): + self.loadedModels.update({ + net: onnx2torch.convert(onnx.load(f'{modelPath}/{net}.onnx')).float() + }) + elif network in self.network_list_available: + print('Loading model: ', network) + self.loadedModels.update({ + network: onnx2torch.convert(onnx.load(f'{modelPath}/{network}.onnx')).float() + }) + else: + raise ValueError( + 'Network not available. Please check the network name for typos or run downloadModels() ' + 'to download the models. Currently available models are: ', self.network_list_available) + + def loadModelCustom(self, networkName: str, modelName: str, descriptor: str, modelDirectory: str = '.') -> None: + """Load a custom ONNX model from a custom directory specified by the user. The primary use case for this + function is to load models that are not included in the package and cannot be placed in the package + directory because of write permissions (e.g. on restrictive HPC systems) or storage allocations. + + Args: + modelDirectory: Directory where the model is located. Defaults to the current directory. + networkName: Name of the network. This is the name used to refer to the ONNX network. It has to be + unique, not contain any spaces, and correspond to the name of the ONNX file (excluding the ``.onnx`` + extension). + modelName: Name of the model. This is the name that will be displayed in the model selection menu. It + can be any string desired. + descriptor: Descriptor/feature vector used by the model. pySIPFENN currently supports the following + descriptors: ``'KS2022'``, and ``'Ward2017'``. + """ + + self.loadedModels.update({ + networkName: onnx2torch.convert(onnx.load(f'{modelDirectory}/{networkName}.onnx')).float() + }) + self.models.update({ + networkName: { + 'name': modelName, + 'descriptor': descriptor + }}) + self.network_list.append(networkName) + self.network_list_names.append(modelName) + self.network_list_available.append(networkName) + print(f'Loaded model {modelName} ({networkName}) from {modelDirectory}') + + def findCompatibleModels(self, descriptor: str) -> List[str]: + """Finds all models compatible with a given descriptor based on the descriptor definitions loaded from the + ``models.json`` file. + + Args: + descriptor: Descriptor to use. Must be one of the available descriptors. See ``pysipfenn.descriptorDefinitions`` + to see available modules or add yours. Available default descriptors are: ``'Ward2017'``, ``'KS2022'``. + + Returns: + List of strings corresponding to compatible models. + """ + + compatibleList = [] + for net in self.models: + if descriptor in self.models[net]['descriptor']: + compatibleList.append(net) + return compatibleList + + # ******************************* DESCRIPTOR HANDLING (MID-LEVEL API) ******************************* def calculate_Ward2017(self, structList: List[Structure], mode: str = 'serial', @@ -515,74 +603,7 @@ def calculate_KS2022_randomSolutions( self.metas['RSS'] = metaList return descList - def loadModels(self, network: str = 'all') -> None: - """Load model/models into memory of the ``Calculator`` class. The models are loaded from the ``modelsSIPFENN`` directory - inside the package. Its location can be seen by calling ``print()`` on the ``Calculator``. The models are stored in the - ``self.loadedModels`` attribute as a dictionary with the network string as key and the PyTorch model as value. - - Note: - This function only works with models that are stored in the ``modelsSIPFENN`` directory inside the package, - are in ONNX format, and have corresponding entries in ``models.json``. For all others, you will need to use - ``loadModelCustom()``. - - Args: - network: Default is ``'all'``, which loads all models detected as available. Alternatively, a specific model - can be loaded by its corresponding key in models.json. E.g. ``'SIPFENN_Krajewski2020_NN9'`` or - ``'SIPFENN_Krajewski2022_NN30'``. The key is the same as the network argument in ``downloadModels()``. - - Raises: - ValueError: If the network name is not recognized or if the model is not available in the ``modelsSIPFENN`` - directory. - - Returns: - None. It updates the loadedModels attribute of the Calculatorclass. - """ - with resources.files('pysipfenn.modelsSIPFENN') as modelPath: - if network == 'all': - print('Loading models:') - for net in tqdm(self.network_list_available): - self.loadedModels.update({ - net: onnx2torch.convert(onnx.load(f'{modelPath}/{net}.onnx')).float() - }) - elif network in self.network_list_available: - print('Loading model: ', network) - self.loadedModels.update({ - network: onnx2torch.convert(onnx.load(f'{modelPath}/{network}.onnx')).float() - }) - else: - raise ValueError( - 'Network not available. Please check the network name for typos or run downloadModels() ' - 'to download the models. Currently available models are: ', self.network_list_available) - - def loadModelCustom(self, networkName: str, modelName: str, descriptor: str, modelDirectory: str = '.') -> None: - """Load a custom ONNX model from a custom directory specified by the user. The primary use case for this - function is to load models that are not included in the package and cannot be placed in the package - directory because of write permissions (e.g. on restrictive HPC systems) or storage allocations. - - Args: - modelDirectory: Directory where the model is located. Defaults to the current directory. - networkName: Name of the network. This is the name used to refer to the ONNX network. It has to be - unique, not contain any spaces, and correspond to the name of the ONNX file (excluding the ``.onnx`` - extension). - modelName: Name of the model. This is the name that will be displayed in the model selection menu. It - can be any string desired. - descriptor: Descriptor/feature vector used by the model. pySIPFENN currently supports the following - descriptors: ``'KS2022'``, and ``'Ward2017'``. - """ - - self.loadedModels.update({ - networkName: onnx2torch.convert(onnx.load(f'{modelDirectory}/{networkName}.onnx')).float() - }) - self.models.update({ - networkName: { - 'name': modelName, - 'descriptor': descriptor - }}) - self.network_list.append(networkName) - self.network_list_names.append(modelName) - self.network_list_available.append(networkName) - print(f'Loaded model {modelName} ({networkName}) from {modelDirectory}') - + # ******************************* PREDICTION RUNNERS (MID-LEVEL API) ******************************* def makePredictions(self, models: Dict[str, torch.nn.Module], toRun: List[str], @@ -624,24 +645,7 @@ def makePredictions(self, self.predictions = dataOuts return dataOuts - def findCompatibleModels(self, descriptor: str) -> List[str]: - """Finds all models compatible with a given descriptor based on the descriptor definitions loaded from the - ``models.json`` file. - - Args: - descriptor: Descriptor to use. Must be one of the available descriptors. See ``pysipfenn.descriptorDefinitions`` - to see available modules or add yours. Available default descriptors are: ``'Ward2017'``, ``'KS2022'``. - - Returns: - List of strings corresponding to compatible models. - """ - - compatibleList = [] - for net in self.models: - if descriptor in self.models[net]['descriptor']: - compatibleList.append(net) - return compatibleList - + # ******************************* TOP-LEVEL API ******************************* def runModels(self, descriptor: str, structList: List[Structure], @@ -760,34 +764,6 @@ def runModels_dilute(self, return self.predictions - def get_resultDicts(self) -> List[dict]: - """Returns a list of dictionaries with the predictions for each network. The keys of the dictionaries are the - names of the networks. The order of the dictionaries is the same as the order of the input structures passed - through ``runModels()`` functions. - - Returns: - List of dictionaries with the predictions. - """ - return [dict(zip(self.toRun, pred)) for pred in self.predictions] - - def get_resultDictsWithNames(self) -> List[dict]: - """Returns a list of dictionaries with the predictions for each network. The keys of the dictionaries are the - names of the networks and the names of the input structures. The order of the dictionaries is the same as the - order of the input structures passed through ``runModels()`` functions. Note that this function requires - ``self.inputFiles`` to be set, which is done automatically when using ``runFromDirectory()`` or - ``runFromDirectory_dilute()`` but not when using ``runModels()`` or ``runModels_dilute()``, as the input structures are - passed directly to the function and names have to be provided separately by assigning them to ``self.inputFiles``. - - Returns: - List of dictionaries with the predictions. - """ - assert self.inputFiles is not [] - assert len(self.inputFiles) == len(self.predictions) - return [ - dict(zip(['name'] + self.toRun, [name] + pred)) - for name, pred in - zip(self.inputFiles, self.predictions)] - def runFromDirectory(self, directory: str, descriptor: str, @@ -881,6 +857,37 @@ def runFromDirectory_dilute(self, max_workers=max_workers) print('Done!') + + # ******************************* POST-PROCESSING ******************************* + def get_resultDicts(self) -> List[dict]: + """Returns a list of dictionaries with the predictions for each network. The keys of the dictionaries are the + names of the networks. The order of the dictionaries is the same as the order of the input structures passed + through ``runModels()`` functions. + + Returns: + List of dictionaries with the predictions. + """ + return [dict(zip(self.toRun, pred)) for pred in self.predictions] + + def get_resultDictsWithNames(self) -> List[dict]: + """Returns a list of dictionaries with the predictions for each network. The keys of the dictionaries are the + names of the networks and the names of the input structures. The order of the dictionaries is the same as the + order of the input structures passed through ``runModels()`` functions. Note that this function requires + ``self.inputFiles`` to be set, which is done automatically when using ``runFromDirectory()`` or + ``runFromDirectory_dilute()`` but not when using ``runModels()`` or ``runModels_dilute()``, as the input structures are + passed directly to the function and names have to be provided separately by assigning them to ``self.inputFiles``. + + Returns: + List of dictionaries with the predictions. + """ + assert self.inputFiles is not [] + assert len(self.inputFiles) == len(self.predictions) + return [ + dict(zip(['name'] + self.toRun, [name] + pred)) + for name, pred in + zip(self.inputFiles, self.predictions)] + + def writeResultsToCSV(self, file: str) -> None: """Writes the results to a CSV file. The first column is the name of the structure. If the ``self.inputFiles`` attribute is populated automatically by ``runFromDirectory()`` or set manually, the names of the structures will @@ -941,7 +948,7 @@ def writeDescriptorsToCSV(self, descriptor: str, file: str = 'descriptorData.csv f.write(f'{i},{",".join(str(v) for v in dd)}\n') i += 1 -# UTILS +# ************************ SATELLITE FUNCTIONS ************************ def ward2ks2022(ward2017: np.ndarray) -> np.ndarray: """Converts a ``Ward2017`` descriptor to a ``KS2022`` descriptor (which is its subset). From be02a78f5e2a5b6ad53bd970e71249bf66cfe4e8 Mon Sep 17 00:00:00 2001 From: "Adam M. Krajewski" <54290107+amkrajewski@users.noreply.github.com> Date: Mon, 12 Feb 2024 22:01:38 -0500 Subject: [PATCH 53/59] (RSS) minor replacement of a deprecated function --- pysipfenn/tests/test_KS2022_randomSolutions.py | 2 +- pysipfenn/tests/test_pysipfenn.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/pysipfenn/tests/test_KS2022_randomSolutions.py b/pysipfenn/tests/test_KS2022_randomSolutions.py index 648c708..c5c0b9d 100644 --- a/pysipfenn/tests/test_KS2022_randomSolutions.py +++ b/pysipfenn/tests/test_KS2022_randomSolutions.py @@ -63,7 +63,7 @@ class TestKS2022RandomSolutionProfiling(unittest.TestCase): structures. ''' def test_serialInParallel(self): - '''Tests profiling a set of structures with perallel task execution.''' + '''Tests profiling a set of structures with parallel task execution.''' process_map(KS2022_randomSolutions.profile, ['BCC', 'FCC', 'HCP'], max_workers=3) diff --git a/pysipfenn/tests/test_pysipfenn.py b/pysipfenn/tests/test_pysipfenn.py index 7ea34ef..5ce996e 100644 --- a/pysipfenn/tests/test_pysipfenn.py +++ b/pysipfenn/tests/test_pysipfenn.py @@ -337,7 +337,7 @@ def test_descriptorCalculate_KS2022_randomSolution_serial_multiple(self): float(d2[0][0]), float(d2[1][0]) , places=6, msg="Coordination number (KS2022[0]) should be the same (12) for both compositions.") - self.assertNotAlmostEquals( + self.assertNotAlmostEqual( float(d2[0][13]), float(d2[1][13]) , places=6, msg="mean_NeighDiff_shell1_Number (KS2022[13]) should be different (1.0vs2.0)." @@ -369,7 +369,7 @@ def test_descriptorCalculate_KS2022_randomSolution_parallel_multiple(self): self.assertEqual(len(d2), 4, "Four composition-structure pairs should be processed.") for i in range(4): self.assertEqual(len(d2[i]), 256, "All 256 KS2022 features should be obtained.") - self.assertNotAlmostEquals( + self.assertNotAlmostEqual( float(d2[0][0]), float(d2[1][0]), places=6, msg="Coordination number (KS2022[0]) should be different for BCC and FCC.") From 84656ab11745b7522d5220fd9863492503ef8a0a Mon Sep 17 00:00:00 2001 From: "Adam M. Krajewski" <54290107+amkrajewski@users.noreply.github.com> Date: Mon, 12 Feb 2024 22:33:56 -0500 Subject: [PATCH 54/59] (RSS) added top-level `runModels_randomSolutions` --- pysipfenn/core/pysipfenn.py | 90 ++++++++++++++++++++++++++++++++++--- 1 file changed, 84 insertions(+), 6 deletions(-) diff --git a/pysipfenn/core/pysipfenn.py b/pysipfenn/core/pysipfenn.py index f3e4bb0..3fe553d 100644 --- a/pysipfenn/core/pysipfenn.py +++ b/pysipfenn/core/pysipfenn.py @@ -764,12 +764,90 @@ def runModels_dilute(self, return self.predictions - def runFromDirectory(self, - directory: str, - descriptor: str, - mode: str = 'serial', - max_workers: int = 4 - ) -> List[list]: + def runModels_randomSolutions( + self, + descriptor: str, + baseStructList: Union[str, Structure, List[str], List[Structure], List[Union[Composition, str]]], + compList: Union[str, List[str], Composition, List[Composition], List[Union[Composition, str]]], + minimumSitesPerExpansion: int = 50, + featureConvergenceCriterion: float = 0.005, + compositionConvergenceCriterion: float = 0.01, + minimumElementOccurrences: int = 10, + plotParameters: bool = False, + printProgress: bool = False, + mode: str = 'serial', + max_workers: int = 8, + ) -> List[List[float]]: + """A top-level convenience wrapper for the ``calculate_KS2022_randomSolutions`` function. It passes all the + arguments to that function directly (except for ``descriptor`` and uses its result to run all applicable models. + The result is a list of predictions for all run networks. + + Args: + descriptor: Descriptor to use for predictions. Must be one of the descriptors which support the random + solid solution structures (i.e. `*_randomSolutions`). See ``pysipfenn.descriptorDefinitions`` to see + available modules or add yours here. As of v0.15.0, the only available descriptor is + ``'KS2022'`` through its ``KS2022_randomSolutions`` submodule. + baseStructList: See ``calculate_KS2022_randomSolutions`` for details. You can mix-and-match ``Structure`` + objects and magic strings, either individually (to use the same entity for all calculations) or in a + list. + compList: See ``calculate_KS2022_randomSolutions`` for details. You can mix-and-match ``Composition`` + objects and composition strings, either individually (to use the same entity for all calculations) + or in a list. + minimumSitesPerExpansion: See ``calculate_KS2022_randomSolutions``. + featureConvergenceCriterion: See ``calculate_KS2022_randomSolutions``. + compositionConvergenceCriterion: See ``calculate_KS2022_randomSolutions``. + minimumElementOccurrences: See ``calculate_KS2022_randomSolutions``. + plotParameters: See ``calculate_KS2022_randomSolutions``. + printProgress: See ``calculate_KS2022_randomSolutions``. + mode: Computation mode. ``'serial'`` or ``'parallel'``. Default is ``'serial'``. Parallel mode is not + recommended for small datasets. + + Returns: + List of predictions. They will correspond to the order of the networks in ``self.toRun`` established by the + ``findCompatibleModels()`` function. If a network is not available, it will not be included in the list. + """ + + self.toRun = list(set(self.findCompatibleModels(descriptor)).intersection(set(self.network_list_available))) + if len(self.toRun) == 0: + print('The list of models to run is empty. This may be caused by selecting a descriptor not ' + 'defined/available, or if the selected descriptor does not correspond to any available network. ' + 'Check spelling and invoke the downloadModels() function if you are using base models.') + raise AssertionError + else: + print(f'Running {self.toRun} models') + + print('Calculating descriptors...') + if descriptor == 'KS2022': + self.descriptorData = self.calculate_KS2022_randomSolutions( + baseStructList=baseStructList, + compList=compList, + minimumSitesPerExpansion=minimumSitesPerExpansion, + featureConvergenceCriterion=featureConvergenceCriterion, + compositionConvergenceCriterion=compositionConvergenceCriterion, + minimumElementOccurrences=minimumElementOccurrences, + plotParameters=plotParameters, + printProgress=printProgress, + mode=mode, + max_workers=max_workers + ) + else: + print('Descriptor handing not implemented. Check spelling.') + raise AssertionError + + self.makePredictions( + models=self.loadedModels, + toRun=self.toRun, + dataInList=self.descriptorData) + + return self.predictions + + def runFromDirectory( + self, + directory: str, + descriptor: str, + mode: str = 'serial', + max_workers: int = 4 + ) -> List[list]: """Runs all loaded models on a list of Structures it automatically imports from a specified directory. The directory must contain only atomic structures in formats such as ``'poscar'``, ``'cif'``, ``'json'``, ``'mcsqs'``, etc., or a mix of these. The structures are automatically sorted using natsort library, so the order of the From f736867bce297ae16b7272df700201b664b874bb Mon Sep 17 00:00:00 2001 From: "Adam M. Krajewski" <54290107+amkrajewski@users.noreply.github.com> Date: Mon, 12 Feb 2024 22:34:23 -0500 Subject: [PATCH 55/59] (core) style improvements --- pysipfenn/core/pysipfenn.py | 129 ++++++++++++++++++------------ pysipfenn/tests/test_pysipfenn.py | 4 +- 2 files changed, 82 insertions(+), 51 deletions(-) diff --git a/pysipfenn/core/pysipfenn.py b/pysipfenn/core/pysipfenn.py index 3fe553d..d4f3974 100644 --- a/pysipfenn/core/pysipfenn.py +++ b/pysipfenn/core/pysipfenn.py @@ -292,7 +292,13 @@ def loadModels(self, network: str = 'all') -> None: 'Network not available. Please check the network name for typos or run downloadModels() ' 'to download the models. Currently available models are: ', self.network_list_available) - def loadModelCustom(self, networkName: str, modelName: str, descriptor: str, modelDirectory: str = '.') -> None: + def loadModelCustom( + self, + networkName: str, + modelName: str, + descriptor: str, + modelDirectory: str = '.' + ) -> None: """Load a custom ONNX model from a custom directory specified by the user. The primary use case for this function is to load models that are not included in the package and cannot be placed in the package directory because of write permissions (e.g. on restrictive HPC systems) or storage allocations. @@ -340,10 +346,12 @@ def findCompatibleModels(self, descriptor: str) -> List[str]: return compatibleList # ******************************* DESCRIPTOR HANDLING (MID-LEVEL API) ******************************* - def calculate_Ward2017(self, - structList: List[Structure], - mode: str = 'serial', - max_workers: int = 4) -> list: + def calculate_Ward2017( + self, + structList: List[Structure], + mode: str = 'serial', + max_workers: int = 4 + ) -> list: """Calculates ``Ward2017`` descriptors for a list of structures. The calculation can be done in serial or parallel mode. In parallel mode, the number of workers can be specified. The results are stored in the ``self.descriptorData`` attribute. The function returns the list of descriptors as well. @@ -370,10 +378,12 @@ def calculate_Ward2017(self, self.descriptorData = descList return descList - def calculate_KS2022(self, - structList: List[Structure], - mode: str = 'serial', - max_workers: int = 8) -> list: + def calculate_KS2022( + self, + structList: List[Structure], + mode: str = 'serial', + max_workers: int = 8 + ) -> list: """Calculates ``KS2022`` descriptors for a list of structures. The calculation can be done in serial or parallel mode. In parallel mode, the number of workers can be specified. The results are stored in the descriptorData attribute. The function returns the list of descriptors as well. @@ -400,11 +410,13 @@ def calculate_KS2022(self, self.descriptorData = descList return descList - def calculate_KS2022_dilute(self, - structList: List[Structure], - baseStruct: Union[str, List[Structure]] = 'pure', - mode: str = 'serial', - max_workers: int = 8) -> List[np.ndarray]: + def calculate_KS2022_dilute( + self, + structList: List[Structure], + baseStruct: Union[str, List[Structure]] = 'pure', + mode: str = 'serial', + max_workers: int = 8 + ) -> List[np.ndarray]: """Calculates ``KS2022`` descriptors for a list of dilute structures (either based on pure elements and on custom base structures, e.g. TCP endmember configurations) that contain a single alloying atom. Speed increases are substantial compared to the ``KS2022`` descriptor, which is more general and can be used on any structure. The @@ -474,7 +486,8 @@ def calculate_KS2022_randomSolutions( plotParameters: bool = False, printProgress: bool = False, mode: str = 'serial', - max_workers: int = 8) -> List[np.ndarray]: + max_workers: int = 8 + ) -> List[np.ndarray]: """Calculates ``KS2022`` descriptors corresponding to random solid solutions occupying base structure / lattice sites for a list of compositions through method described in ``descriptorDefinitions.KS2022_randomSolutions`` submodule. The results are stored in the descriptorData attribute. The function returns the list of descriptors @@ -604,10 +617,12 @@ def calculate_KS2022_randomSolutions( return descList # ******************************* PREDICTION RUNNERS (MID-LEVEL API) ******************************* - def makePredictions(self, - models: Dict[str, torch.nn.Module], - toRun: List[str], - dataInList: List[Union[List[float], np.array]]) -> List[list]: + def makePredictions( + self, + models: Dict[str, torch.nn.Module], + toRun: List[str], + dataInList: List[Union[List[float], np.array]] + ) -> List[list]: """Makes predictions using PyTorch networks listed in toRun and provided in models dictionary. Shared among all "predict" functions. @@ -623,7 +638,8 @@ def makePredictions(self, predictions is the same as the order of the networks in ``toRun``. """ dataOuts = [] - print('Making predictions...') + if self.verbose: + print('Making predictions...') # Run for each network dataIn = torch.from_numpy(np.array(dataInList)).float() assert set(toRun).issubset(set(models.keys())), 'Some networks to run are not available in the models.' @@ -637,8 +653,9 @@ def makePredictions(self, tempOut = model(dataIn) t1 = perf_counter() dataOuts.append(tempOut.cpu().detach().numpy()) - print(f'Prediction rate: {round(len(tempOut) / (t1 - t0), 1)} pred/s') - print(f'Obtained {len(tempOut)} predictions from: {net}') + if self.verbose: + print(f'Prediction rate: {round(len(tempOut) / (t1 - t0), 1)} pred/s') + print(f'Obtained {len(tempOut)} predictions from: {net}') # Transpose and round the predictions dataOuts = np.array(dataOuts).T.tolist()[0] @@ -646,11 +663,13 @@ def makePredictions(self, return dataOuts # ******************************* TOP-LEVEL API ******************************* - def runModels(self, - descriptor: str, - structList: List[Structure], - mode: str = 'serial', - max_workers: int = 4) -> List[list]: + def runModels( + self, + descriptor: str, + structList: List[Structure], + mode: str = 'serial', + max_workers: int = 4 + ) -> List[List[float]]: """Runs all loaded models on a list of Structures using specified descriptor. Supports serial and parallel computation modes. If parallel is selected, max_workers determines number of processes handling the featurization of structures (90-99+% of computational intensity) and models are then run in series. @@ -684,29 +703,37 @@ def runModels(self, print('Calculating descriptors...') if descriptor == 'Ward2017': - self.descriptorData = self.calculate_Ward2017(structList=structList, - mode=mode, - max_workers=max_workers) + self.descriptorData = self.calculate_Ward2017( + structList=structList, + mode=mode, + max_workers=max_workers + ) elif descriptor == 'KS2022': - self.descriptorData = self.calculate_KS2022(structList=structList, - mode=mode, - max_workers=max_workers) + self.descriptorData = self.calculate_KS2022( + structList=structList, + mode=mode, + max_workers=max_workers + ) else: print('Descriptor handing not implemented. Check spelling.') raise AssertionError - self.predictions = self.makePredictions(models=self.loadedModels, - toRun=self.toRun, - dataInList=self.descriptorData) + self.makePredictions( + models=self.loadedModels, + toRun=self.toRun, + dataInList=self.descriptorData + ) return self.predictions - def runModels_dilute(self, - descriptor: str, - structList: List[Structure], - baseStruct: Union[str, List[Structure]] = 'pure', - mode: str = 'serial', - max_workers: int = 4) -> List[list]: + def runModels_dilute( + self, + descriptor: str, + structList: List[Structure], + baseStruct: Union[str, List[Structure]] = 'pure', + mode: str = 'serial', + max_workers: int = 4 + ) -> List[List[float]]: """Runs all loaded models on a list of Structures using specified descriptor. A critical difference from runModels() is that this function will call dilute-specific featurizer, e.g. ``KS2022_dilute`` when ``'KS2022'`` is provided as input, which can only be used on dilute structures (both based on pure elements and on custom base @@ -750,17 +777,21 @@ def runModels_dilute(self, print('Calculating descriptors...') if descriptor == 'KS2022': - self.descriptorData = self.calculate_KS2022_dilute(structList=structList, - baseStruct=baseStruct, - mode=mode, - max_workers=max_workers) + self.descriptorData = self.calculate_KS2022_dilute( + structList=structList, + baseStruct=baseStruct, + mode=mode, + max_workers=max_workers + ) else: print('Descriptor handing not implemented. Check spelling.') raise AssertionError - self.predictions = self.makePredictions(models=self.loadedModels, - toRun=self.toRun, - dataInList=self.descriptorData) + self.makePredictions( + models=self.loadedModels, + toRun=self.toRun, + dataInList=self.descriptorData + ) return self.predictions diff --git a/pysipfenn/tests/test_pysipfenn.py b/pysipfenn/tests/test_pysipfenn.py index 5ce996e..220681d 100644 --- a/pysipfenn/tests/test_pysipfenn.py +++ b/pysipfenn/tests/test_pysipfenn.py @@ -64,7 +64,7 @@ def testFromPOSCAR_Ward2017(self): print(testFileDir) self.c.runFromDirectory(testFileDir, 'Ward2017') else: - print('Did not detect any Ward2017 models to run') + raise ValueError('Did not detect any Ward2017 models to run') @pytest.mark.skipif(IN_GITHUB_ACTIONS, reason="Test depends on the ONNX network files") def testFromPOSCAR_KS2022(self): @@ -79,7 +79,7 @@ def testFromPOSCAR_KS2022(self): print(testFileDir) self.c.runFromDirectory(testFileDir, 'KS2022') else: - print('Did not detect any KS2022 models to run') + raise ValueError('Did not detect any KS2022 models to run') with self.subTest(msg='Test Calculator printout after predictions'): printOut = str(self.c) From 873161cfcd290b90684fe62e192398135b6f4c8a Mon Sep 17 00:00:00 2001 From: "Adam M. Krajewski" <54290107+amkrajewski@users.noreply.github.com> Date: Mon, 12 Feb 2024 22:34:37 -0500 Subject: [PATCH 56/59] (RSS) added test for `runModels_randomSolutions` --- pysipfenn/tests/test_pysipfenn.py | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/pysipfenn/tests/test_pysipfenn.py b/pysipfenn/tests/test_pysipfenn.py index 220681d..071f4a0 100644 --- a/pysipfenn/tests/test_pysipfenn.py +++ b/pysipfenn/tests/test_pysipfenn.py @@ -128,7 +128,26 @@ def testFromStructure_KS2022_dilute(self): self.assertEqual(val1, val2) else: - print('Did not detect any KS2022 models to run') + raise ValueError('Did not detect any KS2022 models to run') + + #@pytest.mark.skipif(IN_GITHUB_ACTIONS, reason="Test depends on the ONNX network files") + def testFromPrototypes_KS2022_randomSolution(self): + """Quick runtime test of the top level API for random solution structures. It does not test the accuracy, as + that is delegated elsewhere.""" + + self.c.updateModelAvailability() + toRun = list(set(self.c.findCompatibleModels('KS2022')).intersection(set(self.c.network_list_available))) + if toRun: + preds = self.c.runModels_randomSolutions( + descriptor='KS2022', + baseStructList='FCC', + compList='AuCu', + compositionConvergenceCriterion=0.05, + featureConvergenceCriterion=0.02, + minimumSitesPerExpansion=8, + mode='serial') + else: + raise ValueError('Did not detect any KS2022 models to run') def test_descriptorCalculate_Ward2017_serial(self): '''Test succesful execution of the descriptorCalculate() method with Ward2017 in series. A separate test for From 11d0457786b5f360818f46a3f94f0d7260bb267e Mon Sep 17 00:00:00 2001 From: "Adam M. Krajewski" <54290107+amkrajewski@users.noreply.github.com> Date: Mon, 12 Feb 2024 22:39:25 -0500 Subject: [PATCH 57/59] (RSS) slightly looser tolerance for reference comparison of results from random solid solution testing; in most cases 2% was fine but probability dictates it sometimes will fail in min/max extremes, so now it's 5% --- pysipfenn/tests/test_KS2022_randomSolutions.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pysipfenn/tests/test_KS2022_randomSolutions.py b/pysipfenn/tests/test_KS2022_randomSolutions.py index c5c0b9d..96ad218 100644 --- a/pysipfenn/tests/test_KS2022_randomSolutions.py +++ b/pysipfenn/tests/test_KS2022_randomSolutions.py @@ -40,8 +40,8 @@ def test_results(self): self.descriptorMeanList, self.labels): with self.subTest(msg=f'{label} in BCC alloy'): - self.assertGreaterEqual(testValue, (0.98*descriptorMean)-descriptorRange-1e-6) - self.assertLessEqual(testValue, (1.02*descriptorMean)+descriptorRange+1e-6) + self.assertGreaterEqual(testValue, (0.95*descriptorMean)-descriptorRange-1e-4) + self.assertLessEqual(testValue, (1.05*descriptorMean)+descriptorRange+1e-4) for field in ['diffHistory', 'propHistory', 'finalAtomsN', 'finalCompositionDistance', 'finalComposition']: with self.subTest(msg=f'{field} present in meta'): From 56a70aaaee4baa5e675adf99f8eedbe0cba9fe94 Mon Sep 17 00:00:00 2001 From: "Adam M. Krajewski" <54290107+amkrajewski@users.noreply.github.com> Date: Mon, 12 Feb 2024 23:14:09 -0500 Subject: [PATCH 58/59] (core) fix two small typos in the core tests --- pysipfenn/tests/test_pysipfenn.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pysipfenn/tests/test_pysipfenn.py b/pysipfenn/tests/test_pysipfenn.py index 071f4a0..29e86cd 100644 --- a/pysipfenn/tests/test_pysipfenn.py +++ b/pysipfenn/tests/test_pysipfenn.py @@ -130,7 +130,7 @@ def testFromStructure_KS2022_dilute(self): else: raise ValueError('Did not detect any KS2022 models to run') - #@pytest.mark.skipif(IN_GITHUB_ACTIONS, reason="Test depends on the ONNX network files") + @pytest.mark.skipif(IN_GITHUB_ACTIONS, reason="Test depends on the ONNX network files") def testFromPrototypes_KS2022_randomSolution(self): """Quick runtime test of the top level API for random solution structures. It does not test the accuracy, as that is delegated elsewhere.""" @@ -307,7 +307,7 @@ def test_CalculatorPrint(self): ''' printOut = str(self.c) self.assertIn('pySIPFENN Calculator Object', printOut) - self.assertIn('Models are located in', printOut) + self.assertIn('Models are located', printOut) self.assertIn('Loaded Networks', printOut) From e2c9e9290a6789eab194c7cbc90021b6d0f8a679 Mon Sep 17 00:00:00 2001 From: "Adam M. Krajewski" <54290107+amkrajewski@users.noreply.github.com> Date: Mon, 12 Feb 2024 23:35:07 -0500 Subject: [PATCH 59/59] (core) try to address Windows stdout encoding problems by forcing utf-8 --- pysipfenn/core/pysipfenn.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/pysipfenn/core/pysipfenn.py b/pysipfenn/core/pysipfenn.py index d4f3974..55ae7e0 100644 --- a/pysipfenn/core/pysipfenn.py +++ b/pysipfenn/core/pysipfenn.py @@ -1,10 +1,13 @@ # Standard Library Imports -import os import csv import json from time import perf_counter from typing import List, Union, Dict from importlib import resources +import os +import io +import sys +sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8') # Helper Imports from tqdm import tqdm