update pre-commit hooks and apply new black format

janosh · Feb 4, 2023 · 938bbe2 · 938bbe2
1 parent 70d04d4
commit 938bbe2
Show file tree

Hide file tree

Showing 8 changed files with 16 additions and 21 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -7,23 +7,23 @@ default_install_hook_types: [pre-commit, commit-msg]
 
 repos:
   - repo: https://github.com/PyCQA/isort
-    rev: 5.10.1
+    rev: 5.12.0
     hooks:
       - id: isort
 
   - repo: https://github.com/psf/black
-    rev: 22.8.0
+    rev: 23.1.0
     hooks:
       - id: black-jupyter
 
   - repo: https://github.com/PyCQA/flake8
-    rev: 5.0.4
+    rev: 6.0.0
     hooks:
       - id: flake8
         additional_dependencies: [flake8-bugbear]
 
   - repo: https://github.com/asottile/pyupgrade
-    rev: v2.38.2
+    rev: v3.3.1
     hooks:
       - id: pyupgrade
         args: [--py39-plus]
@@ -34,7 +34,7 @@ repos:
       - id: format-ipy-cells
 
   - repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v4.3.0
+    rev: v4.4.0
     hooks:
       - id: check-case-conflict
       - id: check-symlinks
@@ -45,13 +45,13 @@ repos:
       - id: trailing-whitespace
 
   - repo: https://github.com/codespell-project/codespell
-    rev: v2.2.1
+    rev: v2.2.2
     hooks:
       - id: codespell
         stages: [commit, commit-msg]
         exclude_types: [json, csv]
 
   - repo: https://github.com/PyCQA/autoflake
-    rev: v1.6.1
+    rev: v2.0.1
     hooks:
       - id: autoflake
diff --git a/dft/fetch_cod_structs.py b/dft/fetch_cod_structs.py
@@ -81,7 +81,6 @@
 
 # %%
 for struct_path in structure_paths:
-
     path = dirname(struct_path)
     if isfile(f"{path}/INCAR"):
         continue

diff --git a/notebooks/multitask/cross_val.py b/notebooks/multitask/cross_val.py
@@ -96,9 +96,7 @@ def forward(self, x):
     metrics = {key: [] for key in metrics}
 
     for epoch in range(model.epoch, total_epochs):
-
         for samples, truth in DataLoader(train_set, batch_size=32, shuffle=True):
-
             optim.zero_grad()
             preds = model(samples)
 

diff --git a/notebooks/multitask/ensemble.py b/notebooks/multitask/ensemble.py
@@ -81,9 +81,7 @@ def forward(self, x):
     metrics = {key: [] for key in metrics}
 
     for epoch in range(model.epoch, total_epochs):
-
         for samples, targets in train_loader:
-
             optim.zero_grad()
             preds = model(samples)
 

diff --git a/readme.md b/readme.md
@@ -1,14 +1,17 @@
-# Data-Driven Risk-Conscious Thermoelectric Materials Discovery
+<h1 align='center'>Data-Driven Risk-Conscious<br />Thermoelectric Materials Discovery</h1>
+
+<h4 align='center'>
 
-[![License](https://img.shields.io/github/license/janosh/thermo?label=License)](/license)
-[![GitHub Repo Size](https://img.shields.io/github/repo-size/janosh/thermo?label=Repo+Size)](https://github.com/janosh/thermo/graphs/contributors)
 [![pre-commit.ci status](https://results.pre-commit.ci/badge/github/janosh/thermo/main.svg)](https://results.pre-commit.ci/latest/github/janosh/thermo/main)
+[![This project supports Python 3.8+](https://img.shields.io/badge/Python-3.8+-blue.svg?logo=python&logoColor=white)](https://python.org/downloads)
+[![GitHub Repo Size](https://img.shields.io/github/repo-size/janosh/thermo?label=Repo+Size)](https://github.com/janosh/thermo/graphs/contributors)
+</h4>
 
 ## Project description
 
-The aim is to discover high figure of merit ($zT > 1$) and sustainable (lead-free and rare earth-free) bulk thermoelectrics using machine learning-guided experimentation. The key advance is going beyond 'big data' which in this domain is unattainable for the foreseeable future since both first principles calculations and experimental synthesis and characterization of bulk thermoelectrics are costly and low throughput. Instead, we move towards so-called 'optimal data' by developing novel algorithms that optimize thermoelectric performance ($zT$) with minimal number of expensive calculations and experiments.
+The aim is to discover high-figure–of-merit ($zT > 1$) and sustainable (lead-free and rare earth-free) bulk thermoelectrics using machine learning-guided experimentation. The key advance is going beyond 'big data' which in this domain is unattainable for the foreseeable future since both first-principles calculations and experimental synthesis and characterization of bulk thermoelectrics are costly and low throughput. Instead, we move towards so-called 'optimal data' by developing novel algorithms that optimize thermoelectric performance ($zT$) with minimal number of expensive calculations and experiments.
 
-To date there has been no statistically robust approach to simultaneously incorporate experimental and model error into machine learning models in a search space with high opportunity cost and high latency (i.e. large time between prediction and validation).
+To date, there has been no statistically robust approach to simultaneously incorporate experimental and model error into machine learning models in a search space with high opportunity cost and high latency (i.e. large time between prediction and validation).
 
 Consequently, searches have been unable to effectively guide experimentalists in the selection of exploring or exploiting new materials when the validation step is inherently low throughput and resource-intensive, as is the case for synthesizing new bulk functional materials like thermoelectrics. This project aims to implement a holistic pipeline to discover novel thermoelectrics: ML models predict the $zT$ of a large database of structures as well as their own uncertainty for each prediction. Candidate structures are then selected, based on maximizing $zT$ subject to a tolerable level of uncertainty, to proceed to the next stage where expensive experimental synthesis and characterization of high-$zT$ candidates are guided by Bayesian optimization and active machine learning.
 

diff --git a/thermo/bnn/torch_dropout.py b/thermo/bnn/torch_dropout.py
@@ -45,7 +45,6 @@ def denorm_X(self, tensor, is_std=False):
 
 class GaultoisData(Normalized):
     def __init__(self, test_size=0.1, train=True, target_cols=None):
-
         features, targets = load_gaultois(target_cols=target_cols)
         targets, features = dropna(targets, features)
 
@@ -154,7 +153,6 @@ def fit(
         print(cols)
 
         for epoch in range(self.epochs, epochs):
-
             targets, outputs = [], []
 
             for samples, target in loader:

diff --git a/thermo/rf.py b/thermo/rf.py
@@ -29,7 +29,7 @@ def get_params(self, _deep: bool = True) -> dict:
         which when trying to inspect instances of this class would throw a
         RuntimeError complaining that "scikit-learn estimators should always specify
         their parameters in the signature of their __init__ (no varargs).
-        Constructor (self, *args, **kwargs) doesn't  follow this convention.".
+        Constructor (self, *args, **kwargs) doesn't follow this convention.".
         sklearn enforces this to be able to read and set the parameter names
         in meta algorithms like pipeline and grid search which we don't need.
         """

diff --git a/thermo/utils/__init__.py b/thermo/utils/__init__.py
@@ -69,7 +69,6 @@ def cross_val_predict(splitter, features, targets, predict_fn):
     for train_idx, test_idx in tqdm(
         splitter.split(features), desc=f"{splitter.n_splits}-fold CV"
     ):
-
         X_train, X_test = features.iloc[train_idx], features.iloc[test_idx]
         y_train, y_test = targets.iloc[train_idx], targets.iloc[test_idx]