Pyproject (#25)

* migrate setup.cfg and requirements.txt to pyproject.toml * replace flake8 isort autoflake pre-commit hooks with ruff * add ruff config to pyproject.toml * fix ruff errors
janosh · Feb 24, 2023 · a4240b4 · a4240b4
1 parent 938bbe2
commit a4240b4
Show file tree

Hide file tree

Showing 25 changed files with 89 additions and 110 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -6,28 +6,17 @@ default_stages: [commit]
 default_install_hook_types: [pre-commit, commit-msg]
 
 repos:
-  - repo: https://github.com/PyCQA/isort
-    rev: 5.12.0
+  - repo: https://github.com/charliermarsh/ruff-pre-commit
+    rev: v0.0.252
     hooks:
-      - id: isort
+      - id: ruff
+        args: [--fix, --ignore, D]
 
   - repo: https://github.com/psf/black
     rev: 23.1.0
     hooks:
       - id: black-jupyter
 
-  - repo: https://github.com/PyCQA/flake8
-    rev: 6.0.0
-    hooks:
-      - id: flake8
-        additional_dependencies: [flake8-bugbear]
-
-  - repo: https://github.com/asottile/pyupgrade
-    rev: v3.3.1
-    hooks:
-      - id: pyupgrade
-        args: [--py39-plus]
-
   - repo: https://github.com/janosh/format-ipy-cells
     rev: v0.1.10
     hooks:
@@ -50,8 +39,4 @@ repos:
       - id: codespell
         stages: [commit, commit-msg]
         exclude_types: [json, csv]
-
-  - repo: https://github.com/PyCQA/autoflake
-    rev: v2.0.1
-    hooks:
-      - id: autoflake
+        args: [--ignore-words-list, 'hist,ihs,te,hte,interruptable']
diff --git a/notebooks/boston_housing_hmc.py b/notebooks/boston_housing_hmc.py
@@ -1,5 +1,4 @@
-"""
-This notebook essentially runs an end-to-end test comparing RF vs MAP NN vs HMC
+"""This notebook essentially runs an end-to-end test comparing RF vs MAP NN vs HMC
 NN performance on the simple Boston housing dataset.
 """
 

diff --git a/notebooks/data/feature_manifolds.py b/notebooks/data/feature_manifolds.py
@@ -1,5 +1,4 @@
-"""
-This notebook plots the Magpie feature space for the Gaultois database with
+"""This notebook plots the Magpie feature space for the Gaultois database with
 several dimensional reduction algorithms (t-SNE, UMAP, PCA) to check for clustering.
 """
 

diff --git a/notebooks/data/gaultois_stats/gaultois_stats.py b/notebooks/data/gaultois_stats/gaultois_stats.py
@@ -1,5 +1,4 @@
-"""
-This notebook plots the prevalence of different chemical elements in the Gaultois
+"""This notebook plots the prevalence of different chemical elements in the Gaultois
 database in a histogram and onto the periodic table. It also plots histogram for
 the four target columns in the Gaultois database: rho, seebeck, kappa, zT.
 """

diff --git a/notebooks/dropout.py b/notebooks/dropout.py
@@ -1,5 +1,4 @@
-"""
-This notebook evaluates the accuracy and uncertainty estimates of dropout neural
+"""This notebook evaluates the accuracy and uncertainty estimates of dropout neural
 networks (DNN) trained with Magpie features on predicting electrical resistivity
 (rho), Seebeck coefficient (S), thermal conductivity (kappa) and thermoelectric
 figure of merit (zT).

diff --git a/notebooks/hmc.py b/notebooks/hmc.py
@@ -1,5 +1,4 @@
-"""
-This notebook compares the performance of neural networks trained with maximum a
+"""This notebook compares the performance of neural networks trained with maximum a
 posteriori (MAP) (i.e. maximum likelihood regularized by a prior) and
 Hamiltonian Monte Carlo (HMC).
 """

diff --git a/notebooks/leaderboard/cv.py b/notebooks/leaderboard/cv.py
@@ -1,4 +1,4 @@
-"""Cross-validated benchmarks"""
+"""Cross-validated benchmarks."""
 
 
 # %%

diff --git a/notebooks/leaderboard/mnf_vs_rf/mnf_vs_rf.py b/notebooks/leaderboard/mnf_vs_rf/mnf_vs_rf.py
@@ -1,5 +1,4 @@
-"""
-Benchmarking MNF vs RF (vs. dropout) using Magpie and AMM features
+"""Benchmarking MNF vs RF (vs. dropout) using Magpie and AMM features.
 
 This notebook compares performance of Multiplicative Normalizing Flow (MNF)
 against random forest (RF) (and dropout), testing first Magpie, then a

diff --git a/notebooks/multitask/cross_val.py b/notebooks/multitask/cross_val.py
@@ -96,27 +96,27 @@ def forward(self, x):
     metrics = {key: [] for key in metrics}
 
     for epoch in range(model.epoch, total_epochs):
-        for samples, truth in DataLoader(train_set, batch_size=32, shuffle=True):
+        for samples, targets in DataLoader(train_set, batch_size=32, shuffle=True):
             optim.zero_grad()
             preds = model(samples)
 
-            loss = loss_fn(preds, truth)
+            loss = loss_fn(preds, targets)
 
             loss.backward()
             optim.step()
 
             metrics["loss"] += [loss]
             if n_tasks > 1:
-                for name, y_hat, y in zip(short_names, preds.T, truth.T):
+                for name, y_hat, y in zip(short_names, preds.T, targets.T):
                     metrics[f"loss_{name}"] += [loss_fn(y_hat, y)]
 
             preds = test_set.denorm(preds)
-            truth = test_set.denorm(truth)
+            targets = test_set.denorm(targets)
 
-            MAE = (preds - truth).abs().mean()
+            MAE = (preds - targets).abs().mean()
             metrics["MAE"] += [MAE]
 
-            RMSE = (preds - truth).pow(2).mean().sqrt()
+            RMSE = (preds - targets).pow(2).mean().sqrt()
             metrics["RMSE"] += [RMSE]
 
         if epoch % report_every == 0:
@@ -126,23 +126,23 @@ def forward(self, x):
                 f"{sum(val) / len(val):<10.3f}" for val in metrics.values() if val
             )
             print(report)
-            metrics = {key: [] for key in metrics.keys()}
+            metrics = {key: [] for key in metrics}
 
         model.epoch += 1
 
     with torch.no_grad():
         preds = model(test_set.X)
 
     preds = test_set.denorm(preds)
-    truth = test_set.denorm(test_set.y)
+    targets = test_set.denorm(test_set.y)
 
     test_preds += [preds]
-    test_targets += [truth]
+    test_targets += [targets]
 
-    mae = (preds - truth).abs().mean(0)
+    mae = (preds - targets).abs().mean(0)
     test_mae += [mae]
 
-    rmse = (preds - truth).pow(2).mean(0).sqrt()
+    rmse = (preds - targets).pow(2).mean(0).sqrt()
     test_rmse += [rmse]
 
     print(f"\ntest set: avg. MAE = {mae.mean():.3f}, avg. RMSE = {rmse.mean():.3f}")

diff --git a/notebooks/multitask/ensemble.py b/notebooks/multitask/ensemble.py
@@ -111,7 +111,7 @@ def forward(self, x):
                 f"{sum(val) / len(val):<10.3f}" for val in metrics.values() if val
             )
             print(report)
-            metrics = {key: [] for key in metrics.keys()}
+            metrics = {key: [] for key in metrics}
 
         model.epoch += 1
 

diff --git a/notebooks/random_forest.py b/notebooks/random_forest.py
@@ -1,5 +1,4 @@
-"""
-This notebook evaluates the accuracy and uncertainty estimates of random forest
+"""This notebook evaluates the accuracy and uncertainty estimates of random forest
 (RF) trained with Magpie features on predicting electrical resistivity (rho),
 Seebeck coefficient (S), thermal conductivity (kappa) and thermoelectric figure
 of merit (zT).

diff --git a/notebooks/relaxation_time.py b/notebooks/relaxation_time.py
@@ -1,5 +1,4 @@
-"""
-This notebook fits a function to the relaxation time of GeSe and extrapolates it
+"""This notebook fits a function to the relaxation time of GeSe and extrapolates it
 to experimentally non-measured temperatures.
 """
 

diff --git a/notebooks/screen/dft.py b/notebooks/screen/dft.py
@@ -1,5 +1,4 @@
-"""
-This notebook plots DFT results for thermoelectric properties of several
+"""This notebook plots DFT results for thermoelectric properties of several
 candidate materials identified via random forest regression and portfolio-like
 risk management. See src/notsbooks/screen/random_forest.py for details.
 """

diff --git a/notebooks/screen/mnf_magpie/mnf_magpie_screen.py b/notebooks/screen/mnf_magpie/mnf_magpie_screen.py
@@ -1,6 +1,5 @@
-"""
-This notebook screens synthesizable materials from ICSD and COD
-for viable thermoelectrics
+"""This notebook screens synthesizable materials from ICSD and COD
+for viable thermoelectrics.
 """
 
 

diff --git a/notebooks/screen/random_forest_magpie/random_forest.py b/notebooks/screen/random_forest_magpie/random_forest.py
@@ -1,5 +1,4 @@
-"""
-This notebook screens a combined list of synthesizable materials from ICSD and
+"""This notebook screens a combined list of synthesizable materials from ICSD and
 COD databases for promising thermoelectric candidates using random forest
 regression.
 """

diff --git a/pyproject.toml b/pyproject.toml
@@ -0,0 +1,52 @@
+[project]
+dependencies = [
+    "automatminer",
+    "gurobipy",
+    "matminer",
+    "matplotlib",
+    "ml-matrics",
+    "numpy",
+    "pandas",
+    "scikit-learn",
+    "scikit-optimize",
+    "scipy",
+    "seaborn",
+    "tensorflow",
+    "tensorflow-probability",
+    "torch",
+    "tqdm",
+    "umap-learn",
+]
+
+[tool.codespell]
+ignore-words-list = "hist,ihs,te,hte,interruptable"
+
+[tool.ruff]
+target-version = "py38"
+select = [
+    "B",   # flake8-bugbear
+    "D",   # pydocstyle
+    "E",   # pycodestyle
+    "F",   # pyflakes
+    "I",   # isort
+    "PLE", # pylint error
+    "PLW", # pylint warning
+    "PYI", # flakes8-pyi
+    "Q",   # flake8-quotes
+    "SIM", # flake8-simplify
+    "TID", # tidy imports
+    "UP",  # pyupgrade
+    "W",   # pycodestyle
+    "YTT", # flake8-2020
+]
+ignore = [
+    "D100",    # Missing docstring in public module
+    "D104",    # Missing docstring in public package
+    "D205",    # 1 blank line required between summary line and description
+    "SIM105",  # Use contextlib.suppress(FileNotFoundError) instead of try-except-pass
+    "SIM115",  # Use context handler for opening files
+    "E731",    # Do not assign a lambda expression, use a def
+    "PLW2901", # Outer for loop variable overwritten by inner assignment target
+]
+pydocstyle.convention = "google"
+isort.lines-after-imports = 2
diff --git a/requirements.txt b/requirements.txt
diff --git a/setup.cfg b/setup.cfg
diff --git a/thermo/bnn/hmc.py b/thermo/bnn/hmc.py
@@ -203,7 +203,7 @@ def ess(chains, **kwargs):
 
 
 def r_hat(tensors):
-    """TFP docs: http://tiny.cc/5bq6tz"""
+    """TFP docs: http://tiny.cc/5bq6tz."""
     return [tfp.mcmc.diagnostic.potential_scale_reduction(t) for t in tensors]
 
 

diff --git a/thermo/bnn/map.py b/thermo/bnn/map.py
@@ -46,7 +46,6 @@ def map_predict(weight_prior, bias_prior, X_train, y_train, X_test, y_test):
         bias_prior (tfp.distribution): Prior probability for the biases
         [X/y_train/test] (np.arrays): Train and test sets
     """
-
     log_prob_tracers = (
         bnn.tracer_factory(X_train, y_train),
         bnn.tracer_factory(X_test, y_test),

diff --git a/thermo/bnn/tf_dropout.py b/thermo/bnn/tf_dropout.py
@@ -73,12 +73,11 @@ def __init__(
 
 @timed
 def predict(model, X_test, n_preds=100):
-    """
-    perform n_preds Monte Carlo predictions (i.e. with dropout)
+    """Perform n_preds Monte Carlo predictions (i.e. with dropout)
     save and return predictive mean and total uncertainty
     model: pre-trained Keras model
     X_test: features tensor
-    n_preds: number of predictions (with dropout)
+    n_preds: number of predictions (with dropout).
     """
     if model.uncertainty == "aleatoric":
         y_pred, y_log_var = tf.squeeze(model.predict(X_test))

diff --git a/thermo/bnn/torch_dropout.py b/thermo/bnn/torch_dropout.py
@@ -71,8 +71,8 @@ def robust_l2_loss(targets, preds, log_stds):
 
 
 class TorchDropoutModel(nn.Sequential):
-    """
-    Constructs a dropout network with aleatoric and/or epistemic uncertainty estimation.
+    """Constructs a dropout network with aleatoric and/or epistemic uncertainty
+    estimation.
     """
 
     def __init__(
@@ -122,7 +122,6 @@ def __init__(
     @torch.no_grad()
     def write_metrics(self, targets, output, denorm, prefix):
         """After an epoch, save evaluation metrics to a dict."""
-
         output, targets = torch.cat(output), torch.cat(targets)
         loss = self.loss_fn(targets, output)
 

diff --git a/thermo/data/transform.py b/thermo/data/transform.py
@@ -32,7 +32,6 @@ def train_test_split(*dfs, test_size: float = 0.1, train=None):
     """Returns training set, test set or both set (split according to test_size)
     depending on train being True, False or None.
     """
-
     test_index = dfs[0].sample(frac=test_size, random_state=0).index
     mask = dfs[0].index.isin(test_index)
 
@@ -64,7 +63,6 @@ def normalize(df, mean=None, std=None):
     """If mean and std are None, normalize array/dataframe columns to have
     zero mean and unit std. Else use mean and std as provided for normalization.
     """
-
     if mean is None:
         mean = df.mean(0)
     if std is None:

diff --git a/thermo/rf.py b/thermo/rf.py
@@ -14,7 +14,7 @@
 
 class RandomForestRegressor(RFR):
     """Adapted from scikit-optimize.
-    https://github.com/scikit-optimize/scikit-optimize/blob/master/skopt/learning/forest.py
+    https://github.com/scikit-optimize/scikit-optimize/blob/master/skopt/learning/forest.py.
 
     Uncertainty estimation: get_var() computes var(y|X_test) as described in sec. 4.3.2
     of https://arxiv.org/abs/1211.0906.

diff --git a/thermo/utils/decorators.py b/thermo/utils/decorators.py
@@ -38,8 +38,7 @@ def timed_func(*args, **kwargs):
 
 
 def squeeze(func: Callable) -> Callable:
-    """unpacks single-entry lists from the decorated function's return value"""
-
+    """Unpack single-entry lists from the decorated function's return value."""
     isiter = lambda x: isinstance(x, (list, tuple))
 
     @wraps(func)