diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index d5d1ac6..5eec3f7 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -6,28 +6,17 @@ default_stages: [commit] default_install_hook_types: [pre-commit, commit-msg] repos: - - repo: https://github.com/PyCQA/isort - rev: 5.12.0 + - repo: https://github.com/charliermarsh/ruff-pre-commit + rev: v0.0.252 hooks: - - id: isort + - id: ruff + args: [--fix, --ignore, D] - repo: https://github.com/psf/black rev: 23.1.0 hooks: - id: black-jupyter - - repo: https://github.com/PyCQA/flake8 - rev: 6.0.0 - hooks: - - id: flake8 - additional_dependencies: [flake8-bugbear] - - - repo: https://github.com/asottile/pyupgrade - rev: v3.3.1 - hooks: - - id: pyupgrade - args: [--py39-plus] - - repo: https://github.com/janosh/format-ipy-cells rev: v0.1.10 hooks: @@ -50,8 +39,4 @@ repos: - id: codespell stages: [commit, commit-msg] exclude_types: [json, csv] - - - repo: https://github.com/PyCQA/autoflake - rev: v2.0.1 - hooks: - - id: autoflake + args: [--ignore-words-list, 'hist,ihs,te,hte,interruptable'] diff --git a/notebooks/boston_housing_hmc.py b/notebooks/boston_housing_hmc.py index 196a0db..b93dcda 100644 --- a/notebooks/boston_housing_hmc.py +++ b/notebooks/boston_housing_hmc.py @@ -1,5 +1,4 @@ -""" -This notebook essentially runs an end-to-end test comparing RF vs MAP NN vs HMC +"""This notebook essentially runs an end-to-end test comparing RF vs MAP NN vs HMC NN performance on the simple Boston housing dataset. """ diff --git a/notebooks/data/feature_manifolds.py b/notebooks/data/feature_manifolds.py index 879c6f1..563c2ac 100644 --- a/notebooks/data/feature_manifolds.py +++ b/notebooks/data/feature_manifolds.py @@ -1,5 +1,4 @@ -""" -This notebook plots the Magpie feature space for the Gaultois database with +"""This notebook plots the Magpie feature space for the Gaultois database with several dimensional reduction algorithms (t-SNE, UMAP, PCA) to check for clustering. """ diff --git a/notebooks/data/gaultois_stats/gaultois_stats.py b/notebooks/data/gaultois_stats/gaultois_stats.py index 3f22df9..043d010 100644 --- a/notebooks/data/gaultois_stats/gaultois_stats.py +++ b/notebooks/data/gaultois_stats/gaultois_stats.py @@ -1,5 +1,4 @@ -""" -This notebook plots the prevalence of different chemical elements in the Gaultois +"""This notebook plots the prevalence of different chemical elements in the Gaultois database in a histogram and onto the periodic table. It also plots histogram for the four target columns in the Gaultois database: rho, seebeck, kappa, zT. """ diff --git a/notebooks/dropout.py b/notebooks/dropout.py index 6e1639f..c985017 100644 --- a/notebooks/dropout.py +++ b/notebooks/dropout.py @@ -1,5 +1,4 @@ -""" -This notebook evaluates the accuracy and uncertainty estimates of dropout neural +"""This notebook evaluates the accuracy and uncertainty estimates of dropout neural networks (DNN) trained with Magpie features on predicting electrical resistivity (rho), Seebeck coefficient (S), thermal conductivity (kappa) and thermoelectric figure of merit (zT). diff --git a/notebooks/hmc.py b/notebooks/hmc.py index abd284b..fc4f046 100644 --- a/notebooks/hmc.py +++ b/notebooks/hmc.py @@ -1,5 +1,4 @@ -""" -This notebook compares the performance of neural networks trained with maximum a +"""This notebook compares the performance of neural networks trained with maximum a posteriori (MAP) (i.e. maximum likelihood regularized by a prior) and Hamiltonian Monte Carlo (HMC). """ diff --git a/notebooks/leaderboard/cv.py b/notebooks/leaderboard/cv.py index 42500f7..1415c40 100644 --- a/notebooks/leaderboard/cv.py +++ b/notebooks/leaderboard/cv.py @@ -1,4 +1,4 @@ -"""Cross-validated benchmarks""" +"""Cross-validated benchmarks.""" # %% diff --git a/notebooks/leaderboard/mnf_vs_rf/mnf_vs_rf.py b/notebooks/leaderboard/mnf_vs_rf/mnf_vs_rf.py index da44afc..c59ca9f 100644 --- a/notebooks/leaderboard/mnf_vs_rf/mnf_vs_rf.py +++ b/notebooks/leaderboard/mnf_vs_rf/mnf_vs_rf.py @@ -1,5 +1,4 @@ -""" -Benchmarking MNF vs RF (vs. dropout) using Magpie and AMM features +"""Benchmarking MNF vs RF (vs. dropout) using Magpie and AMM features. This notebook compares performance of Multiplicative Normalizing Flow (MNF) against random forest (RF) (and dropout), testing first Magpie, then a diff --git a/notebooks/multitask/cross_val.py b/notebooks/multitask/cross_val.py index f999bf2..982c454 100644 --- a/notebooks/multitask/cross_val.py +++ b/notebooks/multitask/cross_val.py @@ -96,27 +96,27 @@ def forward(self, x): metrics = {key: [] for key in metrics} for epoch in range(model.epoch, total_epochs): - for samples, truth in DataLoader(train_set, batch_size=32, shuffle=True): + for samples, targets in DataLoader(train_set, batch_size=32, shuffle=True): optim.zero_grad() preds = model(samples) - loss = loss_fn(preds, truth) + loss = loss_fn(preds, targets) loss.backward() optim.step() metrics["loss"] += [loss] if n_tasks > 1: - for name, y_hat, y in zip(short_names, preds.T, truth.T): + for name, y_hat, y in zip(short_names, preds.T, targets.T): metrics[f"loss_{name}"] += [loss_fn(y_hat, y)] preds = test_set.denorm(preds) - truth = test_set.denorm(truth) + targets = test_set.denorm(targets) - MAE = (preds - truth).abs().mean() + MAE = (preds - targets).abs().mean() metrics["MAE"] += [MAE] - RMSE = (preds - truth).pow(2).mean().sqrt() + RMSE = (preds - targets).pow(2).mean().sqrt() metrics["RMSE"] += [RMSE] if epoch % report_every == 0: @@ -126,7 +126,7 @@ def forward(self, x): f"{sum(val) / len(val):<10.3f}" for val in metrics.values() if val ) print(report) - metrics = {key: [] for key in metrics.keys()} + metrics = {key: [] for key in metrics} model.epoch += 1 @@ -134,15 +134,15 @@ def forward(self, x): preds = model(test_set.X) preds = test_set.denorm(preds) - truth = test_set.denorm(test_set.y) + targets = test_set.denorm(test_set.y) test_preds += [preds] - test_targets += [truth] + test_targets += [targets] - mae = (preds - truth).abs().mean(0) + mae = (preds - targets).abs().mean(0) test_mae += [mae] - rmse = (preds - truth).pow(2).mean(0).sqrt() + rmse = (preds - targets).pow(2).mean(0).sqrt() test_rmse += [rmse] print(f"\ntest set: avg. MAE = {mae.mean():.3f}, avg. RMSE = {rmse.mean():.3f}") diff --git a/notebooks/multitask/ensemble.py b/notebooks/multitask/ensemble.py index edd8dac..353f4a3 100644 --- a/notebooks/multitask/ensemble.py +++ b/notebooks/multitask/ensemble.py @@ -111,7 +111,7 @@ def forward(self, x): f"{sum(val) / len(val):<10.3f}" for val in metrics.values() if val ) print(report) - metrics = {key: [] for key in metrics.keys()} + metrics = {key: [] for key in metrics} model.epoch += 1 diff --git a/notebooks/random_forest.py b/notebooks/random_forest.py index 1f7a9a7..2fef306 100644 --- a/notebooks/random_forest.py +++ b/notebooks/random_forest.py @@ -1,5 +1,4 @@ -""" -This notebook evaluates the accuracy and uncertainty estimates of random forest +"""This notebook evaluates the accuracy and uncertainty estimates of random forest (RF) trained with Magpie features on predicting electrical resistivity (rho), Seebeck coefficient (S), thermal conductivity (kappa) and thermoelectric figure of merit (zT). diff --git a/notebooks/relaxation_time.py b/notebooks/relaxation_time.py index 0f8198c..7c3a541 100644 --- a/notebooks/relaxation_time.py +++ b/notebooks/relaxation_time.py @@ -1,5 +1,4 @@ -""" -This notebook fits a function to the relaxation time of GeSe and extrapolates it +"""This notebook fits a function to the relaxation time of GeSe and extrapolates it to experimentally non-measured temperatures. """ diff --git a/notebooks/screen/dft.py b/notebooks/screen/dft.py index bdeade8..2d62e31 100644 --- a/notebooks/screen/dft.py +++ b/notebooks/screen/dft.py @@ -1,5 +1,4 @@ -""" -This notebook plots DFT results for thermoelectric properties of several +"""This notebook plots DFT results for thermoelectric properties of several candidate materials identified via random forest regression and portfolio-like risk management. See src/notsbooks/screen/random_forest.py for details. """ diff --git a/notebooks/screen/mnf_magpie/mnf_magpie_screen.py b/notebooks/screen/mnf_magpie/mnf_magpie_screen.py index 6d0667d..9077023 100644 --- a/notebooks/screen/mnf_magpie/mnf_magpie_screen.py +++ b/notebooks/screen/mnf_magpie/mnf_magpie_screen.py @@ -1,6 +1,5 @@ -""" -This notebook screens synthesizable materials from ICSD and COD -for viable thermoelectrics +"""This notebook screens synthesizable materials from ICSD and COD +for viable thermoelectrics. """ diff --git a/notebooks/screen/random_forest_magpie/random_forest.py b/notebooks/screen/random_forest_magpie/random_forest.py index 9473c5a..d6cd494 100644 --- a/notebooks/screen/random_forest_magpie/random_forest.py +++ b/notebooks/screen/random_forest_magpie/random_forest.py @@ -1,5 +1,4 @@ -""" -This notebook screens a combined list of synthesizable materials from ICSD and +"""This notebook screens a combined list of synthesizable materials from ICSD and COD databases for promising thermoelectric candidates using random forest regression. """ diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..035a5ab --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,52 @@ +[project] +dependencies = [ + "automatminer", + "gurobipy", + "matminer", + "matplotlib", + "ml-matrics", + "numpy", + "pandas", + "scikit-learn", + "scikit-optimize", + "scipy", + "seaborn", + "tensorflow", + "tensorflow-probability", + "torch", + "tqdm", + "umap-learn", +] + +[tool.codespell] +ignore-words-list = "hist,ihs,te,hte,interruptable" + +[tool.ruff] +target-version = "py38" +select = [ + "B", # flake8-bugbear + "D", # pydocstyle + "E", # pycodestyle + "F", # pyflakes + "I", # isort + "PLE", # pylint error + "PLW", # pylint warning + "PYI", # flakes8-pyi + "Q", # flake8-quotes + "SIM", # flake8-simplify + "TID", # tidy imports + "UP", # pyupgrade + "W", # pycodestyle + "YTT", # flake8-2020 +] +ignore = [ + "D100", # Missing docstring in public module + "D104", # Missing docstring in public package + "D205", # 1 blank line required between summary line and description + "SIM105", # Use contextlib.suppress(FileNotFoundError) instead of try-except-pass + "SIM115", # Use context handler for opening files + "E731", # Do not assign a lambda expression, use a def + "PLW2901", # Outer for loop variable overwritten by inner assignment target +] +pydocstyle.convention = "google" +isort.lines-after-imports = 2 diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index a06a965..0000000 --- a/requirements.txt +++ /dev/null @@ -1,16 +0,0 @@ -automatminer -gurobipy -matminer -matplotlib -ml-matrics -numpy -pandas -scikit-learn -scikit-optimize -scipy -seaborn -tensorflow -tensorflow-probability -torch -tqdm -umap-learn diff --git a/setup.cfg b/setup.cfg deleted file mode 100644 index 82cc052..0000000 --- a/setup.cfg +++ /dev/null @@ -1,25 +0,0 @@ -[flake8] -# Use black's default line length. -max-line-length = 88 -# E731: do not assign a lambda expression, use a def -# E203: whitespace before ':' -# W503: line break before binary operator -ignore = E203, E731, W503 -max-complexity = 12 -per-file-ignores = - # F401: imported but unused - __init__.py: F401 - -[isort] -profile = black -lines_after_imports = 2 - -[codespell] -ignore-words-list = hist,ihs,te,hte,interruptable - -[autoflake] -in-place = true -remove-unused-variables = true -remove-all-unused-imports = true -expand-star-imports = true -ignore-init-module-imports = true diff --git a/thermo/bnn/hmc.py b/thermo/bnn/hmc.py index 18c6c46..f6a6ba2 100644 --- a/thermo/bnn/hmc.py +++ b/thermo/bnn/hmc.py @@ -203,7 +203,7 @@ def ess(chains, **kwargs): def r_hat(tensors): - """TFP docs: http://tiny.cc/5bq6tz""" + """TFP docs: http://tiny.cc/5bq6tz.""" return [tfp.mcmc.diagnostic.potential_scale_reduction(t) for t in tensors] diff --git a/thermo/bnn/map.py b/thermo/bnn/map.py index 9607bad..c4c538c 100644 --- a/thermo/bnn/map.py +++ b/thermo/bnn/map.py @@ -46,7 +46,6 @@ def map_predict(weight_prior, bias_prior, X_train, y_train, X_test, y_test): bias_prior (tfp.distribution): Prior probability for the biases [X/y_train/test] (np.arrays): Train and test sets """ - log_prob_tracers = ( bnn.tracer_factory(X_train, y_train), bnn.tracer_factory(X_test, y_test), diff --git a/thermo/bnn/tf_dropout.py b/thermo/bnn/tf_dropout.py index 24a8bed..a5dbcda 100644 --- a/thermo/bnn/tf_dropout.py +++ b/thermo/bnn/tf_dropout.py @@ -73,12 +73,11 @@ def __init__( @timed def predict(model, X_test, n_preds=100): - """ - perform n_preds Monte Carlo predictions (i.e. with dropout) + """Perform n_preds Monte Carlo predictions (i.e. with dropout) save and return predictive mean and total uncertainty model: pre-trained Keras model X_test: features tensor - n_preds: number of predictions (with dropout) + n_preds: number of predictions (with dropout). """ if model.uncertainty == "aleatoric": y_pred, y_log_var = tf.squeeze(model.predict(X_test)) diff --git a/thermo/bnn/torch_dropout.py b/thermo/bnn/torch_dropout.py index dcb0282..64728b5 100644 --- a/thermo/bnn/torch_dropout.py +++ b/thermo/bnn/torch_dropout.py @@ -71,8 +71,8 @@ def robust_l2_loss(targets, preds, log_stds): class TorchDropoutModel(nn.Sequential): - """ - Constructs a dropout network with aleatoric and/or epistemic uncertainty estimation. + """Constructs a dropout network with aleatoric and/or epistemic uncertainty + estimation. """ def __init__( @@ -122,7 +122,6 @@ def __init__( @torch.no_grad() def write_metrics(self, targets, output, denorm, prefix): """After an epoch, save evaluation metrics to a dict.""" - output, targets = torch.cat(output), torch.cat(targets) loss = self.loss_fn(targets, output) diff --git a/thermo/data/transform.py b/thermo/data/transform.py index a70f5b3..8168d91 100644 --- a/thermo/data/transform.py +++ b/thermo/data/transform.py @@ -32,7 +32,6 @@ def train_test_split(*dfs, test_size: float = 0.1, train=None): """Returns training set, test set or both set (split according to test_size) depending on train being True, False or None. """ - test_index = dfs[0].sample(frac=test_size, random_state=0).index mask = dfs[0].index.isin(test_index) @@ -64,7 +63,6 @@ def normalize(df, mean=None, std=None): """If mean and std are None, normalize array/dataframe columns to have zero mean and unit std. Else use mean and std as provided for normalization. """ - if mean is None: mean = df.mean(0) if std is None: diff --git a/thermo/rf.py b/thermo/rf.py index f70be77..d31e50a 100644 --- a/thermo/rf.py +++ b/thermo/rf.py @@ -14,7 +14,7 @@ class RandomForestRegressor(RFR): """Adapted from scikit-optimize. - https://github.com/scikit-optimize/scikit-optimize/blob/master/skopt/learning/forest.py + https://github.com/scikit-optimize/scikit-optimize/blob/master/skopt/learning/forest.py. Uncertainty estimation: get_var() computes var(y|X_test) as described in sec. 4.3.2 of https://arxiv.org/abs/1211.0906. diff --git a/thermo/utils/decorators.py b/thermo/utils/decorators.py index 987ac76..1423a59 100644 --- a/thermo/utils/decorators.py +++ b/thermo/utils/decorators.py @@ -38,8 +38,7 @@ def timed_func(*args, **kwargs): def squeeze(func: Callable) -> Callable: - """unpacks single-entry lists from the decorated function's return value""" - + """Unpack single-entry lists from the decorated function's return value.""" isiter = lambda x: isinstance(x, (list, tuple)) @wraps(func)