Skip to content

Commit

Permalink
Pyproject (#25)
Browse files Browse the repository at this point in the history
* migrate setup.cfg and requirements.txt to pyproject.toml

* replace flake8 isort autoflake pre-commit hooks with ruff

* add ruff config to pyproject.toml

* fix ruff errors
  • Loading branch information
janosh authored Feb 24, 2023
1 parent 938bbe2 commit a4240b4
Show file tree
Hide file tree
Showing 25 changed files with 89 additions and 110 deletions.
25 changes: 5 additions & 20 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,28 +6,17 @@ default_stages: [commit]
default_install_hook_types: [pre-commit, commit-msg]

repos:
- repo: https://github.com/PyCQA/isort
rev: 5.12.0
- repo: https://github.com/charliermarsh/ruff-pre-commit
rev: v0.0.252
hooks:
- id: isort
- id: ruff
args: [--fix, --ignore, D]

- repo: https://github.com/psf/black
rev: 23.1.0
hooks:
- id: black-jupyter

- repo: https://github.com/PyCQA/flake8
rev: 6.0.0
hooks:
- id: flake8
additional_dependencies: [flake8-bugbear]

- repo: https://github.com/asottile/pyupgrade
rev: v3.3.1
hooks:
- id: pyupgrade
args: [--py39-plus]

- repo: https://github.com/janosh/format-ipy-cells
rev: v0.1.10
hooks:
Expand All @@ -50,8 +39,4 @@ repos:
- id: codespell
stages: [commit, commit-msg]
exclude_types: [json, csv]

- repo: https://github.com/PyCQA/autoflake
rev: v2.0.1
hooks:
- id: autoflake
args: [--ignore-words-list, 'hist,ihs,te,hte,interruptable']
3 changes: 1 addition & 2 deletions notebooks/boston_housing_hmc.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
"""
This notebook essentially runs an end-to-end test comparing RF vs MAP NN vs HMC
"""This notebook essentially runs an end-to-end test comparing RF vs MAP NN vs HMC
NN performance on the simple Boston housing dataset.
"""

Expand Down
3 changes: 1 addition & 2 deletions notebooks/data/feature_manifolds.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
"""
This notebook plots the Magpie feature space for the Gaultois database with
"""This notebook plots the Magpie feature space for the Gaultois database with
several dimensional reduction algorithms (t-SNE, UMAP, PCA) to check for clustering.
"""

Expand Down
3 changes: 1 addition & 2 deletions notebooks/data/gaultois_stats/gaultois_stats.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
"""
This notebook plots the prevalence of different chemical elements in the Gaultois
"""This notebook plots the prevalence of different chemical elements in the Gaultois
database in a histogram and onto the periodic table. It also plots histogram for
the four target columns in the Gaultois database: rho, seebeck, kappa, zT.
"""
Expand Down
3 changes: 1 addition & 2 deletions notebooks/dropout.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
"""
This notebook evaluates the accuracy and uncertainty estimates of dropout neural
"""This notebook evaluates the accuracy and uncertainty estimates of dropout neural
networks (DNN) trained with Magpie features on predicting electrical resistivity
(rho), Seebeck coefficient (S), thermal conductivity (kappa) and thermoelectric
figure of merit (zT).
Expand Down
3 changes: 1 addition & 2 deletions notebooks/hmc.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
"""
This notebook compares the performance of neural networks trained with maximum a
"""This notebook compares the performance of neural networks trained with maximum a
posteriori (MAP) (i.e. maximum likelihood regularized by a prior) and
Hamiltonian Monte Carlo (HMC).
"""
Expand Down
2 changes: 1 addition & 1 deletion notebooks/leaderboard/cv.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
"""Cross-validated benchmarks"""
"""Cross-validated benchmarks."""


# %%
Expand Down
3 changes: 1 addition & 2 deletions notebooks/leaderboard/mnf_vs_rf/mnf_vs_rf.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
"""
Benchmarking MNF vs RF (vs. dropout) using Magpie and AMM features
"""Benchmarking MNF vs RF (vs. dropout) using Magpie and AMM features.
This notebook compares performance of Multiplicative Normalizing Flow (MNF)
against random forest (RF) (and dropout), testing first Magpie, then a
Expand Down
22 changes: 11 additions & 11 deletions notebooks/multitask/cross_val.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,27 +96,27 @@ def forward(self, x):
metrics = {key: [] for key in metrics}

for epoch in range(model.epoch, total_epochs):
for samples, truth in DataLoader(train_set, batch_size=32, shuffle=True):
for samples, targets in DataLoader(train_set, batch_size=32, shuffle=True):
optim.zero_grad()
preds = model(samples)

loss = loss_fn(preds, truth)
loss = loss_fn(preds, targets)

loss.backward()
optim.step()

metrics["loss"] += [loss]
if n_tasks > 1:
for name, y_hat, y in zip(short_names, preds.T, truth.T):
for name, y_hat, y in zip(short_names, preds.T, targets.T):
metrics[f"loss_{name}"] += [loss_fn(y_hat, y)]

preds = test_set.denorm(preds)
truth = test_set.denorm(truth)
targets = test_set.denorm(targets)

MAE = (preds - truth).abs().mean()
MAE = (preds - targets).abs().mean()
metrics["MAE"] += [MAE]

RMSE = (preds - truth).pow(2).mean().sqrt()
RMSE = (preds - targets).pow(2).mean().sqrt()
metrics["RMSE"] += [RMSE]

if epoch % report_every == 0:
Expand All @@ -126,23 +126,23 @@ def forward(self, x):
f"{sum(val) / len(val):<10.3f}" for val in metrics.values() if val
)
print(report)
metrics = {key: [] for key in metrics.keys()}
metrics = {key: [] for key in metrics}

model.epoch += 1

with torch.no_grad():
preds = model(test_set.X)

preds = test_set.denorm(preds)
truth = test_set.denorm(test_set.y)
targets = test_set.denorm(test_set.y)

test_preds += [preds]
test_targets += [truth]
test_targets += [targets]

mae = (preds - truth).abs().mean(0)
mae = (preds - targets).abs().mean(0)
test_mae += [mae]

rmse = (preds - truth).pow(2).mean(0).sqrt()
rmse = (preds - targets).pow(2).mean(0).sqrt()
test_rmse += [rmse]

print(f"\ntest set: avg. MAE = {mae.mean():.3f}, avg. RMSE = {rmse.mean():.3f}")
Expand Down
2 changes: 1 addition & 1 deletion notebooks/multitask/ensemble.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ def forward(self, x):
f"{sum(val) / len(val):<10.3f}" for val in metrics.values() if val
)
print(report)
metrics = {key: [] for key in metrics.keys()}
metrics = {key: [] for key in metrics}

model.epoch += 1

Expand Down
3 changes: 1 addition & 2 deletions notebooks/random_forest.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
"""
This notebook evaluates the accuracy and uncertainty estimates of random forest
"""This notebook evaluates the accuracy and uncertainty estimates of random forest
(RF) trained with Magpie features on predicting electrical resistivity (rho),
Seebeck coefficient (S), thermal conductivity (kappa) and thermoelectric figure
of merit (zT).
Expand Down
3 changes: 1 addition & 2 deletions notebooks/relaxation_time.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
"""
This notebook fits a function to the relaxation time of GeSe and extrapolates it
"""This notebook fits a function to the relaxation time of GeSe and extrapolates it
to experimentally non-measured temperatures.
"""

Expand Down
3 changes: 1 addition & 2 deletions notebooks/screen/dft.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
"""
This notebook plots DFT results for thermoelectric properties of several
"""This notebook plots DFT results for thermoelectric properties of several
candidate materials identified via random forest regression and portfolio-like
risk management. See src/notsbooks/screen/random_forest.py for details.
"""
Expand Down
5 changes: 2 additions & 3 deletions notebooks/screen/mnf_magpie/mnf_magpie_screen.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
"""
This notebook screens synthesizable materials from ICSD and COD
for viable thermoelectrics
"""This notebook screens synthesizable materials from ICSD and COD
for viable thermoelectrics.
"""


Expand Down
3 changes: 1 addition & 2 deletions notebooks/screen/random_forest_magpie/random_forest.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
"""
This notebook screens a combined list of synthesizable materials from ICSD and
"""This notebook screens a combined list of synthesizable materials from ICSD and
COD databases for promising thermoelectric candidates using random forest
regression.
"""
Expand Down
52 changes: 52 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
[project]
dependencies = [
"automatminer",
"gurobipy",
"matminer",
"matplotlib",
"ml-matrics",
"numpy",
"pandas",
"scikit-learn",
"scikit-optimize",
"scipy",
"seaborn",
"tensorflow",
"tensorflow-probability",
"torch",
"tqdm",
"umap-learn",
]

[tool.codespell]
ignore-words-list = "hist,ihs,te,hte,interruptable"

[tool.ruff]
target-version = "py38"
select = [
"B", # flake8-bugbear
"D", # pydocstyle
"E", # pycodestyle
"F", # pyflakes
"I", # isort
"PLE", # pylint error
"PLW", # pylint warning
"PYI", # flakes8-pyi
"Q", # flake8-quotes
"SIM", # flake8-simplify
"TID", # tidy imports
"UP", # pyupgrade
"W", # pycodestyle
"YTT", # flake8-2020
]
ignore = [
"D100", # Missing docstring in public module
"D104", # Missing docstring in public package
"D205", # 1 blank line required between summary line and description
"SIM105", # Use contextlib.suppress(FileNotFoundError) instead of try-except-pass
"SIM115", # Use context handler for opening files
"E731", # Do not assign a lambda expression, use a def
"PLW2901", # Outer for loop variable overwritten by inner assignment target
]
pydocstyle.convention = "google"
isort.lines-after-imports = 2
16 changes: 0 additions & 16 deletions requirements.txt

This file was deleted.

25 changes: 0 additions & 25 deletions setup.cfg

This file was deleted.

2 changes: 1 addition & 1 deletion thermo/bnn/hmc.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,7 @@ def ess(chains, **kwargs):


def r_hat(tensors):
"""TFP docs: http://tiny.cc/5bq6tz"""
"""TFP docs: http://tiny.cc/5bq6tz."""
return [tfp.mcmc.diagnostic.potential_scale_reduction(t) for t in tensors]


Expand Down
1 change: 0 additions & 1 deletion thermo/bnn/map.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,6 @@ def map_predict(weight_prior, bias_prior, X_train, y_train, X_test, y_test):
bias_prior (tfp.distribution): Prior probability for the biases
[X/y_train/test] (np.arrays): Train and test sets
"""

log_prob_tracers = (
bnn.tracer_factory(X_train, y_train),
bnn.tracer_factory(X_test, y_test),
Expand Down
5 changes: 2 additions & 3 deletions thermo/bnn/tf_dropout.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,12 +73,11 @@ def __init__(

@timed
def predict(model, X_test, n_preds=100):
"""
perform n_preds Monte Carlo predictions (i.e. with dropout)
"""Perform n_preds Monte Carlo predictions (i.e. with dropout)
save and return predictive mean and total uncertainty
model: pre-trained Keras model
X_test: features tensor
n_preds: number of predictions (with dropout)
n_preds: number of predictions (with dropout).
"""
if model.uncertainty == "aleatoric":
y_pred, y_log_var = tf.squeeze(model.predict(X_test))
Expand Down
5 changes: 2 additions & 3 deletions thermo/bnn/torch_dropout.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,8 +71,8 @@ def robust_l2_loss(targets, preds, log_stds):


class TorchDropoutModel(nn.Sequential):
"""
Constructs a dropout network with aleatoric and/or epistemic uncertainty estimation.
"""Constructs a dropout network with aleatoric and/or epistemic uncertainty
estimation.
"""

def __init__(
Expand Down Expand Up @@ -122,7 +122,6 @@ def __init__(
@torch.no_grad()
def write_metrics(self, targets, output, denorm, prefix):
"""After an epoch, save evaluation metrics to a dict."""

output, targets = torch.cat(output), torch.cat(targets)
loss = self.loss_fn(targets, output)

Expand Down
2 changes: 0 additions & 2 deletions thermo/data/transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@ def train_test_split(*dfs, test_size: float = 0.1, train=None):
"""Returns training set, test set or both set (split according to test_size)
depending on train being True, False or None.
"""

test_index = dfs[0].sample(frac=test_size, random_state=0).index
mask = dfs[0].index.isin(test_index)

Expand Down Expand Up @@ -64,7 +63,6 @@ def normalize(df, mean=None, std=None):
"""If mean and std are None, normalize array/dataframe columns to have
zero mean and unit std. Else use mean and std as provided for normalization.
"""

if mean is None:
mean = df.mean(0)
if std is None:
Expand Down
2 changes: 1 addition & 1 deletion thermo/rf.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@

class RandomForestRegressor(RFR):
"""Adapted from scikit-optimize.
https://github.com/scikit-optimize/scikit-optimize/blob/master/skopt/learning/forest.py
https://github.com/scikit-optimize/scikit-optimize/blob/master/skopt/learning/forest.py.
Uncertainty estimation: get_var() computes var(y|X_test) as described in sec. 4.3.2
of https://arxiv.org/abs/1211.0906.
Expand Down
3 changes: 1 addition & 2 deletions thermo/utils/decorators.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,7 @@ def timed_func(*args, **kwargs):


def squeeze(func: Callable) -> Callable:
"""unpacks single-entry lists from the decorated function's return value"""

"""Unpack single-entry lists from the decorated function's return value."""
isiter = lambda x: isinstance(x, (list, tuple))

@wraps(func)
Expand Down

0 comments on commit a4240b4

Please sign in to comment.