Skip to content

Commit

Permalink
Merge pull request #31 from MolecularAI/3.1.3
Browse files Browse the repository at this point in the history
Init 3.1.3
  • Loading branch information
lewismervin1 committed Sep 16, 2024
2 parents 649c4d2 + 5a14311 commit 229c0b6
Show file tree
Hide file tree
Showing 17 changed files with 296 additions and 144 deletions.
8 changes: 4 additions & 4 deletions docs/sphinx-source/algorithms.rst
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,13 @@ Lasso


KNeighborsClassifier
#####
####################
.. autoclass:: optunaz.config.optconfig.KNeighborsClassifier
:members:


KNeighborsRegressor
#####
###################
.. autoclass:: optunaz.config.optconfig.KNeighborsRegressor
:members:

Expand Down Expand Up @@ -103,8 +103,8 @@ ChemPropHyperoptRegressor
:members:


ChemPropRegressorPretrained
#########################
ChemPropHyperoptRegressorPretrained
###################################
.. autoclass:: optunaz.config.optconfig.ChemPropRegressorPretrained
:members:

Expand Down
2 changes: 1 addition & 1 deletion docs/sphinx-source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
author = 'MAI'

# The full version, including alpha/beta/rc tags
release = '3.1.2'
release = '3.1.3'


# -- General configuration ---------------------------------------------------
Expand Down
44 changes: 40 additions & 4 deletions docs/sphinx-source/descriptors.rst
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ ECFP_counts


PathFP
###########
######
.. autoclass:: optunaz.descriptors.PathFP
:members:

Expand All @@ -37,13 +37,13 @@ UnscaledPhyschemDescriptors


UnscaledJazzyDescriptors
###########################
########################
.. autoclass:: optunaz.descriptors.UnscaledJazzyDescriptors
:members:


UnscaledZScalesDescriptors
###########
##########################
.. autoclass:: optunaz.descriptors.UnscaledZScalesDescriptors
:members:

Expand All @@ -67,7 +67,7 @@ PrecomputedDescriptorFromFile


ZScales
###########
#######
.. autoclass:: optunaz.descriptors.ZScalesDescriptors
:members:

Expand All @@ -94,3 +94,39 @@ CompositeDescriptor
###################
.. autoclass:: optunaz.descriptors.CompositeDescriptor
:members:


AmorProtDescriptors
###################
.. autoclass:: optunaz.descriptors.AmorProtDescriptors
:members:


PathFP
######
.. autoclass:: optunaz.descriptors.PathFP
:members:


UnscaledMAPC
############
.. autoclass:: optunaz.descriptors.UnscaledMAPC
:members:


UnscaledZScalesDescriptors
##########################
.. autoclass:: optunaz.descriptors.UnscaledZScalesDescriptors
:members:


MAPC
####
.. autoclass:: optunaz.descriptors.MAPC
:members:


ZScalesDescriptors
##################
.. autoclass:: optunaz.descriptors.ZScalesDescriptors
:members:
7 changes: 7 additions & 0 deletions docs/sphinx-source/transform.rst
Original file line number Diff line number Diff line change
Expand Up @@ -24,3 +24,10 @@ ZScales
#######
.. autoclass:: optunaz.utils.preprocessing.transform.ZScales
:members:


AmorProt
########
.. autoclass:: optunaz.utils.preprocessing.transform.AmorProt
:members:

2 changes: 1 addition & 1 deletion optunaz/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import os

__version__ = "3.1.2"
__version__ = "3.1.3"

os.environ["TQDM_DISABLE"] = "1"
27 changes: 21 additions & 6 deletions optunaz/algorithms/chem_prop.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,21 @@ def __exit__(self, *args):
del self._stringio
sys.stdout = self._stdout

class CaptureStdOutErr(list):
def __enter__(self):
self._stdout = sys.stdout
self._stderr = sys.stderr
sys.stdout = self._stringio = StringIO()
sys.stderr = self._stringioerr = StringIO()
return self

def __exit__(self, *args):
self.extend(self._stringio.getvalue().splitlines())
self.extend(self._stringioerr.getvalue().splitlines())
del self._stringio
del self._stringioerr
sys.stdout = self._stdout
sys.stderr = self._stderr

def save_model_memory(model_dir):
tarblob = io.BytesIO()
Expand Down Expand Up @@ -316,7 +331,7 @@ def fit(self, X, y):
).to_csv(x_aux_path.name, index=False)
# arguments += ["--features_path", f"{x_aux_path.name}"] TODO: allow features once ChemProp is updated

with CaptureStdOut() as _:
with CaptureStdOutErr() as _:
args = QSARtunaTrainArgs().parse_args(arguments)
chemprop.train.cross_validate(
args=args, train_func=chemprop.train.run_training
Expand Down Expand Up @@ -365,7 +380,7 @@ def predict_proba(self, X):
else:
X = np.array(X).reshape(len(X), 1)

with CaptureStdOut() as _:
with CaptureStdOutErr() as _:
args = QSARtunaPredictArgs().parse_args(arguments)
model_objects = chemprop.train.load_model(args=args)
preds = np.array(
Expand Down Expand Up @@ -444,7 +459,7 @@ def predict_uncert(self, X):
else:
X = np.array(X[:, 0].reshape(len(X), 1))

with CaptureStdOut() as _:
with CaptureStdOutErr() as _:
args = QSARtunaPredictArgs().parse_args(arguments)
if uncertainty_method == "dropout":
model_objects = list(chemprop.train.load_model(args=args))
Expand Down Expand Up @@ -505,7 +520,7 @@ def interpret(self, X, prop_delta=0.75):
X = np.array(X[:, 0].reshape(len(X), 1))
X = pd.DataFrame(X, columns=["smiles"])
X.to_csv(data_path.name, index=False)
with CaptureStdOut() as _:
with CaptureStdOutErr() as _:
args = chemprop.args.InterpretArgs().parse_args(intrprt_args)
with CaptureStdOut() as intrprt:
interpret(args=args)
Expand Down Expand Up @@ -565,7 +580,7 @@ def chemprop_fingerprint(self, X, fingerprint_type="MPN"):
]
# if self.x_aux_ is not None:
# fprnt_args += ["--features_path", f"{x_aux_path.name}"] TODO: allow features once ChemProp is updated
with CaptureStdOut() as _:
with CaptureStdOutErr() as _:
args = chemprop.args.FingerprintArgs().parse_args(fprnt_args)
try:
fps = chemprop.train.molecule_fingerprint.molecule_fingerprint(
Expand Down Expand Up @@ -861,7 +876,7 @@ def fit(self, X, y):
arguments += ["--frzn_ffn_layers", "1"]
if self.frzn == "mpnn_last_ffn":
arguments += ["--frzn_ffn_layers", f"{self.ffn_num_layers - 1}"]
with CaptureStdOut() as _:
with CaptureStdOutErr() as _:
args = QSARtunaTrainArgs().parse_args(arguments)
chemprop.train.cross_validate(
args=args, train_func=chemprop.train.run_training
Expand Down
17 changes: 11 additions & 6 deletions optunaz/algorithms/chem_prop_hyperopt.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,16 +37,21 @@
chemprop.interpret.MoleculeDataLoader = MoleculeDataLoader


class CaptureStdOut(list):
class CaptureStdOutErr(list):
def __enter__(self):
self._stdout = sys.stdout
self._stderr = sys.stderr
sys.stdout = self._stringio = StringIO()
sys.stderr = self._stringioerr = StringIO()
return self

def __exit__(self, *args):
self.extend(self._stringio.getvalue().splitlines())
self.extend(self._stringioerr.getvalue().splitlines())
del self._stringio
del self._stringioerr
sys.stdout = self._stdout
sys.stderr = self._stderr


def save_model_memory(model_dir):
Expand Down Expand Up @@ -294,7 +299,7 @@ def fit(self, X, y):
).to_csv(x_aux_path.name, index=False)
# arguments += ["--features_path", f"{x_aux_path.name}"] TODO: allow features once ChemProp is updated

with CaptureStdOut() as _:
with CaptureStdOutErr() as _:
if self.num_iters > 1:
with tempfile.NamedTemporaryFile(
delete=True, mode="w+"
Expand Down Expand Up @@ -375,7 +380,7 @@ def predict_proba(self, X):
else:
X = np.array(X[:, 0].reshape(len(X), 1))

with CaptureStdOut() as _:
with CaptureStdOutErr() as _:
args = chemprop.args.PredictArgs().parse_args(arguments)
model_objects = chemprop.train.load_model(args=args)
preds = np.array(
Expand Down Expand Up @@ -450,7 +455,7 @@ def predict_uncert(self, X):
else:
X = np.array(X[:, 0].reshape(len(X), 1))

with CaptureStdOut() as _:
with CaptureStdOutErr() as _:
args = chemprop.args.PredictArgs().parse_args(arguments)
if uncertainty_method == "dropout":
model_objects = list(chemprop.train.load_model(args=args))
Expand Down Expand Up @@ -512,7 +517,7 @@ def interpret(self, X, prop_delta=0.75):
X = pd.DataFrame(X, columns=["smiles"])
X.to_csv(data_path.name, index=False)
args = chemprop.args.InterpretArgs().parse_args(intrprt_args)
with CaptureStdOut() as intrprt:
with CaptureStdOutErr() as intrprt:
interpret(args=args)
intrprt = [
line.split(",")
Expand Down Expand Up @@ -560,7 +565,7 @@ def chemprop_fingerprint(self, X, fingerprint_type="MPN"):
# if self.x_aux_ is not None:
# fprnt_args += ["--features_path", f"{x_aux_path.name}"] TODO: allow features once ChemProp is updated
# load_model returns pred&train arguments, object models & tasks info - but we only need TrainArgs here
with CaptureStdOut() as _:
with CaptureStdOutErr() as _:
args = chemprop.args.FingerprintArgs().parse_args(fprnt_args)
_, trainargs, _, _, _, _ = chemprop.train.load_model(args=args)
if fingerprint_type == "MPN":
Expand Down
40 changes: 33 additions & 7 deletions optunaz/builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,27 @@ def build(
estimator = buildconfig.algorithm.estimator()
if merge_train_and_test_data:
train_smiles, train_y, train_aux = buildconfig.data.get_merged_sets()
test_smiles, test_y, test_aux, test_X = None, None, None, None
else:
train_smiles, train_y, train_aux, _, _, _ = buildconfig.data.get_sets()
(
train_smiles,
train_y,
train_aux,
test_smiles,
test_y,
test_aux,
) = buildconfig.data.get_sets()
if test_smiles is not None and len(test_smiles) > 0:
test_X, failed_idx = descriptor_from_config(
test_smiles, buildconfig.descriptor, cache=cache
)
test_y, test_smiles, test_aux = remove_failed_idx(
failed_idx, test_y, test_smiles, test_aux
)
if test_aux is not None:
test_X = np.hstack((test_X, test_aux))
else:
test_X = None

train_X, failed_idx = descriptor_from_config(
train_smiles, buildconfig.descriptor, cache=cache
Expand All @@ -35,13 +54,20 @@ def build(
estimator.X_ = train_X
estimator.y_ = train_y
estimator.aux_ = train_aux
estimator.test_smiles_ = test_smiles
estimator.test_X_ = test_X
estimator.test_y_ = test_y
estimator.test_aux_ = test_aux

if merge_train_and_test_data:
train_scores = get_merged_train_score(estimator, buildconfig, cache=cache)
test_scores = None
else:
if (
not merge_train_and_test_data
and test_smiles is not None
and len(test_smiles) > 0
):
train_scores, test_scores = get_train_test_scores(
estimator, buildconfig, cache=cache
estimator, buildconfig, train_X, train_y, test_X, test_y
)

else:
train_scores = get_merged_train_score(estimator, buildconfig, train_X, train_y)
test_scores = None
return estimator, train_scores, test_scores
12 changes: 6 additions & 6 deletions optunaz/config/buildconfig.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,8 @@ def estimator(self) -> BaseEstimator:
class AdaBoostClassifier(Algorithm):
@dataclass
class AdaBoostClassifierParameters:
n_estimators: int = field(metadata=schema(min=1))
learning_rate: float = field(metadata=schema(min=0.0001))
n_estimators: int = field(default=1, metadata=schema(min=1))
learning_rate: float = field(default=0.1, metadata=schema(min=0.0001))

name: Literal["AdaBoostClassifier"]
parameters: AdaBoostClassifierParameters
Expand Down Expand Up @@ -116,7 +116,7 @@ class LogisticRegression(Algorithm):
@dataclass
class LogisticRegressionParameters:
solver: str
C: float = field(metadata=schema(min=0.001, max=1000))
C: float = field(default=1.0, metadata=schema(min=0.001, max=1000))

name: Literal["LogisticRegression"]
parameters: LogisticRegressionParameters
Expand All @@ -137,7 +137,7 @@ def estimator(self):
class PLSRegression(Algorithm):
@dataclass
class PLSParameters:
n_components: int = field(metadata=schema(min=1))
n_components: int = field(default=2, metadata=schema(min=1))

name: Literal["PLSRegression"]
parameters: PLSParameters
Expand All @@ -152,9 +152,9 @@ def estimator(self):
class RandomForestClassifier(Algorithm):
@dataclass
class RandomForestParameters:
max_depth: int = field(metadata=schema(min=1))
n_estimators: int = field(metadata=schema(min=1))
max_features: str
max_depth: int = field(default=None, metadata=schema(min=1))
n_estimators: int = field(default=100, metadata=schema(min=1))

name: Literal["RandomForestClassifier"]
parameters: RandomForestParameters
Expand Down
1 change: 0 additions & 1 deletion optunaz/descriptors.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@

from rdkit.ML.Descriptors.MoleculeDescriptors import MolecularDescriptorCalculator
from jazzy.api import molecular_vector_from_smiles
from jazzy.exception import JazzyError
from sklearn import preprocessing
from joblib import Parallel, delayed, effective_n_jobs
from optunaz.config import NameParameterDataclass
Expand Down
Loading

0 comments on commit 229c0b6

Please sign in to comment.