From 5c51f593356e802cc92921fb5ef04df7845aa6bb Mon Sep 17 00:00:00 2001 From: Amanda Paulson Date: Fri, 22 Nov 2024 17:34:31 -0800 Subject: [PATCH 1/2] change the check to dataset not response col --- atomsci/ddm/pipeline/compare_models.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/atomsci/ddm/pipeline/compare_models.py b/atomsci/ddm/pipeline/compare_models.py index 3e95c88e..a3cac1d1 100644 --- a/atomsci/ddm/pipeline/compare_models.py +++ b/atomsci/ddm/pipeline/compare_models.py @@ -1871,7 +1871,7 @@ def get_multitask_perf_from_files_new(result_dir, pred_type='regression', datase pred=models[['model_uuid','response_cols']].join(pred) # check for > 1 dataset - if len(set(models.response_cols.astype(str)))>1: + if len(set(models.dataset_key.astype(str)))>1: raise Exception (f"Warning: you cannot export multitask model performances for more than one dataset at a time. Please provide the dataset_key as an additional parameter. Your {pred_type} options are: {list(set(models.dataset_key))}.") num_model_tasks=models.num_model_tasks.iloc[0] From a4a7cc735d424e239b1d57962a2234e37de4e84b Mon Sep 17 00:00:00 2001 From: Amanda Paulson Date: Fri, 22 Nov 2024 17:35:15 -0800 Subject: [PATCH 2/2] add the weights calculation to minimal dataset for predictions and metrics on MT datasets --- atomsci/ddm/pipeline/model_datasets.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/atomsci/ddm/pipeline/model_datasets.py b/atomsci/ddm/pipeline/model_datasets.py index 87bbffd9..3e59d70d 100644 --- a/atomsci/ddm/pipeline/model_datasets.py +++ b/atomsci/ddm/pipeline/model_datasets.py @@ -821,6 +821,8 @@ def get_featurized_data(self, dset_df, is_featurized=False): self.vals = np.zeros((nrows,ncols)) self.attr = pd.DataFrame({params.smiles_col: dset_df[params.smiles_col].values}, index=dset_df[params.id_col]) + if params.model_type != "hybrid": + self.vals, weights = feat.make_weights(self.vals, is_class=params.prediction_type=='classification') self.log.warning("Done") else: self.log.warning("Featurizing data...") @@ -828,7 +830,7 @@ def get_featurized_data(self, dset_df, is_featurized=False): params, self.contains_responses) self.log.warning("Done") self.n_features = self.featurization.get_feature_count() - self.dataset = NumpyDataset(features, self.vals, ids=ids) + self.dataset = NumpyDataset(features, self.vals, ids=ids, w=weights) # **************************************************************************************** def save_featurized_data(self, featurized_dset_df):