From 5c51f593356e802cc92921fb5ef04df7845aa6bb Mon Sep 17 00:00:00 2001
From: Amanda Paulson <amanda.paulson@ucsf.edu>
Date: Fri, 22 Nov 2024 17:34:31 -0800
Subject: [PATCH 1/2] change the check to dataset not response col

---
 atomsci/ddm/pipeline/compare_models.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/atomsci/ddm/pipeline/compare_models.py b/atomsci/ddm/pipeline/compare_models.py
index 3e95c88e..a3cac1d1 100644
--- a/atomsci/ddm/pipeline/compare_models.py
+++ b/atomsci/ddm/pipeline/compare_models.py
@@ -1871,7 +1871,7 @@ def get_multitask_perf_from_files_new(result_dir, pred_type='regression', datase
         pred=models[['model_uuid','response_cols']].join(pred)
 
         # check for > 1 dataset
-        if len(set(models.response_cols.astype(str)))>1:
+        if len(set(models.dataset_key.astype(str)))>1:
             raise Exception (f"Warning: you cannot export multitask model performances for more than one dataset at a time. Please provide the dataset_key as an additional parameter. Your {pred_type} options are: {list(set(models.dataset_key))}.")
 
         num_model_tasks=models.num_model_tasks.iloc[0]

From a4a7cc735d424e239b1d57962a2234e37de4e84b Mon Sep 17 00:00:00 2001
From: Amanda Paulson <amanda.paulson@ucsf.edu>
Date: Fri, 22 Nov 2024 17:35:15 -0800
Subject: [PATCH 2/2] add the weights calculation to minimal dataset for
 predictions and metrics on MT datasets

---
 atomsci/ddm/pipeline/model_datasets.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/atomsci/ddm/pipeline/model_datasets.py b/atomsci/ddm/pipeline/model_datasets.py
index 87bbffd9..3e59d70d 100644
--- a/atomsci/ddm/pipeline/model_datasets.py
+++ b/atomsci/ddm/pipeline/model_datasets.py
@@ -821,6 +821,8 @@ def get_featurized_data(self, dset_df, is_featurized=False):
                 self.vals = np.zeros((nrows,ncols))
             self.attr = pd.DataFrame({params.smiles_col: dset_df[params.smiles_col].values},
                                  index=dset_df[params.id_col])
+            if params.model_type != "hybrid":
+                self.vals, weights = feat.make_weights(self.vals, is_class=params.prediction_type=='classification')
             self.log.warning("Done")
         else:
             self.log.warning("Featurizing data...")
@@ -828,7 +830,7 @@ def get_featurized_data(self, dset_df, is_featurized=False):
                                                                                     params, self.contains_responses)
             self.log.warning("Done")
         self.n_features = self.featurization.get_feature_count()
-        self.dataset = NumpyDataset(features, self.vals, ids=ids)
+        self.dataset = NumpyDataset(features, self.vals, ids=ids, w=weights)
 
     # ****************************************************************************************
     def save_featurized_data(self, featurized_dset_df):