From 54e011955e9e4093fa680b8ee0797d5c3d1fffbb Mon Sep 17 00:00:00 2001
From: gbrunin <guillaume.brunin@uclouvain.be>
Date: Mon, 21 Aug 2023 11:47:27 +0200
Subject: [PATCH] Possibility to tune random_state in feature selection. Useful
 when segfaults appear with very small datasets (testing).

---
 modnet/preprocessing.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/modnet/preprocessing.py b/modnet/preprocessing.py
index f7690d0..6160b23 100644
--- a/modnet/preprocessing.py
+++ b/modnet/preprocessing.py
@@ -797,6 +797,7 @@ def feature_selection(
         drop_thr: float = 0.2,
         n_jobs: int = None,
         ignore_names: Optional[List] = [],
+        random_state: int = None,
     ):
         """Compute the mutual information between features and targets,
         then apply relevance-redundancy rankings to choose the top `n`
@@ -859,7 +860,11 @@ def feature_selection(
             else:
                 df = self.df_featurized.copy()
             self.cross_nmi, self.feature_entropy = get_cross_nmi(
-                df, return_entropy=True, drop_thr=drop_thr, n_jobs=n_jobs
+                df,
+                return_entropy=True,
+                drop_thr=drop_thr,
+                n_jobs=n_jobs,
+                random_state=random_state,
             )
 
         if self.cross_nmi.isna().sum().sum() > 0:
@@ -889,6 +894,7 @@ def feature_selection(
                 df,
                 df_target,
                 task_type,
+                random_state=random_state,
             )[name]
 
             LOG.info("Computing optimal features...")