diff --git a/.gitignore b/.gitignore
index c15e39e..7f6c528 100644
--- a/.gitignore
+++ b/.gitignore
@@ -6,4 +6,6 @@ env
*.pkl
*.ipynb
__pycache__
-.DS_Store
\ No newline at end of file
+.DS_Store
+tool_test_output.html
+tool_test_output.json
\ No newline at end of file
diff --git a/Dockerfile b/Dockerfile
index 68be808..987b676 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -9,7 +9,7 @@ RUN apt-get update && \
# Install Python packages
RUN pip install -U pip && \
- pip install --no-cache-dir --no-compile pycaret[models]==${VERSION} && \
+ pip install --no-cache-dir --no-compile pycaret[analysis,models]==${VERSION} && \
pip install --no-cache-dir --no-compile explainerdashboard
# Clean up unnecessary packages
diff --git a/tools/base_model_trainer.py b/tools/base_model_trainer.py
index 98f4009..d408905 100644
--- a/tools/base_model_trainer.py
+++ b/tools/base_model_trainer.py
@@ -2,19 +2,31 @@
import logging
import os
+from feature_importance import FeatureImportanceAnalyzer
+
import pandas as pd
+from utils import get_html_closing, get_html_template
+
logging.basicConfig(level=logging.DEBUG)
LOG = logging.getLogger(__name__)
class BaseModelTrainer:
- def __init__(self, input_file, target_col, output_dir, **kwargs):
+ def __init__(
+ self,
+ input_file,
+ target_col,
+ output_dir,
+ task_type,
+ **kwargs
+ ):
self.exp = None # This will be set in the subclass
self.input_file = input_file
self.target_col = target_col
self.output_dir = output_dir
+ self.task_type = task_type
self.data = None
self.target = None
self.best_model = None
@@ -29,9 +41,21 @@ def __init__(self, input_file, target_col, output_dir, **kwargs):
def load_data(self):
LOG.info(f"Loading data from {self.input_file}")
self.data = pd.read_csv(self.input_file, sep=None, engine='python')
+ self.data = self.data.apply(pd.to_numeric, errors='coerce')
names = self.data.columns.to_list()
self.target = names[int(self.target_col)-1]
- self.data = self.data.fillna(self.data.median(numeric_only=True))
+ if hasattr(self, 'missing_value_strategy'):
+ if self.missing_value_strategy == 'mean':
+ self.data = self.data.fillna(
+ self.data.mean(numeric_only=True))
+ elif self.missing_value_strategy == 'median':
+ self.data = self.data.fillna(
+ self.data.median(numeric_only=True))
+ elif self.missing_value_strategy == 'drop':
+ self.data = self.data.dropna()
+ else:
+ # Default strategy if not specified
+ self.data = self.data.fillna(self.data.median(numeric_only=True))
self.data.columns = self.data.columns.str.replace('.', '_')
def setup_pycaret(self):
@@ -116,113 +140,71 @@ def save_html_report(self):
setup_params_table = pd.DataFrame(
list(filtered_setup_params.items()),
columns=['Parameter', 'Value'])
- # Save model summary
+
best_model_params = pd.DataFrame(
self.best_model.get_params().items(),
columns=['Parameter', 'Value'])
best_model_params.to_csv(
os.path.join(self.output_dir, 'best_model.csv'),
index=False)
-
- # Save comparison results
self.results.to_csv(os.path.join(
self.output_dir, "comparison_results.csv"))
- # Read and encode plot images
plots_html = ""
for plot_name, plot_path in self.plots.items():
encoded_image = self.encode_image_to_base64(plot_path)
plots_html += f"""
-
PyCaret Model Training Report
+ {get_html_template()}
+
PyCaret Model Training Report
+
+
+ Setup & Best Model
+
+ Best Model Plots
+
+ Feature Importance
+
+
Setup Parameters
Parameter | Value |
- {setup_params_table.to_html(index=False,
- header=False, classes='table')}
+ {setup_params_table.to_html(
+ index=False, header=False, classes='table')}
Best Model: {model_name}
Parameter | Value |
- {best_model_params.to_html(index=False,
- header=False, classes='table')}
+ {best_model_params.to_html(
+ index=False, header=False, classes='table')}
Comparison Results
- {self.results.to_html(index=False,
- classes='table')}
+ {self.results.to_html(index=False, classes='table')}
-
Plots
+
+
+
Best Model Plots
{plots_html}
-
-
+
+ {feature_importance_html}
+
+ {get_html_closing()}
"""
with open(os.path.join(
diff --git a/tools/feature_importance.py b/tools/feature_importance.py
new file mode 100644
index 0000000..95e2e07
--- /dev/null
+++ b/tools/feature_importance.py
@@ -0,0 +1,175 @@
+import base64
+import logging
+import os
+
+import matplotlib.pyplot as plt
+
+import pandas as pd
+
+from pycaret.classification import ClassificationExperiment
+from pycaret.regression import RegressionExperiment
+
+logging.basicConfig(level=logging.DEBUG)
+LOG = logging.getLogger(__name__)
+
+
+class FeatureImportanceAnalyzer:
+ def __init__(
+ self,
+ task_type,
+ output_dir,
+ data_path=None,
+ data=None,
+ target_col=None):
+
+ if data is not None:
+ self.data = data
+ LOG.info("Data loaded from memory")
+ else:
+ self.target_col = target_col
+ self.data = pd.read_csv(data_path, sep=None, engine='python')
+ self.data.columns = self.data.columns.str.replace('.', '_')
+ self.data = self.data.fillna(self.data.median(numeric_only=True))
+ self.task_type = task_type
+ self.target = self.data.columns[int(target_col) - 1]
+ self.exp = ClassificationExperiment() \
+ if task_type == 'classification' \
+ else RegressionExperiment()
+ self.plots = {}
+ self.output_dir = output_dir
+
+ def setup_pycaret(self):
+ LOG.info("Initializing PyCaret")
+ setup_params = {
+ 'target': self.target,
+ 'session_id': 123,
+ 'html': True,
+ 'log_experiment': False,
+ 'system_log': False
+ }
+ LOG.info(self.task_type)
+ LOG.info(self.exp)
+ self.exp.setup(self.data, **setup_params)
+
+ def save_coefficients(self):
+ model = self.exp.create_model('lr')
+ coef_df = pd.DataFrame({
+ 'Feature': self.data.columns.drop(self.target),
+ 'Coefficient': model.coef_[0]
+ })
+ coef_html = coef_df.to_html(index=False)
+ return coef_html
+
+ def save_tree_importance(self):
+ model = self.exp.create_model('rf')
+ importances = model.feature_importances_
+ feature_importances = pd.DataFrame({
+ 'Feature': self.data.columns.drop(self.target),
+ 'Importance': importances
+ }).sort_values(by='Importance', ascending=False)
+ plt.figure(figsize=(10, 6))
+ plt.barh(
+ feature_importances['Feature'],
+ feature_importances['Importance'])
+ plt.xlabel('Importance')
+ plt.title('Feature Importance (Random Forest)')
+ plot_path = os.path.join(
+ self.output_dir,
+ 'tree_importance.png')
+ plt.savefig(plot_path)
+ plt.close()
+ self.plots['tree_importance'] = plot_path
+
+ def save_shap_values(self):
+ model = self.exp.create_model('lightgbm')
+ import shap
+ explainer = shap.Explainer(model)
+ shap_values = explainer.shap_values(
+ self.data.drop(columns=[self.target]))
+ shap.summary_plot(shap_values, self.data.drop(
+ columns=[self.target]), show=False)
+ plt.title('Shap (LightGBM)')
+ plot_path = os.path.join(
+ self.output_dir, 'shap_summary.png')
+ plt.savefig(plot_path)
+ plt.close()
+ self.plots['shap_summary'] = plot_path
+
+ def generate_feature_importance(self):
+ coef_html = self.save_coefficients()
+ self.save_tree_importance()
+ self.save_shap_values()
+ return coef_html
+
+ def encode_image_to_base64(self, img_path):
+ with open(img_path, 'rb') as img_file:
+ return base64.b64encode(img_file.read()).decode('utf-8')
+
+ def generate_html_report(self, coef_html):
+ LOG.info("Generating HTML report")
+
+ # Read and encode plot images
+ plots_html = ""
+ for plot_name, plot_path in self.plots.items():
+ encoded_image = self.encode_image_to_base64(plot_path)
+ plots_html += f"""
+
+
Feature importance analysis from a
+ trained Random Forest
+
{'Use gini impurity for'
+ 'calculating feature importance for classification'
+ 'and Variance Reduction for regression'
+ if plot_name == 'tree_importance'
+ else 'SHAP Summary from a trained lightgbm'}
+
+
+ """
+
+ # Generate HTML content with tabs
+ html_content = f"""
+
PyCaret Feature Importance Report
+
+
+
Coefficients (based on a trained
+ {'Logistic Regression'
+ if self.task_type == 'classification'
+ else 'Linear Regression'} Model)
+
{coef_html}
+
+ {plots_html}
+ """
+
+ return html_content
+
+ def run(self):
+ LOG.info("Running feature importance analysis")
+ self.setup_pycaret()
+ coef_html = self.generate_feature_importance()
+ html_content = self.generate_html_report(coef_html)
+ LOG.info("Feature importance analysis completed")
+ return html_content
+
+
+if __name__ == "__main__":
+ import argparse
+ parser = argparse.ArgumentParser(description="Feature Importance Analysis")
+ parser.add_argument(
+ "--data_path", type=str, help="Path to the dataset")
+ parser.add_argument(
+ "--target_col", type=int,
+ help="Index of the target column (1-based)")
+ parser.add_argument(
+ "--task_type", type=str,
+ choices=["classification", "regression"],
+ help="Task type: classification or regression")
+ parser.add_argument(
+ "--output_dir",
+ type=str,
+ help="Directory to save the outputs")
+ args = parser.parse_args()
+
+ analyzer = FeatureImportanceAnalyzer(
+ args.data_path, args.target_col,
+ args.task_type, args.output_dir)
+ analyzer.run()
diff --git a/tools/pycaret_classification.py b/tools/pycaret_classification.py
index 0ef935e..d994015 100644
--- a/tools/pycaret_classification.py
+++ b/tools/pycaret_classification.py
@@ -10,8 +10,15 @@
class ClassificationModelTrainer(BaseModelTrainer):
- def __init__(self, input_file, target_col, output_dir, **kwargs):
- super().__init__(input_file, target_col, output_dir, **kwargs)
+ def __init__(
+ self,
+ input_file,
+ target_col,
+ output_dir,
+ task_type,
+ **kwargs):
+ super().__init__(
+ input_file, target_col, output_dir, task_type, **kwargs)
self.exp = ClassificationExperiment()
def save_dashboard(self):
diff --git a/tools/pycaret_regression.py b/tools/pycaret_regression.py
index 91d5b7a..2f1f80e 100644
--- a/tools/pycaret_regression.py
+++ b/tools/pycaret_regression.py
@@ -10,8 +10,15 @@
class RegressionModelTrainer(BaseModelTrainer):
- def __init__(self, input_file, target_col, output_dir, **kwargs):
- super().__init__(input_file, target_col, output_dir, **kwargs)
+ def __init__(
+ self,
+ input_file,
+ target_col,
+ output_dir,
+ task_type,
+ **kwargs):
+ super().__init__(
+ input_file, target_col, output_dir, task_type, **kwargs)
self.exp = RegressionExperiment()
def save_dashboard(self):
diff --git a/tools/pycaret_train.py b/tools/pycaret_train.py
index 1f15c8f..534a997 100644
--- a/tools/pycaret_train.py
+++ b/tools/pycaret_train.py
@@ -85,13 +85,17 @@ def main():
trainer = ClassificationModelTrainer(
args.input_file,
args.target_col,
- args.output_dir, **model_kwargs)
+ args.output_dir,
+ args.model_type,
+ **model_kwargs)
elif args.model_type == "regression":
if "fix_imbalance" in model_kwargs:
del model_kwargs["fix_imbalance"]
trainer = RegressionModelTrainer(
args.input_file,
- args.target_col, args.output_dir,
+ args.target_col,
+ args.output_dir,
+ args.model_type,
**model_kwargs)
else:
LOG.error("Invalid model type. Please choose \
diff --git a/tools/pycaret_train.xml b/tools/pycaret_train.xml
index 2a06c11..06ed7bb 100644
--- a/tools/pycaret_train.xml
+++ b/tools/pycaret_train.xml
@@ -1,12 +1,12 @@
- Compare different machine learning models on a dataset using PyCaret.
+ Compare different machine learning models on a dataset using PyCaret. Do feature analysis using LR, Random Forest and LightGBM.
pycaret_macros.xml
@@ -152,10 +155,12 @@
-
+
-
+
-
+
This tool uses PyCaret to train and evaluate machine learning models.
- Ensure that the Conda environment specified in the requirements is correctly set up.
\ No newline at end of file
diff --git a/tools/test-data/auto-mpg.csv b/tools/test-data/auto-mpg.tsv
similarity index 100%
rename from tools/test-data/auto-mpg.csv
rename to tools/test-data/auto-mpg.tsv
diff --git a/tools/test-data/expected_best_model_classification.csv b/tools/test-data/expected_best_model_classification.csv
new file mode 100644
index 0000000..81152e5
--- /dev/null
+++ b/tools/test-data/expected_best_model_classification.csv
@@ -0,0 +1,20 @@
+Parameter,Value
+boosting_type,gbdt
+class_weight,
+colsample_bytree,1.0
+importance_type,split
+learning_rate,0.1
+max_depth,-1
+min_child_samples,20
+min_child_weight,0.001
+min_split_gain,0.0
+n_estimators,100
+n_jobs,-1
+num_leaves,31
+objective,
+random_state,123
+reg_alpha,0.0
+reg_lambda,0.0
+subsample,1.0
+subsample_for_bin,200000
+subsample_freq,0
diff --git a/tools/test-data/expected_best_model_regression.csv b/tools/test-data/expected_best_model_regression.csv
new file mode 100644
index 0000000..81152e5
--- /dev/null
+++ b/tools/test-data/expected_best_model_regression.csv
@@ -0,0 +1,20 @@
+Parameter,Value
+boosting_type,gbdt
+class_weight,
+colsample_bytree,1.0
+importance_type,split
+learning_rate,0.1
+max_depth,-1
+min_child_samples,20
+min_child_weight,0.001
+min_split_gain,0.0
+n_estimators,100
+n_jobs,-1
+num_leaves,31
+objective,
+random_state,123
+reg_alpha,0.0
+reg_lambda,0.0
+subsample,1.0
+subsample_for_bin,200000
+subsample_freq,0
diff --git a/tools/test-data/expected_comparison_result_classification.html b/tools/test-data/expected_comparison_result_classification.html
index 9f17520..da98cd0 100644
--- a/tools/test-data/expected_comparison_result_classification.html
+++ b/tools/test-data/expected_comparison_result_classification.html
@@ -1,62 +1,98 @@
-
-
-
-
-
-
PyCaret Model Training Report
-
-
-
-
-
PyCaret Model Training Report
+
+
+
+
Model Training Report
+
+
+
+
+
+
PyCaret Model Training Report
+
+
+ Setup & Best Model
+
+ Best Model Plots
+
+ Feature Importance
+
+
Setup Parameters
Parameter | Value |
@@ -183,7 +219,29 @@ Comparison Results
0.7074 |
0.4668 |
0.4969 |
- 0.042 |
+ 0.531 |
+
+
+ Extreme Gradient Boosting |
+ 0.735 |
+ 0.7000 |
+ 0.7500 |
+ 0.6433 |
+ 0.6717 |
+ 0.4289 |
+ 0.4523 |
+ 0.201 |
+
+
+ Logistic Regression |
+ 0.730 |
+ 0.7667 |
+ 0.6667 |
+ 0.5933 |
+ 0.6150 |
+ 0.4013 |
+ 0.4167 |
+ 0.080 |
Quadratic Discriminant Analysis |
@@ -194,18 +252,18 @@ Comparison Results
0.5933 |
0.4514 |
0.4929 |
- 0.027 |
+ 0.063 |
- Logistic Regression |
+ CatBoost Classifier |
0.730 |
- 0.7667 |
- 0.6667 |
- 0.5933 |
- 0.6150 |
- 0.4013 |
- 0.4167 |
- 0.025 |
+ 0.7333 |
+ 0.7500 |
+ 0.6600 |
+ 0.6783 |
+ 0.4293 |
+ 0.4521 |
+ 7.149 |
Gradient Boosting Classifier |
@@ -216,7 +274,7 @@ Comparison Results
0.6900 |
0.4117 |
0.4546 |
- 0.157 |
+ 0.295 |
Random Forest Classifier |
@@ -227,7 +285,7 @@ Comparison Results
0.6383 |
0.3783 |
0.4058 |
- 0.159 |
+ 0.392 |
Linear Discriminant Analysis |
@@ -238,7 +296,7 @@ Comparison Results
0.6717 |
0.3690 |
0.4135 |
- 0.029 |
+ 0.071 |
K Neighbors Classifier |
@@ -249,7 +307,7 @@ Comparison Results
0.6655 |
0.3634 |
0.3779 |
- 0.027 |
+ 0.101 |
Decision Tree Classifier |
@@ -260,7 +318,7 @@ Comparison Results
0.6574 |
0.3523 |
0.3854 |
- 0.025 |
+ 0.083 |
Naive Bayes |
@@ -271,7 +329,7 @@ Comparison Results
0.5917 |
0.3244 |
0.3333 |
- 0.029 |
+ 0.117 |
Ridge Classifier |
@@ -282,7 +340,7 @@ Comparison Results
0.6017 |
0.3320 |
0.3500 |
- 0.032 |
+ 0.062 |
Extra Trees Classifier |
@@ -293,7 +351,7 @@ Comparison Results
0.5805 |
0.2650 |
0.2816 |
- 0.111 |
+ 0.323 |
Ada Boost Classifier |
@@ -304,7 +362,7 @@ Comparison Results
0.5933 |
0.2697 |
0.3121 |
- 0.163 |
+ 0.276 |
SVM - Linear Kernel |
@@ -315,7 +373,7 @@ Comparison Results
0.4717 |
0.1306 |
0.1647 |
- 0.029 |
+ 0.063 |
Dummy Classifier |
@@ -326,84 +384,199 @@ Comparison Results
0.0000 |
0.0000 |
0.0000 |
- 0.033 |
+ 0.074 |
-
Plots
+
+
+
Best Model Plots
Auc
-
+
Confusion_matrix
-
+
Threshold
-
+
Pr
-
+
Error
-
+
Class_report
-
+
Learning
-
+
Calibration
-
+
Vc
-
+
Dimension
-
+
Manifold
-
+
Rfe
-
+
Feature
-
+
Feature_all
-
+
+
+
+
+
+
+
PyCaret Feature Importance Report
+
+
+
Coefficients (based on a trained
+ Logistic Regression Model)
+
+
+
+ Feature |
+ Coefficient |
+
+
+
+
+ SCGB2A2 |
+ -0.588679 |
+
+
+ FDCSP |
+ 0.772756 |
+
+
+ MUCL1 |
+ 0.168434 |
+
+
+ PIP |
+ 0.557828 |
+
+
+ TFF1 |
+ -0.989886 |
+
+
+ SCGB1D1 |
+ -0.740511 |
+
+
+ SCGB1D2 |
+ -0.727969 |
+
+
+ CALML5 |
+ 0.392070 |
+
+
+ AGR2 |
+ -0.716675 |
+
+
+ CPB1 |
+ 0.059255 |
+
+
+
+
+
+
+
Feature importance analysis from a
+ trained Random Forest
+
Use gini impurity forcalculating feature importance for classificationand Variance Reduction for regression
+
+
+
+
+
Feature importance analysis from a
+ trained Random Forest
+
SHAP Summary from a trained lightgbm
+
+
-
-
+
+
+
+
+
+
\ No newline at end of file
diff --git a/tools/test-data/expected_comparison_result_regression.html b/tools/test-data/expected_comparison_result_regression.html
index 7e78624..a3c38bd 100644
--- a/tools/test-data/expected_comparison_result_regression.html
+++ b/tools/test-data/expected_comparison_result_regression.html
@@ -1,63 +1,98 @@
-
-
-
-
-
-
PyCaret Model Training Report
-
-
-
-
-
PyCaret Model Training Report
+
+
+
+
Model Training Report
+
+
+
+
+
+
PyCaret Model Training Report
+
+
+ Setup & Best Model
+
+ Best Model Plots
+
+ Feature Importance
+
+
Setup Parameters
Parameter | Value |
@@ -74,31 +109,87 @@ Setup Parameters
-
Best Model: CatBoostRegressor
+
Best Model: LGBMRegressor
Parameter | Value |
- loss_function |
- RMSE |
+ boosting_type |
+ gbdt |
+
+
+ class_weight |
+ None |
+
+
+ colsample_bytree |
+ 1.0 |
+
+
+ importance_type |
+ split |
+
+
+ learning_rate |
+ 0.1 |
+
+
+ max_depth |
+ -1 |
+
+
+ min_child_samples |
+ 20 |
+
+
+ min_child_weight |
+ 0.001 |
+
+
+ min_split_gain |
+ 0.0 |
+
+
+ n_estimators |
+ 100 |
- border_count |
- 254 |
+ n_jobs |
+ -1 |
- verbose |
- False |
+ num_leaves |
+ 31 |
- task_type |
- CPU |
+ objective |
+ None |
random_state |
123 |
+
+ reg_alpha |
+ 0.0 |
+
+
+ reg_lambda |
+ 0.0 |
+
+
+ subsample |
+ 1.0 |
+
+
+ subsample_for_bin |
+ 200000 |
+
+
+ subsample_freq |
+ 0 |
+
@@ -118,175 +209,175 @@
Comparison Results
+
+ Light Gradient Boosting Machine |
+ 1.9064 |
+ 6.9812 |
+ 2.5932 |
+ 0.8852 |
+ 0.1012 |
+ 0.0832 |
+ 0.041 |
+
CatBoost Regressor |
- 2.0857 |
- 8.2406 |
- 2.8434 |
- 0.8616 |
- 0.1146 |
- 0.0932 |
- 8.470 |
+ 1.8984 |
+ 7.1071 |
+ 2.6242 |
+ 0.8818 |
+ 0.1017 |
+ 0.0827 |
+ 5.248 |
Extra Trees Regressor |
- 2.1028 |
- 8.2773 |
- 2.8615 |
- 0.8590 |
- 0.1122 |
- 0.0922 |
- 3.755 |
+ 1.8770 |
+ 7.1233 |
+ 2.6496 |
+ 0.8796 |
+ 0.1001 |
+ 0.0805 |
+ 0.169 |
- Light Gradient Boosting Machine |
- 2.1336 |
- 8.5588 |
- 2.9093 |
- 0.8578 |
- 0.1138 |
- 0.0930 |
- 1.483 |
+ Random Forest Regressor |
+ 1.9666 |
+ 7.6187 |
+ 2.7155 |
+ 0.8738 |
+ 0.1051 |
+ 0.0861 |
+ 0.284 |
- Random Forest Regressor |
- 2.2340 |
- 9.3259 |
- 3.0304 |
- 0.8438 |
- 0.1189 |
- 0.0984 |
- 5.435 |
+ Extreme Gradient Boosting |
+ 2.0811 |
+ 8.7754 |
+ 2.8962 |
+ 0.8576 |
+ 0.1087 |
+ 0.0896 |
+ 0.094 |
Gradient Boosting Regressor |
- 2.1911 |
- 9.4807 |
- 3.0328 |
- 0.8430 |
- 0.1178 |
- 0.0955 |
- 0.874 |
+ 2.0406 |
+ 8.9163 |
+ 2.9113 |
+ 0.8538 |
+ 0.1104 |
+ 0.0878 |
+ 0.130 |
- Extreme Gradient Boosting |
- 2.3047 |
- 10.3017 |
- 3.1834 |
- 0.8299 |
- 0.1219 |
- 0.1009 |
- 0.822 |
+ AdaBoost Regressor |
+ 2.2673 |
+ 10.1260 |
+ 3.1351 |
+ 0.8339 |
+ 0.1207 |
+ 0.1000 |
+ 0.119 |
- Elastic Net |
- 2.5336 |
- 11.3393 |
- 3.3316 |
- 0.8147 |
- 0.1398 |
- 0.1145 |
- 0.238 |
+ Ridge Regression |
+ 2.5315 |
+ 11.4346 |
+ 3.3421 |
+ 0.8120 |
+ 0.1461 |
+ 0.1173 |
+ 0.024 |
- Bayesian Ridge |
- 2.5427 |
- 11.5743 |
- 3.3639 |
+ Linear Regression |
+ 2.5325 |
+ 11.4367 |
+ 3.3424 |
0.8119 |
- 0.1401 |
- 0.1154 |
- 3.023 |
+ 0.1460 |
+ 0.1173 |
+ 0.029 |
- Lasso Least Angle Regression |
- 2.5705 |
- 11.6280 |
- 3.3736 |
- 0.8099 |
- 0.1422 |
- 0.1163 |
- 0.551 |
-
-
- Lasso Regression |
- 2.5706 |
- 11.6280 |
- 3.3736 |
- 0.8099 |
- 0.1422 |
- 0.1163 |
- 0.204 |
+ Bayesian Ridge |
+ 2.5238 |
+ 11.4695 |
+ 3.3477 |
+ 0.8113 |
+ 0.1480 |
+ 0.1173 |
+ 0.023 |
- Ridge Regression |
- 2.5765 |
- 11.7974 |
- 3.3966 |
- 0.8082 |
- 0.1418 |
- 0.1172 |
- 0.258 |
+ Least Angle Regression |
+ 2.6531 |
+ 12.2959 |
+ 3.4615 |
+ 0.7986 |
+ 0.1506 |
+ 0.1224 |
+ 0.025 |
- Linear Regression |
- 2.5809 |
- 11.8270 |
- 3.4009 |
- 0.8077 |
- 0.1420 |
- 0.1174 |
- 0.270 |
+ Elastic Net |
+ 2.6266 |
+ 12.3303 |
+ 3.4751 |
+ 0.7971 |
+ 0.1543 |
+ 0.1217 |
+ 0.024 |
- Least Angle Regression |
- 2.6615 |
- 12.3747 |
- 3.4723 |
- 0.7994 |
- 0.1458 |
- 0.1210 |
- 0.247 |
+ Huber Regressor |
+ 2.4866 |
+ 12.4597 |
+ 3.4839 |
+ 0.7967 |
+ 0.1387 |
+ 0.1104 |
+ 0.053 |
- AdaBoost Regressor |
+ Lasso Least Angle Regression |
2.6444 |
- 12.6052 |
- 3.5382 |
- 0.7885 |
- 0.1393 |
- 0.1175 |
- 1.196 |
+ 12.4441 |
+ 3.4920 |
+ 0.7954 |
+ 0.1547 |
+ 0.1224 |
+ 0.023 |
- Huber Regressor |
- 2.5126 |
- 13.0411 |
- 3.5739 |
- 0.7862 |
- 0.1355 |
- 0.1084 |
- 3.817 |
+ Lasso Regression |
+ 2.6446 |
+ 12.4444 |
+ 3.4921 |
+ 0.7954 |
+ 0.1547 |
+ 0.1224 |
+ 0.026 |
Decision Tree Regressor |
- 2.8325 |
- 15.5690 |
- 3.8947 |
- 0.7369 |
- 0.1522 |
- 0.1233 |
- 0.845 |
+ 2.7032 |
+ 13.3008 |
+ 3.6071 |
+ 0.7724 |
+ 0.1419 |
+ 0.1185 |
+ 0.030 |
K Neighbors Regressor |
- 3.2820 |
- 19.4715 |
- 4.3865 |
- 0.6744 |
- 0.1637 |
- 0.1389 |
- 0.783 |
+ 3.1884 |
+ 18.3559 |
+ 4.2627 |
+ 0.6902 |
+ 0.1604 |
+ 0.1351 |
+ 0.030 |
Orthogonal Matching Pursuit |
@@ -296,7 +387,7 @@ Comparison Results
0.6686 |
0.1755 |
0.1479 |
- 0.997 |
+ 0.021 |
Dummy Regressor |
@@ -306,78 +397,167 @@ Comparison Results
-0.0687 |
0.3355 |
0.3285 |
- 0.273 |
+ 0.030 |
Passive Aggressive Regressor |
- 11.7360 |
- 200.7733 |
- 13.3007 |
- -2.6451 |
- 0.6835 |
- 0.5873 |
- 0.644 |
+ 10.6628 |
+ 178.4730 |
+ 12.2226 |
+ -2.3674 |
+ 0.6393 |
+ 0.5210 |
+ 0.024 |
-
Plots
+
+
+
Best Model Plots
Residuals
-
+
Error
-
+
Cooks
-
+
Learning
-
+
Vc
-
+
Manifold
-
+
Rfe
-
+
Feature
-
+
Feature_all
+
+
+
+
+
+
+
PyCaret Feature Importance Report
+
+
+
Coefficients (based on a trained
+ Linear Regression Model)
+
+
+
+ Feature |
+ Coefficient |
+
+
+
+
+ Cylinders |
+ -0.414454 |
+
+
+ Displacement |
+ -0.414454 |
+
+
+ Horsepower |
+ -0.414454 |
+
+
+ Weight |
+ -0.414454 |
+
+
+ Acceleration |
+ -0.414454 |
+
+
+ ModelYear |
+ -0.414454 |
+
+
+ Origin |
+ -0.414454 |
+
+
+
+
+
+
+
Feature importance analysis from a
+ trained Random Forest
+
Use gini impurity forcalculating feature importance for classificationand Variance Reduction for regression
+
+
+
+
+
Feature importance analysis from a
+ trained Random Forest
+
SHAP Summary from a trained lightgbm
+ " alt="shap_summary">
+
-
-
+
+
+
+
+
+
\ No newline at end of file
diff --git a/tools/test-data/expected_dashboard_classification.html b/tools/test-data/expected_dashboard_classification.html
index 8e9497b..c89879c 100644
--- a/tools/test-data/expected_dashboard_classification.html
+++ b/tools/test-data/expected_dashboard_classification.html
@@ -58,7 +58,7 @@
@@ -179,7 +179,7 @@
@@ -213,7 +213,7 @@