Skip to content

Commit

Permalink
Merge pull request #91 from tlapusan/support_other_tree_models_#83
Browse files Browse the repository at this point in the history
Support other tree models. Fixes #83
  • Loading branch information
parrt authored Jul 11, 2020
2 parents b38b3cc + e40c03e commit 5a122cf
Show file tree
Hide file tree
Showing 40 changed files with 122,696 additions and 95,443 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -102,3 +102,4 @@ venv.bak/

# mypy
.mypy_cache/
.idea/
109 changes: 47 additions & 62 deletions dtreeviz/interpretation.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,73 +2,77 @@
Prediction path interpretation for decision tree models.
In this moment, it contains "plain english" implementation, but others can be added in the future.
"""
import matplotlib.pyplot as plt
import numpy as np
import pandas
from sklearn import tree
import matplotlib.pyplot as plt

from dtreeviz.colors import adjust_colors
from dtreeviz.models.shadow_decision_tree import ShadowDecTree


def explain_prediction_plain_english(tree_model: (tree.DecisionTreeClassifier, tree.DecisionTreeRegressor),
X: (pandas.core.series.Series, np.ndarray),
feature_names):
def explain_prediction_plain_english(shadow_tree: ShadowDecTree,
x: (pandas.core.series.Series, np.ndarray)):
"""
Explains the prediction path using feature value's range.
A possible output for this method could be :
1.5 <= Pclass(3.0)
3.5 <= Age(29.7) < 44.5
7.91 <= Fare(8.05) < 54.25
0.5 <= Sex_label(1.0)
Cabin_label(-1.0) < 3.5
0.5 <= Embarked_label(2.0)
1.5 <= Pclass
3.5 <= Age < 44.5
7.91 <= Fare < 54.25
0.5 <= Sex_label
Cabin_label < 3.5
0.5 <= Embarked_label
Output explanation :
The model chose to make this prediction because instance's Pclass feature value is bigger or equal to 1.5, Age
is between 3.5 and 44.5, Fare is between 7.91 and 54.25, and so on.
:param tree_model: tree used to make prediction
:param X: Instance example to make prediction
:param feature_names: feature name list
:param shadow_tree: tree used to make prediction
:param x: Instance example to make prediction
:return: str
Prediction path explanation in plain english.
"""

node_feature_index = tree_model.tree_.feature
node_threshold = tree_model.tree_.threshold
node_feature_index = shadow_tree.get_features()
feature_names = shadow_tree.feature_names
node_threshold = shadow_tree.get_thresholds()
prediction_value, decision_node_path = shadow_tree.predict(x)

node_indicator = tree_model.decision_path([X])
decision_node_path = node_indicator.indices[node_indicator.indptr[0]:
node_indicator.indptr[1]]
feature_min_range = {}
feature_max_range = {}
for i, node_id in enumerate(decision_node_path):
feature_smaller_values = {}
feature_bigger_values = {}
for i, node in enumerate(decision_node_path):
if i == len(decision_node_path) - 1:
break # stop at leaf node
node_id = node.id

feature_name = feature_names[node_feature_index[node_id]]
feature_value = X[node_feature_index[node_id]]
feature_value = x[node_feature_index[node_id]]
feature_split_value = round(node_threshold[node_id], 2)

if feature_min_range.get(feature_name, feature_value) >= feature_split_value:
feature_min_range[feature_name] = feature_split_value
elif feature_max_range.get(feature_name, feature_value) < feature_split_value:
feature_max_range[feature_name] = feature_split_value
if feature_split_value <= feature_value:
if feature_smaller_values.get(feature_name) is None:
feature_smaller_values[feature_name] = []
feature_smaller_values.get(feature_name).append(feature_split_value)
elif feature_split_value > feature_value:
if feature_bigger_values.get(feature_name) is None:
feature_bigger_values[feature_name] = []
feature_bigger_values.get(feature_name).append(feature_split_value)

for feature_name in feature_names:
feature_range = ""
if feature_name in feature_min_range:
feature_range = f"{feature_min_range[feature_name]} <= {feature_name}"
if feature_name in feature_max_range:
if feature_name in feature_smaller_values:
feature_range = f"{max(feature_smaller_values[feature_name])} <= {feature_name} "
if feature_name in feature_bigger_values:
if feature_range == "":
feature_range = f"{feature_name} < {feature_max_range[feature_name]}"
feature_range = f"{feature_name} < {min(feature_bigger_values[feature_name])}"
else:
feature_range += f" < {feature_max_range[feature_name]}"
feature_range += f" < {min(feature_bigger_values[feature_name])}"

if feature_range != "":
print(feature_range)


def explain_prediction_sklearn_default(tree_model, X, features,
def explain_prediction_sklearn_default(shadow_tree: ShadowDecTree,
x: (pandas.core.series.Series, np.ndarray),
figsize: tuple = (10, 5),
colors: dict = None,
fontsize: int = 14,
Expand All @@ -81,10 +85,8 @@ def explain_prediction_sklearn_default(tree_model, X, features,
their number of categories.
For more details, you can read this article : https://explained.ai/rf-importance/index.html
:param tree_model: tree used to make prediction
:param X: Instance example to make prediction
:param features: list
Feature name list
:param shadow_tree: tree used to make prediction
:param x: Instance example to make prediction
:param figsize: tuple of int, optional
The plot size
:param colors: dict, optional
Expand All @@ -93,15 +95,18 @@ def explain_prediction_sklearn_default(tree_model, X, features,
Plot labels fontsize
:param fontname: str, optional
Plot labels font name
:param grid: bool
True if we want to display the grid lines on the visualization
:return:
Prediction feature's importance plot
"""

node_indicator = tree_model.decision_path([X])
decision_node_path = node_indicator.indices[node_indicator.indptr[0]:
node_indicator.indptr[1]]
feature_path_importance = _get_feature_path_importance_sklearn(tree_model, decision_node_path)
return _get_feature_path_importance_sklearn_plot(features, feature_path_importance, figsize, colors, fontsize,
prediction_value, decision_node_path = shadow_tree.predict(x)
decision_node_path = [node.id for node in decision_node_path]

feature_path_importance = shadow_tree.get_feature_path_importance(decision_node_path)
return _get_feature_path_importance_sklearn_plot(shadow_tree.feature_names, feature_path_importance, figsize,
colors, fontsize,
fontname,
grid)

Expand Down Expand Up @@ -130,26 +135,6 @@ def _get_feature_path_importance_sklearn_plot(features, feature_path_importance,
return ax


def _get_feature_path_importance_sklearn(tree_model, node_list):
gini_importance = np.zeros(tree_model.tree_.n_features)
for node in node_list:
if tree_model.tree_.children_left[node] != -1:
node_left = tree_model.tree_.children_left[node]
node_right = tree_model.tree_.children_right[node]

gini_importance[tree_model.tree_.feature[node]] += tree_model.tree_.weighted_n_node_samples[node] * \
tree_model.tree_.impurity[node] \
- tree_model.tree_.weighted_n_node_samples[node_left] * \
tree_model.tree_.impurity[node_left] \
- tree_model.tree_.weighted_n_node_samples[node_right] * \
tree_model.tree_.impurity[node_right]
normalizer = np.sum(gini_importance)
if normalizer > 0.0:
gini_importance /= normalizer

return gini_importance


def get_prediction_explainer(explanation_type: str):
"""Factory method responsible to return a prediction path implementation based on argument 'explanation_type'
Expand Down
Empty file added dtreeviz/models/__init__.py
Empty file.
Loading

0 comments on commit 5a122cf

Please sign in to comment.