jmquintana79
4/20/2018 - 6:16 AM

decision tree graph

  • Decisicion tree graph for Data Analysis.
  • Instalation: $sudo pip3 install graphviz $sudo apt-get install graphviz # not enough with the python library instalation (with pip)
import sklearn.datasets as datasets
import pandas as pd
iris=datasets.load_iris()
df=pd.DataFrame(iris.data, columns=iris.feature_names)
y=iris.target

from sklearn.tree import DecisionTreeClassifier
dtree=DecisionTreeClassifier()
dtree.fit(df,y)

from sklearn.externals.six import StringIO  
from IPython.display import Image  
from sklearn.tree import export_graphviz
import pydotplus
dot_data = StringIO()
export_graphviz(dtree, out_file=dot_data,  
                filled=True, rounded=True,
                special_characters=True)
graph = pydotplus.graph_from_dot_data(dot_data.getvalue())  
Image(graph.create_png()) 
## plot decision tree analysis of any features vs target
def analysis_tree(clf:'estimator',X:'array',y:'array',lx:list,ly:list)->'graph':
    """
    Plot decision tree analysis of any features vs target.
    clf -- Decision Tree estimator without being fitted (regressor or classificator).
    X -- numpy array 2D of features.
    y -- numpy array 1D of target.
    lx -- list of feature names.
    ly -- list of target name.
    return -- graph (it is automatically ploted in Jupyter Notebook)
    """
    # fit the estimator
    clf = clf.fit(X, y)
    # graph
    import graphviz 
    dot_data = tree.export_graphviz(clf, out_file=None, 
                             feature_names=lcol_weather,  
                             class_names=['y'],  
                             filled=True, rounded=True,  
                             special_characters=True)  
    # return
    return graphviz.Source(dot_data)  

# define estimator
from sklearn import tree
clf = tree.DecisionTreeRegressor(max_depth=3)
# plot tree
analysis_tree(clf,X,y,lx,ly)