8/8/2017 - 5:28 PM

Decision Tree Sci-Kit Learn

Decision Tree Sci-Kit Learn

import numpy as np
import pandas as pd
from sklearn import datasets
from sklearn import tree
from sklearn.tree import DecisionTreeClassifier
from sklearn.grid_search import GridSearchCV
from sklearn.cross_validation import train_test_split
import warnings
get_ipython().magic(u'matplotlib inline')

balance_data = pd.read_csv(
                           sep= ',', header= None)

#create your X and y
X = balance_data.values[:, 1:5]
Y = balance_data.values[:,0]

#split your X and y
X_train, X_test, y_train, y_test = train_test_split( X, Y, test_size = 0.3, random_state = 100)

#Run decision tree - create instance of class, fit, score, predict
clf_gini = DecisionTreeClassifier(criterion = "gini", random_state = 100,
                               max_depth=3, min_samples_leaf=5), y_train)

clf_gini.score(X_train, y_train)

predicted= clf_gini.predict(X_test)

#how do i get the score from the predicted values now?