vcpreneur
9/9/2018 - 4:49 PM

Decision Tree Classifier

# Import statements 
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
import pandas as pd
import numpy as np

# Read the data.
data = np.asarray(pd.read_csv('data.csv', header=None))
# Assign the features to the variable X, and the labels to the variable y. 
X = data[:,0:2]
y = data[:,2]

# TODO: Create the decision tree model and assign it to the variable model.
model = DecisionTreeClassifier()

# TODO: Fit the model.
model.fit(X,y)

# TODO: Make predictions. Store them in the variable y_pred.
y_pred = model.predict(X)

# TODO: Calculate the accuracy and assign it to the variable acc.
acc = accuracy_score(y, y_pred)

Hyperparameters
- max_depth: The maximum number of levels in the tree.
- min_samples_leaf: The minimum number of samples allowed in a leaf.
- min_samples_split: The minimum number of samples required to split an internal node.
- max_features : The number of features to consider when looking for the best split.

model = DecisionTreeClassifier(max_depth = 7, min_samples_leaf = 10)