alexanderholt
10/12/2017 - 11:02 PM

test_train split

from sklearn import linear_model
from sklearn.model_selection import train_test_split
X = data[['LSTAT', 'RM', 'NOX', 'PTRATIO']] ### select variables to build your model
y = target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5) # creates 50/50 split.
# .3 would make 30 percent test split.
print(X_train.shape, y_train.shape)
print(X_test.shape, y_test.shape)
lm = linear_model.LinearRegression()
model = lm.fit(X_train, y_train)
predictions = model.predict(X_test)

print(model)

plt.scatter(y_test, predictions)
plt.show()
print("Score:", model.score(X_test, y_test))