stavgrossfeld
9/8/2016 - 5:48 AM

logistic regression.py

# EVER GREEN FROM NUMERIC AND ALCHEMY_CATEGORY

from sklearn.linear_model import LogisticRegression
from sklearn import cross_validation
import patsy


formula = 'label ~ alchemy_category_score + C(alchemy_category) + avglinksize+ commonlinkratio_1\
+ commonlinkratio_2 + commonlinkratio_3 + commonlinkratio_4 + compression_ratio + embed_ratio + frameTagRatio\
+ html_ratio + image_ratio + parametrizedLinkRatio + spelling_errors_ratio - 1'


y_mat, x_mat = patsy.dmatrices(formula, data = su)

x_train, x_test, y_train, y_test = cross_validation.train_test_split(x_mat,y_mat, test_size = .33)
logreg = LogisticRegression()
logreg = logreg.fit(x_train, np.ravel(y_train))

y_pred = logreg.predict(x_test)
print logreg.score(x_test,y_test)