# EVER GREEN FROM NUMERIC AND ALCHEMY_CATEGORY
from sklearn.linear_model import LogisticRegression
from sklearn import cross_validation
import patsy
formula = 'label ~ alchemy_category_score + C(alchemy_category) + avglinksize+ commonlinkratio_1\
+ commonlinkratio_2 + commonlinkratio_3 + commonlinkratio_4 + compression_ratio + embed_ratio + frameTagRatio\
+ html_ratio + image_ratio + parametrizedLinkRatio + spelling_errors_ratio - 1'
y_mat, x_mat = patsy.dmatrices(formula, data = su)
x_train, x_test, y_train, y_test = cross_validation.train_test_split(x_mat,y_mat, test_size = .33)
logreg = LogisticRegression()
logreg = logreg.fit(x_train, np.ravel(y_train))
y_pred = logreg.predict(x_test)
print logreg.score(x_test,y_test)