example_model.py
#!/usr/bin/env python
"""
Example classifier on Numerai data using a logistic regression classifier.
To get started, install the required packages: pip install pandas, numpy, sklearn
"""
import pandas as pd
import numpy as np
from sklearn import metrics, preprocessing, linear_model, neural_network
import matplotlib.pyplot as plt
with np.load('prediction-challenge-01-data.npz') as fh:
data_x = fh['data_x']
data_y = fh['data_y']
test_x = fh['test_x']
# TRAINING DATA: INPUT (x) AND OUTPUT (y)
# 1. INDEX: IMAGE SERIAL NUMBER
# 2. INDEX: COLOR CHANNEL
# 3/4. INDEX: PIXEL VALUE
print(data_x.shape, data_x.dtype)
print(data_y.shape, data_y.dtype)
# TEST DATA: INPUT (x) ONLY
print(test_x.shape, test_x.dtype)
plt.imshow(data_x[0, 0])
plt.title(data_y[0])
plt.show()
# This is your model that will learn to predict
model = neural_network.MLPClassifier()
print("Training...")
# Your model is trained on the training_data
model.fit(data_x, data_y)
print("Predicting...")
# Your trained model is now used to make predictions on the data
# We are just interested in the probability that the target is 1.
y_prediction = model.predict(test_x)
results = y_prediction[:, 1]
results_df = pd.DataFrame(data={'probability':results})
joined = pd.DataFrame(test_x).join(results_df)
print("Writing predictions to predictions.csv")
# Save the predictions out to a CSV file
joined.to_csv("predictions.csv", index=False)