jccantre
10/1/2019 - 6:08 PM

Create the Training Script

%%writefile $script_folder/train.py

import argparse
import os
import numpy as np

from sklearn.svm import SVC
from sklearn.externals import joblib
import pickle

from azureml.core import Run

# let user feed in 2 parameters, the location of the data files (from datastore), and the regularization rate of the logistic regression model
parser = argparse.ArgumentParser()
parser.add_argument('--data-folder', type=str, dest='data_folder', help='data folder mounting point')
parser.add_argument('--regularization', type=float, dest='reg', default=0.01, help='regularization rate')
args = parser.parse_args()

# height, width, shoe size
X = [[181, 80, 44], [177, 70, 43], [160, 60, 38], [154, 54, 37], [166, 65, 40], [190, 90, 47], [175, 64, 39],
     [177, 70, 40], [159, 55, 37], [171, 75, 42], [181, 85, 43]]

Y = ['male', 'male', 'female', 'female', 'male', 'male', 'female', 'female', 'female', 'male', 'male']

clf = SVC()
clf = clf.fit(X, Y)

print('Predicted value:', clf.predict([[190, 70, 43]]))
print('Accuracy', clf.score(X,Y))

print('Export the model to model.pkl')
f = open('fwrk.pkl', 'wb')
pickle.dump(clf, f)
f.close()

print('Import the model from model.pkl')
f2 = open('fwrk.pkl', 'rb')
clf2 = pickle.load(f2)

X_new = [[154, 54, 35]]
print('New Sample:', X_new)
print('Predicted class:', clf2.predict(X_new))

os.makedirs('outputs', exist_ok=True)
# note file saved in the outputs folder is automatically uploaded into experiment record
joblib.dump(value=clf, filename='outputs/fwrk.pkl')