lorinfields
9/13/2018 - 10:31 PM

pickle

# https://stackabuse.com/scikit-learn-save-and-restore-models/

import pickle

# make an example object to pickle
some_obj = {'x':[4,2,1.5,1], 'y':[32,[101],17], 'foo':True, 'spam':False}
# To save a pickle, use pickle.dump.
# A convention is to name pickle files *.pickle, but you can name it whatever you want.
# Make sure to open the file in 'wb' mode (write binary). This is more cross-platform 
# friendly than 'w' mode (write text) which might not work on Windows, etc.
with open('mypickle.pickle', 'wb') as f:
    pickle.dump(some_obj, f)

!wget -nc URLPICKLEFILE
data = pd.read_pickle('FILENAMW.pkl')
df = pd.DataFrame(data)

# this works to unpack the thing...
!wget -nc https://www.dropbox.com/s/ezl4oj16vkeu21l/supreme_genders_data.pkl?dl=0
data = pd.read_pickle('supreme_genders_data.pkl?dl=0')
df = pd.DataFrame(data)

# # from sklearn.externals import joblib
# Joblib Module
# The Joblib library is intended to be a replacement for Pickle, for objects containing large data. 
from sklearn.externals import joblib

# Save to file in the current working directory
joblib_file = "joblib_model.pkl"  
joblib.dump(model, joblib_file)

# Load from file
joblib_model = joblib.load(joblib_file)

# Calculate the accuracy and predictions
score = joblib_model.score(Xtest, Ytest)  
print("Test score: {0:.2f} %".format(100 * score))  
Ypredict = pickle_model.predict(Xtest)  
# As seen from the example, the Joblib library offers a bit simpler 
# workflow compared to Pickle. While Pickle requires a file object
# to be passed as an argument, Joblib works with both file objects 
# and string filenames. In case your model contains large arrays of 
# data, each array will be stored in a separate file, but the save 
# and restore procedure will remain the same. Joblib also allows 
# different compression methods, such as 'zlib', 'gzip', 'bz2', and 
# different levels of compression.