# https://stackabuse.com/scikit-learn-save-and-restore-models/
import pickle
# make an example object to pickle
some_obj = {'x':[4,2,1.5,1], 'y':[32,[101],17], 'foo':True, 'spam':False}
# To save a pickle, use pickle.dump.
# A convention is to name pickle files *.pickle, but you can name it whatever you want.
# Make sure to open the file in 'wb' mode (write binary). This is more cross-platform
# friendly than 'w' mode (write text) which might not work on Windows, etc.
with open('mypickle.pickle', 'wb') as f:
pickle.dump(some_obj, f)
!wget -nc URLPICKLEFILE
data = pd.read_pickle('FILENAMW.pkl')
df = pd.DataFrame(data)
# this works to unpack the thing...
!wget -nc https://www.dropbox.com/s/ezl4oj16vkeu21l/supreme_genders_data.pkl?dl=0
data = pd.read_pickle('supreme_genders_data.pkl?dl=0')
df = pd.DataFrame(data)
# # from sklearn.externals import joblib
# Joblib Module
# The Joblib library is intended to be a replacement for Pickle, for objects containing large data.
from sklearn.externals import joblib
# Save to file in the current working directory
joblib_file = "joblib_model.pkl"
joblib.dump(model, joblib_file)
# Load from file
joblib_model = joblib.load(joblib_file)
# Calculate the accuracy and predictions
score = joblib_model.score(Xtest, Ytest)
print("Test score: {0:.2f} %".format(100 * score))
Ypredict = pickle_model.predict(Xtest)
# As seen from the example, the Joblib library offers a bit simpler
# workflow compared to Pickle. While Pickle requires a file object
# to be passed as an argument, Joblib works with both file objects
# and string filenames. In case your model contains large arrays of
# data, each array will be stored in a separate file, but the save
# and restore procedure will remain the same. Joblib also allows
# different compression methods, such as 'zlib', 'gzip', 'bz2', and
# different levels of compression.