Rescale to between 0, 1
# Rescale data (between 0 and 1)
from pandas import read_csv
from numpy import set_printoptions
from sklearn.preprocessing import MinMaxScaler
filename = 'pima-indians-diabetes.data.csv'
names = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class']
dataframe = read_csv(filename, names=names)
array = dataframe.values
# separate array into input and output components
X = array[:,0:8]
Y = array[:,8]
scaler = MinMaxScaler(feature_range=(0, 1))
rescaledX = scaler.fit_transform(X)
# summarize transformed data
set_printoptions(precision=3)
[[ 0.353 0.744 0.59 0.354 0. 0.501 0.234 0.483]
[ 0.059 0.427 0.541 0.293 0. 0.396 0.117 0.167]
[ 0.471 0.92 0.525 0. 0. 0.347 0.254 0.183]
[ 0.059 0.447 0.541 0.232 0.111 0.419 0.038 0. ]
[ 0. 0.688 0.328 0.354 0.199 0.642 0.944 0.2 ]]
x(new) = (x - mean) / stdev.
# Standardize data (0 mean, 1 stdev)
from sklearn.preprocessing import StandardScaler
from pandas import read_csv
from numpy import set_printoptions
filename = 'pima-indians-diabetes.data.csv'
names = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class']
dataframe = read_csv(filename, names=names)
array = dataframe.values
# separate array into input and output components
X = array[:,0:8]
Y = array[:,8]
scaler = StandardScaler().fit(X)
rescaledX = scaler.transform(X)
# summarize transformed data
set_printoptions(precision=3)
print(rescaledX[0:5,:])
[[ 0.64 0.848 0.15 0.907 -0.693 0.204 0.468 1.426]
[-0.845 -1.123 -0.161 0.531 -0.693 -0.684 -0.365 -0.191]
[ 1.234 1.944 -0.264 -1.288 -0.693 -1.103 0.604 -0.106]
[-0.845 -0.998 -0.161 0.155 0.123 -0.494 -0.921 -1.042]
[-1.142 0.504 -1.505 0.907 0.766 1.41 5.485 -0.02 ]]
Useful when data have a lot of zeroes (result value between 0, 1)
# Normalize data (length of 1)
from sklearn.preprocessing import Normalizer
from pandas import read_csv
from numpy import set_printoptions
filename = 'pima-indians-diabetes.data.csv'
names = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class']
dataframe = read_csv(filename, names=names)
array = dataframe.values
# separate array into input and output components
X = array[:,0:8]
Y = array[:,8]
scaler = Normalizer().fit(X)
normalizedX = scaler.transform(X)
# summarize transformed data
set_printoptions(precision=3)
print(normalizedX[0:5,:])
[[ 0.034 0.828 0.403 0.196 0. 0.188 0.004 0.28 ]
[ 0.008 0.716 0.556 0.244 0. 0.224 0.003 0.261]
[ 0.04 0.924 0.323 0. 0. 0.118 0.003 0.162]
[ 0.007 0.588 0.436 0.152 0.622 0.186 0.001 0.139]
[ 0. 0.596 0.174 0.152 0.731 0.188 0.01 0.144]]
Only 0 and 1
# Normalize data (length of 1)
from sklearn.preprocessing import Normalizer
from pandas import read_csv
from numpy import set_printoptions
filename = 'pima-indians-diabetes.data.csv'
names = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class']
dataframe = read_csv(filename, names=names)
array = dataframe.values
# separate array into input and output components
X = array[:,0:8]
Y = array[:,8]
scaler = Normalizer().fit(X)
normalizedX = scaler.transform(X)
# summarize transformed data
set_printoptions(precision=3)
print(normalizedX[0:5,:])
[[ 0.034 0.828 0.403 0.196 0. 0.188 0.004 0.28 ]
[ 0.008 0.716 0.556 0.244 0. 0.224 0.003 0.261]
[ 0.04 0.924 0.323 0. 0. 0.118 0.003 0.162]
[ 0.007 0.588 0.436 0.152 0.622 0.186 0.001 0.139]
[ 0. 0.596 0.174 0.152 0.731 0.188 0.01 0.144]]