Normalization is for input data so the cost function graph gets round enabling descending to be faster.
# This didn't improve the model. Might be because the data had
# one hot vector and numerical columns all together.
from sklearn.preprocessing import StandardScaler
std = StandardScaler()
std.fit(df)
df = pd.DataFrame(std.transform(df), columns=x_df.columns)