Dask Distributed Setup
%matplotlib inline
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from dask.distributed import Client
client = Client("dask-scheduler:8786")
import dask.dataframe as dd
dfd = dd.read_csv("/tmp/header.tsv", sep="\t", na_values=[" "], infer_datetime_format=True, parse_dates=[0], sample=100000000)