canimus
4/12/2018 - 1:10 PM

Dask Distributed Setup

Dask Distributed Setup

%matplotlib inline
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from dask.distributed import Client
client = Client("dask-scheduler:8786")


import dask.dataframe as dd
dfd = dd.read_csv("/tmp/header.tsv", sep="\t", na_values=[" "], infer_datetime_format=True, parse_dates=[0], sample=100000000)