I/O between csv and pandas dataframe
import pandas
from datetime import datetime
## CSV to PANDAS
# basic
path = 'path_file_input.csv'
data_df = pandas.read_csv(path,sep=";",index_col=0,usecols=['col1','col2'])
# reading japanese characters
data_df = read_csv(path, encoding="SHIFT-JIS")
# formating type of columns
path = 'path_file_input.csv'
data_df = pandas.read_csv(path,sep=";",dtype={"column1":str,"column2":float})
# parse a single datetime column
def dateparse(x): return pd.datetime.strptime(x, '%Y-%m-%d %H:%M:%S')
data_df = pd.read_csv(path_input, parse_dates=['datetime'], date_parser=dateparse)
# parse date time in different columns before upload
path = 'path_file_input.csv'
parse = lambda x1,x2: datetime.strptime('%s %s'%(x1,x2), '%Y%m%d %H%M')
data_df = pd.read_csv(path,sep=";", parse_dates = [['column_DATE', 'column_TIME']],
index_col = 0,
date_parser=parse)
# parse dates after upload data
DF['date'] = pd.to_datetime(DF['date'], format='%d%b%Y')
## LOAD MULTIPLE FILES
def loader(path):
return pd.read_csv(path)
df = pd.concat(map(loader, l_paths))
## PANDAS to CSV
# basic
path = 'path_file_output.csv'
DF.to_csv(path,sep=";", index=False) # where DF is a Pandas dataframe
## EXCEL (.xlsx) to PANDAS
import pandas as pd
xl = pd.ExcelFile(path_input)
lsheet_names = xl.sheet_names
xl.parse(lsheet_names[0]).head()
import pandas as pd
import feather
# file name
pingInfoFilePath = "./serverpings.ftr";
# data
pingInfo = {"servername": ["svr_et_1","svr_et_2","svr_wt_1","svr_wt_2","svr_nr_1","svr_nr_2","svr_st_1","svr_st_2"],
"lastping":["12.20.15.122","12.20.11.395", "12.20.12.836","12.20.16.769","12.20.17.193","12.20.18.416","11.59.55.913","12.20.14.811"],
"roundtriptime":[300, 400, 0, 200, 100, 500, 350, 0],
"status":["PASS","PASS","FAIL","PASS","PASS","PASS","PASS","FAIL"]};
# pandas df createtion
dataFrame = pd.DataFrame(data=pingInfo);
# save into feather file
dataFrame.to_feather(pingInfoFilePath);
# load / read feather file
readFrame = pd.read_feather(pingInfoFilePath, columns=None, use_threads=True);
# pandas 0.21 introduces new functions for Parquet:
df = pd.read_parquet('example_file.parquet', engine='pyarrow')
# or
df = pd.read_parquet('example_ifle.parquet', engine='fastparquet')