jmquintana79
11/19/2015 - 2:52 AM

create pandas dataframe

create pandas dataframe

import numpy as np
import pandas as pd
# alternative faster: Pandas on Ray
import ray.dataframe as pd

## ONE NUMPY ARRAY TO PANDAS DATAFRAME

# data
ldata = [[1,2],[7,3]]
# list of list to numpy array
npdata = np.array(ldata)
# np array to df pandas
DF = pd.DataFrame(npdata, index=["row1","row2"], columns=["col1","col2"])

# get dtypes
DF.dtypes


## TWO NUMPY ARRYES TO PANDAS DATAFRAME

# data
x = np.array([1,2,3])
y = np.array([4,5,6])
# np arrays to df pandas
DF = pd.DataFrame({'x':x, 'y':y})



# SET INDEX
indexed_df = df.set_index(['A', 'B'])

# SET FORMAT
df.index = df.index.astype(str)     # str or other format
df.index = pd.to_datetime(df.index) # datetime format

# RENAME INDEX
DF.index.rename(name, inplace=True)

# GET LIST OF INDEX OF DATAFRAME
lindex = DF.index.tolist()

# RESET INDEX (and drop if it is required)
DF = DF.reset_index(drop=False,inplace=False)

# display head (first lines) of DATAFRAME
DF.head(n=5)
# Getting last rows
DF.tail(n=5)

# DROP COLUMN OF DATAFRAME PANDAS
df.drop('column_name', axis=1, inplace=True)
df.drop(df.columns[[0, 1, 3]], axis=1) # by index


# RENAME COLUMNS OF DATAFRAME PANDAS
df.rename(columns={'old1': 'new1', 'old2': 'new1'}, inplace=True)

# Changing column labels.
df.columns = ['water_year','rain_octsep', 'outflow_octsep',
              'rain_decfeb', 'outflow_decfeb', 'rain_junaug', 'outflow_junaug']

# Create CATEGORICAL object
pd.Categorical(array)


# Create CONTINGENCE TABLE from 2 arrays
CONTINGENCE_TABLE = pd.crosstab(array1,array2, margins = True)

# Create array of consecutive of dates
pd.date_range(start=datetime(2017,1,1), end=datetime(2017,1,10), freq='D')                 # format: Pandas Timestamp
pd.date_range(start=datetime(2017,1,1), end=datetime(2017,1,10), freq='D').to_pydatetime() # format: Python Datetime

"""
Possible frequencies:
Y: yearly
M: monthly
W: weekly
D: daily
H: hourly
'30min': each 30 minutes (and so on)
"""