[pandas timeseries] #timeserie #financial #frequency #timestamp #resample #filter
import pandas as pd
import numpy as np
rng = np.random.RandomState(0)
nvals = 2000
index = pd.date_range('1/1/2000', periods=nvals, freq='1L') #milli-seconds
df = pd.DataFrame({'seq' : range(nvals)}, index=index)
df['rv1'] = rng.randn(nvals)
########################
# filter on timestamp
########################
#### filter by timestamp in index
s = pd.Timestamp('2000-01-01 00:00:00.001')
e = pd.Timestamp('2000-01-01 00:00:00.004')
df.loc[(df.index> s)&(df.index <=e)]
#### filter by timestamp in column
s = pd.Timestamp('2000-01-01 00:00:00.001')
e = pd.Timestamp('2000-01-01 00:00:00.004')
df.loc[(df.index> s)&(df.index <=e)]
### filter by timestamp in column
df.reset_index(inplace=True) # moves index to 'index' col
df.rename(columns={'index':'its'},inplace=True) #rename 'index' col to 'its'
df.loc[(df['its']>s)&(df['its']<=e)]
#######################
# ts resampling
#######################
### Donwsampling
nvals=15
index = pd.date_range('2019-01-01', periods=nvals, freq='1T') #freq is secs
series = pd.Series(range(nvals), index=index)
df = pd.DataFrame(series,columns=['seq'])
# ts label is at start of aggregation period
df.resample('5T').sum()
#the first line with ts #2019-01-01 00:00:00 has
#summed values betwee 00:00:00 and 00:04:00
#output
# seq
#2019-01-01 00:00:00 10
#2019-01-01 00:05:00 35
#2019-01-01 00:10:00 60
## ts label is at the end of aggregation period
df.resample('5T', label='right').sum()
# sums values on 00:00, 00:01 00:02 00:03 00:04 but assigns ts label 00:05
# seq
#2019-01-01 00:05:00 10
#2019-01-01 00:10:00 35
#2019-01-01 00:15:00 60
df.resample('5T', label='right',closed='right').sum()
# sums values on 00:01 00:02 00:03 00:04 00:05 and assigns ts label 00:05
#------------------------------------------------
# resampling columns with different agg funcs
#------------------------------------------------
## see pandas resample docs
df_secs = pd.DataFrame()
df_secs['v1_sum'] = df['v1'].resample('1S').sum()
df_secs['v2_avg'] = df['v2'].resample('1S').mean()