slzdevsnp
8/15/2019 - 5:13 PM

[pandas timeseries] #timeserie #financial #frequency #timestamp #resample #filter

[pandas timeseries] #timeserie #financial #frequency #timestamp #resample #filter

import pandas as pd 
import numpy as np 

rng = np.random.RandomState(0)
nvals = 2000
index = pd.date_range('1/1/2000', periods=nvals, freq='1L') #milli-seconds

df = pd.DataFrame({'seq' : range(nvals)}, index=index)
df['rv1'] = rng.randn(nvals)


########################
#  filter on  timestamp
########################

#### filter by timestamp in index
s = pd.Timestamp('2000-01-01 00:00:00.001')
e = pd.Timestamp('2000-01-01 00:00:00.004')
df.loc[(df.index> s)&(df.index <=e)]

#### filter by timestamp in column
s = pd.Timestamp('2000-01-01 00:00:00.001')
e = pd.Timestamp('2000-01-01 00:00:00.004')
df.loc[(df.index> s)&(df.index <=e)]


### filter by timestamp in column
df.reset_index(inplace=True) # moves index to 'index' col
df.rename(columns={'index':'its'},inplace=True) #rename 'index' col to 'its'
df.loc[(df['its']>s)&(df['its']<=e)]  
  
#######################
# ts  resampling
#######################
### Donwsampling

nvals=15
index = pd.date_range('2019-01-01', periods=nvals, freq='1T')  #freq is secs
series = pd.Series(range(nvals), index=index)
df = pd.DataFrame(series,columns=['seq'])

# ts  label is at start of aggregation period
df.resample('5T').sum()

#the   first line with ts  #2019-01-01 00:00:00 has 
#summed values betwee 00:00:00  and 00:04:00

#output
#                     seq
#2019-01-01 00:00:00   10
#2019-01-01 00:05:00   35
#2019-01-01 00:10:00   60

##  ts label is at the end of aggregation period

df.resample('5T', label='right').sum()
# sums values on 00:00, 00:01 00:02 00:03 00:04 but assigns ts label 00:05
#                     seq
#2019-01-01 00:05:00   10
#2019-01-01 00:10:00   35
#2019-01-01 00:15:00   60  
  
df.resample('5T', label='right',closed='right').sum()
# sums values on 00:01 00:02 00:03 00:04 00:05 and assigns ts label 00:05
  
  
#------------------------------------------------
#  resampling columns with different agg funcs
#------------------------------------------------
## see pandas resample docs
df_secs = pd.DataFrame()
df_secs['v1_sum'] = df['v1'].resample('1S').sum()
df_secs['v2_avg'] = df['v2'].resample('1S').mean()