In [None]:
import pandas as pd
import numpy as np
from datetime import datetime
import re

# datetime

In [None]:
datetime.now()

In [None]:
agora = datetime.now()

In [None]:
agora.day, agora.month, agora.year

In [None]:
str(agora)

In [None]:
hoje = str(agora)

In [None]:
agora.strftime('%d/%m/%Y')

In [None]:
agora.strftime('%d/%B/%Y')

In [None]:
agora.strftime('%d/%b/%y')

In [None]:
agora.strftime('%m/%Y')

In [None]:
dia = '2021-11-11'

In [None]:
datetime.strptime(dia, '%Y-%m-%d')

In [None]:
from dateutil.parser import parse

In [None]:
parse('2021-01-13')

In [None]:
parse('20/03/2019', dayfirst=True)

# Séries temporais

In [None]:
datas = [datetime(2020, 1, 2), datetime(2020, 1, 5),
         datetime(2020, 1, 7), datetime(2020, 1, 8),
         datetime(2020, 1, 10), datetime(2020, 1, 12)]

In [None]:
ts = pd.Series(np.random.randn(6), index=datas)

In [None]:
ts

In [None]:
ts.index

In [None]:
ts + ts[::2]

In [None]:
ts.index.dtype

In [None]:
s = ts.index[0]
s

In [None]:
ts[s]

In [None]:
x = ts.index[3]
ts[x]

In [None]:
ts['2020-01-10']

In [None]:
ts['1/10/2020']

In [None]:
ts['20200107']

In [None]:
longer_ts = pd.Series(np.random.randn(1000),
                     index=pd.date_range('1/1/2000', periods=1000))

In [None]:
longer_ts['2001']

In [None]:
dois_mil_um = longer_ts['2001']

In [None]:
dois_mil_um.mean()

In [None]:
dois_mil_um.describe()

In [None]:
dois_mil_dois = longer_ts['2002']

In [None]:
dois_mil_um.plot()


In [None]:
dois_mil_dois.plot(color='g')

In [None]:
maio_dois_mil = longer_ts['2000-05']

In [None]:
maio_dois_mil.plot(color='r')

In [None]:
df = pd.read_csv('pagamentos.csv')

In [None]:
chars_to_remove = ['.', '-', '#', '', 'R', '$']
regular_expression = '[' + re.escape (''. join (chars_to_remove)) + ']'

df['valor'] = df['valor'].str.replace(regular_expression, '', regex=True)

In [None]:
df['valor'] = df['valor'].str.replace(',','.')
df['valor'] = df['valor'].replace(' ', np.nan)
df['valor'] = df['valor'].astype('float64')

In [None]:
df.columns

In [None]:
df['data_pagamento'] = pd.to_datetime(df['data_pagamento'])

In [None]:
df = df.set_index('data_pagamento')

In [None]:
df.index

In [None]:
ano2010 = df.loc['2010']
ano2011 = df.loc['2011']
ano2012 = df.loc['2012']
ano2013 = df.loc['2013']
ano2014 = df.loc['2014']
ano2015 = df.loc['2015']
ano2016 = df.loc['2016']
ano2017 = df.loc['2017']
ano2018 = df.loc['2018']
ano2019 = df.loc['2019']

In [None]:
import matplotlib.pyplot as plt

In [None]:
ano2010['valor'].mean()

In [None]:
longer_ts[datetime(2011, 1, 11):]

In [None]:
ts

In [None]:
ts['1/6/2020':'1/11/2020']

In [None]:
ts.truncate(after='1/9/2020')

## Índices duplicados

In [None]:
dates = pd.DatetimeIndex(['1/1/2000','1/2/2000','1/2/2000','1/2/2000','1/3/2000'])

In [None]:
dup_ts = pd.Series(np.arange(5), index=dates)

In [None]:
dup_ts

In [None]:
dup_ts.index.is_unique

In [None]:
dup_ts['1/2/2000']

In [None]:
g = dup_ts.groupby(level=0)

In [None]:
g.mean()

In [None]:
g.count()

In [None]:
g = [g for g in g]
g

# Intervalos, frequências e deslocamentos

In [None]:
ts.resample('D')

In [None]:
pd.date_range('2012-04-01', '2012-06-01')

In [None]:
pd.date_range(start='2012-04-01', periods=20)

In [None]:
pd.date_range(end='2012-04-01', periods=20)

In [None]:
pd.date_range('2021-01-01', '2021-12-31', freq='BM') # Último Dia Útil

In [None]:
pd.date_range('2021-01-01', '2021-12-31', freq='BQ') # Trimestre comercial

In [None]:
pd.date_range('2021-01-01', '2021-12-31', freq='W-MON') # Segundas-feiras de 2021

In [None]:
pd.date_range('2021-01-01 12:56:31', periods=5)

In [None]:
pd.date_range('2021-01-01 12:56:31', periods=5, normalize=True) # Normaliza para a meia-noite

In [None]:
from pandas.tseries.offsets import Hour, Minute

In [None]:
hour = Hour()

In [None]:
hour

In [None]:
four_hours = Hour(4)

In [None]:
four_hours

In [None]:
pd.date_range('2000-01-01', '2000-01-03 23:59', freq='4h')

In [None]:
Hour(2) + Minute(30)

In [None]:
pd.date_range('2000-01-01', '2000-01-03 23:59', freq='2h25min')

In [None]:
pd.date_range('2020-01-01', '2020-12-31', freq='WOM-3FRI') # 3ª sexta-feira de cada mês

In [None]:
ts = pd.Series(np.random.randn(4),
              index=pd.date_range('1/1/2000', periods=4, freq='M'))

In [None]:
ts

In [None]:
ts.shift(2)

In [None]:
ts.shift(-2)

In [None]:
ts.shift(2, freq='M')

# Fuso horário

In [None]:
import pytz

In [None]:
pytz.common_timezones[-5:]

In [None]:
pytz.timezone('America/New_York')

In [None]:
pytz.timezone('America/Sao_Paulo')

In [None]:
rng = pd.date_range('3/9/2012 9:30', periods=6, freq='D')

In [None]:
ts = pd.Series(np.random.randn(len(rng)), index=rng)

In [None]:
ts

In [None]:
print(ts.index.tz)

In [None]:
pd.date_range('3/9/2012 9:30', periods=10, freq='D', tz='UTC')

In [None]:
ts_utc = ts.tz_localize('UTC')

In [None]:
ts_utc

In [None]:
ts_utc.index

In [None]:
ts_utc.tz_convert('America/New_York')

In [None]:
ts_utc.tz_convert('America/Sao_Paulo')

In [None]:
ts_utc.tz_convert('Europe/Paris')

In [None]:
ts_utc.tz_convert('Asia/Shanghai')

In [None]:
ts

In [None]:
ts1 = ts[:4].tz_localize('Europe/London')
ts2 = ts1[2:].tz_convert('Europe/Moscow')

In [None]:
result = ts1 + ts2

In [None]:
result.index

# Períodos

In [None]:
p = pd.Period(2009, freq='A-DEC') # Pega o ano inteiro

In [None]:
p

In [None]:
p + 5

In [None]:
p - 2

In [None]:
pd.Period('2014', freq='A-DEC') - p

In [None]:
rng = pd.period_range('2000-01-01', '2000-06-30', freq='M')

In [None]:
rng

In [None]:
pd.Series(np.random.randn(6), index=rng)

In [None]:
values = ['2001Q3', '2002Q2', '2003Q1']

In [None]:
index = pd.PeriodIndex(values, freq='Q-DEC')

In [None]:
index

In [None]:
p = pd.Period('2007', freq='A-DEC')

In [None]:
p.asfreq('M', how='start')

In [None]:
p.asfreq('M', how='end')

In [None]:
rng = pd.period_range('2006', '2009', freq='A-DEC')

In [None]:
ts = pd.Series(np.random.randn(len(rng)), index=rng)

In [None]:
ts

In [None]:
ts.asfreq('M', how='start')

In [None]:
ts.asfreq('B', how='end')

# Reamostragem

In [None]:
rng = pd.date_range('2009-01-01', periods=100, freq='D')

In [None]:
ts = pd.Series(np.random.randn(len(rng)), index=rng)
ts

In [None]:
ts.resample('M').mean()

In [None]:
ts.resample('M', kind='period').mean()

In [None]:
df = pd.DataFrame(columns=['Valor', 'Data'])
                  

In [None]:
df['Valor'] = np.random.randn(90) * 2

In [None]:
df['Data'] = pd.date_range('2021-01-01', periods=90)

In [None]:
df

In [None]:
df = df.set_index('Data')

In [None]:
df.resample('M', kind='period').sum()

In [None]:
df.resample('M', kind='period').max()

In [None]:
df.resample('M', kind='period').min()

In [None]:
df.resample('M', kind='period').mean()

In [None]:
rng = pd.date_range('2010-01-01', periods=12, freq='T')

In [None]:
ts = pd.Series(np.arange(12), index=rng)
ts

In [None]:
ts.resample('5min', closed='right').sum()

In [None]:
ts.resample('5min').sum()

In [None]:
ts.resample('5min', closed='right', label='right').sum()

In [None]:
oxi = ts.resample('5min').ohlc()
oxi

In [None]:
frame = pd.DataFrame(np.random.randn(2, 4),
                    index=pd.date_range('1/1/2000', periods=2, freq='W-WED'),
                    columns=['Colorado', 'Ohio', 'Texas', 'New York'])

In [None]:
frame

In [None]:
df_daily = frame.resample('D').asfreq()

In [None]:
df_daily

In [None]:
frame.resample('D').ffill()

In [None]:
frame.resample('W-THU').ffill()

# Janela móvel

In [None]:
close_px_all = pd.read_csv('stock_px.csv',
                          parse_dates=True, index_col=0)

In [None]:
close_px = close_px_all[['AAPL', 'MSFT', 'XOM']]

In [None]:
close_px = close_px.resample('B').ffill()

In [None]:
close_px_all.head(3)

In [None]:
close_px.head(3)

In [None]:
close_px.AAPL.plot()
close_px.AAPL.rolling(250).mean().plot()

In [None]:
close_px.MSFT.plot()
close_px.MSFT.rolling(250).mean().plot()

In [None]:
close_px.XOM.plot()
close_px.XOM.rolling(250).mean().plot()

In [None]:
appl_std250 = close_px.AAPL.rolling(250, min_periods=10).std()

In [None]:
appl_std250[5:12]

In [None]:
appl_std250.plot()

In [None]:
ex_mean = appl_std250.expanding().mean()

In [None]:
close_px.AAPL.plot()
ex_mean.plot()
appl_std250.plot()

In [None]:
close_px.rolling(60).mean().plot(logy=True)

In [None]:
close_px.rolling('20D').mean()

In [None]:
close_px.rolling('20D').mean().plot()

In [None]:
aapl_px = close_px.AAPL['2006':'2007']
ma60 = aapl_px.rolling(30, min_periods=20).mean()
ewma60 = aapl_px.ewm(span=30).mean()

In [None]:
ma60.plot(style='k--', label='Simple MA')
ewma60.plot(style='r-', label='EW MA')
plt.legend()

In [None]:
msft_px = close_px.MSFT['2006':'2007']
ma60 = msft_px.rolling(30, min_periods=20).mean()
ewma60 = msft_px.ewm(span=30).mean()
ma60.plot(style='k--', label='Simple MA')
ewma60.plot(style='r-', label='EW MA')
plt.legend()
plt.title('Microsoft')

In [None]:
spx_px = close_px_all['SPX']
spx_rets = spx_px.pct_change()
returns = close_px.pct_change()

In [None]:
corr = returns.AAPL.rolling(125, min_periods=100).corr(spx_rets)

In [None]:
corr.plot(label='Correlação')
plt.legend()

In [None]:
corr = returns.rolling(125, min_periods=100).corr(spx_rets)

In [None]:
corr.plot()
plt.legend()
plt.title('Correlação entre preços e S&P 500')

In [None]:
from scipy.stats import percentileofscore

In [None]:
score_at_2percent = lambda x: percentileofscore(x, 0.02)

In [None]:
result = returns.AAPL.rolling(250).apply(score_at_2percent)
result.plot()
plt.title('Score at 2%')