In [1]:
import pandas as pd

In [2]:
import numpy as np

In [3]:
import matplotlib.pyplot as plt
%matplotlib inline

# 1 Criando Séries

In [4]:
# Criando Series
s = pd.Series([1, 3, 4, np.nan, 6, 8])
s

0    1.0
1    3.0
2    4.0
3    NaN
4    6.0
5    8.0
dtype: float64

# 2 Criando DataFrame

In [5]:
# Criando uma lista de 6 datas a partir de 01/01/2018
dates = pd.date_range('20180101', periods=6)
dates


DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03', '2018-01-04',
               '2018-01-05', '2018-01-06'],
              dtype='datetime64[ns]', freq='D')

In [6]:
# Criando o DataFrame e indexando a lista de datas a nossa base de Desvio Padrão criada randomicamente
df = pd.DataFrame(np.random.randn(6, 4), index=dates, columns=list('ABCD')) 
df

Unnamed: 0,A,B,C,D
2018-01-01,-0.056055,1.417708,-0.381697,0.598963
2018-01-02,-0.679637,-0.392138,-0.160904,0.342329
2018-01-03,-0.039107,-1.070029,1.266126,0.285716
2018-01-04,1.288924,-2.344338,0.456453,-0.539733
2018-01-05,0.306672,-1.095638,0.503843,-1.026805
2018-01-06,-0.232047,-2.408368,0.605077,0.526685


# 2.1 Criando DataFrame passando o dicionário de dados (dict)

In [7]:
df2 = pd.DataFrame({'A': 1,
                    'B': pd.Timestamp('20180102'),
                    'C': pd.Series(1, index=list(range(4)), dtype='float32'),
                    'D': np.array([3] * 4, dtype='int32'),
                    'E': pd.Categorical(['test', 'train', 'test', 'train']),
                    'F': 'foo'})

In [8]:
df2

Unnamed: 0,A,B,C,D,E,F
0,1,2018-01-02,1.0,3,test,foo
1,1,2018-01-02,1.0,3,train,foo
2,1,2018-01-02,1.0,3,test,foo
3,1,2018-01-02,1.0,3,train,foo


In [9]:
pd.Timestamp('20180102')

Timestamp('2018-01-02 00:00:00')

In [10]:
pd.Series(2, index=list(range(4)), dtype='float32')

0    2.0
1    2.0
2    2.0
3    2.0
dtype: float32

In [11]:
np.array([3] * 2, dtype='int32')

array([3, 3])

In [12]:
# Verificando os tipos do DataFrame que criamos
df2.dtypes

A             int64
B    datetime64[ns]
C           float32
D             int32
E          category
F            object
dtype: object

# 3 Visualizando Dados

In [13]:
df.head() # Exibe os 5 primeiros registros

Unnamed: 0,A,B,C,D
2018-01-01,-0.056055,1.417708,-0.381697,0.598963
2018-01-02,-0.679637,-0.392138,-0.160904,0.342329
2018-01-03,-0.039107,-1.070029,1.266126,0.285716
2018-01-04,1.288924,-2.344338,0.456453,-0.539733
2018-01-05,0.306672,-1.095638,0.503843,-1.026805


In [14]:
df.tail() # Exibe os 5 últimos registros

Unnamed: 0,A,B,C,D
2018-01-02,-0.679637,-0.392138,-0.160904,0.342329
2018-01-03,-0.039107,-1.070029,1.266126,0.285716
2018-01-04,1.288924,-2.344338,0.456453,-0.539733
2018-01-05,0.306672,-1.095638,0.503843,-1.026805
2018-01-06,-0.232047,-2.408368,0.605077,0.526685


In [15]:
df.head(3) # dá para modificar a quantidade a ser exibida dos primeiros registros

Unnamed: 0,A,B,C,D
2018-01-01,-0.056055,1.417708,-0.381697,0.598963
2018-01-02,-0.679637,-0.392138,-0.160904,0.342329
2018-01-03,-0.039107,-1.070029,1.266126,0.285716


In [16]:
df.tail(3)  # dá para modificar a quantidade a ser exibida dos últimos registros

Unnamed: 0,A,B,C,D
2018-01-04,1.288924,-2.344338,0.456453,-0.539733
2018-01-05,0.306672,-1.095638,0.503843,-1.026805
2018-01-06,-0.232047,-2.408368,0.605077,0.526685


In [17]:
# Exibindo os índices de uma base
df.index

DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03', '2018-01-04',
               '2018-01-05', '2018-01-06'],
              dtype='datetime64[ns]', freq='D')

In [18]:
# Exibindo as colunas de uma base
df.columns

Index(['A', 'B', 'C', 'D'], dtype='object')

In [19]:
# Exibindo os valores de uma base
df.values

array([[-0.05605489,  1.41770836, -0.38169712,  0.59896295],
       [-0.67963727, -0.39213773, -0.16090378,  0.34232856],
       [-0.03910734, -1.07002888,  1.26612556,  0.28571591],
       [ 1.28892448, -2.34433785,  0.45645277, -0.53973304],
       [ 0.30667245, -1.09563833,  0.50384307, -1.02680457],
       [-0.23204708, -2.40836849,  0.60507674,  0.5266846 ]])

In [20]:
# Exibindo um resumo estatístico da base
df.describe()

Unnamed: 0,A,B,C,D
count,6.0,6.0,6.0,6.0
mean,0.098125,-0.982134,0.381483,0.031192
std,0.666037,1.415464,0.588185,0.659512
min,-0.679637,-2.408368,-0.381697,-1.026805
25%,-0.188049,-2.032163,-0.006565,-0.333371
50%,-0.047581,-1.082834,0.480148,0.314022
75%,0.220228,-0.561611,0.579768,0.480596
max,1.288924,1.417708,1.266126,0.598963


In [21]:
df.T # inverte as colunas pelas linhas

Unnamed: 0,2018-01-01 00:00:00,2018-01-02 00:00:00,2018-01-03 00:00:00,2018-01-04 00:00:00,2018-01-05 00:00:00,2018-01-06 00:00:00
A,-0.056055,-0.679637,-0.039107,1.288924,0.306672,-0.232047
B,1.417708,-0.392138,-1.070029,-2.344338,-1.095638,-2.408368
C,-0.381697,-0.160904,1.266126,0.456453,0.503843,0.605077
D,0.598963,0.342329,0.285716,-0.539733,-1.026805,0.526685


In [22]:
df.sort_index(axis=1, ascending=False) # muda a ordem das colunas para Descendente

Unnamed: 0,D,C,B,A
2018-01-01,0.598963,-0.381697,1.417708,-0.056055
2018-01-02,0.342329,-0.160904,-0.392138,-0.679637
2018-01-03,0.285716,1.266126,-1.070029,-0.039107
2018-01-04,-0.539733,0.456453,-2.344338,1.288924
2018-01-05,-1.026805,0.503843,-1.095638,0.306672
2018-01-06,0.526685,0.605077,-2.408368,-0.232047


In [23]:
df.sort_values(by='B') # Orderna uma base por uma coluna de forma ascendente

Unnamed: 0,A,B,C,D
2018-01-06,-0.232047,-2.408368,0.605077,0.526685
2018-01-04,1.288924,-2.344338,0.456453,-0.539733
2018-01-05,0.306672,-1.095638,0.503843,-1.026805
2018-01-03,-0.039107,-1.070029,1.266126,0.285716
2018-01-02,-0.679637,-0.392138,-0.160904,0.342329
2018-01-01,-0.056055,1.417708,-0.381697,0.598963


In [24]:
df.sort_values(by='B', ascending=False) # Orderna uma base por uma coluna de forma descendente

Unnamed: 0,A,B,C,D
2018-01-01,-0.056055,1.417708,-0.381697,0.598963
2018-01-02,-0.679637,-0.392138,-0.160904,0.342329
2018-01-03,-0.039107,-1.070029,1.266126,0.285716
2018-01-05,0.306672,-1.095638,0.503843,-1.026805
2018-01-04,1.288924,-2.344338,0.456453,-0.539733
2018-01-06,-0.232047,-2.408368,0.605077,0.526685


# 4 Seleção de Dados

Métodos .at, .iat, .loc, .iloc e .ix

In [26]:
df['A'] # Selecionado dados pela coluna "A"

2018-01-01   -0.056055
2018-01-02   -0.679637
2018-01-03   -0.039107
2018-01-04    1.288924
2018-01-05    0.306672
2018-01-06   -0.232047
Freq: D, Name: A, dtype: float64

In [27]:
df.A # Esta é outra forma de utilizar, só que não é recomendado

2018-01-01   -0.056055
2018-01-02   -0.679637
2018-01-03   -0.039107
2018-01-04    1.288924
2018-01-05    0.306672
2018-01-06   -0.232047
Freq: D, Name: A, dtype: float64

In [31]:
df[0:2] # desta forma selecionas as 2 primeiras linhas

Unnamed: 0,A,B,C,D
2018-01-01,-0.056055,1.417708,-0.381697,0.598963
2018-01-02,-0.679637,-0.392138,-0.160904,0.342329


In [35]:
df['20180101':'20180103'] # Dá para selecionar em um intervalo de dados entre os valores

Unnamed: 0,A,B,C,D
2018-01-01,-0.056055,1.417708,-0.381697,0.598963
2018-01-02,-0.679637,-0.392138,-0.160904,0.342329
2018-01-03,-0.039107,-1.070029,1.266126,0.285716


# 4.1 Seleção utilizando label

In [37]:
df.head()

Unnamed: 0,A,B,C,D
2018-01-01,-0.056055,1.417708,-0.381697,0.598963
2018-01-02,-0.679637,-0.392138,-0.160904,0.342329
2018-01-03,-0.039107,-1.070029,1.266126,0.285716
2018-01-04,1.288924,-2.344338,0.456453,-0.539733
2018-01-05,0.306672,-1.095638,0.503843,-1.026805


In [39]:
df.loc[dates[0]] # Seleciona todos os valores da linha 1 (indice 0 "zero")

A   -0.056055
B    1.417708
C   -0.381697
D    0.598963
Name: 2018-01-01 00:00:00, dtype: float64

# 4.2 Seleção Multima utilizando label

In [41]:
df.loc[:, ['A', 'B']] #Seleciona  todas as linhas das colunas A e B

Unnamed: 0,A,B
2018-01-01,-0.056055,1.417708
2018-01-02,-0.679637,-0.392138
2018-01-03,-0.039107,-1.070029
2018-01-04,1.288924,-2.344338
2018-01-05,0.306672,-1.095638
2018-01-06,-0.232047,-2.408368


In [43]:
df.loc['20180101':'20180103', ['A', 'C']] # dá para combinar intevalos de valores com colunas

Unnamed: 0,A,C
2018-01-01,-0.056055,-0.381697
2018-01-02,-0.679637,-0.160904
2018-01-03,-0.039107,1.266126


In [45]:
df.loc['20180102', ['A', 'D']]  # selecionar por um determinado valor e listar os valores correspondentes das colunas

A   -0.679637
D    0.342329
Name: 2018-01-02 00:00:00, dtype: float64

In [47]:
df.loc[dates[0], 'A'] # selecionando um valor específico de uma coluna específica

-0.056054889006374724

In [48]:
df.at[dates[0], 'A'] # corresponde a chamada antetiro In [47]

-0.056054889006374724

# 4.3 Seleção por Posição

In [50]:
df.head()

Unnamed: 0,A,B,C,D
2018-01-01,-0.056055,1.417708,-0.381697,0.598963
2018-01-02,-0.679637,-0.392138,-0.160904,0.342329
2018-01-03,-0.039107,-1.070029,1.266126,0.285716
2018-01-04,1.288924,-2.344338,0.456453,-0.539733
2018-01-05,0.306672,-1.095638,0.503843,-1.026805


In [52]:
# Seleciona a linha de índice 3 ( Lembrando que o índice começa no zero 0,1,2,3...n)
# indice(3) -> 2018-01-04 	1.288924 	-2.344338 	0.456453 	-0.539733
df.iloc[3] 

A    1.288924
B   -2.344338
C    0.456453
D   -0.539733
Name: 2018-01-04 00:00:00, dtype: float64

In [59]:
df.iloc[3:5, 0:2] # Seleciona as linhas entre o indice 3 |- 5 e as colunas entre 0 |- 2

Unnamed: 0,A,B
2018-01-04,1.288924,-2.344338
2018-01-05,0.306672,-1.095638


In [60]:
# Seleção com multiplas posições
df.iloc[[1,2,4], [0,2]]  # linhas 1,2,4 e colunas 0 e 2

Unnamed: 0,A,C
2018-01-02,-0.679637,-0.160904
2018-01-03,-0.039107,1.266126
2018-01-05,0.306672,0.503843


In [62]:
# Seleção do curinmga ":" que seleciona tudo do 0 à N
df.iloc[1:3, :] # seleciona as linhas do intervalo 1 |- 3 e todas as colunas ":"

Unnamed: 0,A,B,C,D
2018-01-02,-0.679637,-0.392138,-0.160904,0.342329
2018-01-03,-0.039107,-1.070029,1.266126,0.285716


In [66]:
# O mesmo critério da In[62] também serve para as linhas
df.iloc[:, 1:3] # seleciona todas as linhas das colunas no intervalo 1 |- 3 (colunas B e C)

Unnamed: 0,B,C
2018-01-01,1.417708,-0.381697
2018-01-02,-0.392138,-0.160904
2018-01-03,-1.070029,1.266126
2018-01-04,-2.344338,0.456453
2018-01-05,-1.095638,0.503843
2018-01-06,-2.408368,0.605077


In [67]:
# utilizando valores explicitos
df.iloc[1,1]

-0.39213773010835662

# 4.4 Seleção por indexação boleana (boolean)

In [69]:
df.head()

Unnamed: 0,A,B,C,D
2018-01-01,-0.056055,1.417708,-0.381697,0.598963
2018-01-02,-0.679637,-0.392138,-0.160904,0.342329
2018-01-03,-0.039107,-1.070029,1.266126,0.285716
2018-01-04,1.288924,-2.344338,0.456453,-0.539733
2018-01-05,0.306672,-1.095638,0.503843,-1.026805


In [73]:
# Note que constamm valores negativos na tabela origial
# para selecionarmos apenas os valores maiores que zero, podemos utilizar da seguinte forma:
df[df['A'] > 0] # é equivalore a df[df.A > 0]

Unnamed: 0,A,B,C,D
2018-01-04,1.288924,-2.344338,0.456453,-0.539733
2018-01-05,0.306672,-1.095638,0.503843,-1.026805


In [74]:
# Dá para testar todas as celulas independente das colunas, com no exemplo anterior.
df[df > 0] # Note que onde a condição não é verdadeira, o valor foi trocado por NaN

Unnamed: 0,A,B,C,D
2018-01-01,,1.417708,,0.598963
2018-01-02,,,,0.342329
2018-01-03,,,1.266126,0.285716
2018-01-04,1.288924,,0.456453,
2018-01-05,0.306672,,0.503843,
2018-01-06,,,0.605077,0.526685


# 4.4.1 Utilizando o método isin()

In [75]:
df2 = df.copy() # copia a tabela para a df2

In [76]:
df2.head()

Unnamed: 0,A,B,C,D
2018-01-01,-0.056055,1.417708,-0.381697,0.598963
2018-01-02,-0.679637,-0.392138,-0.160904,0.342329
2018-01-03,-0.039107,-1.070029,1.266126,0.285716
2018-01-04,1.288924,-2.344338,0.456453,-0.539733
2018-01-05,0.306672,-1.095638,0.503843,-1.026805


In [77]:
# Aqui adicionamos mais uma coluna "E" e setamos valores no formato texto(object)
df2['E'] = ['one', 'one', 'two', 'three', 'four', 'three']
df2

Unnamed: 0,A,B,C,D,E
2018-01-01,-0.056055,1.417708,-0.381697,0.598963,one
2018-01-02,-0.679637,-0.392138,-0.160904,0.342329,one
2018-01-03,-0.039107,-1.070029,1.266126,0.285716,two
2018-01-04,1.288924,-2.344338,0.456453,-0.539733,three
2018-01-05,0.306672,-1.095638,0.503843,-1.026805,four
2018-01-06,-0.232047,-2.408368,0.605077,0.526685,three


In [80]:
df2[df2['E'].isin(['two', 'four'])] # busca dentro do dataframe da coluna especifica se existe aqueles valores

Unnamed: 0,A,B,C,D,E
2018-01-03,-0.039107,-1.070029,1.266126,0.285716,two
2018-01-05,0.306672,-1.095638,0.503843,-1.026805,four


# 5 Setando Dados (Setting)

In [82]:
# criando uma nova séria de dados para preencher uma nova coluna que será criada
s1 = pd.Series([1, 2, 3, 4, 5, 6], index=pd.date_range('20180102', periods=6))
s1

2018-01-02    1
2018-01-03    2
2018-01-04    3
2018-01-05    4
2018-01-06    5
2018-01-07    6
Freq: D, dtype: int64

In [83]:
df['F'] = s1 # arqui estamos setando a série s1 na coluna F que estamos criando

In [85]:
df # note que a primeira linha, está com valor NaN

Unnamed: 0,A,B,C,D,F
2018-01-01,-0.056055,1.417708,-0.381697,0.598963,
2018-01-02,-0.679637,-0.392138,-0.160904,0.342329,1.0
2018-01-03,-0.039107,-1.070029,1.266126,0.285716,2.0
2018-01-04,1.288924,-2.344338,0.456453,-0.539733,3.0
2018-01-05,0.306672,-1.095638,0.503843,-1.026805,4.0
2018-01-06,-0.232047,-2.408368,0.605077,0.526685,5.0


In [87]:
# Vamos setar o valor zero no primeiro registro da coluna A
df.at[dates[0],'A'] = 0

In [89]:
# de forma análoga utilizando o método iat(), vamos setar o valor zero na coluna de indice 1(B) da linha zero
df.iat[0,1] = 0

In [91]:
# Aqui vamos setar o valor 5 em todos os registros da coluna D
df.loc[:, 'D'] = np.array([5] * len(df))

In [98]:
df # todos os valores setados foram atribuidos ao DataFrame

Unnamed: 0,A,B,C,D,F
2018-01-01,0.0,0.0,-0.381697,5,
2018-01-02,-0.679637,-0.392138,-0.160904,5,1.0
2018-01-03,-0.039107,-1.070029,1.266126,5,2.0
2018-01-04,1.288924,-2.344338,0.456453,5,3.0
2018-01-05,0.306672,-1.095638,0.503843,5,4.0
2018-01-06,-0.232047,-2.408368,0.605077,5,5.0


In [99]:
df2 = df.copy() # copiando o DataFrame df

In [100]:
# Podemos setar valores com condições e trocando o sinal de operação
df2[df2 > 0] = -df2
df2 # Todos os valores > 0 agora estão negativos

Unnamed: 0,A,B,C,D,F
2018-01-01,0.0,0.0,-0.381697,-5,
2018-01-02,-0.679637,-0.392138,-0.160904,-5,-1.0
2018-01-03,-0.039107,-1.070029,-1.266126,-5,-2.0
2018-01-04,-1.288924,-2.344338,-0.456453,-5,-3.0
2018-01-05,-0.306672,-1.095638,-0.503843,-5,-4.0
2018-01-06,-0.232047,-2.408368,-0.605077,-5,-5.0


# 6 Tratando dados faltantes (Missing Data)

Ps.:  NaN (acrónimo em inglês para Not a Number) 

In [101]:
df1 = df.reindex(index=dates[0:4], columns=list(df.columns) + ['E'])

In [102]:
df1

Unnamed: 0,A,B,C,D,F,E
2018-01-01,0.0,0.0,-0.381697,5,,
2018-01-02,-0.679637,-0.392138,-0.160904,5,1.0,
2018-01-03,-0.039107,-1.070029,1.266126,5,2.0,
2018-01-04,1.288924,-2.344338,0.456453,5,3.0,


In [104]:
df1.loc[dates[0]:dates[1], 'E']

2018-01-01   NaN
2018-01-02   NaN
Freq: D, Name: E, dtype: float64

In [106]:
df1.loc[dates[0]:dates[1], 'E'] = 1 # Seta o valor 1 nas duas primeiras linhas da coluna E

In [107]:
df1

Unnamed: 0,A,B,C,D,F,E
2018-01-01,0.0,0.0,-0.381697,5,,1.0
2018-01-02,-0.679637,-0.392138,-0.160904,5,1.0,1.0
2018-01-03,-0.039107,-1.070029,1.266126,5,2.0,
2018-01-04,1.288924,-2.344338,0.456453,5,3.0,


In [109]:
# vamos excluir as linhas que tenham valores faltantes
df1.dropna(how='any') # sobraria apenas a linha de índice 1

Unnamed: 0,A,B,C,D,F,E
2018-01-02,-0.679637,-0.392138,-0.160904,5,1.0,1.0


In [110]:
df1 # Note que as linhas não foram realmente excluídas, para isso teria que setar o atributo inplace=True

Unnamed: 0,A,B,C,D,F,E
2018-01-01,0.0,0.0,-0.381697,5,,1.0
2018-01-02,-0.679637,-0.392138,-0.160904,5,1.0,1.0
2018-01-03,-0.039107,-1.070029,1.266126,5,2.0,
2018-01-04,1.288924,-2.344338,0.456453,5,3.0,


In [111]:
# Você pode preencher os valores faltantes com outro valor indifevente da linha ou coluna
df1.fillna(value=5) # Trovamos todos os NaN por 5

Unnamed: 0,A,B,C,D,F,E
2018-01-01,0.0,0.0,-0.381697,5,5.0,1.0
2018-01-02,-0.679637,-0.392138,-0.160904,5,1.0,1.0
2018-01-03,-0.039107,-1.070029,1.266126,5,2.0,5.0
2018-01-04,1.288924,-2.344338,0.456453,5,3.0,5.0


In [112]:
df1 # sem definir o inplace=True os valores continuam os mesmos

Unnamed: 0,A,B,C,D,F,E
2018-01-01,0.0,0.0,-0.381697,5,,1.0
2018-01-02,-0.679637,-0.392138,-0.160904,5,1.0,1.0
2018-01-03,-0.039107,-1.070029,1.266126,5,2.0,
2018-01-04,1.288924,-2.344338,0.456453,5,3.0,


In [114]:
# Se você quizer saber quais celulas estão com valores faltantes (NaN), utilize assim:
pd.isna(df1)

Unnamed: 0,A,B,C,D,F,E
2018-01-01,False,False,False,False,True,False
2018-01-02,False,False,False,False,False,False
2018-01-03,False,False,False,False,False,True
2018-01-04,False,False,False,False,False,True


# 7 Operações

# 7.1 Operações Estatísticas

Ps.: Operations in general exclude missing data

In [116]:
df

Unnamed: 0,A,B,C,D,F
2018-01-01,0.0,0.0,-0.381697,5,
2018-01-02,-0.679637,-0.392138,-0.160904,5,1.0
2018-01-03,-0.039107,-1.070029,1.266126,5,2.0
2018-01-04,1.288924,-2.344338,0.456453,5,3.0
2018-01-05,0.306672,-1.095638,0.503843,5,4.0
2018-01-06,-0.232047,-2.408368,0.605077,5,5.0


In [117]:
df.mean() # Média aritimética

A    0.107468
B   -1.218419
C    0.381483
D    5.000000
F    3.000000
dtype: float64

In [123]:
df.mean(1) # Mesma operação no outro eixo

2018-01-01    1.154576
2018-01-02    0.953464
2018-01-03    1.431398
2018-01-04    1.480208
2018-01-05    1.742975
2018-01-06    1.592932
Freq: D, dtype: float64

In [126]:
# o shift define em qual índice das linhas começar a setar os valores
s = pd.Series([1, 3, 5, np.nan, 6, 8], index=dates).shift(2) 
s

2018-01-01    NaN
2018-01-02    NaN
2018-01-03    1.0
2018-01-04    3.0
2018-01-05    5.0
2018-01-06    NaN
Freq: D, dtype: float64

In [128]:
df

Unnamed: 0,A,B,C,D,F
2018-01-01,0.0,0.0,-0.381697,5,
2018-01-02,-0.679637,-0.392138,-0.160904,5,1.0
2018-01-03,-0.039107,-1.070029,1.266126,5,2.0
2018-01-04,1.288924,-2.344338,0.456453,5,3.0
2018-01-05,0.306672,-1.095638,0.503843,5,4.0
2018-01-06,-0.232047,-2.408368,0.605077,5,5.0


In [132]:
s.reset_index()

Unnamed: 0,index,0
0,2018-01-01,
1,2018-01-02,
2,2018-01-03,1.0
3,2018-01-04,3.0
4,2018-01-05,5.0
5,2018-01-06,


In [134]:
type(s)

pandas.core.series.Series

In [135]:
type(df)

pandas.core.frame.DataFrame

In [137]:
s.index

DatetimeIndex(['2018-01-01', '2018-01-02', '2018-01-03', '2018-01-04',
               '2018-01-05', '2018-01-06'],
              dtype='datetime64[ns]', freq='D')

In [136]:
# o método sub() subtrai do DataDrame "df" as linhas com valores faltantes em relação a série "s" pelo "index"
df.sub(s, axis='index')

Unnamed: 0,A,B,C,D,F
2018-01-01,,,,,
2018-01-02,,,,,
2018-01-03,-1.039107,-2.070029,0.266126,4.0,1.0
2018-01-04,-1.711076,-5.344338,-2.543547,2.0,0.0
2018-01-05,-4.693328,-6.095638,-4.496157,0.0,-1.0
2018-01-06,,,,,
