In [1]:
# importação das bibliotecas
import pandas as pd
import numpy as np

In [2]:
# Seta a seed para que os valores nao sejam sempre aleatorios (a sequencia vai se repetir)
np.random.seed(101)

In [3]:
# Cria um dataframe com valores aleatorios e chaves para linhas e colunas
df = pd.DataFrame(np.random.randn(5, 4), index = 'A B C D E'.split(), columns='W X Y Z'.split())

In [4]:
df

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [5]:
# Acesso da serie (dataframes são conjuntos de series) da coluna W
df['W']

A    2.706850
B    0.651118
C   -2.018168
D    0.188695
E    0.190794
Name: W, dtype: float64

In [6]:
# Novamente, dataframes são conjutos de series
type(df['W'])

pandas.core.series.Series

In [7]:
type(df)

pandas.core.frame.DataFrame

In [8]:
# Acesso de multiplas colunas é possivel
df[['W', 'Z']]

Unnamed: 0,W,Z
A,2.70685,0.503826
B,0.651118,0.605965
C,-2.018168,-0.589001
D,0.188695,0.955057
E,0.190794,0.683509


In [9]:
# Apenas um exemplo de notação, porém pode criar confusão com o acesso a metodos de um objeto
df.W

A    2.706850
B    0.651118
C   -2.018168
D    0.188695
E    0.190794
Name: W, dtype: float64

In [1]:
# Exemplo de criação de uma nova coluna, onde ela sera a soma de 2 colunas W e X
df['new'] = df['W'] + df['X']

NameError: name 'df' is not defined

In [11]:
# Apenas para mostrar a nova coluna
df

Unnamed: 0,W,X,Y,Z,new
A,2.70685,0.628133,0.907969,0.503826,3.334983
B,0.651118,-0.319318,-0.848077,0.605965,0.3318
C,-2.018168,0.740122,0.528813,-0.589001,-1.278046
D,0.188695,-0.758872,-0.933237,0.955057,-0.570177
E,0.190794,1.978757,2.605967,0.683509,2.169552


In [15]:
# É possivel deletar Series do dataframe (para diferenciar se é uma linha ou coluna, especificar em axis)
df.drop('new', axis=1)

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [16]:
# A coluna nao foi deletada :O
df

Unnamed: 0,W,X,Y,Z,new
A,2.70685,0.628133,0.907969,0.503826,3.334983
B,0.651118,-0.319318,-0.848077,0.605965,0.3318
C,-2.018168,0.740122,0.528813,-0.589001,-1.278046
D,0.188695,-0.758872,-0.933237,0.955057,-0.570177
E,0.190794,1.978757,2.605967,0.683509,2.169552


In [17]:
# Métodos que modificam o dataframe precisam do parametro inplace para executar essas funções na memória, e não apenas realiazar uma consulta
df.drop('new', axis=1, inplace=True)

In [18]:
df

Unnamed: 0,W,X,Y,Z
A,2.70685,0.628133,0.907969,0.503826
B,0.651118,-0.319318,-0.848077,0.605965
C,-2.018168,0.740122,0.528813,-0.589001
D,0.188695,-0.758872,-0.933237,0.955057
E,0.190794,1.978757,2.605967,0.683509


In [19]:
# Método alternativo para realizar consultas (evitar ambiguidades que podem existir com []), consulta por rótulos
df.loc['A']

W    2.706850
X    0.628133
Y    0.907969
Z    0.503826
Name: A, dtype: float64

In [20]:
# Outro exemplo de pesquisa por rotulos
df.loc[['A', 'B'], ['X', 'Y', 'Z']]

Unnamed: 0,X,Y,Z
A,0.628133,0.907969,0.503826
B,-0.319318,-0.848077,0.605965


In [21]:
# Iloc é o metodo especifico para posições numericas (caso exista uma coluna chamada 2, e utilizemos o iloc para tentar acessar, acessaremos a coluna de posição 2)
df.iloc[1:4, 2:]

Unnamed: 0,Y,Z
B,-0.848077,0.605965
C,0.528813,-0.589001
D,-0.933237,0.955057
