In [1]:
#!pip install numpy
#!pip install pandas

In [2]:
import numpy as np
import pandas as pd

# Criando uma Series

In [3]:
series = pd.Series([7, 4, 2, np.nan, 9])
series

0    7.0
1    4.0
2    2.0
3    NaN
4    9.0
dtype: float64

In [4]:
type(series)

pandas.core.series.Series

# Trabalhando com datas

In [5]:
datas = pd.date_range('20200101', periods = 6, freq="D") # a frequencia pode ser em Meses, Anos, Segundos, minutos, etc
for i in datas:
    print(i)

2020-01-01 00:00:00
2020-01-02 00:00:00
2020-01-03 00:00:00
2020-01-04 00:00:00
2020-01-05 00:00:00
2020-01-06 00:00:00


> Usando range de datas como parametro do loop

### Visualizar os detalhes da função

In [6]:
#? pd.date_range

## Gerando conjunto de dados

#### Usando metodo randon para gerar uma matriz numerica:

In [7]:
np.random.randn(6,4)

array([[ 1.14594706,  0.26175311, -2.53156203,  0.58787142],
       [-0.48723515, -1.15930032, -1.9632528 ,  0.42495505],
       [-0.04739785, -0.07946723, -0.7615292 , -0.30515587],
       [ 1.09967237,  1.31441032,  1.14969254, -0.19909384],
       [-1.609332  , -0.33654399, -0.29149602, -1.53027988],
       [-0.79148736,  0.49690798, -0.30897041, -0.04995429]])

> 6 linhas por 4 colunas de numeros randomicos. Ao usar a função randn(), os numeros são gerados respeitando a distribuição normal. Com média 0 e desvio padrão 1

#### Vamos usar a series "datas" que acabamos de criar, como index do dataframe.

In [8]:
datas

DatetimeIndex(['2020-01-01', '2020-01-02', '2020-01-03', '2020-01-04',
               '2020-01-05', '2020-01-06'],
              dtype='datetime64[ns]', freq='D')

#### Vamos criar uma lista para ser o label das colunas

In [9]:
list("ABCD")

['A', 'B', 'C', 'D']

### Agora vamos juntar tudo isso e criar um dataframe

In [11]:
df = pd.DataFrame(np.random.randn(6,4), index = datas, columns = list("ABCD"))
df

Unnamed: 0,A,B,C,D
2020-01-01,-0.664804,-1.284975,-0.519772,-1.275043
2020-01-02,0.196606,0.186127,0.1285,0.223497
2020-01-03,-1.209663,0.179505,1.694016,0.131675
2020-01-04,-0.23264,-1.394878,0.483417,0.606075
2020-01-05,0.104152,1.410409,-1.669846,1.019045
2020-01-06,0.997285,0.69955,0.498329,-0.913677


In [12]:
type(df)

pandas.core.frame.DataFrame

### Outra forma de gerar um DataFrame
#### Gerando a partir de um dicionário (dict)

In [13]:
df2 = pd.DataFrame({"A":7,
                    "B": pd.Timestamp('20200101'),
                    "C": pd.Series(1, index=list(range(4)), dtype='float32'),
                    "D": np.array([3] * 4, dtype='int32'),
                    "E": pd.Categorical(["test", "train", "test", "train"]),
                    "F": 'Python'
                   })
df2

Unnamed: 0,A,B,C,D,E,F
0,7,2020-01-01,1.0,3,test,Python
1,7,2020-01-01,1.0,3,train,Python
2,7,2020-01-01,1.0,3,test,Python
3,7,2020-01-01,1.0,3,train,Python


### Verificando os tipos de dados em cada coluna do dataframe

In [14]:
df2.dtypes

A             int64
B    datetime64[ns]
C           float32
D             int32
E          category
F            object
dtype: object

### Visualizando dados com pandas
#### O Comando head() mostra o "cabeçalho" do dataframe e seu padrão retorna 5 linhas
##### Porém é possivel também visualizar as ultimas linhas do Dataframe, usando tail():

In [16]:
df.tail(2)

Unnamed: 0,A,B,C,D
2020-01-05,0.104152,1.410409,-1.669846,1.019045
2020-01-06,0.997285,0.69955,0.498329,-0.913677


### Visualizando os indices de dados e seus valores

In [17]:
df.index

DatetimeIndex(['2020-01-01', '2020-01-02', '2020-01-03', '2020-01-04',
               '2020-01-05', '2020-01-06'],
              dtype='datetime64[ns]', freq='D')

In [19]:
df.values

array([[-0.66480356, -1.28497537, -0.51977166, -1.27504258],
       [ 0.19660635,  0.18612711,  0.12849995,  0.22349717],
       [-1.20966329,  0.17950469,  1.69401564,  0.13167473],
       [-0.23263966, -1.39487754,  0.48341705,  0.6060747 ],
       [ 0.10415198,  1.4104092 , -1.66984591,  1.01904496],
       [ 0.99728491,  0.69954984,  0.49832915, -0.91367681]])

In [20]:
df.columns

Index(['A', 'B', 'C', 'D'], dtype='object')

> Os comandos index, values e columns retornam um array com a informação solicitada

### Transpor (rotacionar) um dataframe

In [24]:
df.T

Unnamed: 0,2020-01-01,2020-01-02,2020-01-03,2020-01-04,2020-01-05,2020-01-06
A,-0.664804,0.196606,-1.209663,-0.23264,0.104152,0.997285
B,-1.284975,0.186127,0.179505,-1.394878,1.410409,0.69955
C,-0.519772,0.1285,1.694016,0.483417,-1.669846,0.498329
D,-1.275043,0.223497,0.131675,0.606075,1.019045,-0.913677


> Linhas viram colunas e vice-versa