# Exercicios de Pandas (Favtutor)

### Link: https://favtutor.com/blogs/pandas-exercises-python

In [1]:
# importações

import pandas as pd

# Exercicio 1 - Converter lista para Series

In [2]:
nomes = ["Gabriela", "Antônio", "João", "Maria"]
nomes_df = pd.Series(nomes) # convertendo lista para Series
print(nomes_df)
type(nomes_df) # imprimindo o tipo de nomes_df

0    Gabriela
1     Antônio
2        João
3       Maria
dtype: object


pandas.core.series.Series

# Exercicio 2 - Converter lista para Series com indexação customizada

In [3]:
notas = [6.0, 7.0, 5.0, 9.0]
notas_df = pd.Series(notas, index=[x for x in range(1, len(notas) * 2, 2)]) # index -> customizando a indexação da série
notas_df

1    6.0
3    7.0
5    5.0
7    9.0
dtype: float64

# Exercicio 3 - Geração de Series com datas

In [4]:
datas = pd.date_range(start="1st May, 2021", end="12th May, 2021") # Atribuindo uma Series com range de datas
datas

DatetimeIndex(['2021-05-01', '2021-05-02', '2021-05-03', '2021-05-04',
               '2021-05-05', '2021-05-06', '2021-05-07', '2021-05-08',
               '2021-05-09', '2021-05-10', '2021-05-11', '2021-05-12'],
              dtype='datetime64[ns]', freq='D')

# Exericicio 4 - Implementar uma função para todo e qualquer elemento de uma Series

In [5]:
def quadrado(x):
  return x ** 2

teste = [1, 2, 3, 4, 5]
teste_s = pd.Series(teste)
teste_s.apply(quadrado) # *aplica* uma função em uma Series em todos os elementos

0     1
1     4
2     9
3    16
4    25
dtype: int64

In [6]:
teste = [1, 2, 3, 4, 5]
teste_s = pd.Series(teste)
teste_s.apply(lambda x: x ** 2) # aceita funções criadas separadamente ou lambda

0     1
1     4
2     9
3    16
4    25
dtype: int64

In [7]:
frutas = ["morango", "abacaxi", "uva", "kiwi", "melão"]
frutas_s = pd.Series(frutas)
frutas_s.apply(lambda x: x.capitalize()) # uso com strings

0    Morango
1    Abacaxi
2        Uva
3       Kiwi
4      Melão
dtype: object

# Exercicio 5 - Dicionário para um DataFrame

In [8]:
pessoas = {
    "Nome": ["Adriano", "Gabriela", "Hugo"],
    "Idade": [18, 20, 23],
    "Matriculado": [0, 1, 1]
}

pessoas_df = pd.DataFrame(pessoas) # transformando um dicionario em um DF
pessoas_df

Unnamed: 0,Nome,Idade,Matriculado
0,Adriano,18,0
1,Gabriela,20,1
2,Hugo,23,1


# Exercicio 6 - Lista 2D em DataFrame

In [9]:
lista2d = [["Lucas", 17, 1], 
           ["Matheus", 19, 1], 
           ["Estela", 23, 0]] # criando uma lista 2D

lista2d_df = pd.DataFrame(lista2d, columns=["Nome", "Idade", "Matriculado"]) # colocando os dados e nomeando as colunas
lista2d_df

Unnamed: 0,Nome,Idade,Matriculado
0,Lucas,17,1
1,Matheus,19,1
2,Estela,23,0


# Exercicio 7 - Ler CSV para DataFrame

In [13]:
dados = pd.read_csv("https://raw.githubusercontent.com/alura-cursos/introducao-a-data-science/master/aula4.2/movies.csv") # ler dados de um csv
dados.head()

Unnamed: 0,movieId,title,genres
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,2,Jumanji (1995),Adventure|Children|Fantasy
2,3,Grumpier Old Men (1995),Comedy|Romance
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,5,Father of the Bride Part II (1995),Comedy


# Exercicio 8 - Customização de Index em DataFrame

In [15]:
dados.set_index("movieId") # mudar o index de um dataframe a partir de outra coluna existente

Unnamed: 0_level_0,title,genres
movieId,Unnamed: 1_level_1,Unnamed: 2_level_1
1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
2,Jumanji (1995),Adventure|Children|Fantasy
3,Grumpier Old Men (1995),Comedy|Romance
4,Waiting to Exhale (1995),Comedy|Drama|Romance
5,Father of the Bride Part II (1995),Comedy
...,...,...
193581,Black Butler: Book of the Atlantic (2017),Action|Animation|Comedy|Fantasy
193583,No Game No Life: Zero (2017),Animation|Comedy|Fantasy
193585,Flint (2017),Drama
193587,Bungo Stray Dogs: Dead Apple (2018),Action|Animation


# Exercicio 9 - Ordenando DataFrame pelo Index

In [18]:
dados.set_index("title").sort_index().head(10) #sort_index organiza o index do DataFrame por ordem (alfabética ou crescente)

Unnamed: 0_level_0,movieId,genres
title,Unnamed: 1_level_1,Unnamed: 2_level_1
'71 (2014),117867,Action|Drama|Thriller|War
'Hellboy': The Seeds of Creation (2004),97757,Action|Adventure|Comedy|Documentary|Fantasy
'Round Midnight (1986),26564,Drama|Musical
'Salem's Lot (2004),27751,Drama|Horror|Mystery|Thriller
'Til There Was You (1997),779,Drama|Romance
'Tis the Season for Love (2015),149380,Romance
"'burbs, The (1989)",2072,Comedy
'night Mother (1986),3112,Drama
(500) Days of Summer (2009),69757,Comedy|Drama|Romance
*batteries not included (1987),8169,Children|Comedy|Fantasy|Sci-Fi


In [19]:
dados.set_index("movieId").sort_index().head(10)

Unnamed: 0_level_0,title,genres
movieId,Unnamed: 1_level_1,Unnamed: 2_level_1
1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
2,Jumanji (1995),Adventure|Children|Fantasy
3,Grumpier Old Men (1995),Comedy|Romance
4,Waiting to Exhale (1995),Comedy|Drama|Romance
5,Father of the Bride Part II (1995),Comedy
6,Heat (1995),Action|Crime|Thriller
7,Sabrina (1995),Comedy|Romance
8,Tom and Huck (1995),Adventure|Children
9,Sudden Death (1995),Action
10,GoldenEye (1995),Action|Adventure|Thriller


# Exercicio 10 - Ordenar DataFrame por múltiplas colunas

In [34]:
# criando nova coluna "year" no dataset

def separa_ano(coluna):
  final = []
  for item in coluna:
    final.append(item[-5:-1])
  return final

anos = separa_ano(dados["title"])
dados["year"] = anos
dados.head()

Unnamed: 0,movieId,title,genres,year
0,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy,1995
1,2,Jumanji (1995),Adventure|Children|Fantasy,1995
2,3,Grumpier Old Men (1995),Comedy|Romance,1995
3,4,Waiting to Exhale (1995),Comedy|Drama|Romance,1995
4,5,Father of the Bride Part II (1995),Comedy,1995


In [35]:
dados.sort_values(by=["movieId", "year"]).tail(10) # ordenando por múltiplas colunas

Unnamed: 0,movieId,title,genres,year
9732,193565,Gintama: The Movie (2010),Action|Animation|Comedy|Sci-Fi,2010
9733,193567,anohana: The Flower We Saw That Day - The Movi...,Animation|Drama,2013
9734,193571,Silver Spoon (2014),Comedy|Drama,2014
9735,193573,Love Live! The School Idol Movie (2015),Animation,2015
9736,193579,Jon Stewart Has Left the Building (2015),Documentary,2015
9737,193581,Black Butler: Book of the Atlantic (2017),Action|Animation|Comedy|Fantasy,2017
9738,193583,No Game No Life: Zero (2017),Animation|Comedy|Fantasy,2017
9739,193585,Flint (2017),Drama,2017
9740,193587,Bungo Stray Dogs: Dead Apple (2018),Action|Animation,2018
9741,193609,Andrew Dice Clay: Dice Rules (1991),Comedy,1991


# Exercicio 11 - Revertendo uma ordenação

In [42]:
# criando o dataset ordenado

dados_ordenados_por_ano = dados.set_index("year")
dados_ordenados_por_ano.head()

Unnamed: 0_level_0,movieId,title,genres
year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1995,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1995,2,Jumanji (1995),Adventure|Children|Fantasy
1995,3,Grumpier Old Men (1995),Comedy|Romance
1995,4,Waiting to Exhale (1995),Comedy|Drama|Romance
1995,5,Father of the Bride Part II (1995),Comedy


In [44]:
# desfazendo a ordenação

dados_ordenados_por_ano.reset_index(inplace=True)
dados_ordenados_por_ano.head()

Unnamed: 0,year,movieId,title,genres
0,1995,1,Toy Story (1995),Adventure|Animation|Children|Comedy|Fantasy
1,1995,2,Jumanji (1995),Adventure|Children|Fantasy
2,1995,3,Grumpier Old Men (1995),Comedy|Romance
3,1995,4,Waiting to Exhale (1995),Comedy|Drama|Romance
4,1995,5,Father of the Bride Part II (1995),Comedy


# Exercicio 12 - Selecionar uma coluna do DataFrame

In [45]:
print("Hello World")

Hello World
