In [None]:
import numpy as np
import pandas as pd

In [None]:
df = pd.DataFrame({'Estado': ['RS','RS',"SC",'PR',"RS",'SC','SC','PR','RS'],
                  'Cidade': ['Porto Alegre', 'Pelotas', 'Florianópolis', 'Curitiba', 'Porto Alegre', 'Joinville', 'Florianópolis', 'Curitiba', 'Porto Alegre'],
                  '2019': np.random.randn(9),
                  '2020': np.random.randn(9)})

In [None]:
df

### GroupBy

In [None]:
grouped = df['2019'].groupby(df['Estado'])
grouped

In [None]:
grouped.mean()

In [None]:
grouped2 = df['2020'].groupby(df['Estado'])
grouped2.mean()

In [None]:
grouped3 = df['2020'].groupby(df['Cidade'])

In [None]:
grouped3.mean()

In [None]:
grouped4 = df['2019'].groupby(df['Cidade'])
grouped4.mean()

In [None]:
means = df['2019'].groupby([df['Estado'], df['Cidade']]).mean()

In [None]:
means

In [None]:
means.unstack()

In [None]:
means2 = df['2020'].groupby([df['Estado'], df['Cidade']]).mean()
means2

In [None]:
df.groupby('Estado').mean()


In [None]:
df.groupby('Cidade').mean()

In [None]:
df.groupby(['Estado', 'Cidade']).mean()

In [None]:
df.groupby(['Estado', 'Cidade']).size()

### Iterar por grupos

In [None]:
for name, group in df.groupby('Estado'):
    print(name)
    print(group)

In [None]:
for (k1, k2), group in df.groupby(['Estado', 'Cidade']):
    print(k1, k2)
    print(group)

In [None]:
pieces = dict(list(df.groupby('Estado')))

In [None]:
pieces['SC']

In [None]:
pieces['PR']

In [None]:
pieces['RS']

In [None]:
df.dtypes

In [None]:
grouped = df.groupby(df.dtypes, axis=1)

In [None]:
for dtype, group in grouped:
    print(dtype)
    print(group)

### Seleção de colunas

In [None]:
x = df.groupby('Estado')['2020']
x

In [None]:
df.groupby(['Estado','Cidade'])[['2020']].mean()

In [None]:
s_grouped = df.groupby(['Estado','Cidade'])[['2019']]
s_grouped

In [None]:
s_grouped.mean()

### Dicts e Series

In [None]:
people = pd.DataFrame(np.random.randn(5, 5),
                     columns=['Jan','Fev','Mar','Abr','Mai'],
                     index=['João','Maria','Ana','Paulo','Bruna'])

In [None]:
people

In [None]:
mapping = {'Jan': 'red','Fev': 'green','Mar': 'red','Abr': 'green','Mai': 'red', 'Jun': 'green'}

In [None]:
by_column = people.groupby(mapping, axis=1)

In [None]:
by_column.sum()

In [None]:
map_series = pd.Series(mapping)

In [None]:
map_series

In [None]:
people.groupby(map_series, axis=1).count(
)

### Agrupar com funções

In [None]:
people.groupby(len).sum()

In [None]:
people.groupby(min).sum()

In [None]:
people.groupby(max).sum()

In [None]:
df = df.unstack()

In [None]:
df = pd.DataFrame(df)

In [None]:
df.groupby(len).min()

### Agrupar por níveis de índice

In [None]:
columns = pd.MultiIndex.from_arrays([['US','US','US','JP','JP'],
                                    [1, 3, 5, 1, 3]],
                                   names=['country', 'region'])

In [None]:
df = pd.DataFrame(np.random.randn(4, 5), columns=columns)

In [None]:
df

In [None]:
df.groupby(level='country', axis=1).count()

In [None]:
df.groupby(level='region', axis=1).sum()

### Agregar dados

In [None]:
df = pd.DataFrame({'Estado': ['RS','RS',"SC",'PR',"RS",'SC','SC','PR','RS'],
                  'Cidade': ['Porto Alegre', 'Pelotas', 'Florianópolis', 'Curitiba', 'Porto Alegre', 'Joinville', 'Florianópolis', 'Curitiba', 'Porto Alegre'],
                  '2019': np.random.randn(9),
                  '2020': np.random.randn(9)})

In [None]:
grouped = df.groupby('Estado')

In [None]:
grouped['2019'].quantile(0.9)

In [None]:
grouped['2019'].median()

In [None]:
grouped['2019'].quantile(0.25)

In [None]:
grouped['2019'].quantile(0.75)

In [None]:
grouped['2019'].first()

In [None]:
grouped['2019'].last()

In [None]:
grouped['2020'].std()

In [None]:
grouped['2019'].std()

In [None]:
grouped['2019'].var()

In [None]:
grouped['2019'].prod()

In [None]:
def myfunc(arr):
    return arr.max() - arr.min()

In [None]:
grouped.agg(myfunc) #usando uma função criada por mim

In [None]:
grouped.describe()

In [None]:
tips = pd.read_csv('tips.csv')

In [None]:
tips['tip_pct'] = tips['tip'] / tips['total_bill']

In [None]:
tips[:6]

In [None]:
grouped = tips.groupby(['day', 'smoker'])

In [None]:
grouped_pct = grouped['tip_pct']

In [None]:
grouped_pct.agg('mean')

In [None]:
grouped_pct.agg(['mean', 'std', myfunc, 'min', 'max'])

In [None]:
grouped_pct.agg([('Média', 'mean'), ('Desvio Padrão', 'std')])

In [None]:
functions = ['count','mean','max']
result = grouped['tip_pct', 'total_bill'].agg(functions)

In [None]:
result

In [None]:
result['tip_pct']

In [None]:
grouped.agg({'tip': np.max, 'size': 'sum'})

In [None]:
grouped.agg({'sex': 'count', 'total_bill': 'median'})

In [None]:
tips.groupby(['day', 'smoker'], as_index=False).mean()

In [None]:
g = df.groupby('Estado', as_index=False)

In [None]:
g.head()

### Apply