# 💿 Pandas DataFrame Aggregation

#### » Import the Pandas library, usually abbreviated as `pd`

In [1]:
import pandas as pd

#### » Create a DataFrame with a dictionary

In [2]:
df = pd.DataFrame({"groups": ["a", "b", "c", "a", "b", "c"],
                   "var1": [1,5,2,8,0,4],
                   "var2": [4,3,6,8,3,2]})
df

Unnamed: 0,groups,var1,var2
0,a,1,4
1,b,5,3
2,c,2,6
3,a,8,8
4,b,0,3
5,c,4,2


#### » Create a GroupBy object based on the specific column

In [3]:
df.groupby("groups")

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x12e5f1a90>

#### » Aggregate using multiple functions (min, meadian and max)

In [4]:
df.groupby("groups").agg(["min","median","max"])

Unnamed: 0_level_0,var1,var1,var1,var2,var2,var2
Unnamed: 0_level_1,min,median,max,min,median,max
groups,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2
a,1,4.5,8,4,6.0,8
b,0,2.5,5,3,3.0,3
c,2,3.0,4,2,4.0,6


#### » Aggregate with different functions per column

In [5]:
df.groupby("groups").agg({"var1": "min", "var2": "max"})

Unnamed: 0_level_0,var1,var2
groups,Unnamed: 1_level_1,Unnamed: 2_level_1
a,1,8
b,0,3
c,2,6


#### » Aggregate with custom lambda function on a specific column

In [6]:
df.groupby("groups")["var1"].agg(lambda x: max(x) - min(x))

groups
a    7
b    5
c    2
Name: var1, dtype: int64

#### » Name the result of an aggregation

In [13]:
df.groupby("groups").agg(var1_min=("var1", "min"), var2_max=("var2", "max"))

Unnamed: 0_level_0,var1_min,var2_max
groups,Unnamed: 1_level_1,Unnamed: 2_level_1
a,1,8
b,0,3
c,2,6
