# 📦 Pandas DataFrame Grouping

#### » Import the Pandas library, usually abbreviated as `pd`

In [1]:
import pandas as pd

#### » Create a DataFrame with a dictionary

In [2]:
df = pd.DataFrame({"groups": ["a", "b", "c", "a", "b", "c"],
                   "data": [1,5,6,8,3,10]})
df

Unnamed: 0,groups,data
0,a,1
1,b,5
2,c,6
3,a,8
4,b,3
5,c,10


#### » Create a GroupBy object based on the specific column

In [3]:
df.groupby("groups")

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x135135a90>

#### » Access a specific group from the grouped object

In [4]:
df.groupby("groups").get_group("a")

Unnamed: 0,groups,data
0,a,1
3,a,8


#### » Calculate the mean of each group in a specific grouped column

In [5]:
df.groupby("groups").mean()

Unnamed: 0_level_0,data
groups,Unnamed: 1_level_1
a,4.5
b,4.0
c,8.0


#### » View the size of each group

In [6]:
df.groupby("groups").sum()

Unnamed: 0_level_0,data
groups,Unnamed: 1_level_1
a,9
b,8
c,16


#### » Create a GroupBy object on specific column and selecting a column

In [7]:
df.groupby("groups")["data"]

<pandas.core.groupby.generic.SeriesGroupBy object at 0x135135940>

#### » Calculate the mean of a specific column for each group

In [8]:
df.groupby("groups")["data"].mean()

groups
a    4.5
b    4.0
c    8.0
Name: data, dtype: float64

In [9]:
df.groupby("groups")[["data"]].mean()

Unnamed: 0_level_0,data
groups,Unnamed: 1_level_1
a,4.5
b,4.0
c,8.0


#### » Calculate the mean and sorting descending by group average

In [10]:
df.groupby("groups")["data"].mean().sort_values(ascending=False)

groups
c    8.0
a    4.5
b    4.0
Name: data, dtype: float64

#### » Calculate mean and sorting ascending by group average

In [11]:
df.groupby("groups")["data"].mean().sort_values(ascending=True)

groups
b    4.0
a    4.5
c    8.0
Name: data, dtype: float64

#### » Get detailed statistics (count, mean, std, min, etc.) for each group of a column

In [12]:
df.groupby("groups")["data"].describe()

Unnamed: 0_level_0,count,mean,std,min,25%,50%,75%,max
groups,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
a,2.0,4.5,4.949747,1.0,2.75,4.5,6.25,8.0
b,2.0,4.0,1.414214,3.0,3.5,4.0,4.5,5.0
c,2.0,8.0,2.828427,6.0,7.0,8.0,9.0,10.0


#### » Transpose the result of group-wise describe for better readability

In [13]:
df.groupby("groups")["data"].describe().T

groups,a,b,c
count,2.0,2.0,2.0
mean,4.5,4.0,8.0
std,4.949747,1.414214,2.828427
min,1.0,3.0,6.0
25%,2.75,3.5,7.0
50%,4.5,4.0,8.0
75%,6.25,4.5,9.0
max,8.0,5.0,10.0
