In [1]:
import pandas as pd
import numpy as np

df = pd.DataFrame({'StudentID': [9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30], 
                   'Math' : [22, 66, 31, 51, 71, 91, 56, 32, 52, 73, 92, 98, 93, 44, 77, 69, 56, 31, 53, 78, 93, 56, 77, 33, 56, 27],
                   'English' : [39, 49, 55, 77, 52, 86, 41, 77, 73, 51, 86, 82, 92, 23, 93, 44, 78, 97, 87, 89, 39, 43, 88, 78, 42, 55],
                   'Home': [1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0],
                   'Gender': ["M", "F", "F", "M", "M", "F", "M", "F", "F", "M", "F", "M", "M", "F", "F", "M", "F", "M", "M", "F", "F",
                              "F", "M", "F", "M", "M"]
})

df.head(100)

Unnamed: 0,StudentID,Math,English,Home,Gender
0,9,22,39,1,M
1,11,66,49,1,F
2,13,31,55,0,F
3,15,51,77,0,M
4,17,71,52,1,M
5,19,91,86,1,F
6,21,56,41,0,M
7,23,32,77,1,F
8,25,52,73,0,F
9,27,73,51,1,M


# Split

In [2]:
df_sub = df[["Gender", "Math", "English"]]
grouped = df_sub.groupby("Gender")
grouped.groups.keys()

dict_keys(['F', 'M'])

# Apply and Combine

In [3]:
df_sub.groupby("Gender").min()

Unnamed: 0_level_0,Math,English
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1
F,31,23
M,22,39


In [4]:
df_sub.groupby("Gender").max()

Unnamed: 0_level_0,Math,English
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1
F,93,93
M,98,97


In [5]:
df_sub.groupby("Gender").count()

Unnamed: 0_level_0,Math,English
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1
F,13,13
M,13,13


In [6]:
df_sub.groupby("Gender").size()

Gender
F    13
M    13
dtype: int64

In [7]:
df_sub.groupby("Gender").mean()

Unnamed: 0_level_0,Math,English
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1
F,61.615385,66.846154
M,59.769231,65.153846


In [8]:
df_sub.groupby("Gender").median()

Unnamed: 0_level_0,Math,English
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1
F,56.0,77.0
M,56.0,55.0


In [9]:
df_sub.groupby("Gender").sum()

Unnamed: 0_level_0,Math,English
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1
F,801,869
M,777,847


# Apply several aggregation methods

In [10]:
df_sub.groupby("Gender").agg(["min", "max"])

Unnamed: 0_level_0,Math,Math,English,English
Unnamed: 0_level_1,min,max,min,max
Gender,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
F,31,93,23,93
M,22,98,39,97


# Apply a custom aggregation method

In [11]:
def difference(g):
    """The argument is a Series representing one column of one group.
    """
    return g.max() - g.min()

df_sub.groupby("Gender").agg(difference)

Unnamed: 0_level_0,Math,English
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1
F,62,70
M,76,58


In [12]:
df_sub.groupby("Gender").agg({"Math": difference, "English": "std"})

Unnamed: 0_level_0,Math,English
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1
F,62,22.404784
M,76,22.146656


In [18]:
df_sub.groupby("Gender").agg({"Math": ["mean", "std"], "English": ["mean", "std"]})

Unnamed: 0_level_0,Math,Math,English,English
Unnamed: 0_level_1,mean,std,mean,std
Gender,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
F,61.615385,23.085127,66.846154,22.404784
M,59.769231,23.678942,65.153846,22.146656


# Grouping by multiple columns

In [13]:
df_sub = df[["Gender", "Home", "Math", "English"]]
df_sub.groupby(["Gender", "Home"]).mean()

Unnamed: 0_level_0,Unnamed: 1_level_0,Math,English
Gender,Home,Unnamed: 2_level_1,Unnamed: 3_level_1
F,0,52.0,57.4
F,1,67.625,72.75
M,0,61.571429,67.714286
M,1,57.666667,62.166667


In [14]:
grouped_df = df_sub.groupby(["Gender", "Home"]).mean()
grouped_df.info()

<class 'pandas.core.frame.DataFrame'>
MultiIndex: 4 entries, ('F', 0) to ('M', 1)
Data columns (total 2 columns):
 #   Column   Non-Null Count  Dtype  
---  ------   --------------  -----  
 0   Math     4 non-null      float64
 1   English  4 non-null      float64
dtypes: float64(2)
memory usage: 212.0+ bytes


In [15]:
grouped_df["Math"]["F", 1]

67.625

In [16]:
grouped_df["English"]["M", 0]

67.71428571428571

In [17]:
grouped_df.reset_index()

Unnamed: 0,Gender,Home,Math,English
0,F,0,52.0,57.4
1,F,1,67.625,72.75
2,M,0,61.571429,67.714286
3,M,1,57.666667,62.166667
