# Grouping Data in Pandas DataFrames

## 1. Import Libraries and Dependencies

In [2]:
# Import necessary libraries and dependencies
import pandas as pd
from pathlib import Path
%matplotlib inline

In [4]:
# Use the Pathlib libary to set the path to the CSV
csvpath = Path("../Resources/people_cleansed.csv")

## 3. Read the CSV into a Pandas DataFrame

In [5]:
# Use the file path to read the CSV into a DataFrame and display a few rows
people_df = pd.read_csv(csvpath)
people_df

Unnamed: 0,Person_ID,Last_Name,First_Name,Gender,University,Occupation,Salary,Email,Age
0,1,Lenormand,Keriann,Female,Aurora University,Nurse Practicioner,58135.0,klenormand0@businessinsider.com,27
1,2,Rupke,Huntley,Male,Osaka University of Economics,Project Manager,96053.0,hrupke1@reuters.com,22
2,3,Dalgarnowch,Gorden,Male,Ludong University,Environmental Tech,59196.0,gdalgarnowch2@microsoft.com,40
3,4,Unnamed,Cullie,Male,Université des Sciences et de la Technologie d...,Legal Assistant,88493.0,cputten3@nymag.com,62
4,5,Strangman,Ariel,Female,Boise State University,Project Manager,89073.0,astrangman4@bravesites.com,47
...,...,...,...,...,...,...,...,...,...
968,996,Crumpton,Meta,Female,ECAM - Institut Supérieur Industriel,Registered Nurse,57060.0,mcrumptonrn@qq.com,52
969,997,Gilford,Gunar,Male,Smolny University,Marketing Manager,76109.0,ggilfordro@yandex.ru,32
970,998,Gurling,Lucretia,Female,Institut Teknologi Telkom,Software Engineer III,92115.0,lgurlingrp@de.vu,48
971,999,Yang,Andrew,Male,Rutgers University School of Business,Curriculum Engineer,60000.0,ayang@codedrills.com,53


## 4. Group DataFrame and perform `count` aggregation

In [6]:
# Group by `Occupation` and perform count
people_df.groupby("Occupation").count()

Unnamed: 0_level_0,Person_ID,Last_Name,First_Name,Gender,University,Salary,Email,Age
Occupation,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Account Coordinator,6,6,6,6,6,6,6,6
Account Executive,4,4,4,4,4,4,4,4
Account Representative I,1,1,1,1,1,1,1,1
Account Representative II,5,5,5,5,5,5,5,5
Account Representative III,4,4,4,4,4,4,4,4
...,...,...,...,...,...,...,...,...
Web Designer III,1,1,1,1,1,1,1,1
Web Developer I,2,2,2,2,2,2,2,2
Web Developer II,1,1,1,1,1,1,1,1
Web Developer III,3,3,3,3,3,3,3,3


## 5. Group DataFrame by `Occupation` and Calculate Average Salary and Age

In [7]:
# Calculate average Salary and Age for each Occupation
people_df.groupby("Occupation").mean()

Unnamed: 0_level_0,Person_ID,Salary,Age
Occupation,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Account Coordinator,327.50,91525.000000,52.666667
Account Executive,140.00,79902.750000,47.750000
Account Representative I,774.00,75871.000000,63.000000
Account Representative II,505.40,72313.400000,41.600000
Account Representative III,446.75,91499.750000,39.500000
...,...,...,...
Web Designer III,577.00,78667.000000,30.000000
Web Developer I,748.00,99438.500000,46.000000
Web Developer II,801.00,75447.000000,58.000000
Web Developer III,302.00,87014.666667,41.333333


## 6. Group By Multiple Columns and Calculate Average Salary and Age

In [10]:
# Group by more than one column
people_df.groupby(["Occupation", "Gender"]).mean()

Unnamed: 0_level_0,Unnamed: 1_level_0,Person_ID,Salary,Age
Occupation,Gender,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Account Coordinator,Female,343.000000,98514.000000,47.500000
Account Coordinator,Male,319.750000,88030.500000,55.250000
Account Executive,Female,169.666667,82720.666667,52.666667
Account Executive,Male,51.000000,71449.000000,33.000000
Account Representative I,Male,774.000000,75871.000000,63.000000
...,...,...,...,...
Web Developer II,Male,801.000000,75447.000000,58.000000
Web Developer III,Female,159.000000,101919.000000,41.000000
Web Developer III,Male,373.500000,79562.500000,41.500000
Web Developer IV,Female,521.000000,76349.000000,42.500000
