### Imports

In [2]:
import pandas as pd
import numpy as np

dogs = pd.read_csv("dogs_formatted.csv")
print(dogs)

     name             breed  color  height_cm  weight_kg date_of_birth
0   Bella         Chihuahua  Brown         18          2    2018-02-05
1   Amigo          Labrador  Black         59         35    2016-08-12
2  Trevis       St. Bernard  Brown         77         73    2019-07-24
3   Golin             Husky  White         55         30    2015-06-18
4    Lucy          Labrador  White         51         26    2020-04-29
5     Max  Golden Retriever  Brown         49         21    2014-01-20
6    Otto            Poodle  Brown         42         20    2013-06-27
7    Rexo   German Shepherd  Brown         54         24    2018-05-21


## Groupby() method

In [2]:
dogs.groupby("color")["weight_kg"].mean()

color
Black    35.0
Brown    28.0
White    28.0
Name: weight_kg, dtype: float64

## Pivot table() method

Mean of the column

In [3]:
# pivot tables takes by default the mean value for each group
dogs.pivot_table(values="weight_kg", index="color")

Unnamed: 0_level_0,weight_kg
color,Unnamed: 1_level_1
Black,35
Brown,28
White,28


Median of the column

In [4]:
# if we want a different statistics, we write it in aggfunc argument
dogs.pivot_table(values="weight_kg", index="color", aggfunc=np.median)

Unnamed: 0_level_0,weight_kg
color,Unnamed: 1_level_1
Black,35
Brown,21
White,28


Mean and median of the column

In [6]:
dogs.pivot_table(values="weight_kg", index="color", aggfunc=[np.mean, np.median])

Unnamed: 0_level_0,mean,median
Unnamed: 0_level_1,weight_kg,weight_kg
color,Unnamed: 1_level_2,Unnamed: 2_level_2
Black,35,35
Brown,28,21
White,28,28


## Pivot on two variables

In [8]:
dogs.pivot_table(values="weight_kg", index="color", columns="breed")

breed,Chihuahua,German Shepherd,Golden Retriever,Husky,Labrador,Poodle,St. Bernard
color,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Black,,,,,35.0,,
Brown,2.0,24.0,21.0,,,20.0,73.0
White,,,,30.0,26.0,,


## Filling missing values in pivot tables

In [9]:
dogs.pivot_table(values="weight_kg", index="color", columns="breed", fill_value=0)

breed,Chihuahua,German Shepherd,Golden Retriever,Husky,Labrador,Poodle,St. Bernard
color,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
Black,0,0,0,0,35,0,0
Brown,2,24,21,0,0,20,73
White,0,0,0,30,26,0,0


## Summing with pivot tables

In [10]:
# All will contain mean of all the values in the column or row, not including the missing values
# that were filled in with zeroes
dogs.pivot_table(values="weight_kg", index="color", columns="breed", fill_value=0, margins=True)

breed,Chihuahua,German Shepherd,Golden Retriever,Husky,Labrador,Poodle,St. Bernard,All
color,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Black,0,0,0,0,35.0,0,0,35.0
Brown,2,24,21,0,0.0,20,73,28.0
White,0,0,0,30,26.0,0,0,28.0
All,2,24,21,30,30.5,20,73,28.875


## Pivoting the dog pack

In [4]:
dogs_height_by_breed_vs_color = dogs.pivot_table("height_cm", index="breed", columns="color", fill_value=0)
print(dogs_height_by_breed_vs_color)

color             Black  Brown  White
breed                                
Chihuahua             0     18      0
German Shepherd       0     54      0
Golden Retriever      0     49      0
Husky                 0      0     55
Labrador             59      0     51
Poodle                0     42      0
St. Bernard           0     77      0


In [5]:
dogs_height_by_breed_vs_color.loc["German Shepherd":"Labrador"]

color,Black,Brown,White
breed,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
German Shepherd,0,54,0
Golden Retriever,0,49,0
Husky,0,0,55
Labrador,59,0,51


Axis argument

In [6]:
# calculate mean per each color
dogs_height_by_breed_vs_color.mean(axis="index")

color
Black     8.428571
Brown    34.285714
White    15.142857
dtype: float64

In [7]:
# calculating summary stats across columns (mean per breed)
dogs_height_by_breed_vs_color.mean(axis="columns")

breed
Chihuahua            6.000000
German Shepherd     18.000000
Golden Retriever    16.333333
Husky               18.333333
Labrador            36.666667
Poodle              14.000000
St. Bernard         25.666667
dtype: float64