In [1]:
# Import packages
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Import data and display
country = pd.read_csv('country_complete.csv')
country

Unnamed: 0,Country,Continent,Years,Internet access,Emissions range,Fertility,Emissions,Internet
0,Afghanistan,Asia,3.8,Low,Low,4.33,0.254,16.8
1,Albania,Europe,10.0,Moderate,Low,1.71,1.590,65.4
2,Algeria,Africa,8.0,Low,Moderate,2.64,3.690,49.0
3,Angola,Africa,5.1,Low,Low,5.55,1.120,29.0
4,Argentina,Americas,9.9,High,Moderate,2.26,4.410,77.7
...,...,...,...,...,...,...,...,...
146,Uruguay,Americas,8.7,High,Moderate,1.97,2.010,80.7
147,Uzbekistan,Asia,11.5,Moderate,Moderate,2.23,2.810,55.2
148,Vietnam,Asia,8.2,Moderate,Moderate,1.95,2.160,69.8
149,Zambia,Africa,7.0,Low,Low,4.87,0.302,14.3


In [2]:
# Categorical features are sorted in alphabetical order by default
# np.size counts the number of entries
country['Internet access'] = country['Internet access'].astype('category')
country.pivot_table(
    values='Years', index='Continent', columns='Internet access', aggfunc=np.size
)

Internet access,High,Low,Moderate,Very high
Continent,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Africa,1.0,36.0,8.0,
Americas,8.0,7.0,10.0,1.0
Asia,10.0,13.0,9.0,8.0
Europe,26.0,,3.0,7.0
Oceania,2.0,1.0,1.0,


In [3]:
# cat.reorder_categories is useful for rearranging the order
# (ex: low to high)
country['Internet access'] = country['Internet access'].cat.reorder_categories(
    ['Low', 'Moderate', 'High', 'Very high']
)
# Display the number of countries in a pivot table of continent and
# internet access
country.pivot_table(
    values='Years', index='Continent', columns='Internet access', aggfunc=np.size
)

Internet access,Low,Moderate,High,Very high
Continent,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Africa,36.0,8.0,1.0,
Americas,7.0,10.0,8.0,1.0
Asia,13.0,9.0,10.0,8.0
Europe,,3.0,26.0,7.0
Oceania,1.0,1.0,2.0,


In [4]:
# Which 7 countries in the Americas have low Internet access?
country[(country['Continent'] == 'Americas') & (country['Internet access'] == 'Low')]

Unnamed: 0,Country,Continent,Years,Internet access,Emissions range,Fertility,Emissions,Internet
14,Belize,Americas,10.5,Low,Low,2.44,1.5,49.8
17,Bolivia,Americas,8.9,Low,Low,2.8,1.96,44.3
43,El Salvador,Americas,6.9,Low,Low,2.04,1.11,43.8
56,Guatemala,Americas,6.5,Low,Low,2.87,1.07,41.5
59,Haiti,Americas,5.3,Low,Low,2.82,0.27,32.5
60,Honduras,Americas,6.5,Low,Low,2.39,1.04,36.0
101,Nicaragua,Americas,6.7,Low,Low,2.14,0.862,37.6


In [5]:
# Display the average number of years of schooling
country.pivot_table(
    values='Years', index='Continent', columns='Internet access', aggfunc=np.mean
)

Internet access,Low,Moderate,High,Very high
Continent,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Africa,4.836111,7.475,9.5,
Americas,7.328571,9.22,9.7375,13.3
Asia,6.346154,9.566667,10.53,10.1
Europe,,11.033333,11.588462,12.828571
Oceania,7.9,10.8,12.7,


In [6]:
# Display the average of each feature by continent

country.groupby(by=["Continent"]).mean()

  country.groupby(by=["Continent"]).mean()


Unnamed: 0_level_0,Years,Fertility,Emissions,Internet
Continent,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Africa,5.408889,4.090889,1.127953,31.057778
Americas,9.026923,2.161154,3.309308,62.411538
Asia,8.8675,2.244,7.43135,63.8825
Europe,11.783333,1.621944,6.716944,81.738889
Oceania,11.025,2.46,6.81275,68.725


In [7]:
# Display the median of each feature by continent

country.groupby(by=["Continent"]).median()

  country.groupby(by=["Continent"]).median()


Unnamed: 0_level_0,Years,Fertility,Emissions,Internet
Continent,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Africa,5.2,4.41,0.434,25.0
Americas,8.7,2.13,2.26,63.4
Asia,9.35,2.02,3.925,67.15
Europe,12.05,1.605,5.905,81.3
Oceania,11.65,2.22,4.875,78.25


In [8]:
# Display the number of countries in the dataset
# for each continent

country.groupby(by=["Continent"]).size()

Continent
Africa      45
Americas    26
Asia        40
Europe      36
Oceania      4
dtype: int64

In [9]:
# Use describe on each feature for each continent

country.groupby(by=["Continent"]).describe()

Unnamed: 0_level_0,Years,Years,Years,Years,Years,Years,Years,Years,Fertility,Fertility,...,Emissions,Emissions,Internet,Internet,Internet,Internet,Internet,Internet,Internet,Internet
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max,count,mean,...,75%,max,count,mean,std,min,25%,50%,75%,max
Continent,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
Africa,45.0,5.408889,2.147496,1.5,3.6,5.2,6.8,10.1,45.0,4.090889,...,1.16,8.1,45.0,31.057778,19.035893,4.1,15.8,25.0,43.0,70.1
Americas,26.0,9.026923,1.981829,5.3,7.925,8.7,10.125,13.5,26.0,2.161154,...,3.0225,16.6,26.0,62.411538,17.080523,32.5,50.8,63.4,74.475,94.6
Asia,40.0,8.8675,2.688474,3.1,7.175,9.35,10.825,13.0,40.0,2.244,...,8.945,38.0,40.0,63.8825,26.208081,15.3,43.9,67.15,84.675,99.7
Europe,36.0,11.783333,1.199405,9.2,11.25,12.05,12.425,14.2,36.0,1.621944,...,8.3625,15.9,36.0,81.738889,9.654704,62.6,74.625,81.3,88.975,99.0
Oceania,4.0,11.025,2.27358,7.9,10.075,11.65,12.6,12.9,4.0,2.46,...,9.73,16.9,4.0,68.725,27.771853,29.4,59.025,78.25,87.95,89.0


In [10]:
# Display a subset of the data above using .agg()

country.groupby("Continent").agg(
    min_Emissions=pd.NamedAgg(column="Emissions", aggfunc="min"),
    max_Emissions=pd.NamedAgg(column="Emissions", aggfunc="max"),
    mean_Emissions=pd.NamedAgg(column="Emissions", aggfunc=np.mean),
)

Unnamed: 0_level_0,min_Emissions,max_Emissions,mean_Emissions
Continent,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Africa,0.0467,8.1,1.127953
Americas,0.27,16.6,3.309308
Asia,0.254,38.0,7.43135
Europe,1.59,15.9,6.716944
Oceania,0.601,16.9,6.81275
