In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os

### EDA for World Econcomic Outlook (WEO) Dataset

#### _Reading the dataset_

In [2]:
df = pd.read_csv("WEOOct2020all.csv", encoding='cp1252')
df.head(100)

Unnamed: 0,WEO Country Code,ISO,WEO Subject Code,Country,Subject Descriptor,Subject Notes,Units,Scale,Country/Series-specific Notes,1980,...,2017,2018,2019,2020,2021,2022,2023,2024,2025,Estimates Start After
0,512,AFG,NGDP_R,Afghanistan,"Gross domestic product, constant prices",Expressed in billions of national currency uni...,National currency,Billions,Source: National Statistics Office Latest actu...,,...,1255.29,1270.22,1319.90,1253.91,1304.06,1363.06,1424.40,1481.50,1540.94,2019.0
1,512,AFG,NGDP_RPCH,Afghanistan,"Gross domestic product, constant prices",Annual percentages of constant price GDP are y...,Percent change,,"See notes for: Gross domestic product, consta...",,...,2.647,1.189,3.912,-5,4,4.524,4.5,4.009,4.012,2019.0
2,512,AFG,NGDP,Afghanistan,"Gross domestic product, current prices",Expressed in billions of national currency uni...,National currency,Billions,Source: National Statistics Office Latest actu...,,...,1285.46,1327.69,1469.60,1465.92,1597.74,1741.83,1893.02,2047.67,2215.01,2019.0
3,512,AFG,NGDPD,Afghanistan,"Gross domestic product, current prices",Values are based upon GDP in national currency...,U.S. dollars,Billions,"See notes for: Gross domestic product, curren...",,...,18.91,18.401,18.876,19.006,19.692,20.829,22.022,23.169,24.372,2019.0
4,512,AFG,PPPGDP,Afghanistan,"Gross domestic product, current prices",These data form the basis for the country weig...,Purchasing power parity; international dollars,Billions,"See notes for: Gross domestic product, curren...",,...,74.712,77.416,81.88,78.884,83.852,89.205,94.908,100.6,106.685,2019.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,612,DZA,NGDP_D,Algeria,"Gross domestic product, deflator",The GDP deflator is derived by dividing curren...,Index,,"See notes for: Gross domestic product, consta...",6.259,...,252.228,271.285,268.378,256.715,273.083,286.532,301.946,319.268,339.461,2019.0
96,612,DZA,NGDPRPC,Algeria,"Gross domestic product per capita, constant pr...",GDP is expressed in constant national currency...,National currency,Units,"See notes for: Gross domestic product, consta...",139096.11,...,176522.01,175390.59,173349.36,160907.12,163058.90,164397.78,163593.03,162714.82,161822.73,2019.0
97,612,DZA,NGDPRPPPPC,Algeria,"Gross domestic product per capita, constant pr...",GDP is expressed in constant international dol...,Purchasing power parity; 2017 international do...,Units,"See notes for: Gross domestic product, consta...",9029.29,...,11458.76,11385.31,11252.81,10445.13,10584.81,10671.72,10619.48,10562.48,10504.57,2019.0
98,612,DZA,NGDPPC,Algeria,"Gross domestic product per capita, current prices",GDP is expressed in current national currency ...,National currency,Units,"See notes for: Gross domestic product, curren...",8705.67,...,445238.61,475809.10,465232.08,413072.84,445285.33,471053.02,493962.94,519495.82,549325.50,2019.0


#### _Check for the size of the df_

In [3]:
df.shape

(8777, 56)

#### _Check for missing values_

In [4]:
df.isnull().sum()

WEO Country Code                    1
ISO                                 2
WEO Subject Code                    2
Country                             2
Subject Descriptor                  2
Subject Notes                     197
Units                               2
Scale                            4877
Country/Series-specific Notes    1188
1980                             4898
1981                             4769
1982                             4728
1983                             4686
1984                             4661
1985                             4585
1986                             4549
1987                             4528
1988                             4439
1989                             4378
1990                             3889
1991                             3732
1992                             3349
1993                             3156
1994                             3029
1995                             2673
1996                             2530
1997        

#### _Since the missing values of the years columns are very large, we will drop just the rows whose WEO Subject Code are missing_

In [5]:
condition = (df['WEO Subject Code'].isnull() == True)
# condition
df = df[~condition]
df.shape

(8775, 56)

#### _Dropping some columns that are not being used (only interested in the last decade's information)_

In [6]:
cols_to_drop = ['Subject Descriptor', 'Subject Notes', 'Units', 'Scale', 'Country/Series-specific Notes', 'Estimates Start After',
               '1980', '1981', '1982', '1983', '1984', '1985', '1986', '1987', '1988', '1989', '1990', '1991', '1992', '1993','1994',
               '1995', '1996', '1997', '1998', '1999', '2000', '2001', '2002', '2003', '2004', '2005', '2006', '2007', '2008', '2009', '2010',
               '2011', '2012','2024', '2025']
df.drop(columns=cols_to_drop, inplace=True)
df

Unnamed: 0,WEO Country Code,ISO,WEO Subject Code,Country,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022,2023
0,512,AFG,NGDP_R,Afghanistan,1154.18,1185.31,1197.01,1222.92,1255.29,1270.22,1319.90,1253.91,1304.06,1363.06,1424.40
1,512,AFG,NGDP_RPCH,Afghanistan,5.683,2.697,0.988,2.164,2.647,1.189,3.912,-5,4,4.524,4.5
2,512,AFG,NGDP,Afghanistan,1116.83,1183.04,1226.57,1222.92,1285.46,1327.69,1469.60,1465.92,1597.74,1741.83,1893.02
3,512,AFG,NGDPD,Afghanistan,20.17,20.635,20.22,17.994,18.91,18.401,18.876,19.006,19.692,20.829,22.022
4,512,AFG,PPPGDP,Afghanistan,63.784,69.444,72.056,70.098,74.712,77.416,81.88,78.884,83.852,89.205,94.908
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8770,698,ZWE,GGXWDG,Zimbabwe,7.36,7.854,8.346,11.269,14.505,15.859,17.224,18.512,19.73,20.862,21.46
8771,698,ZWE,GGXWDG_NGDP,Zimbabwe,38.553,40.287,41.806,54.164,52.866,37.343,10.806,2.358,2.226,2.202,2.148
8772,698,ZWE,NGDP_FY,Zimbabwe,19.091,19.496,19.963,20.806,27.438,42.468,159.391,785.156,886.282,947.567,999.093
8773,698,ZWE,BCA,Zimbabwe,-2.526,-2.254,-1.521,-0.718,-0.284,-1.229,0.208,-0.505,-0.16,-0.288,-0.589


#### _Replacing missing values with 0_

In [7]:
df.fillna(0, inplace=True)
df.isnull().sum()

WEO Country Code    0
ISO                 0
WEO Subject Code    0
Country             0
2013                0
2014                0
2015                0
2016                0
2017                0
2018                0
2019                0
2020                0
2021                0
2022                0
2023                0
dtype: int64

#### _Removing the commas from the numbers and Converting the year columns data to numeric form_

In [8]:
years_country_cols = ['Country', '2013', '2014', '2015', '2016', '2017', '2018', '2019', '2020', '2021', '2022', '2023']

for col in df[years_country_cols[1:]]:
    df[col] = df[col].replace(',', '', regex=True)
    
for col in df[years_country_cols[1:]]:
    df[col] = pd.to_numeric(df[col], errors='coerce')
    
df[years_country_cols]   
# df


Unnamed: 0,Country,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022,2023
0,Afghanistan,1154.180,1185.310,1197.010,1222.920,1255.290,1270.220,1319.900,1253.910,1304.060,1363.060,1424.400
1,Afghanistan,5.683,2.697,0.988,2.164,2.647,1.189,3.912,-5.000,4.000,4.524,4.500
2,Afghanistan,1116.830,1183.040,1226.570,1222.920,1285.460,1327.690,1469.600,1465.920,1597.740,1741.830,1893.020
3,Afghanistan,20.170,20.635,20.220,17.994,18.910,18.401,18.876,19.006,19.692,20.829,22.022
4,Afghanistan,63.784,69.444,72.056,70.098,74.712,77.416,81.880,78.884,83.852,89.205,94.908
...,...,...,...,...,...,...,...,...,...,...,...,...
8770,Zimbabwe,7.360,7.854,8.346,11.269,14.505,15.859,17.224,18.512,19.730,20.862,21.460
8771,Zimbabwe,38.553,40.287,41.806,54.164,52.866,37.343,10.806,2.358,2.226,2.202,2.148
8772,Zimbabwe,19.091,19.496,19.963,20.806,27.438,42.468,159.391,785.156,886.282,947.567,999.093
8773,Zimbabwe,-2.526,-2.254,-1.521,-0.718,-0.284,-1.229,0.208,-0.505,-0.160,-0.288,-0.589


## Tasks

1. Find top 10 countries that grew "Gross domestic product per capita" the most over the last decade

#### _New df for gross domestic product per capita_

In [9]:
gdppc = df[df['WEO Subject Code'] == 'NGDPPC']
gdppc

Unnamed: 0,WEO Country Code,ISO,WEO Subject Code,Country,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022,2023
8,512,AFG,NGDPPC,Afghanistan,35195.98,36114.49,36357.36,35287.31,36179.58,36502.09,39495.72,38521.15,41041.03,43736.42,4.646407e+04
53,914,ALB,NGDPPC,Albania,466324.61,482954.10,497901.56,511970.59,539644.58,570655.99,584876.96,548016.29,591368.93,639708.26,6.844908e+05
98,612,DZA,NGDPPC,Algeria,434705.07,440471.44,418203.99,428900.97,445238.61,475809.10,465232.08,413072.84,445285.33,471053.02,4.939629e+05
143,614,AGO,NGDPPC,Angola,524723.13,553023.40,522854.87,601736.74,714051.25,915486.64,1082787.76,1144588.30,1418836.60,1623855.25,1.778944e+06
188,311,ATG,NGDPPC,Antigua and Barbuda,36159.00,37670.37,39681.61,42001.23,42269.11,45524.58,46427.74,38228.14,39819.17,44196.01,4.794260e+04
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8558,582,VNM,NGDPPC,Vietnam,49295071.92,53830819.20,56015183.23,60223994.08,66531308.40,73245702.04,79349929.72,81863279.24,89454137.95,98613256.65,1.088655e+08
8603,487,WBG,NGDPPC,West Bank and Gaza,11264.71,11290.16,11982.80,12770.98,12256.58,12057.29,12212.83,10072.79,10607.32,11080.36,1.129957e+04
8648,474,YEM,NGDPPC,Yemen,325761.69,338289.65,346404.96,305195.16,333789.11,375733.15,398322.10,468071.53,544130.86,609181.37,6.937316e+05
8693,754,ZMB,NGDPPC,Zambia,9925.93,10626.07,11311.44,12926.84,14285.40,15890.59,16994.34,17909.10,20110.95,21747.14,2.324067e+04


#### _Calculating the percentage growth between the beginning of the decade (2014) and the end of the decade_

In [14]:
gdppc['Growth Percentage'] = ((gdppc.loc[:,'2023'] - gdppc.loc[:, '2014'])/gdppc.loc[:, '2014'])*100
top10_gdppc_countries = gdppc.sort_values(by='Growth Percentage', ascending=False).head(10)
# top10_gdppc_countries[['Country', 'Growth Percentage']]
top10_gdppc_countries

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  gdppc['Growth Percentage'] = ((gdppc.loc[:,'2023'] - gdppc.loc[:, '2014'])/gdppc.loc[:, '2014'])*100


Unnamed: 0,WEO Country Code,ISO,WEO Subject Code,Country,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022,2023,Growth Percentage
8738,698,ZWE,NGDPPC,Zimbabwe,1421.55,1415.03,1425.01,1462.31,1900.55,2900.49,10693.46,51691.45,57207.92,59908.93,61808.71,4268.014106
7388,732,SDN,NGDPPC,Sudan,8684.81,11818.38,13323.61,16087.4,20416.71,32444.62,46675.07,101054.0,229102.8,356720.3,455987.6,3758.291661
7118,733,SSD,NGDPPC,South Sudan,3944.83,3935.56,4450.06,13439.42,31555.41,50889.07,58294.66,52848.89,69911.51,86328.84,102332.7,2500.20734
233,213,ARG,NGDPPC,Argentina,79338.29,107315.2,138053.3,188761.0,242031.4,326843.1,477255.5,594189.9,906788.0,1295624.0,1692511.0,1477.139764
3518,429,IRN,NGDPPC,Islamic Republic of Iran,127922600.0,143495100.0,140029600.0,158126000.0,181854700.0,221576500.0,294410000.0,358328100.0,478509500.0,602037000.0,756372500.0,427.107023
2528,644,ETH,NGDPPC,Ethiopia,9969.71,12007.52,14460.31,16900.62,19780.63,23371.2,27915.34,34082.17,40059.14,47514.67,55353.95,360.994027
7433,366,SUR,NGDPPC,Suriname,30861.48,30950.71,28833.39,33853.33,41292.25,43813.88,46174.54,65087.76,105220.4,121973.7,137803.8,345.23641
8423,927,UZB,NGDPPC,Uzbekistan,4819320.0,5809697.0,6775182.0,7679910.0,9418809.0,12452220.0,15391080.0,17643180.0,20056560.0,22659590.0,25000100.0,330.316733
3203,263,HTI,NGDPPC,Haiti,35047.45,37125.3,39609.41,44392.35,50254.29,56802.9,65018.88,75462.61,93404.28,112160.5,131490.6,254.180465
8198,926,UKR,NGDPPC,Ukraine,32383.0,37112.4,46689.41,56238.89,70657.18,84809.21,95238.37,93165.84,101647.4,110946.2,121169.1,226.492116


#### _Visualizing the GDP per capita growth percentage for the decade for the top 10 countries_ 