# Getting Data from IMF World Economic Outlook Reports

In [1]:
import pandas as pd
from functools import reduce

### Reading in Data

In [2]:
data = pd.read_csv("WEO_initial.csv")

In [3]:
data.head()

Unnamed: 0,WEO Country Code,ISO,WEO Subject Code,Country,Subject Descriptor,Subject Notes,Units,Scale,Country/Series-specific Notes,1980,...,2016,2017,2018,2019,2020,2021,2022,2023,2024,Estimates Start After
0,512,AFG,NGDP_R,Afghanistan,"Gross domestic product, constant prices",Expressed in billions of national currency uni...,National currency,Billions,Source: National Statistics Office Latest actu...,,...,493.073,506.215,517.858,533.394,552.063,574.127,599.933,629.88,664.452,2017.0
1,512,AFG,NGDP_RPCH,Afghanistan,"Gross domestic product, constant prices",Annual percentages of constant price GDP are y...,Percent change,,"See notes for: Gross domestic product, consta...",,...,2.164,2.665,2.3,3.0,3.5,3.997,4.495,4.992,5.489,2017.0
2,512,AFG,NGDP,Afghanistan,"Gross domestic product, current prices",Expressed in billions of national currency uni...,National currency,Billions,Source: National Statistics Office Latest actu...,,...,1318.48,1377.54,1418.13,1488.86,1595.05,1733.54,1902.11,2096.95,2322.65,2017.0
3,512,AFG,NGDPD,Afghanistan,"Gross domestic product, current prices",Values are based upon GDP in national currency...,U.S. dollars,Billions,"See notes for: Gross domestic product, curren...",,...,19.428,20.235,19.585,19.99,20.682,21.928,23.577,25.45,27.608,2017.0
4,512,AFG,PPPGDP,Afghanistan,"Gross domestic product, current prices",These data form the basis for the country weig...,Purchasing power parity; international dollars,Billions,"See notes for: Gross domestic product, curren...",,...,66.384,69.449,72.648,76.158,80.47,85.426,91.113,97.643,105.158,2017.0


### Cleaning

In [4]:
data.drop(["Subject Descriptor","WEO Country Code", "ISO", "Subject Notes",
           "Country/Series-specific Notes", "Units", "Scale", "2017", "2018",
           "2019", "2020", "2021", "2022", "2023", "2024" , "Estimates Start After"],
          axis=1, inplace=True)

data2 = pd.melt(data, id_vars=["WEO Subject Code", "Country"], 
                  var_name="Year", value_name="Value")

data2.rename(columns = {'WEO Subject Code':'WEO_SC'}, inplace = True)

data2 = data2[["Year", "Country", "Value", "WEO_SC", ]]

data3 = data2.replace(to_replace ="Vietnam", value ="Viet Nam")
data3 = data2.replace(to_replace ="Venezuela", value ="Venezuela, Bolivarian Republic of")
data3 = data2.replace(to_replace ="Tanzania", value ="Tanzania, United Republic of")
data3 = data2.replace(to_replace ="Taiwan Province of China", value ="Taiwan, Province of China")
data3 = data2.replace(to_replace ="Syria", value ="Syrian Arab Republic")
data3 = data2.replace(to_replace ="Slovak Republic", value ="Slovakia")
data3 = data2.replace(to_replace ="São Tomé and Príncipe", value ="Sao Tome and Principe")
data3 = data2.replace(to_replace ="Russia", value ="Russian Federation")
data3 = data2.replace(to_replace ="Moldova", value ="Moldova, Republic of")
data3 = data2.replace(to_replace ="Micronesia", value ="Micronesia, Federated States of")
data3 = data2.replace(to_replace ="Macao SAR", value ="Macao")
data3 = data2.replace(to_replace ="Lao P.D.R.", value ="Lao People's Democratic Republic")
data3 = data2.replace(to_replace ="Kyrgyz Republic", value ="Kyrgyzstan")
data3 = data2.replace(to_replace ="Korea", value ="Micronesia, Federated States of")
data3 = data2.replace(to_replace ="Islamic Republic of Iran", value ="Iran, Islamic Republic of")
data3 = data2.replace(to_replace ="Hong Kong SAR", value ="Hong Kong")
data3 = data2.replace(to_replace ="Democratic Republic of the Congo", value ="Congo")
data3 = data2.replace(to_replace ="Czech Republic", value ="Czechia")
data3 = data2.replace(to_replace ="Bolivia", value ="Bolivia, Plurinational State of")

print(data3)

NGDP_R = data3[data3.WEO_SC == 'NGDP_R']
NGDP_R.drop(["WEO_SC"],axis=1, inplace=True)
NGDP_R.rename(columns = {'Value':'NGDP_R'}, inplace = True)


NGDP_RPCH = data3[data3.WEO_SC == 'NGDP_RPCH']
NGDP_RPCH.drop(["WEO_SC"],axis=1, inplace=True)
NGDP_RPCH.rename(columns = {'Value':'NGDP_RPCH'}, inplace = True)

PPPEX = data3[data3.WEO_SC == 'PPPEX']
PPPEX.drop(["WEO_SC"],axis=1, inplace=True)
PPPEX.rename(columns = {'Value':'PPPEX'}, inplace = True)

PPPSH = data3[data3.WEO_SC == 'PPPSH']
PPPSH.drop(["WEO_SC"],axis=1, inplace=True)
PPPSH.rename(columns = {'Value':'PPPSH'}, inplace = True)

FLIBOR6 = data3[data3.WEO_SC == 'FLIBOR6']
FLIBOR6.drop(["WEO_SC"],axis=1, inplace=True)
FLIBOR6.rename(columns = {'Value':'FLIBOR6'}, inplace = True)

TM_RPCH = data3[data3.WEO_SC == 'TM_RPCH']
TM_RPCH.drop(["WEO_SC"],axis=1, inplace=True)
TM_RPCH.rename(columns = {'Value':'TM_RPCH'}, inplace = True)

TX_RPCH = data3[data3.WEO_SC == 'TX_RPCH']
TX_RPCH.drop(["WEO_SC"],axis=1, inplace=True)
TX_RPCH.rename(columns = {'Value':'TX_RPCH'}, inplace = True)

LUR = data3[data3.WEO_SC == 'LUR']
LUR.drop(["WEO_SC"],axis=1, inplace=True)
LUR.rename(columns = {'Value':'LUR'}, inplace = True)

LE = data3[data3.WEO_SC == 'LE']
LE.drop(["WEO_SC"],axis=1, inplace=True)
LE.rename(columns = {'Value':'LE'}, inplace = True)

LP = data3[data3.WEO_SC == 'LP']
LP.drop(["WEO_SC"],axis=1, inplace=True)
LP.rename(columns = {'Value':'LP'}, inplace = True)

GGXONLB_NGDP = data3[data3.WEO_SC == 'GGXONLB_NGDP']
GGXONLB_NGDP.drop(["WEO_SC"],axis=1, inplace=True)
GGXONLB_NGDP.rename(columns = {'Value':'GGXONLB_NGDP'}, inplace = True)

NGDP_FY = data3[data3.WEO_SC == 'NGDP_FY']
NGDP_FY.drop(["WEO_SC"],axis=1, inplace=True)
NGDP_FY.rename(columns = {'Value':'NGDP_FY'}, inplace = True)

data_frames = [NGDP_R, NGDP_RPCH, PPPEX, PPPSH, FLIBOR6, TM_RPCH, TX_RPCH, LUR, LE, LP, GGXONLB_NGDP, NGDP_FY ]

weo_merged = reduce(lambda  left,right: pd.merge(left,right,on=['Country', 'Year'],
                                            how='outer'), data_frames)

        Year      Country   Value     WEO_SC
0       1980  Afghanistan     NaN     NGDP_R
1       1980  Afghanistan     NaN  NGDP_RPCH
2       1980  Afghanistan     NaN       NGDP
3       1980  Afghanistan     NaN      NGDPD
4       1980  Afghanistan     NaN     PPPGDP
...      ...          ...     ...        ...
323079  2016     Zimbabwe  20.806    NGDP_FY
323080  2016     Zimbabwe  -0.718        BCA
323081  2016     Zimbabwe   -3.58  BCA_NGDPD
323082  2016          NaN     NaN        NaN
323083  2016          NaN     NaN        NaN

[323084 rows x 4 columns]


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  errors=errors,


### Writing to CSV

In [5]:
weo_merged.to_csv('WEO_Clean.csv', index = False)