In [2]:
# Install Libries
import pandas as pd 
import numpy as np 
import requests 

In [3]:
pd.set_option('display.max_columns',None)

# Economic Data


In [4]:
import time
Economic_Prosperity = [
    "NY.GDP.PCAP.CD",       # GDP per capita (current US$)
    "NY.GDP.PCAP.PP.CD",   # GDP per capita, PPP (current international $)
    "NY.GDP.MKTP.KD.ZG",   # GDP growth (annual %)
    "NY.GDP.MKTP.CD",       # GDP (current US$)
    "SL.UEM.TOTL.ZS",       # Unemployment, total (% of labor force)
    "SL.EMP.TOTL.SP.ZS",    # Employment-to-population ratio (% ages 15+)
    "FP.CPI.TOTL.ZG",       # Inflation, consumer prices (annual %)
    "NE.EXP.GNFS.ZS",       # Exports of goods & services (% of GDP)
    "NE.IMP.GNFS.ZS" ]    # Imports of goods & services (% of GDP)


base_url = "https://api.worldbank.org/v2/countries/all/indicators/{}?format=json&per_page=1000&page={}"
indicator_dataframe = {}
for i in Economic_Prosperity:
  print(f"Fetching Data For indicator : {i}")

  all_dfs = []
  page = 1
  fault = 1
  while True:
    url = base_url.format(i,page)
    response = requests.get(url)

    if response.status_code!=200:
      if fault>2:
        break
      else:
        print(f"No Data for Page : {page}")
        page +=1
        fault += 1
        continue
    else:

      try:
        data = response.json()
      except Exception as e:
        if fault>2:
          break
        else:
          print(f"{e} Error while Loading page : {page}")
          page += 1
          fault += 1
          continue

      if len(data)<2 or not data[1]:
        if fault>2:
          break
        else:
          print(f"No Data For Indicator {i} on page {page}")
          page += 1
          fault += 1
          continue

      print(f"Page : {page} fetched for indicator {i}")

      total_pages = data[0]['pages']
      record = data[1]

      if page>total_pages:
        print(f"Current Page : {page} | Total Page : {total_pages}")
        break
      else:
        df = pd.json_normalize(record)
        df = df[[ "country.id","country.value","indicator.id","indicator.value","date","value"]].rename(columns={
                "country.id":"country_id",
                "country.value":"country_value","indicator.id":"indicator_id","indicator.value":"indicator_name","date":"year"})

        df = df[df["year"].astype(int) > 2000]
        all_dfs.append(df)

        page += 1
        time.sleep(0.3)

  if all_dfs:

    combined_df = pd.concat(all_dfs, ignore_index=True)
    indicator_dataframe[i] = combined_df
    print(f"üìä Total rows collected for {i}: {len(combined_df)}")
  else:
    print(f"‚ö†Ô∏è No data collected for {i}")


Fetching Data For indicator : NY.GDP.PCAP.CD
Page : 1 fetched for indicator NY.GDP.PCAP.CD
Page : 2 fetched for indicator NY.GDP.PCAP.CD
Page : 3 fetched for indicator NY.GDP.PCAP.CD
Page : 4 fetched for indicator NY.GDP.PCAP.CD
Page : 5 fetched for indicator NY.GDP.PCAP.CD
Page : 6 fetched for indicator NY.GDP.PCAP.CD
Page : 7 fetched for indicator NY.GDP.PCAP.CD
Page : 8 fetched for indicator NY.GDP.PCAP.CD
Page : 9 fetched for indicator NY.GDP.PCAP.CD
Page : 10 fetched for indicator NY.GDP.PCAP.CD
Page : 11 fetched for indicator NY.GDP.PCAP.CD
Page : 12 fetched for indicator NY.GDP.PCAP.CD
Page : 13 fetched for indicator NY.GDP.PCAP.CD
Page : 14 fetched for indicator NY.GDP.PCAP.CD
Page : 15 fetched for indicator NY.GDP.PCAP.CD
Page : 16 fetched for indicator NY.GDP.PCAP.CD
Page : 17 fetched for indicator NY.GDP.PCAP.CD
Page : 18 fetched for indicator NY.GDP.PCAP.CD
No Data For Indicator NY.GDP.PCAP.CD on page 19
No Data For Indicator NY.GDP.PCAP.CD on page 20
üìä Total rows collec

In [30]:
all_indicators = []
for key, df in indicator_dataframe.items():
    temp = df[['indicator_name',"country_value", "year", "value"]].copy()
    all_indicators.append(temp)

In [6]:
indicator_dataframe['NY.GDP.MKTP.CD']['country_value'].unique().shape

(266,)

In [31]:
all_indicators = pd.concat(all_indicators)
wide_indicators = all_indicators.pivot_table(
    index=["country_value", "year"],
    columns="indicator_name",
    values="value"
).reset_index()

In [32]:
wide_indicators

indicator_name,country_value,year,"Employment to population ratio, 15+, total (%) (modeled ILO estimate)",Exports of goods and services (% of GDP),GDP (current US$),GDP growth (annual %),GDP per capita (current US$),"GDP per capita, PPP (current international $)",Imports of goods and services (% of GDP),"Inflation, consumer prices (annual %)","Unemployment, total (% of total labor force) (modeled ILO estimate)"
0,Afghanistan,2001,42.832,,2.813572e+09,-9.431974,138.706822,747.688045,,,7.953
1,Afghanistan,2002,42.820,,3.825701e+09,28.600001,178.954088,926.507941,,,7.930
2,Afghanistan,2003,42.835,,4.520947e+09,8.832278,198.871116,966.962032,,,7.880
3,Afghanistan,2004,42.833,,5.224897e+09,1.414118,221.763654,971.633503,,,7.899
4,Afghanistan,2005,42.863,,6.203257e+09,11.229715,254.184249,1076.087353,,12.686269,7.885
...,...,...,...,...,...,...,...,...,...,...,...
6264,Zimbabwe,2020,59.180,22.293071,2.686856e+10,-7.816933,1730.453910,3510.676040,25.020311,557.201817,8.621
6265,Zimbabwe,2021,59.159,22.775238,2.724051e+10,8.468039,1724.387271,3184.784602,28.071885,98.546105,9.540
6266,Zimbabwe,2022,58.636,27.872170,3.278966e+10,6.139176,2040.546587,3560.039403,36.891437,104.705171,10.087
6267,Zimbabwe,2023,59.308,21.579400,3.523137e+10,5.336770,2156.034093,3820.359922,29.215564,,8.759


In [33]:
wide_indicators.columns

Index(['country_value', 'year',
       'Employment to population ratio, 15+, total (%) (modeled ILO estimate)',
       'Exports of goods and services (% of GDP)', 'GDP (current US$)',
       'GDP growth (annual %)', 'GDP per capita (current US$)',
       'GDP per capita, PPP (current international $)',
       'Imports of goods and services (% of GDP)',
       'Inflation, consumer prices (annual %)',
       'Unemployment, total (% of total labor force) (modeled ILO estimate)'],
      dtype='object', name='indicator_name')

In [34]:
wide_indicators.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6269 entries, 0 to 6268
Data columns (total 11 columns):
 #   Column                                                                 Non-Null Count  Dtype  
---  ------                                                                 --------------  -----  
 0   country_value                                                          6269 non-null   object 
 1   year                                                                   6269 non-null   object 
 2   Employment to population ratio, 15+, total (%) (modeled ILO estimate)  5631 non-null   float64
 3   Exports of goods and services (% of GDP)                               5317 non-null   float64
 4   GDP (current US$)                                                      6161 non-null   float64
 5   GDP growth (annual %)                                                  6094 non-null   float64
 6   GDP per capita (current US$)                                           6167 non-null   f

In [35]:
# cols = [
#     'Exports of goods and services (% of GDP)',
#     'GDP (current US$)',
#     'GDP growth (annual %)',
#     'GDP per capita (current US$)',
#     'GDP per capita, PPP (current international $)',
#     'Imports of goods and services (% of GDP)',
#     'Inflation, consumer prices (annual %)'
# ]

# # Check rows where all of these columns are null
# all_null = wide_indicators[cols].isnull().all(axis=1)

# # Show rows where all are null
# wide_indicators[all_null]


In [38]:
# idx = wide_indicators[all_null].index
# wide_indicators = wide_indicators.drop(idx)

In [39]:
wide_indicators.info()

<class 'pandas.core.frame.DataFrame'>
Index: 6204 entries, 0 to 6268
Data columns (total 11 columns):
 #   Column                                                                 Non-Null Count  Dtype  
---  ------                                                                 --------------  -----  
 0   country_value                                                          6204 non-null   object 
 1   year                                                                   6204 non-null   object 
 2   Employment to population ratio, 15+, total (%) (modeled ILO estimate)  5566 non-null   float64
 3   Exports of goods and services (% of GDP)                               5317 non-null   float64
 4   GDP (current US$)                                                      6161 non-null   float64
 5   GDP growth (annual %)                                                  6094 non-null   float64
 6   GDP per capita (current US$)                                           6167 non-null   float6

In [40]:
# a = wide_indicators.dropna(thresh=len(wide_indicators.columns) - 4 + 1)
# a['year'].value_counts().sort_index()

In [28]:
# wide_indicators = wide_indicators.fillna(-1.0)

In [41]:
#Save Data
wide_indicators.to_csv("Economic_df.csv",index=False)