In [3]:
import pandas as pd
import numpy as np
import requests

In [4]:
pd.set_option('display.max_columns',None)
pd.set_option('display.max_rows',None)


# Industrialization Data

In [5]:
import time
Industrialization =  [
    # Industry & Manufacturing Base
    "NV.IND.MANF.ZS",   # Manufacturing, value added (% of GDP)
    "NV.IND.TOTL.ZS",   # Industry (incl. construction), value added (% of GDP)

    # Employment
    "SL.IND.EMPL.ZS",   # Employment in industry (% of total employment)

    # Energy & Power (industrial backbone)
    "EG.USE.PCAP.KG.OE",  # Energy use (kg of oil equivalent per capita)
    "EG.USE.ELEC.KH.PC",  # Electric power consumption (kWh per capita)

    # Infrastructure & Logistics
    "IS.AIR.GOOD.MT.K1",  # Air transport, freight (million ton-km)
    "IS.SHP.GOOD.TU",     # Container port traffic (TEU: 20-foot equivalent units)

    # Environment (industrial side-effects)
    "EN.ATM.CO2E.PC",     # CO₂ emissions (metric tons per capita)
    "EN.ATM.PM25.MC.M3"   # PM2.5 air pollution, mean annual exposure (µg/m³)
]



base_url = "https://api.worldbank.org/v2/countries/all/indicators/{}?format=json&per_page=1000&page={}"
indicator_dataframe = {}
for i in Industrialization:
  print(f"Fetching Data For indicator : {i}")

  all_dfs = []
  page = 1
  fault = 1
  while True:
    url = base_url.format(i,page)
    response = requests.get(url)

    if response.status_code!=200:
      if fault>2:
        break
      else:
        print(f"No Data for Page : {page}")
        page +=1
        fault += 1
        continue
    else:

      try:
        data = response.json()
      except Exception as e:
        if fault>2:
          break
        else:
          print(f"{e} Error while Loading page : {page}")
          page += 1
          fault += 1
          continue

      if len(data)<2 or not data[1]:
        if fault>2:
          break
        else:
          print(f"No Data For Indicator {i} on page {page}")
          page += 1
          fault += 1
          continue

      print(f"Page : {page} fetched for indicator {i}")

      total_pages = data[0]['pages']
      record = data[1]

      if page>total_pages:
        print(f"Current Page : {page} | Total Page : {total_pages}")
        break
      else:
        df = pd.json_normalize(record)
        df = df[[ "country.id","country.value","indicator.id","indicator.value","date","value"]].rename(columns={
                "country.id":"country_id",
                "country.value":"country_value","indicator.id":"indicator_id","indicator.value":"indicator_name","date":"year"})

        df = df[df["year"].astype(int) > 2000]
        all_dfs.append(df)

        page += 1
        time.sleep(0.3)

  if all_dfs:

    combined_df = pd.concat(all_dfs, ignore_index=True)
    indicator_dataframe[i] = combined_df
    print(f"📊 Total rows collected for {i}: {len(combined_df)}")
  else:
    print(f"⚠️ No data collected for {i}")


Fetching Data For indicator : NV.IND.MANF.ZS
Page : 1 fetched for indicator NV.IND.MANF.ZS
Page : 2 fetched for indicator NV.IND.MANF.ZS
Page : 3 fetched for indicator NV.IND.MANF.ZS
Page : 4 fetched for indicator NV.IND.MANF.ZS
Page : 5 fetched for indicator NV.IND.MANF.ZS
Page : 6 fetched for indicator NV.IND.MANF.ZS
Page : 7 fetched for indicator NV.IND.MANF.ZS
Page : 8 fetched for indicator NV.IND.MANF.ZS
Page : 9 fetched for indicator NV.IND.MANF.ZS
Page : 10 fetched for indicator NV.IND.MANF.ZS
Page : 11 fetched for indicator NV.IND.MANF.ZS
Page : 12 fetched for indicator NV.IND.MANF.ZS
Page : 13 fetched for indicator NV.IND.MANF.ZS
Page : 14 fetched for indicator NV.IND.MANF.ZS
Page : 15 fetched for indicator NV.IND.MANF.ZS
Page : 16 fetched for indicator NV.IND.MANF.ZS
Page : 17 fetched for indicator NV.IND.MANF.ZS
Page : 18 fetched for indicator NV.IND.MANF.ZS
No Data For Indicator NV.IND.MANF.ZS on page 19
No Data For Indicator NV.IND.MANF.ZS on page 20
📊 Total rows collected

In [6]:
all_indicators = []
for key, df in indicator_dataframe.items():
    temp = df[['indicator_name',"country_value", "year", "value"]].copy()
    all_indicators.append(temp)

In [7]:
all_indicators = pd.concat(all_indicators)
wide_indicators = all_indicators.pivot_table(
    index=["country_value", "year"],
    columns="indicator_name",
    values="value"
).reset_index()
wide_indicators

indicator_name,country_value,year,"Air transport, freight (million ton-km)",Container port traffic (TEU: 20 foot equivalent units),Electric power consumption (kWh per capita),Employment in industry (% of total employment) (modeled ILO estimate),Energy use (kg of oil equivalent per capita),"Industry (including construction), value added (% of GDP)","Manufacturing, value added (% of GDP)","PM2.5 air pollution, mean annual exposure (micrograms per cubic meter)"
0,Afghanistan,2001,,,,9.833793,,,,64.597573
1,Afghanistan,2002,,,,10.013898,,23.810127,18.822752,64.416888
2,Afghanistan,2003,,,,10.340424,,22.710864,16.923866,64.176231
3,Afghanistan,2004,,,,10.808873,,26.22679,17.554006,63.826609
4,Afghanistan,2005,,,,11.227758,,26.812099,16.598212,63.319026
5,Afghanistan,2006,,,,11.63467,,28.210768,16.385537,61.514649
6,Afghanistan,2007,,,,11.860039,,26.882242,17.74731,58.083785
7,Afghanistan,2008,,,,12.418676,,26.915628,17.839116,54.191144
8,Afghanistan,2009,,,,13.583731,,21.897122,13.149877,51.001437
9,Afghanistan,2010,108.019487,,,14.85912,,21.151421,12.522577,49.679377


In [8]:
wide_indicators['year'] = wide_indicators['year'].astype('int')
wide_indicators[wide_indicators['year']<=2022].info()


<class 'pandas.core.frame.DataFrame'>
Index: 5749 entries, 0 to 6195
Data columns (total 10 columns):
 #   Column                                                                  Non-Null Count  Dtype  
---  ------                                                                  --------------  -----  
 0   country_value                                                           5749 non-null   object 
 1   year                                                                    5749 non-null   int32  
 2   Air transport, freight (million ton-km)                                 4012 non-null   float64
 3   Container port traffic (TEU: 20 foot equivalent units)                  3436 non-null   float64
 4   Electric power consumption (kWh per capita)                             4245 non-null   float64
 5   Employment in industry (% of total employment) (modeled ILO estimate)   5169 non-null   float64
 6   Energy use (kg of oil equivalent per capita)                            4373 non-null

In [9]:
wide_indicators.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6198 entries, 0 to 6197
Data columns (total 10 columns):
 #   Column                                                                  Non-Null Count  Dtype  
---  ------                                                                  --------------  -----  
 0   country_value                                                           6198 non-null   object 
 1   year                                                                    6198 non-null   int32  
 2   Air transport, freight (million ton-km)                                 4012 non-null   float64
 3   Container port traffic (TEU: 20 foot equivalent units)                  3436 non-null   float64
 4   Electric power consumption (kWh per capita)                             4290 non-null   float64
 5   Employment in industry (% of total employment) (modeled ILO estimate)   5401 non-null   float64
 6   Energy use (kg of oil equivalent per capita)                            4421 non

In [10]:
wide_indicators.to_csv('Industrialization_df.csv',index=False)