In [1]:
import pandas as pd
import numpy as np
import requests

In [2]:
pd.set_option('display.max_columns',None)
pd.set_option('display.max_rows',None)

In [7]:
import time
T_R = [
    "IT.NET.USER.ZS",     # Internet users (% of population)
    "IT.CMP.PCMP.P2",     # Personal computers (per 100 people)
    "IP.PAT.RESD",        # Patent applications, residents
    "GB.XPD.RSDV.GD.ZS",  # R&D expenditure (% of GDP)
    "SE.ADT.LITR.ZS",     # Adult literacy rate
    "SE.TER.ENRL",        # Tertiary education enrollment (% gross)
    "BX.GSR.CCIS.ZS",     # ICT service exports (BoP, current US$)
    "TX.VAL.ICTG.ZS.UN"    # ICT goods exports (% of total goods exports)
]


base_url = "https://api.worldbank.org/v2/countries/all/indicators/{}?format=json&per_page=1000&page={}"
indicator_dataframe = {}
for i in T_R:
  print(f"Fetching Data For indicator : {i}")

  all_dfs = []
  page = 1
  fault = 1
  while True:
    url = base_url.format(i,page)
    response = requests.get(url)

    if response.status_code!=200:
      if fault>2:
        break
      else:
        print(f"No Data for Page : {page}")
        page +=1
        fault += 1
        continue
    else:

      try:
        data = response.json()
      except Exception as e:
        if fault>2:
          break
        else:
          print(f"{e} Error while Loading page : {page}")
          page += 1
          fault += 1
          continue

      if len(data)<2 or not data[1]:
        if fault>2:
          break
        else:
          print(f"No Data For Indicator {i} on page {page}")
          page += 1
          fault += 1
          continue

      print(f"Page : {page} fetched for indicator {i}")

      total_pages = data[0]['pages']
      record = data[1]

      if page>total_pages:
        print(f"Current Page : {page} | Total Page : {total_pages}")
        break
      else:
        df = pd.json_normalize(record)
        df = df[[ "country.id","country.value","indicator.id","indicator.value","date","value"]].rename(columns={
                "country.id":"country_id",
                "country.value":"country_value","indicator.id":"indicator_id","indicator.value":"indicator_name","date":"year"})

        df = df[df["year"].astype(int) > 2000]
        all_dfs.append(df)

        page += 1
        time.sleep(0.3)

  if all_dfs:

    combined_df = pd.concat(all_dfs, ignore_index=True)
    indicator_dataframe[i] = combined_df
    print(f"📊 Total rows collected for {i}: {len(combined_df)}")
  else:
    print(f"⚠️ No data collected for {i}")



Fetching Data For indicator : IT.NET.USER.ZS
Page : 1 fetched for indicator IT.NET.USER.ZS
Page : 2 fetched for indicator IT.NET.USER.ZS
Page : 3 fetched for indicator IT.NET.USER.ZS
Page : 4 fetched for indicator IT.NET.USER.ZS
Page : 5 fetched for indicator IT.NET.USER.ZS
Page : 6 fetched for indicator IT.NET.USER.ZS
Page : 7 fetched for indicator IT.NET.USER.ZS
Page : 8 fetched for indicator IT.NET.USER.ZS
Page : 9 fetched for indicator IT.NET.USER.ZS
Page : 10 fetched for indicator IT.NET.USER.ZS
Page : 11 fetched for indicator IT.NET.USER.ZS
Page : 12 fetched for indicator IT.NET.USER.ZS
Page : 13 fetched for indicator IT.NET.USER.ZS
Page : 14 fetched for indicator IT.NET.USER.ZS
Page : 15 fetched for indicator IT.NET.USER.ZS
Page : 16 fetched for indicator IT.NET.USER.ZS
Page : 17 fetched for indicator IT.NET.USER.ZS
Page : 18 fetched for indicator IT.NET.USER.ZS
No Data For Indicator IT.NET.USER.ZS on page 19
No Data For Indicator IT.NET.USER.ZS on page 20
📊 Total rows collected

In [8]:
all_indicators = []
for key, df in indicator_dataframe.items():
    temp = df[['indicator_name',"country_value", "year", "value"]].copy()
    all_indicators.append(temp)

In [9]:
all_indicators = pd.concat(all_indicators)
wide_indicators = all_indicators.pivot_table(
    index=["country_value", "year"],
    columns="indicator_name",
    values="value"
).reset_index()
wide_indicators

indicator_name,country_value,year,"Enrolment in tertiary education, all programmes, both sexes (number)",ICT goods exports (% of total goods exports),"ICT service exports (% of service exports, BoP)",Individuals using the Internet (% of population),"Literacy rate, adult total (% of people ages 15 and above)","Patent applications, residents",Personal computers (per 100 people),Research and development expenditure (% of GDP)
0,Afghanistan,2001,,,,0.004723,,,,
1,Afghanistan,2002,,,,0.004561,,,,
2,Afghanistan,2003,26211.0,,,0.087891,,,,
3,Afghanistan,2004,27648.0,,,0.105809,,,,
4,Afghanistan,2005,,,,1.22415,,,0.331329,
5,Afghanistan,2006,,,,2.10712,,,0.378314,
6,Afghanistan,2007,,,,1.9,,,,
7,Afghanistan,2008,,,8.480139,1.84,,,,
8,Afghanistan,2009,95185.0,,7.005129,3.55,,,,
9,Afghanistan,2010,,,8.96741,4.0,,,,


In [10]:
wide_indicators['year'] = wide_indicators['year'].astype('int')
wide_indicators[wide_indicators['year']<=2022].info()

<class 'pandas.core.frame.DataFrame'>
Index: 5668 entries, 0 to 6080
Data columns (total 10 columns):
 #   Column                                                                Non-Null Count  Dtype  
---  ------                                                                --------------  -----  
 0   country_value                                                         5668 non-null   object 
 1   year                                                                  5668 non-null   int32  
 2   Enrolment in tertiary education, all programmes, both sexes (number)  3285 non-null   float64
 3   ICT goods exports (% of total goods exports)                          4275 non-null   float64
 4   ICT service exports (% of service exports, BoP)                       4325 non-null   float64
 5   Individuals using the Internet (% of population)                      4922 non-null   float64
 6   Literacy rate, adult total (% of people ages 15 and above)            1656 non-null   float64
 7   Pa

In [11]:
wide_indicators.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6082 entries, 0 to 6081
Data columns (total 10 columns):
 #   Column                                                                Non-Null Count  Dtype  
---  ------                                                                --------------  -----  
 0   country_value                                                         6082 non-null   object 
 1   year                                                                  6082 non-null   int32  
 2   Enrolment in tertiary education, all programmes, both sexes (number)  3285 non-null   float64
 3   ICT goods exports (% of total goods exports)                          4275 non-null   float64
 4   ICT service exports (% of service exports, BoP)                       4677 non-null   float64
 5   Individuals using the Internet (% of population)                      5218 non-null   float64
 6   Literacy rate, adult total (% of people ages 15 and above)            1781 non-null   float64
 7

In [12]:
wide_indicators.to_csv('tech&research_df.csv',index=False)