Websites Used for Scraping:
i)Worldbank:inflation rate
ii)Wikipedia:GDP , population, exchange rates 
iii)Macrotrends:populations,debt
iv)Statista: wage
v)Our World in data:food price  

In [3]:
import requests
from bs4 import BeautifulSoup
import csv

url = "https://en.wikipedia.org/wiki/List_of_countries_and_dependencies_by_population"
response = requests.get(url)
soup = BeautifulSoup(response.content, 'html.parser')
table = soup.find('table', {'class': 'wikitable'})
with open('population_data.csv', mode='w', newline='', encoding='utf-8') as file:
    writer = csv.writer(file)
    writer.writerow(['Country (or dependency)', 'Population'])
    for row in table.find_all('tr')[1:]:
        cols = row.find_all(['td', 'th'])
        if len(cols) >= 2:
            country = cols[1].get_text(strip=True)
            population = cols[2].get_text(strip=True)
            writer.writerow([country, population])
print("CSV file 'population_data.csv' has been created.")

CSV file 'population_data.csv' has been created.


In [None]:
# gdp data from wikipedia
import requests
from bs4 import BeautifulSoup
import pandas as pd
url='https://en.wikipedia.org/wiki/List_of_countries_by_past_and_projected_GDP_(nominal)'
r=requests.get(url)
soup=BeautifulSoup(r.text,'html.parser')
tables=soup.find_all('table',{'class':'wikitable'})
dfs=[pd.read_html(str(t))[0] for t in tables]
df=pd.concat(dfs)
df.to_csv('gdp_data.csv',index=False)

In [None]:
# country wise urban population
import requests
from bs4 import BeautifulSoup
import csv

url = 'https://www.macrotrends.net/global-metrics/countries/ranking/urban-population'
headers = {'User-Agent': 'Mozilla/5.0'}
response = requests.get(url, headers=headers)
soup = BeautifulSoup(response.text, 'html.parser')
table = soup.find('table', id='country_ranking')
headers = [th.text.strip() for th in table.find_all('th')]
rows = []
for tr in table.find_all('tr')[1:]:
    cells = [td.text.strip() for td in tr.find_all('td')]
    if cells:
        rows.append(cells)
filename = "urban_population.csv"
with open(filename, mode='w', newline='', encoding='utf-8') as f:
    writer = csv.writer(f)
    writer.writerow(headers)
    writer.writerows(rows)
print(f"Data saved to '{filename}' successfully.")

Data saved to 'urban_population.csv' successfully.


In [132]:
# country wise rural population
import requests
from bs4 import BeautifulSoup
import csv

url = 'https://www.macrotrends.net/global-metrics/countries/ranking/rural-population'
headers = {'User-Agent': 'Mozilla/5.0'}
response = requests.get(url, headers=headers)
soup = BeautifulSoup(response.text, 'html.parser')
table = soup.find('table', id='country_ranking')
headers = [th.text.strip() for th in table.find_all('th')]
rows = []
for tr in table.find_all('tr')[1:]:
    cells = [td.text.strip() for td in tr.find_all('td')]
    if cells:
        rows.append(cells)
filename = "rural_population.csv"
with open(filename, mode='w', newline='', encoding='utf-8') as f:
    writer = csv.writer(f)
    writer.writerow(headers)
    writer.writerows(rows)
print(f"Data saved to '{filename}' successfully.")

Data saved to 'rural_population.csv' successfully.


In [15]:
# debt to gdp ratio :
# (total national debt)/(gdp)*100
import requests , re
from bs4 import BeautifulSoup

url = 'https://www.macrotrends.net/global-metrics/countries/ranking/debt-to-gdp-ratio'
headers = {'User-Agent': 'Mozilla/5.0'}
response = requests.get(url, headers=headers)
soup = BeautifulSoup(response.text, 'lxml')
table = soup.find('table', id='country_ranking')
df = pd.read_html(str(table))[0]
for col in df.columns[1:]:
    df[col] = df[col].str.replace('%','').astype(float)
filename = "debt_to_gdp_ratio.csv"
df.to_csv(filename)
print(f"Data saved to '{filename}' successfully.")

Data saved to 'debt_to_gdp_ratio.csv' successfully.


  df = pd.read_html(str(table))[0]


In [None]:
# raw inflation data 
import requests

url = "http://api.worldbank.org/v2/country/all/indicator/FP.CPI.TOTL.ZG?date=2010:2024&format=json&per_page=20000"
response = requests.get(url)
if response.status_code == 200:
    data = response.json()
    print(data)
else:
    print(f"Request failed with status code {response.status_code}")


[{'page': 1, 'pages': 1, 'per_page': 20000, 'total': 3990, 'sourceid': '2', 'lastupdated': '2025-04-15'}, [{'indicator': {'id': 'FP.CPI.TOTL.ZG', 'value': 'Inflation, consumer prices (annual %)'}, 'country': {'id': 'ZH', 'value': 'Africa Eastern and Southern'}, 'countryiso3code': 'AFE', 'date': '2024', 'value': None, 'unit': '', 'obs_status': '', 'decimal': 1}, {'indicator': {'id': 'FP.CPI.TOTL.ZG', 'value': 'Inflation, consumer prices (annual %)'}, 'country': {'id': 'ZH', 'value': 'Africa Eastern and Southern'}, 'countryiso3code': 'AFE', 'date': '2023', 'value': 7.12697482131851, 'unit': '', 'obs_status': '', 'decimal': 1}, {'indicator': {'id': 'FP.CPI.TOTL.ZG', 'value': 'Inflation, consumer prices (annual %)'}, 'country': {'id': 'ZH', 'value': 'Africa Eastern and Southern'}, 'countryiso3code': 'AFE', 'date': '2022', 'value': 10.7737512242899, 'unit': '', 'obs_status': '', 'decimal': 1}, {'indicator': {'id': 'FP.CPI.TOTL.ZG', 'value': 'Inflation, consumer prices (annual %)'}, 'country

In [None]:
# converting a raw json file into a procced good json file
def process_inflation_data(data):
    raw_entries = data[1] 

    country_data = {}

    for entry in raw_entries:
        country = entry['country']['value']
        year = int(entry['date'])
        value = entry['value']

        if value is None:
            continue

        if country not in country_data:
            country_data[country] = []

        country_data[country].append((year, value))
    for country in country_data:
        country_data[country].sort()
    return country_data
process_inflation_data(data)

{'Africa Eastern and Southern': [(2010, 5.53753791645677),
  (2011, 8.97120640995788),
  (2012, 9.15870843167905),
  (2013, 5.74883133553455),
  (2014, 5.37028961008587),
  (2015, 5.24587835022454),
  (2016, 6.59650490696543),
  (2017, 6.39934341569381),
  (2018, 4.72080474200535),
  (2019, 4.64496717329569),
  (2020, 5.40516207933392),
  (2021, 7.24097765588442),
  (2022, 10.7737512242899),
  (2023, 7.12697482131851)],
 'Africa Western and Central': [(2010, 1.78484420477113),
  (2011, 4.01869907534488),
  (2012, 4.57837463515948),
  (2013, 2.43920076104127),
  (2014, 1.76843582146219),
  (2015, 2.13081738058791),
  (2016, 1.48741550755546),
  (2017, 1.72548602374723),
  (2018, 1.78404994832039),
  (2019, 1.98309228487231),
  (2020, 2.49037783931944),
  (2021, 3.74570045027359),
  (2022, 7.94925143524016),
  (2023, 5.22116801323998)],
 'Arab World': [(2010, 3.91106195534028),
  (2011, 4.7531638888563),
  (2012, 4.6118443220664),
  (2013, 3.25423910998847),
  (2014, 2.77351613126123),
 

In [6]:
import pandas as pd
records = data[1]
filtered_records = [
    entry for entry in records
    if entry['country']['id'].isalpha() 
    and entry['countryiso3code'] not in ('WLD', '') 
    and entry['country']['value'].lower() not in [
        'world', 'euro area', 'low income', 'middle income', 'high income',
        'sub-saharan africa', 'south asia', 'east asia & pacific',
        'latin america & caribbean', 'north america', 'europe & central asia',
        'middle east & north africa', 'least developed countries',
        'emerging markets and developing economies', 'g20', 'g7'
    ]
]
df = pd.DataFrame([
    {
        'Country': entry['country']['value'],
        'Year': entry['date'],
        'Inflation Rate (%)': entry['value']
    }
    for entry in filtered_records if entry['value'] is not None
])
df_wide = df.pivot(index='Country', columns='Year', values='Inflation Rate (%)')
df_wide = df_wide[sorted(df_wide.columns, key=int)]  
df_wide.to_csv('Inflation_Rate_1.csv')