In [1]:
import requests
from bs4 import BeautifulSoup
import json
import sys
import os

In [12]:
page = requests.get('https://edurank.org/geo/gh/')
soup = BeautifulSoup(page.text, 'html.parser')

In [24]:
# Find all university blocks
universities = soup.findAll('div', {'class': 'block-cont pt-4 mb-4'})

all_schools = []

for block in universities:
    try:
        # University name and URL
        name_tag = block.find("h2").find("a")
        university_name = name_tag.text.strip().split(maxsplit=1)[-1]  # Remove "1."
        university_url = name_tag["href"]

        # Location
        location = block.find("div", class_="uni-card__geo").get_text(strip=True)

        # Rankings
        ranks = block.find_all("div", class_="uni-card__rank")
        rank_africa = ranks[0].get_text(strip=True).split('in')[0] if len(ranks) > 0 else None
        rank_world = ranks[1].get_text(strip=True).split('in')[0] if len(ranks) > 1 else None

        # Logo URL
        logo_tag = block.find("img")
        logo_url = logo_tag["src"] if logo_tag else None

        # Stats
        info_items = block.select("dl.uni-card__info-list > div")
        stats = {item.dt.text.strip(): item.dd.text.strip() for item in info_items}

        # Extra links
        extra_links = block.select("a.block-cont__double-link")
        stats_link = extra_links[0]["href"] if len(extra_links) > 0 else None
        rankings_link = extra_links[1]["href"] if len(extra_links) > 1 else None

        # Output all data
        data = {
            "university_name": university_name,
            "university_url": university_url,
            "location": location,
            "rank_africa": rank_africa,
            "rank_world": rank_world,
            "logo_url": logo_url,
            "acceptance_rate": stats.get("Acceptance Rate"),
            "enrollment": stats.get("Enrollment"),
            "male_female_ratio": stats.get("Male:Female"),
            "founded": stats.get("Founded"),
            "statistics_link": stats_link,
            "rankings_link": rankings_link
        }

        all_schools.append(data)

    except Exception as e:
        print("Error parsing a university block:", e)

# Output
all_schools

[{'university_name': 'University of Ghana',
  'university_url': 'https://edurank.org/uni/university-of-ghana/',
  'location': 'Legon',
  'rank_africa': '#11',
  'rank_world': '#932',
  'logo_url': 'https://edurank.org/assets/img/uni-logos/university-of-ghana-logo.png',
  'acceptance_rate': '18%',
  'enrollment': '67,914',
  'male_female_ratio': '51:49',
  'founded': '1948',
  'statistics_link': 'https://edurank.org/uni/university-of-ghana/',
  'rankings_link': 'https://edurank.org/uni/university-of-ghana/rankings/'},
 {'university_name': 'Kwame Nkrumah University of Science and Technology',
  'university_url': 'https://edurank.org/uni/kwame-nkrumah-university-of-science-and-technology/',
  'location': 'Kumasi',
  'rank_africa': '#20',
  'rank_world': '#1302',
  'logo_url': 'https://edurank.org/assets/img/uni-logos/kwame-nkrumah-university-of-science-and-technology-logo.png',
  'acceptance_rate': '54%',
  'enrollment': '85,000',
  'male_female_ratio': None,
  'founded': '1951',
  'stati

In [None]:
# 
for school in all_schools:
    logo_path = f"{school['university_name'].replace(' ', '_')}.png"
    school['logo_url'] = logo_path

In [23]:
all_schools

[{'university_name': 'University of Ghana',
  'university_url': 'https://edurank.org/uni/university-of-ghana/',
  'location': 'Legon',
  'rank_africa': '#11inAfrica',
  'rank_world': '#932inthe World',
  'logo_url': 'University_of_Ghana.png',
  'acceptance_rate': '18%',
  'enrollment': '67,914',
  'male_female_ratio': '51:49',
  'founded': '1948',
  'statistics_link': 'https://edurank.org/uni/university-of-ghana/',
  'rankings_link': 'https://edurank.org/uni/university-of-ghana/rankings/'},
 {'university_name': 'Kwame Nkrumah University of Science and Technology',
  'university_url': 'https://edurank.org/uni/kwame-nkrumah-university-of-science-and-technology/',
  'location': 'Kumasi',
  'rank_africa': '#20inAfrica',
  'rank_world': '#1302inthe World',
  'logo_url': 'Kwame_Nkrumah_University_of_Science_and_Technology.png',
  'acceptance_rate': '54%',
  'enrollment': '85,000',
  'male_female_ratio': None,
  'founded': '1951',
  'statistics_link': 'https://edurank.org/uni/kwame-nkrumah-un

In [10]:
# Save soup as json file
with open('edurank_gh.json', 'w') as f:
    json.dump(soup.prettify(), f, indent=4)