In [None]:
import os
import requests
import pandas as pd
import json
from concurrent.futures import ThreadPoolExecutor, as_completed

In [None]:
def dowload_entries_data_on_pages(pages):
    folder_path = "json_entries"
    os.makedirs(folder_path, exist_ok=True)

    for page in pages:
        file_path = os.path.join(folder_path, f"page_{page}.json")
        
        if os.path.exists(file_path):
            # print(f"File for page {page} already exists.")
            continue
        
        url = f"https://public.mybustudent.bu.edu/psc/BUPRD/EMPLOYEE/SA/s/WEBLIB_HCX_CM.H_CLASS_SEARCH.FieldFormula.IScript_ClassSearch?institution=BU001&term=2248&page={page}"
        response = requests.get(url)
        
        if response.status_code == 200:
            try:
                json_data = response.json()
                if isinstance(json_data, list) and all(
                    isinstance(item, dict) and
                    "index" in item and isinstance(item["index"], int) and
                    "crse_id" in item and isinstance(item["crse_id"], str) and
                    "crse_offer_nbr" in item and isinstance(item["crse_offer_nbr"], int) and
                    "strm" in item and isinstance(item["strm"], str) and
                    "session_code" in item and isinstance(item["session_code"], str)
                    for item in json_data
                ) and len(json_data) > 0:
                    with open(file_path, 'w') as f:
                        json.dump(json_data, f)
                    print(f"Saved JSON for page {page} to {file_path}")
                else:
                    print(f"Invalid JSON format for page {page}")
            except json.JSONDecodeError:
                print(f"Invalid JSON format for page {page}")
        else:
            print(f"Failed to fetch page {page}: {response.status_code}")


def combine_entries_json():
    folder_path = "json_entries"
    data = []
    for filename in os.listdir(folder_path):
        if filename.endswith(".json"):
            file_path = os.path.join(folder_path, filename)
            with open(file_path, 'r') as f:
                json_data = json.load(f)
                data.extend(json_data)
    return data

In [None]:
def dowload_details_data_of_class_nbr(class_nbr):
    folder_path = "json_details"
    os.makedirs(folder_path, exist_ok=True)

    file_path = os.path.join(folder_path, f"class_nbr_{class_nbr}.json")

    if os.path.exists(file_path):
        # print(f"File for class_nbr {class_nbr} already exists.")
        return

    url = f"https://public.mybustudent.bu.edu/psc/BUPRD/EMPLOYEE/SA/s/WEBLIB_HCX_CM.H_CLASS_SEARCH.FieldFormula.IScript_ClassDetails?institution=BU001&term=2248&class_nbr={class_nbr}"
    response = requests.get(url)

    if response.status_code == 200:
        try:
            json_data = response.json()
            if isinstance(json_data, dict) and "section_info" in json_data and isinstance(json_data["section_info"], dict) and len(json_data) > 0:
                with open(file_path, 'w') as f:
                    json.dump(json_data, f)
                print(f"Saved JSON for class_nbr {class_nbr} to {file_path}")
            else:
                print(f"Invalid JSON format for class_nbr {class_nbr}")
        except json.JSONDecodeError:
            print(f"Invalid JSON format for class_nbr {class_nbr}")
    else:
        print(f"Failed to fetch details of class_nbr {class_nbr}: {response.status_code}")
            
def dowload_details_data_of_class_nbrs(class_nbrs):
    with ThreadPoolExecutor() as executor:
        futures = [executor.submit(dowload_details_data_of_class_nbr, class_nbr) for class_nbr in class_nbrs]
        for future in as_completed(futures):
            future.result()
            
def combine_details_json():
    folder_path = "json_details"
    data = []
    for filename in os.listdir(folder_path):
        if filename.endswith(".json"):
            file_path = os.path.join(folder_path, filename)
            with open(file_path, 'r') as f:
                json_data = json.load(f)
                data.append(json_data)
    return data

In [None]:
pages = [i for i in range(0, 135)] # Page 1 to 133 are valid

dowload_entries_data_on_pages(pages)
entries = combine_entries_json()

with open("entries.json", "w") as f:
    json.dump(entries, f)

In [None]:
if entries == None:
    with open("entries.json", "r") as f:
        entries = json.loads(f.read())

class_nbrs = [item["class_nbr"] for item in entries]

dowload_details_data_of_class_nbrs(class_nbrs)
details = combine_details_json()

with open("details.json", "w") as f:
    json.dump(details, f)

In [None]:
import json

with open("/kaggle/working/details.json", "r") as f:
    print(json.loads(f.read()))