In [1]:
import requests
import json
import csv
import os

# --- Configuration ---
main_dir = r"C:\Users\toddw\Desktop\Python Rating Code and Files\GenAI_Rewrite"
json_path = os.path.join(main_dir, "schools.json")
csv_path = os.path.join(main_dir, "schools.csv")

# --- Step 1: Fetch data from the API ---
url = "https://api.v2.tennisreporting.com/schools"
print(f"Fetching data from {url}...")

response = requests.get(url)
response.raise_for_status()  # raises error if request failed

schools_data = response.json()

# --- Step 2: Save raw JSON to file ---
with open(json_path, "w", encoding="utf-8") as f:
    json.dump(schools_data, f, indent=2, ensure_ascii=False)
print(f"Saved JSON data to {json_path}")

# --- Step 3: Read JSON from file and transform ---
with open(json_path, "r", encoding="utf-8") as f:
    data = json.load(f)

# Filter to Oregon (OR) and extract required fields
filtered_schools = []
for s in data:
    try:
        if s.get("city", {}).get("state", {}).get("abbr") == "OR":
            filtered_schools.append({
                "id": s.get("id"),
                "name": s.get("name"),
                "city_name": s.get("city", {}).get("name")
            })
    except Exception as e:
        print(f"Skipping record due to error: {e}")

# --- Step 4: Write to CSV ---
fieldnames = ["id", "name", "city_name"]
with open(csv_path, "w", newline="", encoding="utf-8") as csvfile:
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    writer.writeheader()
    writer.writerows(filtered_schools)

print(f"Wrote {len(filtered_schools)} Oregon schools to {csv_path}")


Fetching data from https://api.v2.tennisreporting.com/schools...
Saved JSON data to C:\Users\toddw\Desktop\Python Rating Code and Files\GenAI_Rewrite\schools.json
Skipping record due to error: 'NoneType' object has no attribute 'get'
Skipping record due to error: 'NoneType' object has no attribute 'get'
Skipping record due to error: 'NoneType' object has no attribute 'get'
Wrote 467 Oregon schools to C:\Users\toddw\Desktop\Python Rating Code and Files\GenAI_Rewrite\schools.csv


In [2]:
import requests
import json
import csv
import os
import pandas as pd

# --- Configuration ---
main_dir = r"C:\Users\toddw\Desktop\Python Rating Code and Files\GenAI_Rewrite"
json_path = os.path.join(main_dir, "schools.json")
csv_path = os.path.join(main_dir, "schools.csv")

# --- Step 1: Fetch data from API ---
url = "https://api.v2.tennisreporting.com/schools"
print(f"Fetching school data from {url}...")

response = requests.get(url)
response.raise_for_status()  # Stop if there's an HTTP error
schools_data = response.json()

# --- Step 2: Save JSON to file ---
with open(json_path, "w", encoding="utf-8") as f:
    json.dump(schools_data, f, indent=2, ensure_ascii=False)
print(f"Saved raw JSON data to {json_path}")

# --- Step 3: Read JSON and filter for Oregon schools ---
with open(json_path, "r", encoding="utf-8") as f:
    data = json.load(f)

filtered_schools = []
for s in data:
    try:
        state_abbr = s.get("city", {}).get("state", {}).get("abbr")
        if state_abbr == "OR":
            school_name = s.get("name", "").strip().upper()
            city_name = s.get("city", {}).get("name", "").strip().upper()
            filtered_schools.append({
                "SchoolID": s.get("id"),
                "School_Name": school_name,
                "City_Name": city_name
            })
    except Exception as e:
        print(f"Skipping record due to error: {e}")

# --- Step 4: Write filtered results to CSV ---
fieldnames = ["SchoolID", "School_Name", "City_Name"]
with open(csv_path, "w", newline="", encoding="utf-8") as csvfile:
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
    writer.writeheader()
    writer.writerows(filtered_schools)

print(f"Wrote {len(filtered_schools)} Oregon schools to {csv_path}")

# ======================================================
# === DATA QUALITY CHECKS ===============================
# ======================================================

print("\n--- Running Data Quality Checks ---")

# Load CSV into pandas for easy checks
df = pd.read_csv(csv_path)

# 1. Check for duplicate SchoolIDs
duplicates = df[df.duplicated(subset=["SchoolID"], keep=False)]
if not duplicates.empty:
    print("\n⚠️ Duplicate SchoolIDs found:")
    print(duplicates.sort_values("SchoolID"))
else:
    print("✅ No duplicate SchoolIDs found.")

# 2. Check for leading/trailing whitespace in all fields
def has_whitespace(s):
    return isinstance(s, str) and (s != s.strip())

whitespace_issues = []
for col in df.columns:
    mask = df[col].apply(has_whitespace)
    if mask.any():
        whitespace_issues.append((col, df[mask][["SchoolID", col]]))

if whitespace_issues:
    print("\n⚠️ Leading/trailing whitespace found:")
    for col, rows in whitespace_issues:
        print(f"\nColumn: {col}")
        print(rows.to_string(index=False))
else:
    print("✅ No leading or trailing whitespace found in any fields.")

print("\nData quality checks complete.")


Fetching school data from https://api.v2.tennisreporting.com/schools...
Saved raw JSON data to C:\Users\toddw\Desktop\Python Rating Code and Files\GenAI_Rewrite\schools.json
Skipping record due to error: 'NoneType' object has no attribute 'get'
Skipping record due to error: 'NoneType' object has no attribute 'get'
Skipping record due to error: 'NoneType' object has no attribute 'get'
Wrote 467 Oregon schools to C:\Users\toddw\Desktop\Python Rating Code and Files\GenAI_Rewrite\schools.csv

--- Running Data Quality Checks ---
✅ No duplicate SchoolIDs found.
✅ No leading or trailing whitespace found in any fields.

Data quality checks complete.


In [3]:
# --- Read CSV ---
df = pd.read_csv(csv_path)

# --- Print header (column names) ---
print("=== CSV HEADER ===")
print(df.columns.tolist())

# --- Print first 10 records ---
print("\n=== TOP 10 RECORDS ===")
print(df.head(10).to_string(index=False))


=== CSV HEADER ===
['SchoolID', 'School_Name', 'City_Name']

=== TOP 10 RECORDS ===
 SchoolID                       School_Name  City_Name
    74588            FALLS CITY HIGH SCHOOL FALLS CITY
    74589        OREGON SCHOOL FOR THE DEAF      SALEM
    74590       OREGON SCHOOL FOR THE BLIND      SALEM
    74596                              VALE       VALE
    74600               GERVAIS HIGH SCHOOL    GERVAIS
    74602 DOUGLAS AVENUE ALTERNATIVE SCHOOL    GERVAIS
    74605       YAMHILL CARLTON HIGH SCHOOL    YAMHILL
    74608            HARRISBURG HIGH SCHOOL HARRISBURG
    74614                           STAYTON    STAYTON
    74616    SOUTH WASCO COUNTY HIGH SCHOOL     MAUPIN
