In [458]:
# ✅ Cell 0 — Setup
# Purpose:
# - Define file paths for modern and legacy catalog files
# - Sort all files by year
# - Include college name mapper helper

import os
import re

# Base directories
catalog_dir = "/Users/buddy/Desktop/WGU-Reddit/data/WGU_catalog/catalogs-2017-2025"
tagged_dir = os.path.join(catalog_dir, "tagged")

# Modern untagged files
modern_files = [
    "catalog_june_2021.txt",
    "catalog_june_2022.txt",
    "catalog_june_2023.txt",
    "catalog_june_2024.txt",
    "catalog_june_2025.txt"
]

# Legacy tagged files
legacy_files = [
    "catalog_july_2017_tagged.txt",
    "catalog_june_2018_tagged.txt",
    "catalog_june_2019_tagged.txt",
    "catalog_june_2020_tagged.txt"
]

# Build full paths
modern_files = [os.path.join(catalog_dir, f) for f in modern_files]
legacy_files = [os.path.join(tagged_dir, f) for f in legacy_files]

# Combine and sort
all_files = modern_files + legacy_files

def extract_year(filepath):
    base = os.path.basename(filepath)
    m = re.search(r'(20\\d{2})', base)
    return int(m.group(1)) if m else 0

all_files = sorted(all_files, key=extract_year)

# ✅ College name standardizer
def map_college_name(raw):
    if raw == "Teachers College":
        return "School of Education"
    elif raw == "College of Business":
        return "School of Business"
    elif raw in ["College of Health Professions", "Leavitt School of Health"]:
        return "School of Health"
    elif raw == "College of Information Technology":
        return "School of Technology"
    else:
        return raw

In [459]:
# ✅ Cell 1 — Define Patterns (FINAL)

import re

# Tenets pattern — full valid names only
tenets_pattern = re.compile(
    r'^(School of [A-Za-z ]+|College of [A-Za-z ]+|Leavitt School of Health|Teachers College) Tenets:'
)

# Tagged college marker
college_tag_pattern = re.compile(r'^###COLLEGE:\s*(.+)')

# Certificates header
certificates_header_pattern = re.compile(r'^Certificates - Standard Paths')

# Copyright line
copyright_pattern = re.compile(r'^© Western Governors University')

# Footer for Total CUs
footer_pattern = re.compile(r'Total CUs:\s*\d+', re.IGNORECASE)

# Program title line — must start with prefix AND contain no © anywhere
title_pattern = re.compile(
    r'^(?:Bachelor|Master|B\.S\.|B\.A\.|M\.S\.|M\.A\.|MBA|Certificate:|Post-Master\'s Certificate|Endorsement)(?!.*©).*\Z',
    re.IGNORECASE
)

In [460]:
# fix_specials.py

import re

footer_pattern = re.compile(r'Total CUs:\s*\d+', re.IGNORECASE)
copyright_pattern = re.compile(r'^© Western Governors University')
title_pattern = re.compile(
    r'^(Bachelor|Master|B\.S\.|B\.A\.|M\.S\.|M\.A\.|MBA|Certificate:|Post-Master\'s Certificate|Endorsement)',
    re.IGNORECASE
)

def fix_lines_if_needed(lines, filename):
    fixed = lines.copy()

    if "2023" in filename or "2024" in filename:
        fixed = fix_health_block_23_24(fixed)
        fixed = fix_nursing_prelicensure_blocks(fixed)

    if "2022" in filename:
        fixed = add_missing_copyrights(fixed)

    if "2021" in filename:
        fixed = fix_2021_blocks(fixed)

    if "2024" in filename:
        fixed = fix_2024_blocks(fixed)

    fixed = fix_management_and_nursing(fixed)

    return fixed


def fix_health_block_23_24(lines):
    fixed = []
    inserted_header = False

    for line in lines:
        if "Bachelor of Science, Nursing" in line and not inserted_header:
            fixed.append("###COLLEGE: School of Health")
            inserted_header = True
        fixed.append(line)

    return fixed


def fix_nursing_prelicensure_blocks(lines):
    fixed = []
    block = []
    inside = False
    footer = ""
    copyright_line = ""

    for line in lines:
        if "Bachelor of Science, Nursing - Prelicensure" in line:
            if block:
                if footer:
                    block.append(footer)
                if copyright_line:
                    block.append(copyright_line)
                fixed.extend(block)
                block = []
                footer = ""
                copyright_line = ""
            inside = True

        if inside:
            if footer_pattern.search(line):
                footer = line
                continue
            if copyright_pattern.search(line):
                copyright_line = line
                continue
            if title_pattern.match(line) and "Nursing - Prelicensure" not in line:
                if footer:
                    block.append(footer)
                if copyright_line:
                    block.append(copyright_line)
                fixed.extend(block)
                block = []
                inside = False
                footer = ""
                copyright_line = ""
                fixed.append(line)
                continue
            block.append(line)
        else:
            fixed.append(line)

    if block:
        if footer:
            block.append(footer)
        if copyright_line:
            block.append(copyright_line)
        fixed.extend(block)

    return fixed


def fix_2024_blocks(lines):
    fixed = []
    se_seen = {"java": False, "csharp": False}
    med_seen = 0

    for line in lines:
        if line.strip() == "Bachelor of Science, Software Engineering":
            if not se_seen["java"]:
                line = "Bachelor of Science, Software Engineering (Java Track)"
                se_seen["java"] = True
            elif not se_seen["csharp"]:
                line = "Bachelor of Science, Software Engineering (C# Track)"
                se_seen["csharp"] = True

        if line.strip() == "Master of Education, Education Technology and Instructional Design":
            med_seen += 1
            if med_seen == 1:
                line = "Master of Education, Education Technology and Instructional Design (K-12 and Adult Learner)"
            elif med_seen == 2:
                line = "Master of Education, Education Technology and Instructional Design (Adult Learner)"
            elif med_seen == 3:
                line = "Master of Education, Education Technology and Instructional Design (K-12 Learner)"

        fixed.append(line)

    return fixed


def fix_2021_blocks(lines):
    fixed = []
    se_seen = {"java": False, "csharp": False}
    med_seen = 0

    for line in lines:
        if line.strip() == "Bachelor of Science, Software Engineering":
            if not se_seen["java"]:
                line = "Bachelor of Science, Software Engineering (Java Track)"
                se_seen["java"] = True
            elif not se_seen["csharp"]:
                line = "Bachelor of Science, Software Engineering (C# Track)"
                se_seen["csharp"] = True

        if line.strip() == "Master of Education, Education Technology and Instructional Design":
            med_seen += 1
            if med_seen == 1:
                line = "Master of Education, Education Technology and Instructional Design (K-12 and Adult Learner)"
            elif med_seen == 2:
                line = "Master of Education, Education Technology and Instructional Design (Adult Learner)"
            elif med_seen == 3:
                line = "Master of Education, Education Technology and Instructional Design (K-12 Learner)"

        fixed.append(line)

    return fixed


def fix_management_and_nursing(lines):
    fixed = []
    mgmt_seen = {"plain": False, "mkt": False, "hc": False}
    nursing_seen = {"pre": False, "rn": False}

    for line in lines:
        txt = line.strip()

        if txt == "Bachelor of Science Business Administration, Management":
            if not mgmt_seen["plain"]:
                mgmt_seen["plain"] = True
            elif not mgmt_seen["mkt"]:
                line = "Bachelor of Science Business Administration, Management (Marketing Emphasis)"
                mgmt_seen["mkt"] = True
            elif not mgmt_seen["hc"]:
                line = "Bachelor of Science Business Administration, Management (Healthcare Emphasis)"
                mgmt_seen["hc"] = True

        if txt == "Bachelor of Science, Nursing":
            if not nursing_seen["pre"]:
                line = "Bachelor of Science, Nursing (Prelicensure)"
                nursing_seen["pre"] = True
            elif not nursing_seen["rn"]:
                line = "Bachelor of Science, Nursing (RN to BSN)"
                nursing_seen["rn"] = True

        fixed.append(line)

    return fixed


def remove_cloud_from_health(lines):
    return lines


def add_missing_copyrights(lines):
    return lines


def move_misplaced_total_cus(lines):
    return lines

In [473]:
# parse_courses.py
# parse_courses.py

import re

dept_course_regex = re.compile(
    r'^([A-Z]{2,5})\s+(\d{4})\s+([A-Z]{1,4})\s*(\d{1,4}[A-Z]?)\s+(.*?)\s+(\d+)\s+\d+$'
)

def parse_courses(ccn_rows):
    courses = []
    seen = set()

    for line in ccn_rows:
        if '©' in line or len(line.split()) < 4:
            continue

        match = dept_course_regex.match(line)
        if not match:
            continue

        dept, num, prefix, code, name, cu = match.groups()
        ccn = dept
        course_code = f"{prefix}{code}"
        course_name = name.strip()
        cu = int(cu)

        key = (ccn, course_code, course_name)
        if key in seen:
            continue

        courses.append({
            "ccn": ccn,
            "course_code": course_code,
            "course_name": course_name,
            "cu": cu
        })

        seen.add(key)

    return courses

In [461]:
# ✅ Cell 3 — parse_program (updated to use parse_courses)

def parse_program(lines, i, end, is_first_copyright, debug=False):
    if debug:
        print(f"\n🔍 parse_program: starting at line {i}: '{lines[i]}'")

    trust_copyright = False

    if copyright_pattern.match(lines[i]):
        if is_first_copyright:
            trust_copyright = True
            is_first_copyright = False
            if debug:
                print(f"  ✔️ Using first copyright in block")
        elif i > 0 and footer_pattern.search(lines[i - 1]):
            trust_copyright = True
            if debug:
                print(f"  ✔️ Using copyright after Total CUs")

        if trust_copyright:
            i += 1
            while i < end and not title_pattern.match(lines[i]):
                if debug:
                    print(f"  ➜ Skipping stray line: '{lines[i]}'")
                i += 1
        else:
            if debug:
                print(f"  ❌ Skipping stray watermark")
            return None, i + 1, is_first_copyright

    if i >= end:
        if debug:
            print(f"  ⚠️ Reached end while looking for title.")
        return None, i, is_first_copyright

    title_candidate = lines[i].strip()
    if debug:
        print(f"  ➜ Title candidate: '{title_candidate}'")

    if not title_pattern.match(title_candidate):
        if debug:
            print(f"  ❌ Invalid title line: '{title_candidate}'")
        return None, i + 1, is_first_copyright

    program_title = title_candidate
    i += 1

    program_desc = []
    while i < end and not lines[i].startswith("CCN Course Number"):
        program_desc.append(lines[i])
        i += 1

    if debug:
        print(f"  ✔️ Collected description ({len(program_desc)} lines)")

    if i >= end:
        if debug:
            print(f"  ⚠️ Reached end while looking for CCN header.")
        return None, i, is_first_copyright

    if debug:
        print(f"  ✔️ Found CCN header at line {i}: '{lines[i]}'")
    i += 1

    ccn_rows = []
    cu_footer = ""
    while i < end:
        line = lines[i]
        if footer_pattern.search(line):
            cu_footer = line
            if debug:
                print(f"  ✔️ Found Total CUs footer at line {i}: '{line}'")
            i += 1
            break
        ccn_rows.append(line)
        i += 1

    if debug:
        print(f"  ✔️ Collected {len(ccn_rows)} course rows")

    # ✅ New: parse the course rows
    courses = parse_courses(ccn_rows)

    if debug:
        print(f"  ✔️ Parsed {len(courses)} valid courses")

    return {
        "title": program_title,
        "desc": " ".join(program_desc).strip(),
        "courses": courses,
        "cu_footer": cu_footer
    }, i, is_first_copyright

In [462]:
# ✅ Debug version — same parse_college, just more prints

def parse_college(lines, start, end, college_name, debug=False):
    if debug:
        print(f"\n📌 START COLLEGE: {college_name} | Lines {start} to {end}")

    i = start + 1
    desc_lines = []

    while i < end:
        line = lines[i]
        if college_name == "School of Education":
            if title_pattern.match(line):
                if debug:
                    print(f"  ➜ Found program title at {i}: '{line}' → end description")
                break
            if copyright_pattern.match(line):
                if debug:
                    print(f"  ➜ Skipping stray © at {i}: '{line}'")
                i += 1
                continue
        else:
            if copyright_pattern.match(line):
                if debug:
                    print(f"  ➜ End description at © line {i}: '{line}'")
                break
        desc_lines.append(line)
        i += 1

    college_desc = " ".join(desc_lines).strip()
    if debug:
        print(f"  ➜ College Description: '{college_desc[:60]}...'")

    programs = []
    while i < end:
        if debug:
            print(f"  ➜ Checking line {i}: '{lines[i]}'")
        result, next_i, _ = parse_program(lines, i, end, True, debug=debug)
        if result:
            result["college"] = college_name
            result["college_desc"] = college_desc
            programs.append(result)
            if debug:
                print(f"    ✔️ Parsed '{result['title']}'")
            i = next_i
            continue
        i += 1
        if i < end and tenets_pattern.match(lines[i]):
            if debug:
                print(f"  ➜ Next college detected at {i}: '{lines[i]}' → stop parsing")
            break
    if debug:
        print(f"📌 DONE COLLEGE: {college_name} | Programs found: {len(programs)}")
    return programs

In [16]:
# parse_file.py

def parse_file(filepath, debug=False):
    with open(filepath, "r", encoding="utf-8") as f:
        lines = [line.strip() for line in f]

    lines = fix_lines_if_needed(lines, filepath)

    results = []

    if "_tagged" in filepath and any(y in filepath for y in ["2017", "2018", "2019", "2020"]):
        markers = []
        for i, line in enumerate(lines):
            m = college_tag_pattern.match(line)
            if m:
                raw = m.group(1)
                name = map_college_name(raw)
                markers.append((i, name))
        markers.append((len(lines), None))

        for (start, college_name), (end, _) in zip(markers[:-1], markers[1:]):
            programs = parse_college(lines, start, end, college_name, debug=debug)
            results.extend(programs)

    else:
        markers = []
        for i, line in enumerate(lines):
            m = tenets_pattern.match(line)
            if m:
                raw = m.group(1)
                name = map_college_name(raw)
                markers.append((i, name))
        markers.append((len(lines), None))

        for (start, college_name), (end, _) in zip(markers[:-1], markers[1:]):
            programs = parse_college(lines, start, end, college_name, debug=debug)
            results.extend(programs)

    return results

In [17]:
for f in all_files:
    colleges = parse_file(f)
    print(f"\n{os.path.basename(f)}: {colleges if colleges else '⚠️ None found'}")

IOPub data rate exceeded.
The Jupyter server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--ServerApp.iopub_data_rate_limit`.

Current values:
ServerApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
ServerApp.rate_limit_window=3.0 (secs)



In [477]:
# ✅ Cell 6 — Debug legacy only: show colleges and program titles only

import os

for filepath in legacy_files:
    print(f"\n=== {os.path.basename(filepath)} ===")
    with open(filepath, "r", encoding="utf-8") as f:
        lines = [line.strip() for line in f]

    lines = fix_lines_if_needed(lines, filepath)

    markers = []
    for i, line in enumerate(lines):
        m = college_tag_pattern.match(line)
        if m:
            raw = m.group(1)
            name = map_college_name(raw)
            markers.append((i, name))

    markers.append((len(lines), None))

    for (start, college_name), (end, _) in zip(markers[:-1], markers[1:]):
        progs = parse_college(lines, start, end, college_name)
        print(f"{college_name}:")
        for p in progs:
            print(f"  - {p['title']}")


=== catalog_july_2017_tagged.txt ===
School of Business:
School of Health:
School of Technology:
School of Education:
  - Bachelor of Arts, Interdisciplinary Studies (K-8)
  - Bachelor of Arts, Mathematics (5-9)
  - Bachelor of Arts, Mathematics (5-12)
  - Bachelor of Arts, Science (5-9)
  - Bachelor of Arts, Science (5-12, Bio)
  - Bachelor of Arts, Science (5-12, Chemistry)
  - Bachelor of Arts, Science (5-12, Geo)
  - Bachelor of Arts, Science (5-12, Physics)
  - Bachelor of Arts, Special Education
  - Master of Arts in Teaching, Elementary Education (K-8)
  - Master of Arts in Teaching, English Education (5-12)
  - Master of Arts in Teaching, Mathematics (5-9)
  - Master of Arts in Teaching, Mathematics (5-12)
  - Master of Arts in Teaching, Science (5-12)
  - Master of Science, Special Education
  - Master of Science, Educational Leadership
  - Master of Arts, English Language Learning (PreK-12)
  - Master of Arts, Mathematics Education (K-6)
  - Master of Arts, Mathematics Educa

In [None]:
for f in legacy_files[:1]:
    with open(f, "r", encoding="utf-8") as file:
        lines = [line.strip() for line in file]
    lines = fix_lines_if_needed(lines, f)

    markers = []
    for idx, line in enumerate(lines):
        m = college_tag_pattern.match(line)
        if m:
            raw = m.group(1)
            name = map_college_name(raw)
            markers.append((idx, name))

    markers.append((len(lines), None))

    for (start, college_name), (end, _) in zip(markers[:-1], markers[1:]):
        parse_college(lines, start, end, college_name, debug=True)

In [478]:
# ✅ Cell 6 — Debug legacy: dump raw blocks for inspection

import os

for filepath in legacy_files:
    print(f"\n=== {os.path.basename(filepath)} ===")
    with open(filepath, "r", encoding="utf-8") as f:
        lines = [line.strip() for line in f]

    lines = fix_lines_if_needed(lines, filepath)

    markers = []
    for i, line in enumerate(lines):
        m = college_tag_pattern.match(line)
        if m:
            raw = m.group(1)
            name = map_college_name(raw)
            markers.append((i, name))

    markers.append((len(lines), None))

    for (start, college_name), (end, _) in zip(markers[:-1], markers[1:]):
        print(f"\n=== {college_name} ===")
        for i in range(start, min(start + 40, end)):
            print(f"{i}: {lines[i]}")


=== catalog_july_2017_tagged.txt ===

=== School of Business ===
2322: ###COLLEGE: School of Business
2323: Bachelor of Science, Business Management
2324: The Bachelor of Science in Business Management is a competency-based program that enables leaders and
2325: managers in organizations to earn a Bachelor of Science degree. The B.S. in Business Management is great
2326: preparation for a variety of careers in the business field. This program consists of twelve balanced areas of study,
2327: WGU competency-based assessments, and a capstone project.
2328: CCN Course Number Course Description CUs Term
2329: BUS 2301 C483 Principles of Management 4 1
2330: MATH 1010 C463 Intermediate Algebra 3 1
2331: ENGL 1010 C455 English Composition I 3 1
2332: SOCG 1010 C273 Introduction to Sociology 3 1
2333: GEOG 1311 C255 Introduction to Geography 3 2
2334: MATH 1015 C278 College Algebra 4 2
2335: ENGL 1020 C456 English Composition II 3 2
2336: MGMT 3000 C715 Organizational Behavior 3 2
2337: HRM 

In [479]:
# ✅ Debug: Check which lines match title_pattern inside legacy blocks

import os

test_file = legacy_files[0]  # Pick your 2017 file for example

print(f"\n=== Testing: {os.path.basename(test_file)} ===")

with open(test_file, "r", encoding="utf-8") as f:
    lines = [line.strip() for line in f]

lines = fix_lines_if_needed(lines, test_file)

# Find tagged colleges
markers = []
for i, line in enumerate(lines):
    m = college_tag_pattern.match(line)
    if m:
        raw = m.group(1)
        name = map_college_name(raw)
        markers.append((i, name))

markers.append((len(lines), None))

for (start, college_name), (end, _) in zip(markers[:-1], markers[1:]):
    print(f"\n=== {college_name} ===")
    for i in range(start, end):
        line = lines[i]
        if title_pattern.match(line):
            print(f"✔️ MATCH [{i}]: {line}")
        elif (
            line.lower().startswith("bachelor")
            or line.lower().startswith("master")
        ):
            print(f"❌ SHOULD MATCH [{i}]: {line}")


=== Testing: catalog_july_2017_tagged.txt ===

=== School of Business ===
✔️ MATCH [2323]: Bachelor of Science, Business Management
✔️ MATCH [2371]: Bachelor of Science, Business - Healthcare Management
✔️ MATCH [2428]: Bachelor of Science, Business - Human Resource Management
✔️ MATCH [2480]: Bachelor of Science, Business - Information Technology Management
✔️ MATCH [2530]: Bachelor of Science, Marketing Management
✔️ MATCH [2574]: Bachelor of Science, Accounting
✔️ MATCH [2622]: Master of Business Administration
✔️ MATCH [2639]: MBA, IT Management
✔️ MATCH [2658]: MBA, Healthcare Management
✔️ MATCH [2678]: Master of Science, Integrated Healthcare Management
✔️ MATCH [2700]: Master of Science, Management and Leadership
✔️ MATCH [2724]: Master of Science, Accounting

=== School of Health ===
✔️ MATCH [2755]: Bachelor of Science, Nursing (Prelicensure)
✔️ MATCH [2820]: Bachelor of Science, Nursing (RN to BSN)
✔️ MATCH [2861]: Master of Science, Nursing - Education (BSN to MSN)
✔️ MATC


catalog_june_2021.txt: ['School of Business', 'School of Education', 'School of Health', 'School of Technology']

catalog_june_2022.txt: ['School of Business', 'School of Education', 'School of Health', 'School of Technology']

catalog_june_2023.txt: ['School of Business', 'School of Education', 'School of Health', 'School of Technology']

catalog_june_2024.txt: ['School of Business', 'School of Education', 'School of Health', 'School of Technology']

catalog_june_2025.txt: ['School of Business', 'School of Education', 'School of Health', 'School of Technology']

catalog_july_2017_tagged.txt: ['School of Business', 'School of Education', 'School of Health', 'School of Technology']

catalog_june_2018_tagged.txt: ['School of Business', 'School of Education', 'School of Health', 'School of Technology']

catalog_june_2019_tagged.txt: ['School of Business', 'School of Education', 'School of Health', 'School of Technology']

catalog_june_2020_tagged.txt: ['School of Business', 'School of Ed

In [467]:
# ✅ Cell 6 — Dump only School of Health programs for ALL modern years (no filters)

import os

for filepath in modern_files:
    print(f"\n=== {os.path.basename(filepath)} ===")
    with open(filepath, "r", encoding="utf-8") as f:
        lines = [line.strip() for line in f]

    lines = fix_lines_if_needed(lines, filepath)

    # Tenets-based colleges
    markers = []
    for i, line in enumerate(lines):
        m = tenets_pattern.match(line)
        if m:
            raw = m.group(1)
            name = map_college_name(raw)
            markers.append((i, name))

    markers.append((len(lines), None))

    # For each college — only School of Health
    for (start, college_name), (end, _) in zip(markers[:-1], markers[1:]):
        if college_name != "School of Health":
            continue

        progs = parse_college(lines, start, end, college_name)
        print(f"\n{college_name}:")
        for p in progs:
            print(f"  - {p['title']}")


=== catalog_june_2021.txt ===

School of Health:
  - Bachelor of Science, Nursing (Prelicensure)
  - Bachelor of Science, Nursing (RN to BSN)
  - Bachelor of Science, Health Information Management
  - Bachelor of Science, Health Services Coordination
  - Master of Science, Nursing - Family Nurse Practitioner (BSN to MSN)
  - Master of Science, Nursing - Education (BSN to MSN)
  - Master of Science, Nursing - Leadership and Management (BSN to MSN)
  - Master of Science, Nursing - Nursing Informatics (BSN to MSN)
  - Master of Science, Nursing - Education (RN to MSN)
  - Master of Science, Nursing - Leadership and Management (RN to MSN)
  - Master of Science, Nursing - Nursing Informatics
  - Master of Health Leadership

=== catalog_june_2022.txt ===

School of Health:
  - Bachelor of Science, Nursing (Prelicensure)
  - Bachelor of Science, Nursing (RN to BSN)
  - Bachelor of Science, Health Information Management
  - Bachelor of Science, Health Services Coordination
  - Master of Scien

In [471]:
# ✅ Cell 6 — Dump ALL colleges and programs for ALL modern years (no filters)

import os

for filepath in modern_files:
    print(f"\n=== {os.path.basename(filepath)} ===")
    with open(filepath, "r", encoding="utf-8") as f:
        lines = [line.strip() for line in f]

    lines = fix_lines_if_needed(lines, filepath)

    # Tenets-based colleges
    markers = []
    for i, line in enumerate(lines):
        m = tenets_pattern.match(line)
        if m:
            raw = m.group(1)
            name = map_college_name(raw)
            markers.append((i, name))

    markers.append((len(lines), None))

    # For each college — no filters
    for (start, college_name), (end, _) in zip(markers[:-1], markers[1:]):
        progs = parse_college(lines, start, end, college_name)
        print(f"\n{college_name}:")
        for p in progs:
            print(f"  - {p['title']}")


=== catalog_june_2021.txt ===

School of Business:
  - Bachelor of Science Business Administration, Accounting
  - Bachelor of Science Business Administration, Healthcare Management
  - Bachelor of Science Business Administration, Human Resource Management
  - Bachelor of Science Business Administration, Information Technology Management
  - Bachelor of Science Business Administration, Management
  - Bachelor of Science Business Administration, Management (Marketing Emphasis)
  - Bachelor of Science Business Administration, Management (Healthcare Emphasis)
  - Bachelor of Science Business Administration, Marketing
  - Master of Business Administration
  - MBA, IT Management
  - MBA, Healthcare Management
  - Master of Science, Management and Leadership
  - Master of Science, Accounting

School of Health:
  - Bachelor of Science, Nursing (Prelicensure)
  - Bachelor of Science, Nursing (RN to BSN)
  - Bachelor of Science, Health Information Management
  - Bachelor of Science, Health Ser