In [None]:
import re

# 3 different ways to find the course code

def find_course_code_rule_based(paragraphs, search_limit=30):

    for long_name, short_code in COURSE_CODE_MAP.items():
        pattern = re.compile(
            r'\b' + re.escape(long_name) +
            r'\s+' +
            r'(\d{3,4}[a-zA-Z]?)\b',
            re.IGNORECASE
        )
        for para in paragraphs[:search_limit]:
            if match := pattern.search(para):
                return f"{short_code} {match.group(1).upper()}"

    EXCLUDE_TERMS = ['FALL', 'SPRING', 'WINTER', 'SUMMER', 'ROOM', 'TO', 'AT', 'THE']
    exclude_pattern = '|'.join(EXCLUDE_TERMS)

    num_pattern_ext = r'(\d{3,4}[a-zA-Z]?(?:\s*\/\s*\d{3,4}[a-zA-Z]?)?)'
    
    all_caps_pattern = re.compile(
        r'\b' +
        r'(?!' + exclude_pattern + r'\b)' +
        r'([A-Z]{2,4}(?:\s*\/\s*[A-Z]{2,4})*)' +
        r'\s*-?\s*' +
        num_pattern_ext + r'\b'
    )
    compound_pattern = re.compile(
        r'\b(([A-Z]{2,4}\d{3,4}[a-zA-Z]?)(?:\s*/\s*[A-Z]{2,4}\d{3,4}[a-zA-Z]?)+)\b'
    )
    mixed_case_pattern = re.compile(
        r'\b([A-Z][a-z]{1,3})' +
        num_pattern_ext + r'\b'
    )
    
    for para in paragraphs[:search_limit]:
        found_high_priority = []
        
        compound_match = compound_pattern.search(para)
        caps_match = all_caps_pattern.search(para)

        if compound_match:
            raw_code = compound_match.group(1).upper()
            parts = [p.strip() for p in raw_code.split('/')]
            formatted_parts = [re.sub(r'([A-Z]+)(\d)', r'\1 \2', p, 1) for p in parts]
            found_high_priority.append({'start': compound_match.start(), 'code': '/'.join(formatted_parts)})

        if caps_match and (not compound_match or caps_match.start() != compound_match.start()):
            dept = re.sub(r'\s*\/\s*', '/', caps_match.group(1))
            if dept.upper() not in EXCLUDE_TERMS:
                 num = re.sub(r'\s*\/\s*', '/', caps_match.group(2).upper())
                 found_high_priority.append({'start': caps_match.start(), 'code': f"{dept} {num}"})
        
        if found_high_priority:
            earliest_match = min(found_high_priority, key=lambda x: x['start'])
            return earliest_match['code']


    for para in paragraphs[:search_limit]:
        if match := mixed_case_pattern.search(para):
            dept = match.group(1).upper()
            num = re.sub(r'\s*\/\s*', '/', match.group(2).upper())
            return f"{dept} {num}"
    
    return None






def find_course_code_position_based_1(paragraphs, search_limit=30):
    
    patterns = []
    
    patterns.append({
        'name': 'compound',
        'pattern': re.compile(r'\b(([A-Z]{2,4}\d{3,4}[a-zA-Z]?)(?:\s*/\s*[A-Z]{2,4}\d{3,4}[a-zA-Z]?)+)\b')
    })

    for long_name, short_code in COURSE_CODE_MAP.items():
        patterns.append({
            'name': 'long_name',
            'pattern': re.compile(r'\b' + re.escape(long_name) + r'\s+(\d{3,4}[a-zA-Z]?)\b', re.IGNORECASE),
            'short_code': short_code
        })
    
    EXCLUDE_TERMS = ['FALL', 'SPRING', 'WINTER', 'SUMMER', 'ROOM', 'TO', 'AT', 'THE']
    exclude_pattern_str = '|'.join(EXCLUDE_TERMS)
    num_pattern_ext = r'(\d{3,4}[a-zA-Z]?(?:\s*\/\s*\d{3,4}[a-zA-Z]?)?)'
    
    patterns.append({
        'name': 'all_caps',
        'pattern': re.compile(
            r'\b(?!' + exclude_pattern_str + r'\b)'
            r'([A-Z]{2,4}(?:\s*\/\s*[A-Z]{2,4})*)'
            r'\s*-?\s*' + num_pattern_ext + r'\b'
        )
    })
    
    patterns.append({
        'name': 'mixed_case',
        'pattern': re.compile(r'\b([A-Z][a-z]{1,3})' + num_pattern_ext + r'\b')
    })

    for para in paragraphs[:search_limit]:
        found_matches = []
        for p_info in patterns:
            for match in p_info['pattern'].finditer(para):
                found_matches.append({
                    'start': match.start(), 'end': match.end(),
                    'pattern_name': p_info['name'], 'match_obj': match,
                    'short_code': p_info.get('short_code')
                })
        
        if not found_matches: continue
            
        final_candidates = []
        for i, m1 in enumerate(found_matches):
            is_submatch = False
            for j, m2 in enumerate(found_matches):
                if i == j: continue
                if m2['start'] <= m1['start'] and m2['end'] >= m1['end'] and (m2['end']-m2['start'] > m1['end']-m1['start']):
                    is_submatch = True
                    break
            if not is_submatch:
                final_candidates.append(m1)

        if not final_candidates: continue

        earliest_match_info = min(final_candidates, key=lambda x: x['start'])
        match = earliest_match_info['match_obj']
        name = earliest_match_info['pattern_name']

        if name == 'compound':
            raw_code = match.group(1).upper()
            parts = [p.strip() for p in raw_code.split('/')]
            formatted_parts = [re.sub(r'([A-Z]+)(\d)', r'\1 \2', p, 1) for p in parts]
            return '/'.join(formatted_parts)
        elif name == 'long_name':
            return f"{earliest_match_info['short_code']} {match.group(1).upper()}"
        elif name == 'all_caps':
            dept = re.sub(r'\s*\/\s*', '/', match.group(1))
            num = re.sub(r'\s*\/\s*', '/', match.group(2).upper())
            return f"{dept} {num}"
        elif name == 'mixed_case':
            dept = match.group(1).upper()
            num = re.sub(r'\s*\/\s*', '/', match.group(2).upper())
            return f"{dept} {num}"

    return None






def find_course_code_position_based_2(paragraphs, search_limit=30):
    
    EXCLUDE_TERMS = ['FALL', 'SPRING', 'WINTER', 'SUMMER', 'ROOM', 'TO', 'AT', 'THE']
    exclude_pattern_str = '|'.join(EXCLUDE_TERMS)
    
    separator = r'\s*-?\s*'
    num_part = r'\d{3,4}[a-zA-Z]?'
    num_ext = f'({num_part}(?:{separator}/{separator}{num_part})?)'

    patterns_to_find = []
    
    for long_name, short_code in COURSE_CODE_MAP.items():
        patterns_to_find.append({
            'name': 'long_name', 'short_code': short_code,
            'pattern': re.compile(r'\b' + re.escape(long_name) + r'\s+(' + num_part + r')\b', re.IGNORECASE)
        })
    patterns_to_find.append({
        'name': 'compound',
        'pattern': re.compile(r'\b(([A-Z]{2,4}' + num_part + r')(?:' + separator + r'/' + separator + r'[A-Z]{2,4}' + num_part + r')+)\b')
    })
    patterns_to_find.append({
        'name': 'all_caps',
        'pattern': re.compile(r'\b(?!' + exclude_pattern_str + r'\b)([A-Z]{2,4}(?:' + separator + r'/' + separator + r'[A-Z]{2,4})*)' + separator + num_ext + r'\b')
    })
    patterns_to_find.append({
        'name': 'mixed_case',
        'pattern': re.compile(r'\b([A-Z][a-z]{1,3})' + separator + num_ext + r'\b')
    })
    found_matches = []
    for para in paragraphs[:search_limit]:
        for p_info in patterns_to_find:
            for match in p_info['pattern'].finditer(para):

                if p_info['name'] == 'all_caps':
                    dept_to_check = match.group(1).split('/')[0].strip()
                    if dept_to_check in EXCLUDE_TERMS:
                        continue
                
                found_matches.append({

                    
                    'start': match.start(), 'end': match.end(),
                    'type': p_info['name'], 'match_obj': match,
                    'short_code': p_info.get('short_code')
                })
    
    if not found_matches:
        return None