In [None]:
# This file ../data/InstructorData/DCE.json contains instructor data for the Department of Civil Engineering (DCE).
# But there might be multiple instance of the same instructor with differnet courses/time. merge that into a single instance.
# [
#     {
#         "name": "Bimal Ojha",
#         "email": "bimal.ojha@email.com",
#         "phone": "9855018620",
#         "th": "8",
#         "pr": "10",
#         "pro": "BO",
#         "avail": {
#             "Sunday": [
#                 "2:25-3:15"
#             ],
#             "Monday": [
#                 "2:25-3:15"
#             ],
#             "Tuesday": [
#                 "3:15-4:05"
#             ],
#             "Wednesday": [
#                 "10:15-11:05",
#                 "2:25-3:15"
#             ],
#             "Thursday": [
#                 "10:15-11:05",
#                 "3:15-4:05"
#             ],
#             "Friday": [
#                 "10:15-11:05",
#                 "2:25-3:15",
#                 "3:15-4:05",
#                 "4:05-4:55"
#             ]
#         },
#         "fulltime": true,
#         "courses": [
#             {
#                 "name": "Theory of Structure-II",
#                 "type": "theory"
#             },
#             {
#                 "name": "Theory of Structure-II",
#                 "type": "practical"
#             },
#             {
#                 "name": "Design of R.C.C Structure",
#                 "type": "practical"
#             }
#         ]
#     },

In [5]:
import json
from collections import defaultdict
import os

def merge_instructors(file_path):
    """
    Merge duplicate instructor instances from DCE.json file.
    Instructors with same name and email will be merged into single instance.
    """
    
    # Check if file exists
    if not os.path.exists(file_path):
        print(f"Error: File not found at {file_path}")
        print(f"Current working directory: {os.getcwd()}")
        return None
    
    try:
        # Load the JSON data
        with open(file_path, 'r') as file:
            instructors = json.load(file)
        
        print(f"Successfully loaded {len(instructors)} instructor records")
        
        # Dictionary to group instructors by unique identifier (name + email)
        instructor_groups = defaultdict(list)
        
        # Group instructors by name and email
        for instructor in instructors:
            key = (instructor['name'], instructor['email'])
            instructor_groups[key].append(instructor)
        
        # Merge grouped instructors
        merged_instructors = []
        
        for (name, email), group in instructor_groups.items():
            if len(group) == 1:
                # No duplicates, add as is
                merged_instructors.append(group[0])
            else:
                print(f"Merging {len(group)} instances of {name}")
                # Merge multiple instances
                merged_instructor = group[0].copy()  # Start with first instance
                
                # Merge availability schedules
                merged_avail = defaultdict(set)
                for instructor in group:
                    for day, times in instructor['avail'].items():
                        merged_avail[day].update(times)
                
                # Convert sets back to sorted lists
                merged_instructor['avail'] = {
                    day: sorted(list(times)) for day, times in merged_avail.items()
                }
                
                # Merge courses (remove duplicates)
                all_courses = []
                for instructor in group:
                    all_courses.extend(instructor['courses'])
                
                # Remove duplicate courses based on name and type
                unique_courses = []
                seen_courses = set()
                for course in all_courses:
                    course_key = (course['name'], course['type'])
                    if course_key not in seen_courses:
                        unique_courses.append(course)
                        seen_courses.add(course_key)
                
                merged_instructor['courses'] = unique_courses
                
                merged_instructors.append(merged_instructor)
        
        # Save merged data to new file
        output_path = file_path.replace('.json', '_merged.json')
        with open(output_path, 'w') as file:
            json.dump(merged_instructors, file, indent=4)
        
        print(f"Original file preserved: {file_path}")
        print(f"Merged data saved to: {output_path}")
        print(f"Merged {len(instructors)} instructor records into {len(merged_instructors)} unique instructors")
        return merged_instructors
    
    except json.JSONDecodeError as e:
        print(f"Error: Invalid JSON format - {e}")
        return None
    except Exception as e:
        print(f"Error: {e}")
        return None

# Execute the merge with correct path
file_path = "../.data/InstructorData/DCE.json"
merged_data = merge_instructors(file_path)

if merged_data:
    # Display summary
    print("\nMerged instructors summary:")
    for instructor in merged_data:
        print(f"- {instructor['name']}: {len(instructor['courses'])} courses")
else:
    print("Failed to merge instructors")

Successfully loaded 86 instructor records
Merging 3 instances of Dr. Indra Narayan Yadav
Merging 3 instances of Om Shant Maharjan
Merging 3 instances of Piyush Pradhan
Merging 3 instances of Krishna Bhattarai
Merging 3 instances of Shailendra Raj Khanal
Merging 3 instances of Rajesh Khadka
Merging 3 instances of Rajendra Bd. Chhetri
Merging 3 instances of Khem Raj Regmi
Merging 3 instances of Dr. Sujan Tripathi
Merging 3 instances of RPN
Merging 3 instances of Pradeep Pr. Bhatta
Merging 3 instances of Surakshya Kafle
Merging 2 instances of Nirajan Devkota
Merging 2 instances of Dr. Ram Chandra Tiwari
Merging 3 instances of Dr. Gokarna Ba. Mohatra
Merging 3 instances of Dr. Rajan Suwal
Merging 3 instances of Sukriti Suvedi
Merging 3 instances of Dr. Alin Chandra Shakya
Merging 3 instances of Bibek Babu Lamichhanie
Merging 3 instances of Prajwal Singh Thapa
Merging 3 instances of Merina Sthapit
Merging 3 instances of Bijaya Kumar Tuladhar
Merging 3 instances of Gopal Bhattarai
Merging 3 

In [None]:
# Check what files exist in the data directory
import os
data_dir = "../.data/InstructorData"
if os.path.exists(data_dir):
    print(f"Files in {data_dir}:")
    for file in os.listdir(data_dir):
        print(f"  - {file}")
else:
    print(f"Directory {data_dir} does not exist")
    print(f"Current directory: {os.getcwd()}")