In [4]:
import mysql.connector
from mysql.connector import Error
from db_credentials import host, user, password
import faker

def create_connection():
    """Establishes connection to MySQL."""
    try:
        connection = mysql.connector.connect(
            host=host,
            user=user,
            password=password
        )
        return connection
    except Error as e:
        print(f"Error connecting to MySQL: {e}")
        return None

In [None]:
import random
from mysql.connector import Error

# Variables for number of records
NUM_DEPARTMENTS = 5
NUM_COURSES = 20
NUM_TEACHERS = 13
NUM_STUDENTS_MIN = 55
NUM_STUDENTS_MAX = 65

def create_database(connection):
    """Creates the university database."""
    try:
        cursor = connection.cursor()
        cursor.execute("DROP DATABASE IF EXISTS university")
        cursor.execute("CREATE DATABASE university")
        cursor.execute("USE university")
        print("Database created successfully")
    except Error as e:
        print(f"Error creating database: {e}")

def create_tables(connection):
    """Creates tables in the university database."""
    try:
        cursor = connection.cursor()

        # Creating tables in order to avoid foreign key issues

        # Step 1: Create teachers table first
        cursor.execute("""
        CREATE TABLE teachers (
            teacher_id INT AUTO_INCREMENT PRIMARY KEY,
            name VARCHAR(100) NOT NULL,
            age INT NOT NULL,
            salary DECIMAL(10, 2) NOT NULL,
            specialization VARCHAR(100) NOT NULL
        );
        """)

        # Step 2: Create departments table, where head_id is a foreign key referring to teachers table
        cursor.execute("""
        CREATE TABLE departments (
            department_id INT AUTO_INCREMENT PRIMARY KEY,
            department_name VARCHAR(50) NOT NULL,
            budget DECIMAL(10, 2),
            head_id INT,
            FOREIGN KEY (head_id) REFERENCES teachers(teacher_id)
            ON DELETE SET NULL ON UPDATE CASCADE
        );
        """)

        # Step 3: Create students table
        cursor.execute("""
        CREATE TABLE students (
            student_id INT AUTO_INCREMENT PRIMARY KEY,
            name VARCHAR(100) NOT NULL,
            age INT,
            department_id INT,
            FOREIGN KEY (department_id) REFERENCES departments(department_id)
            ON DELETE CASCADE ON UPDATE CASCADE
        );
        """)

        # Step 4: Create courses table
        cursor.execute("""
        CREATE TABLE courses (
            course_id INT AUTO_INCREMENT PRIMARY KEY,
            course_name VARCHAR(100) NOT NULL,
            department_id INT,
            teacher_id INT,
            FOREIGN KEY (department_id) REFERENCES departments(department_id)
            ON DELETE CASCADE ON UPDATE CASCADE,
            FOREIGN KEY (teacher_id) REFERENCES teachers(teacher_id)
            ON DELETE SET NULL ON UPDATE CASCADE
        );
        """)

        # Step 5: Create enrollments table
        cursor.execute("""
        CREATE TABLE enrollments (
            enrollment_id INT AUTO_INCREMENT PRIMARY KEY,
            student_id INT,
            course_id INT,
            grade VARCHAR(2),
            FOREIGN KEY (student_id) REFERENCES students(student_id)
            ON DELETE CASCADE ON UPDATE CASCADE,
            FOREIGN KEY (course_id) REFERENCES courses(course_id)
            ON DELETE CASCADE ON UPDATE CASCADE
        );
        """)

        connection.commit()
        print("Tables created successfully")
    except Error as e:
        print(f"Error creating tables: {e}")

def populate_teachers(connection, num_teachers):
    """Inserts data into the teachers table."""
    cursor = connection.cursor()

    specializations = [
        "Computer Science", "Mathematics", "Physics", "Biology", "History", 
        "Chemistry", "English", "Philosophy", "Sociology", "Economics"
    ]

    for i in range(num_teachers):
        # name = f"Teacher_{i+1}"
        name = faker.Faker().name()[:100]  # Using Faker library to generate random names
        age = random.randint(30, 60)
        salary = round(random.uniform(50000, 120000), 2)
        specialization = random.choice(specializations)
        cursor.execute("""
            INSERT INTO teachers (name, age, salary, specialization)
            VALUES (%s, %s, %s, %s)
        """, (name, age, salary, specialization))

    connection.commit()
    print("Teachers populated")

def get_teacher_ids(connection):
    """Fetches teacher IDs to ensure foreign key constraints."""
    cursor = connection.cursor()
    cursor.execute("SELECT teacher_id FROM teachers")
    return [row[0] for row in cursor.fetchall()]

def populate_departments(connection, teacher_ids):
    """Inserts data into the departments table and assigns head teachers."""
    cursor = connection.cursor()

    departments = [
        ('Computer Science', 500000),
        ('Mathematics', 300000),
        ('Physics', 400000),
        ('Biology', 250000),
        ('History', 200000)
    ]

    # Assign heads to the first 4 departments (one department will have no head initially)
    for i, (dept_name, budget) in enumerate(departments):
        head_id = random.choice(teacher_ids) if i < 4 else None
        cursor.execute("""
            INSERT INTO departments (department_name, budget, head_id)
            VALUES (%s, %s, %s)
        """, (dept_name, budget, head_id))

    connection.commit()
    print("Departments populated")

def get_department_ids(connection):
    """Fetches department IDs to ensure foreign key constraints."""
    cursor = connection.cursor()
    cursor.execute("SELECT department_id FROM departments")
    return [row[0] for row in cursor.fetchall()]

def populate_students(connection, num_students, department_ids):
    """Inserts data into the students table."""
    cursor = connection.cursor()

    for i in range(num_students):
        # name = f"Student_{i+1}"
        name = faker.Faker().name()[:100]
        age = random.randint(18, 25)
        department_id = random.choice(department_ids)
        cursor.execute("""
            INSERT INTO students (name, age, department_id)
            VALUES (%s, %s, %s)
        """, (name, age, department_id))

    connection.commit()
    print("Students populated")

def get_student_ids(connection):
    """Fetches student IDs to ensure foreign key constraints."""
    cursor = connection.cursor()
    cursor.execute("SELECT student_id FROM students")
    return [row[0] for row in cursor.fetchall()]

def populate_courses(connection, department_ids, teacher_ids, num_courses):
    """Inserts data into the courses table. Uneven course distribution across departments."""
    cursor = connection.cursor()

    # List of real course names categorized by department
    courses_by_department = {
        'Computer Science': ['Data Structures', 'Algorithms', 'Database Systems', 'Operating Systems', 'Artificial Intelligence'],
        'Mathematics': ['Calculus', 'Linear Algebra', 'Statistics'],
        'Physics': ['Quantum Mechanics', 'Electromagnetism', 'Classical Mechanics', 'Thermodynamics'],
        'Biology': ['Cell Biology', 'Genetics'],
        'History': []  # No courses for History as per the requirement
    }

    for department_id, course_list in zip(department_ids, courses_by_department.values()):
        for course_name in course_list:
            teacher_id = random.choice([None] + teacher_ids)  # Some courses may not have a teacher
            cursor.execute("""
                INSERT INTO courses (course_name, department_id, teacher_id)
                VALUES (%s, %s, %s)
            """, (course_name, department_id, teacher_id))

    connection.commit()
    print("Courses populated")

def get_course_ids(connection):
    """Fetches course IDs to ensure foreign key constraints."""
    cursor = connection.cursor()
    cursor.execute("SELECT course_id FROM courses")
    return [row[0] for row in cursor.fetchall()]

def populate_enrollments(connection, num_enrollments, student_ids, course_ids):
    """Inserts data into the enrollments table."""
    cursor = connection.cursor()

    grades = ['A', 'B', 'C', 'D', 'F']

    for _ in range(num_enrollments):
        student_id = random.choice(student_ids)
        course_id = random.choice(course_ids)
        grade = random.choice(grades)
        cursor.execute("""
            INSERT INTO enrollments (student_id, course_id, grade)
            VALUES (%s, %s, %s)
        """, (student_id, course_id, grade))

    connection.commit()
    print("Enrollments populated")

def main():
    """Main function to create and populate the university database."""
    connection = create_connection()

    if connection is not None:
        create_database(connection)
        create_tables(connection)

        populate_teachers(connection, NUM_TEACHERS)
        teacher_ids = get_teacher_ids(connection)

        populate_departments(connection, teacher_ids)
        department_ids = get_department_ids(connection)

        num_students = random.randint(NUM_STUDENTS_MIN, NUM_STUDENTS_MAX)
        populate_students(connection, num_students, department_ids)
        student_ids = get_student_ids(connection)

        populate_courses(connection, department_ids, teacher_ids, NUM_COURSES)
        course_ids = get_course_ids(connection)

        # Number of enrollments is 1.5x the number of students to simulate multiple enrollments per student
        num_enrollments = int(1.5 * num_students)

        populate_enrollments(connection, num_enrollments, student_ids, course_ids)
        
        connection.close()
        print("Database setup and population completed.")
    else:
        print("Failed to connect to MySQL.")

if __name__ == "__main__":
    main()
