#   Create the Dummy Data

In [2]:
import csv
import random
from faker import Faker
import os
from datetime import datetime, timedelta

faker = Faker()

# Random email generator
def generate_email(first_name, last_name):
    return f'{first_name.lower()}.{last_name.lower()}{random.randint(10, 99)}@gmail.com'

# Generate random gender
def random_gender():
    return random.choice(['MALE', 'FEMALE'])

# Generate training dates
def generate_training_dates():
    start_date = faker.date_between(start_date=datetime(2024, 1, 1), end_date=datetime(2024, 10, 1))
    end_date = start_date + timedelta(days=random.randint(10, 60))
    return start_date, end_date

# Create Users (Admins, Trainers, Employees in one file)
def create_users(num_admins=10, num_trainers=40, num_employees=1000):
    user_data = []
    
    # Admins
    for _ in range(num_admins):
        user_data.append({
            'firstName': faker.first_name(),
            'lastName': faker.last_name(),
            'email': generate_email(faker.first_name(), faker.last_name()),
            'password': 'AdminPass123',
            'role': 'ADMIN',
            'gender': random_gender(),
        })

    # Trainers
    for _ in range(num_trainers):
        user_data.append({
            'firstName': faker.first_name(),
            'lastName': faker.last_name(),
            'email': generate_email(faker.first_name(), faker.last_name()),
            'password': 'TrainerPass123',
            'role': 'TRAINER',
            'gender': random_gender(),
        })

    # Employees
    for _ in range(num_employees):
        user_data.append({
            'firstName': faker.first_name(),
            'lastName': faker.last_name(),
            'email': generate_email(faker.first_name(), faker.last_name()),
            'password': 'EmployeePass123',
            'role': 'EMPLOYEE',
            'gender': random_gender(),
        })

    return user_data

# Create Domains
def create_domains():
    return [
        {'name': "DATA_ENGINEERING"},
        {'name': "MACHINE_LEARNING"},
        {'name': "FULL_STACK"},
    ]

# Create Trainings (with 15 trainings)
def create_trainings(domains, trainers, num_trainings=15):
    training_data = []
    for _ in range(num_trainings):
        domain = random.choice(domains)  # Randomly pick a domain for each training
        start_date, end_date = generate_training_dates()
        trainer = random.choice(trainers)  # Assign a random trainer to each training
        training_data.append({
            'name': f"{domain['name']} Training {random.randint(1, 1000)}",  # Ensure unique training names
            'description': f"This training focuses on {domain['name']} skills and knowledge.",
            'startDate': start_date,
            'endDate': end_date,
            'domainId': domain['name'],  # Just using the name for simplicity
            'trainerId': f"{trainer['firstName']} {trainer['lastName']}",  # Trainer assigned by name
        })
    return training_data

# Create TrainingAssignments
def create_training_assignments(employees, trainings):
    training_assignments = []
    for employee in employees:
        for training in random.sample(trainings, k=min(3, len(trainings))):  # Randomly assign up to 3 trainings
            training_assignments.append({
                'employeeId': f"{employee['firstName']} {employee['lastName']}",  # Simple ID representation
                'trainingId': training['name'],  # Training ID as the name for simplicity
            })
    return training_assignments

# Create Scores
def create_scores(employees, trainings):
    scores_data = []
    for employee in employees:
        for training in random.sample(trainings, k=min(3, len(trainings))):  # Randomly assign scores for up to 3 trainings
            score_value = random.randint(0, 100)  # Random score between 0-100
            scores_data.append({
                'value': score_value,
                'trainingId': training['name'],  # Using training name for simplicity
                'employeeId': f"{employee['firstName']} {employee['lastName']}",  # Simple ID representation
                'trainerId': training['trainerId'],  # Trainer ID from the training
            })
    return scores_data

# Function to write data to CSV
def write_to_csv(filename, data, fieldnames):
    os.makedirs(os.path.dirname(filename), exist_ok=True)
    
    with open(filename, 'w', newline='') as csvfile:
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()
        for row in data:
            writer.writerow(row)

# Generate and save data to CSV files
def generate_data():
    users = create_users()  # This combines admins, trainers, and employees
    trainers = [user for user in users if user['role'] == 'TRAINER']
    employees = [user for user in users if user['role'] == 'EMPLOYEE']
    domains = create_domains()
    trainings = create_trainings(domains, trainers)
    training_assignments = create_training_assignments(employees, trainings)
    scores = create_scores(employees, trainings)

    # Write to CSV files
    write_to_csv('output/users.csv', users, ['firstName', 'lastName', 'email', 'password', 'role', 'gender'])
    write_to_csv('output/domains.csv', domains, ['name'])
    write_to_csv('output/trainings.csv', trainings, ['name', 'description', 'startDate', 'endDate', 'domainId', 'trainerId'])
    write_to_csv('output/training_assignments.csv', training_assignments, ['employeeId', 'trainingId'])
    write_to_csv('output/scores.csv', scores, ['value', 'trainingId', 'employeeId', 'trainerId'])

if __name__ == "__main__":
    generate_data()


Script for Creating Dummy Data

In [1]:
import csv
import random
from faker import Faker
import os
from datetime import datetime, timedelta

faker = Faker()

# Random email generator with uniqueness
def generate_unique_email(first_name, last_name, existing_emails):
    while True:
        email = f'{first_name.lower()}.{last_name.lower()}{random.randint(10, 99)}@gmail.com'
        if email not in existing_emails:
            existing_emails.add(email)
            return email

# Generate random gender
def random_gender():
    return random.choice(['MALE', 'FEMALE'])

# Generate training dates
def generate_training_dates():
    start_date = faker.date_between(start_date=datetime(2024, 1, 1), end_date=datetime(2024, 10, 1))
    end_date = start_date + timedelta(days=random.randint(10, 60))
    return start_date, end_date

# Generate designation based on role
def generate_designation(role):
    if role == 'ADMIN':
        return random.choice(['System Admin', 'IT Manager', 'HR Admin'])
    elif role == 'TRAINER':
        return random.choice(['Lead Trainer', 'Senior Trainer', 'Training Coordinator'])
    elif role == 'EMPLOYEE':
        return random.choice(['Software Engineer I', 'Software Engineer II', 'Software Engineer III', 'Solution Consultant', 'Solution Enabler'])

# Create Users (Admins, Trainers, Employees in one file)
def create_users(num_admins=20, num_trainers=200, num_employees=10000):
    user_data = []
    existing_emails = set()  # Set to store unique emails
    
    # Admins
    for _ in range(num_admins):
        first_name = faker.first_name()
        last_name = faker.last_name()
        email = generate_unique_email(first_name, last_name, existing_emails)
        role = 'ADMIN'
        user_data.append({
            'id': len(user_data) + 1,  # Incremental ID
            'firstName': first_name,
            'lastName': last_name,
            'email': email,
            'password': 'AdminPass123',  # Plain password
            'role': role,
            'gender': random_gender(),
            'designation': generate_designation(role),  # Assign designation based on role
        })

    # Trainers
    for _ in range(num_trainers):
        first_name = faker.first_name()
        last_name = faker.last_name()
        email = generate_unique_email(first_name, last_name, existing_emails)
        role = 'TRAINER'
        user_data.append({
            'id': len(user_data) + 1,  # Incremental ID
            'firstName': first_name,
            'lastName': last_name,
            'email': email,
            'password': 'TrainerPass123',  # Plain password
            'role': role,
            'gender': random_gender(),
            'designation': generate_designation(role),  # Assign designation based on role
        })

    # Employees
    for _ in range(num_employees):
        first_name = faker.first_name()
        last_name = faker.last_name()
        email = generate_unique_email(first_name, last_name, existing_emails)
        role = 'EMPLOYEE'
        user_data.append({
            'id': len(user_data) + 1,  # Incremental ID
            'firstName': first_name,
            'lastName': last_name,
            'email': email,
            'password': 'EmployeePass123',  # Plain password
            'role': role,
            'gender': random_gender(),
            'designation': generate_designation(role),  # Assign designation based on role
        })

    return user_data

# Create Domains
def create_domains():
    return [
        {'id': 1, 'name': "DATA_ENGINEERING"},
        {'id': 2, 'name': "MACHINE_LEARNING"},
        {'id': 3, 'name': "FULL_STACK"},
    ]

# Create Trainings (with 15 trainings)
def create_trainings(domains, trainers, num_trainings=50):
    training_data = []
    for i in range(num_trainings):
        domain = random.choice(domains)  # Randomly pick a domain for each training
        start_date, end_date = generate_training_dates()
        trainer = random.choice(trainers)  # Assign a random trainer to each training
        training_data.append({
            'id': i + 1,  # Incremental ID for each training
            'name': f"{domain['name']} Training {random.randint(1, 1000)}",  # Ensure unique training names
            'description': f"This training focuses on {domain['name']} skills and knowledge.",
            'startDate': start_date,
            'endDate': end_date,
            'domainId': domain['id'],  # Use ID from domains
            'trainerId': trainer['id'],  # Use ID from trainers
        })
    return training_data

# Create TrainingAssignments
def create_training_assignments(employees, trainings):
    training_assignments = []
    for employee in employees:
        assigned_trainings = random.sample(trainings, k=min(3, len(trainings)))  # Randomly assign up to 3 trainings
        for training in assigned_trainings:
            training_assignments.append({
                'employeeId': employee['id'],  # Use numeric ID
                'trainingId': training['id'],  # Use numeric ID
            })
    return training_assignments

# Create Scores
def create_scores(employees, trainings):
    scores_data = []
    for employee in employees:
        assigned_trainings = random.sample(trainings, k=min(3, len(trainings)))  # Randomly assign scores for up to 3 trainings
        for training in assigned_trainings:
            score_value = random.randint(0, 100)  # Random score between 0-100
            scores_data.append({
                'value': score_value,
                'trainingId': training['id'],  # Use numeric ID
                'employeeId': employee['id'],  # Use numeric ID
                'trainerId': training['trainerId'],  # Use trainer ID from training
            })
    return scores_data

# Function to write data to CSV
def write_to_csv(filename, data, fieldnames):
    os.makedirs(os.path.dirname(filename), exist_ok=True)
    
    with open(filename, 'w', newline='') as csvfile:
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()
        for row in data:
            writer.writerow(row)

# Generate and save data to CSV files
def generate_data():
    users = create_users()  # This combines admins, trainers, and employees
    trainers = [user for user in users if user['role'] == 'TRAINER']
    employees = [user for user in users if user['role'] == 'EMPLOYEE']
    domains = create_domains()
    trainings = create_trainings(domains, trainers)
    training_assignments = create_training_assignments(employees, trainings)
    scores = create_scores(employees, trainings)

    # Write to CSV files
    write_to_csv('output01/users.csv', users, ['id', 'firstName', 'lastName', 'email', 'password', 'role', 'gender', 'designation'])
    write_to_csv('output01/domains.csv', domains, ['id', 'name'])
    write_to_csv('output01/trainings.csv', trainings, ['id', 'name', 'description', 'startDate', 'endDate', 'domainId', 'trainerId'])
    write_to_csv('output01/training_assignments.csv', training_assignments, ['employeeId', 'trainingId'])
    write_to_csv('output01/scores.csv', scores, ['value', 'trainingId', 'employeeId', 'trainerId'])

if __name__ == "__main__":
    generate_data()


In [1]:
import csv
import random
from faker import Faker
import os
from datetime import datetime, timedelta

faker = Faker()

# Random email generator with uniqueness
def generate_unique_email(first_name, last_name, existing_emails):
    while True:
        email = f'{first_name.lower()}.{last_name.lower()}{random.randint(10, 99)}@gmail.com'
        if email not in existing_emails:
            existing_emails.add(email)
            return email

# Generate random gender
def random_gender():
    return random.choice(['MALE', 'FEMALE'])

# Generate training dates
def generate_training_dates():
    start_date = faker.date_between(start_date=datetime(2024, 1, 1), end_date=datetime(2024, 10, 1))
    end_date = start_date + timedelta(days=random.randint(10, 60))
    return start_date, end_date

# Generate designation based on role
def generate_designation(role):
    if role == 'ADMIN':
        return random.choice(['System Admin', 'IT Manager', 'HR Admin'])
    elif role == 'TRAINER':
        return random.choice(['Lead Trainer', 'Senior Trainer', 'Training Coordinator'])
    elif role == 'EMPLOYEE':
        return random.choice(['Software Engineer I', 'Software Engineer II', 'Software Engineer III', 
                              'Solution Consultant', 'Solution Enabler'])

# Create Users (Admins, Trainers, Employees in one file)
def create_users(num_admins=20, num_trainers=200, num_employees=10000):
    user_data = []
    existing_emails = set()
    
    # Admins
    for _ in range(num_admins):
        first_name = faker.first_name()
        last_name = faker.last_name()
        email = generate_unique_email(first_name, last_name, existing_emails)
        user_data.append({
            'id': len(user_data) + 1,
            'firstName': first_name,
            'lastName': last_name,
            'email': email,
            'password': 'AdminPass123',
            'role': 'ADMIN',
            'gender': random_gender(),
            'designation': generate_designation('ADMIN'),
        })

    # Trainers
    for _ in range(num_trainers):
        first_name = faker.first_name()
        last_name = faker.last_name()
        email = generate_unique_email(first_name, last_name, existing_emails)
        user_data.append({
            'id': len(user_data) + 1,
            'firstName': first_name,
            'lastName': last_name,
            'email': email,
            'password': 'TrainerPass123',
            'role': 'TRAINER',
            'gender': random_gender(),
            'designation': generate_designation('TRAINER'),
        })

    # Employees
    for _ in range(num_employees):
        first_name = faker.first_name()
        last_name = faker.last_name()
        email = generate_unique_email(first_name, last_name, existing_emails)
        user_data.append({
            'id': len(user_data) + 1,
            'firstName': first_name,
            'lastName': last_name,
            'email': email,
            'password': 'EmployeePass123',
            'role': 'EMPLOYEE',
            'gender': random_gender(),
            'designation': generate_designation('EMPLOYEE'),
        })

    return user_data

# Create Domains
def create_domains():
    return [
        {'id': 1, 'name': "DATA_ENGINEERING"},
        {'id': 2, 'name': "MACHINE_LEARNING"},
        {'id': 3, 'name': "FULL_STACK"},
    ]

# Create Trainings
def create_trainings(domains, trainers, num_trainings=50):
    training_data = []
    for i in range(num_trainings):
        domain = random.choice(domains)
        start_date, end_date = generate_training_dates()
        trainer = random.choice(trainers)
        training_data.append({
            'id': i + 1,
            'name': f"{domain['name']} Training {random.randint(1, 1000)}",
            'description': f"This training focuses on {domain['name']} skills and knowledge.",
            'startDate': start_date,
            'endDate': end_date,
            'domainId': domain['id'],
            'trainerId': trainer['id'],
        })
    return training_data

# Create Training Assignments
def create_training_assignments(employees, trainings):
    assignments = []
    for employee in employees:
        assigned_trainings = random.sample(trainings, k=min(3, len(trainings)))
        for training in assigned_trainings:
            assignments.append({
                'employeeId': employee['id'],
                'trainingId': training['id'],
            })
    return assignments

# Create Scores with Assignment Score
def create_scores_with_assignment(employees, trainings):
    scores_data = []
    for employee in employees:
        assigned_trainings = random.sample(trainings, k=min(3, len(trainings)))
        for training in assigned_trainings:
            score_value = random.randint(0, 100)  # Final project score for the training

            # Additional performance metrics during the training
            code_quality = random.randint(0, 100)
            communication = random.randint(0, 100)
            problem_solving = random.randint(0, 100)
            time_management = random.randint(0, 100)

            scores_data.append({
                'employeeId': employee['id'],
                'trainingId': training['id'],
                'trainerId': training['trainerId'],
                'codeQuality': code_quality,
                'communication': communication,
                'problemSolving': problem_solving,
                'timeManagement': time_management,
                'assignment_score': score_value,  # Final project score
            })
    return scores_data

# Write data to CSV
def write_to_csv(filename, data, fieldnames):
    os.makedirs(os.path.dirname(filename), exist_ok=True)
    
    with open(filename, 'w', newline='') as csvfile:
        writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
        writer.writeheader()
        for row in data:
            writer.writerow(row)

# Generate and save data to CSV files
def generate_data():
    users = create_users()
    trainers = [user for user in users if user['role'] == 'TRAINER']
    employees = [user for user in users if user['role'] == 'EMPLOYEE']
    domains = create_domains()
    trainings = create_trainings(domains, trainers)
    training_assignments = create_training_assignments(employees, trainings)
    scores = create_scores_with_assignment(employees, trainings)

    write_to_csv('output01/users.csv', users, 
                 ['id', 'firstName', 'lastName', 'email', 'password', 'role', 'gender', 'designation'])
    write_to_csv('output01/domains.csv', domains, ['id', 'name'])
    write_to_csv('output01/trainings.csv', trainings, 
                 ['id', 'name', 'description', 'startDate', 'endDate', 'domainId', 'trainerId'])
    write_to_csv('output01/training_assignments.csv', training_assignments, ['employeeId', 'trainingId'])
    write_to_csv('output01/scores.csv', scores, 
                 ['employeeId', 'trainingId', 'trainerId', 'codeQuality', 'communication', 
                  'problemSolving', 'timeManagement', 'assignment_score'])

if __name__ == "__main__":
    generate_data()
