In [20]:
import psycopg2
from psycopg2 import Error
import json
import os
from dotenv import load_dotenv
from faker import Faker

In [22]:
def write_to_json(data, filename):
    with open(filename, "w") as f:
        json.dump(data, f)


def read_from_json(filename):
    with open(filename, "r") as f:
        return json.load(f)

In [21]:
# Connection string
conn_string = os.getenv("DATABASE_URL")

try:
    # Connect to PostgreSQL database
    connection = psycopg2.connect(conn_string)
    print("Connected to PostgreSQL")
    cursor = connection.cursor()

except (Exception, Error) as error:
    print("Error while connecting to PostgreSQL:", error)

Connected to PostgreSQL


In [23]:
def create_folder(folder_name):
    if not os.path.exists(folder_name):
        os.makedirs(folder_name)
        print(f"Folder '{folder_name}' created.")
    else:
        print(f"Folder '{folder_name}' already exists.")


# Example usage:
folder_name = "data"
create_folder(folder_name)

Folder 'data' already exists.


## 1. Inserting Organization Data

In [25]:
ORG_ID = "89933"

In [27]:
organization_data_list = [
    {
        "id": ORG_ID,
        "name": "Tech Innovations Inc.",
        "contact_email": "contact@techinnovations.com",
        "phone_number": "+1-555-0123",
        "address": "123 Tech Avenue, Silicon Valley, CA",
        "industry": "Information Technology and Services",
        "founded_year": 2010,
        "number_of_employees": 250,
        "website": "www.techinnovations.com",
    }
    # Add more organization data dictionaries as needed
]

# Write organization data to JSON file
write_to_json(organization_data_list, f"{folder_name}/organization_data.json")

In [33]:
connection = psycopg2.connect(conn_string)
cursor = connection.cursor()
try:

    # Read organization data from JSON file
    organization_data_list = read_from_json(f"{folder_name}/organization_data.json")

    # Define the INSERT query
    insert_query = """
        INSERT INTO "Organization" (name, contact_email, phone_number)
        VALUES (%s, %s, %s)
        RETURNING id
    """

    # Iterate over organization data list and insert each entry
    for organization_data in organization_data_list:
        # Execute the INSERT query
        cursor.execute(
            insert_query,
            (
                organization_data["name"],
                organization_data["contact_email"],
                organization_data["phone_number"],
            ),
        )
        organization_id = cursor.fetchone()[
            0
        ]  # Get the ID of the inserted organization
        print(
            f"Organization '{organization_data['name']}' inserted with ID: {organization_id}"
        )

    # Commit the transaction
    connection.commit()
except (Exception, Error) as error:
    print("Error while connecting to PostgreSQL:", error)

Organization 'Tech Innovations Inc.' inserted with ID: 8fd51c1d-32f9-48e3-a473-cf57f29bea05


Organization 'Tech Innovations Inc.' inserted with ID: 8fd51c1d-32f9-48e3-a473-cf57f29bea05

## 2. Inserting Location

In [28]:
# Initialize Faker
fake = Faker()
number_of_locations = 20


# Generate fake location data and write to JSON file
def generate_location_data(org_id, number_of_locations):
    locations = []
    for _ in range(number_of_locations):
        location = {
            "id": str(fake.unique.random_number(digits=5)),
            "org_id": org_id,
            "name": fake.street_name(),
            "description": (
                fake.text(max_nb_chars=200)
                if fake.boolean(chance_of_getting_true=50)
                else None
            ),
            "latitude": float(fake.latitude()),
            "longitude": float(fake.longitude()),
        }
        locations.append(location)

    write_to_json(locations, f"{folder_name}/locations.json")


# Generate and write fake location data to JSON
generate_location_data(ORG_ID, number_of_locations)

In [41]:
# Now read the data from the JSON file and insert it into the database
def insert_locations_from_json(cursor, json_file):
    locations = read_from_json(f"{folder_name}/locations.json")

    insert_query = """
        INSERT INTO "Location" ( org_id, name, description, latitude, longitude)
        VALUES (%s, %s, %s, %s, %s);
    """

    for location in locations:
        cursor.execute(
            insert_query,
            (
                location["org_id"],
                location["name"],
                location["description"],
                location["latitude"],
                location["longitude"],
            ),
        )


# Set up database connection
connection_string = os.getenv("DATABASE_URL")
conn = psycopg2.connect(connection_string)
cursor = conn.cursor()

# Read from the JSON file and insert data into PostgreSQL
insert_locations_from_json(cursor, "locations.json")

# Commit the transaction
conn.commit()

# Clean up
cursor.close()
conn.close()

print(f"Inserted {number_of_locations} fake Location entries into the database.")

Inserted 20 fake Location entries into the database.


Need to be done for uuid
ALTER TABLE "Location" ALTER COLUMN id SET DEFAULT uuid_generate_v4();
CREATE EXTENSION IF NOT EXISTS "uuid-ossp";

Organization 'Tech Innovations Inc.' inserted with ID: 8fd51c1d-32f9-48e3-a473-cf57f29bea05

# 3. Inserting Skill Categories

In [31]:
from faker import Faker
import json

# Create a Faker instance
faker = Faker()

# Define the static organization ID
org_id = ORG_ID

# Skill category names and descriptions
skill_categories_info = [
    (
        "Senior Management",
        "Skills relevant to leadership and high-level management decisions.",
    ),
    (
        "Cybersecurity",
        "Skills related to protecting information from unauthorized access and cyber threats.",
    ),
    (
        "Software Engineering",
        "Skills in designing, developing, testing, and evaluating software systems.",
    ),
    (
        "Project Management",
        "Skills required for effectively managing projects and resources.",
    ),
    (
        "Data Science",
        "Skills for analyzing complex data to extract actionable insights.",
    ),
    (
        "DevOps",
        "Skills for collaboration between software development and IT operations.",
    ),
    (
        "Cloud Computing",
        "Skills related to the delivery of computing services over the internet.",
    ),
    (
        "Front End Technologies",
        "Skills in client-side development, such as using Angular, React JS, and Vue.",
    ),
    (
        "Server Side Technologies",
        "Skills in server-side development, such as Node JS, Java Spring Boot, .NET, Laravel, Adobe Cold fusion.",
    ),
    ("Database Technologies", "Skills related to database management, such as SQL."),
    (
        "Business Intelligence",
        "Skills in BI tools like Apache Superset, Power BI, Tableau, Looker.",
    ),
    (
        "User Interface Design",
        "Skills in UI design and development, such as Wordpress, Web Designing (HTML, CSS, SCSS), Javascript (ES6).",
    ),
    (
        "Mobile Development",
        "Skills in mobile app development, such as Flutter and React Native.",
    ),
    ("Web Scraping", "Skills in scraping technologies like Selenium / Puppeteer."),
    ("Scripting Languages", "Skills in scripting languages, such as Python."),
    (
        "ELT / ETL Technologies",
        "Skills in ELT/ETL processes, such as DBT, Alteryx, Azure Synapse, Databricks (Spark), Matillion.",
    ),
    ("Data Warehousing", "Skills related to data warehousing, such as Snowflake."),
    (
        "Machine Learning",
        "Skills in ML technologies like opencv, scikit-learn, tensorflow, neural networks.",
    ),
    ("Natural Language Processing", "Skills in NLP and sentimental analysis."),
    (
        "Cloud Operations",
        "Skills in cloud operations, including Cloud Native (Containerization & Orchestration), Azure Logic Apps, Microsoft Flow, Serverless.",
    ),
]

# Generate SkillCategory entries with Faker-generated IDs
skill_categories = [
    {
        "id": i,  # Auto-incrementing ID, padded to 5 digits
        "org_id": org_id,
        "name": name,
        "description": description,
    }
    for i, (name, description) in enumerate(skill_categories_info, start=1)
]

# Define the folder name where the file will be saved

# Write the JSON string to a file
json_filename = f"{folder_name}/skill_categories.json"

# Write the JSON data to a file
with open(json_filename, "w") as file:
    json.dump(skill_categories, file, indent=2)

print(f"JSON data written to {json_filename}")

JSON data written to data/skill_categories.json


In [12]:
# Function to read data from a JSON file
def read_from_json(json_file_path):
    with open(json_file_path, "r") as file:
        return json.load(file)


# Now read the data from the JSON file and insert it into the database
def insert_skill_categories_from_json(cursor, json_data):
    insert_query = """
        INSERT INTO "SkillCategory" (id, org_id, name, description)
        VALUES (%s, %s, %s, %s);
    """

    for skill_category in json_data:
        cursor.execute(
            insert_query,
            (
                skill_category["id"],
                skill_category["org_id"],
                skill_category["name"],
                skill_category["description"],
            ),
        )


# Function to connect to the database and insert data
# Read from the JSON file
skill_categories_json_path = f"{folder_name}/skill_categories.json"
skill_categories = read_from_json(skill_categories_json_path)

# Set up database connection using environment variables
connection_string = os.getenv("DATABASE_URL")
conn = psycopg2.connect(connection_string)
cursor = conn.cursor()

# Insert data into PostgreSQL
insert_skill_categories_from_json(cursor, skill_categories)

# Commit the transaction
conn.commit()

# Clean up
cursor.close()
conn.close()

print(f"Inserted {len(skill_categories)} SkillCategory entries into the database.")

Inserted 13 SkillCategory entries into the database.


# 4. Inserting Skills

In [32]:
# Assuming the skill categories JSON is stored in 'skill_categories.json'
skill_categories_file = f"{folder_name}/skill_categories.json"


# Load the skill categories from the JSON file to get the real UUIDs
with open(skill_categories_file, "r") as file:
    skill_categories_data = json.load(file)
    category_id_map = {
        category["name"]: category["id"] for category in skill_categories_data
    }

# Extended list of skills for each category with at least four skills per category
skills_info = [
    # Senior Management
    (
        "Corporate governance",
        "Understanding and applying the best practices for corporate governance.",
        category_id_map["Senior Management"],
    ),
    (
        "Strategic planning",
        "Developing long-term strategies for the organization's growth.",
        category_id_map["Senior Management"],
    ),
    (
        "Visionary leadership",
        "Providing direction and inspiring the organization to achieve its vision.",
        category_id_map["Senior Management"],
    ),
    (
        "Financial acumen",
        "Understanding and applying financial principles to drive business success.",
        category_id_map["Senior Management"],
    ),
    # Cybersecurity
    (
        "Network security",
        "Protecting computer networks from intrusions and attacks.",
        category_id_map["Cybersecurity"],
    ),
    (
        "Information security",
        "Ensuring the confidentiality, integrity, and availability of data.",
        category_id_map["Cybersecurity"],
    ),
    (
        "Cybersecurity policies",
        "Developing and enforcing policies to protect against cyber threats.",
        category_id_map["Cybersecurity"],
    ),
    (
        "Incident response",
        "Responding to and recovering from security breaches and incidents.",
        category_id_map["Cybersecurity"],
    ),
    # Software Engineering
    (
        "System design",
        "Architecting complex software systems to meet business requirements.",
        category_id_map["Software Engineering"],
    ),
    (
        "Coding best practices",
        "Maintaining high standards of coding and software development.",
        category_id_map["Software Engineering"],
    ),
    (
        "Software testing",
        "Designing and executing tests to ensure software quality.",
        category_id_map["Software Engineering"],
    ),
    (
        "Continuous integration",
        "Automating the integration of code changes from multiple contributors.",
        category_id_map["Software Engineering"],
    ),
    # Project Management
    (
        "Resource allocation",
        "Effectively distributing resources across projects to ensure efficient use.",
        category_id_map["Project Management"],
    ),
    (
        "Risk assessment",
        "Identifying potential risks in project planning and execution.",
        category_id_map["Project Management"],
    ),
    (
        "Project scheduling",
        "Planning and organizing project tasks and timelines.",
        category_id_map["Project Management"],
    ),
    (
        "Stakeholder management",
        "Managing relationships with all project stakeholders.",
        category_id_map["Project Management"],
    ),
    # Data Science
    (
        "Data Analysis",
        "Analyzing and interpreting complex datasets to extract meaningful insights.",
        category_id_map["Data Science"],
    ),
    (
        "Machine Learning",
        "Applying statistical models and algorithms to data to predict outcomes.",
        category_id_map["Data Science"],
    ),
    (
        "Data visualization",
        "Representing data in graphical format to aid understanding.",
        category_id_map["Data Science"],
    ),
    (
        "Big Data technologies",
        "Utilizing technologies for processing large datasets.",
        category_id_map["Data Science"],
    ),
    # DevOps
    (
        "Continuous Integration/Continuous Deployment",
        "Implementing CI/CD pipelines for software delivery.",
        category_id_map["DevOps"],
    ),
    (
        "Automation scripting",
        "Writing scripts to automate operational processes.",
        category_id_map["DevOps"],
    ),
    (
        "Infrastructure as Code",
        "Managing infrastructure through code to improve deployment speed.",
        category_id_map["DevOps"],
    ),
    (
        "Monitoring and logging",
        "Tracking and analyzing system performance and activity.",
        category_id_map["DevOps"],
    ),
    # Cloud Computing
    (
        "Cloud Service Management",
        "Managing cloud services and infrastructure.",
        category_id_map["Cloud Computing"],
    ),
    (
        "Cloud Migration Strategies",
        "Planning and executing the migration of services to cloud environments.",
        category_id_map["Cloud Computing"],
    ),
    (
        "Cloud Security",
        "Ensuring the security of cloud-based applications and data.",
        category_id_map["Cloud Computing"],
    ),
    (
        "Cloud resource optimization",
        "Managing cloud resources to optimize performance and cost.",
        category_id_map["Cloud Computing"],
    ),
    # Front End Technologies
    ("Angular", "Angular development.", category_id_map["Front End Technologies"]),
    ("React", "React development.", category_id_map["Front End Technologies"]),
    ("Vue", "Vue.js development.", category_id_map["Front End Technologies"]),
    (
        "HTML/CSS",
        "Web development using HTML and CSS.",
        category_id_map["Front End Technologies"],
    ),
    # Server Side Technologies
    (
        "Node.js",
        "Backend development with Node.js.",
        category_id_map["Server Side Technologies"],
    ),
    (
        "Spring Boot",
        "Application development with Java Spring Boot.",
        category_id_map["Server Side Technologies"],
    ),
    (
        ".NET",
        ".NET framework development.",
        category_id_map["Server Side Technologies"],
    ),
    (
        "Laravel",
        "Web development with Laravel.",
        category_id_map["Server Side Technologies"],
    ),
    # Database Technologies
    (
        "SQL",
        "SQL query writing and optimization.",
        category_id_map["Database Technologies"],
    ),
    ("NoSQL", "NoSQL database management.", category_id_map["Database Technologies"]),
    ("DB Admin", "Database administration.", category_id_map["Database Technologies"]),
    (
        "DB Design",
        "Database design and normalization.",
        category_id_map["Database Technologies"],
    ),
    # Business Intelligence
    (
        "Power BI",
        "Business intelligence with Power BI.",
        category_id_map["Business Intelligence"],
    ),
    (
        "Tableau",
        "Data visualization with Tableau.",
        category_id_map["Business Intelligence"],
    ),
    (
        "Apache Superset",
        "Business intelligence with Apache Superset.",
        category_id_map["Business Intelligence"],
    ),
    (
        "BI Tools",
        "Using various business intelligence tools.",
        category_id_map["Business Intelligence"],
    ),
    # User Interface Design
    (
        "UX/UI Design",
        "User experience and user interface design.",
        category_id_map["User Interface Design"],
    ),
    (
        "Web Design",
        "Designing responsive web layouts.",
        category_id_map["User Interface Design"],
    ),
    (
        "Graphic Design",
        "Graphic design for web and print.",
        category_id_map["User Interface Design"],
    ),
    (
        "Interaction Design",
        "Designing interactive user interfaces.",
        category_id_map["User Interface Design"],
    ),
    # Mobile Development
    (
        "Flutter",
        "Mobile app development with Flutter.",
        category_id_map["Mobile Development"],
    ),
    (
        "React Native",
        "Building mobile apps with React Native.",
        category_id_map["Mobile Development"],
    ),
    ("iOS", "Developing apps for iOS.", category_id_map["Mobile Development"]),
    ("Android", "Developing apps for Android.", category_id_map["Mobile Development"]),
    # Web Scraping
    ("Selenium", "Web scraping with Selenium.", category_id_map["Web Scraping"]),
    (
        "Puppeteer",
        "Automated browser control with Puppeteer.",
        category_id_map["Web Scraping"],
    ),
    (
        "Web Crawl",
        "Crawling websites for data extraction.",
        category_id_map["Web Scraping"],
    ),
    (
        "Data Scraping",
        "Extracting data from web sources.",
        category_id_map["Web Scraping"],
    ),
    # Scripting Languages
    ("Python", "Scripting with Python.", category_id_map["Scripting Languages"]),
    ("Bash", "Shell scripting with Bash.", category_id_map["Scripting Languages"]),
    ("Ruby", "Scripting with Ruby.", category_id_map["Scripting Languages"]),
    (
        "PowerShell",
        "Automation with PowerShell.",
        category_id_map["Scripting Languages"],
    ),
    # ELT / ETL Technologies
    ("DBT", "Data transformation with DBT.", category_id_map["ELT / ETL Technologies"]),
    (
        "Alteryx",
        "Data processing with Alteryx.",
        category_id_map["ELT / ETL Technologies"],
    ),
    (
        "Azure Synapse",
        "Data warehousing with Azure Synapse.",
        category_id_map["ELT / ETL Technologies"],
    ),
    (
        "Databricks",
        "Big data processing with Databricks.",
        category_id_map["ELT / ETL Technologies"],
    ),
    # Data Warehousing
    (
        "ETL Development",
        "Building Extract, Transform, Load processes for data warehousing.",
        category_id_map["Data Warehousing"],
    ),
    (
        "Data Modeling",
        "Designing data models for efficient storage and retrieval.",
        category_id_map["Data Warehousing"],
    ),
    (
        "Data Integration",
        "Integrating data from various sources into a data warehouse.",
        category_id_map["Data Warehousing"],
    ),
    (
        "Data Warehouse Optimization",
        "Optimizing performance and storage in data warehousing systems.",
        category_id_map["Data Warehousing"],
    ),
    # Machine Learning
    (
        "Deep Learning",
        "Applying neural networks for complex pattern recognition.",
        category_id_map["Machine Learning"],
    ),
    (
        "Reinforcement Learning",
        "Developing AI agents that learn through interactions.",
        category_id_map["Machine Learning"],
    ),
    (
        "Model Deployment",
        "Deploying machine learning models for real-world applications.",
        category_id_map["Machine Learning"],
    ),
    (
        "Model Interpretability",
        "Understanding and explaining the decisions made by machine learning models.",
        category_id_map["Machine Learning"],
    ),
    # Natural Language Processing
    (
        "Named Entity Recognition (NER)",
        "Identifying and classifying named entities in text data.",
        category_id_map["Natural Language Processing"],
    ),
    (
        "Text Classification",
        "Categorizing text into predefined classes or categories.",
        category_id_map["Natural Language Processing"],
    ),
    (
        "Language Translation",
        "Translating text between different languages.",
        category_id_map["Natural Language Processing"],
    ),
    (
        "Sentiment Analysis",
        "Analyzing text to determine sentiment or emotional tone.",
        category_id_map["Natural Language Processing"],
    ),
    # Cloud Operations
    (
        "AWS Services Management",
        "Managing AWS services for cloud-based operations.",
        category_id_map["Cloud Operations"],
    ),
    (
        "Google Cloud Platform (GCP) Operations",
        "Handling cloud operations on the Google Cloud Platform.",
        category_id_map["Cloud Operations"],
    ),
    (
        "Kubernetes Administration",
        "Administering Kubernetes clusters for container orchestration.",
        category_id_map["Cloud Operations"],
    ),
    (
        "Cloud Cost Management",
        "Optimizing cloud usage and costs for efficient operations.",
        category_id_map["Cloud Operations"],
    ),
]

faker = Faker()

org_id = ORG_ID


# Generate Skill entries with Faker-generated IDs
skills = [
    {
        "id": i,  # Auto-incrementing ID, padded to 5 digits
        "name": name,
        "description": description,
        "org_id": org_id,
        "skill_category_id": skill_category_id,
    }
    for i, (name, description, skill_category_id) in enumerate(skills_info, start=1)
]

# Convert to JSON string for output
skills_json = json.dumps(skills, indent=2)

# Writing the skills data to a JSON file
skills_file_path = "data/skills.json"
with open(skills_file_path, "w") as file:
    file.write(skills_json)

skills_file_path

'data/skills.json'

# 5. Inserting Roles

In [41]:
import uuid
import json

def generate_role_data_json():
    organization_id = ORG_ID
    role_hierarchy = {
        "Chief Information Officer": {
            "description": "Oversees the IT strategy and operations.",
            "children": ["IT Manager", "Cybersecurity Manager", "Cloud Solutions Architect"]
        },
        "IT Manager": {
            "description": "Manages the IT department and its resources.",
            "children": ["Development Manager", "Database Administrator", "Project Manager", "Network Administrator"]
        },
        "Development Manager": {
            "description": "Oversees software development teams and projects.",
            "children": ["Front-End Developer", "Back-End Developer"]
        },
        "Cybersecurity Manager": {
            "description": "Oversees cybersecurity policies and defenses.",
            "children": ["Cybersecurity Analyst"]
        },
        "Cloud Solutions Architect": {
            "description": "Designs and manages cloud solutions.",
            "children": []
        },
        "Database Administrator": {
            "description": "Manages the organization's database systems.",
            "children": []
        },
        "Project Manager": {
            "description": "Manages projects from inception to completion.",
            "children": ["DevOps Engineer"]
        },
        "DevOps Engineer": {
            "description": "Improves operations between software development and IT operations.",
            "children": []
        },
        "Data Scientist": {
            "description": "Analyzes data to extract insights.",
            "children": []
        },
        "Cybersecurity Analyst": {
            "description": "Protects against cyber threats.",
            "children": []
        },
        "Front-End Developer": {
            "description": "Develops the user interface of web applications.",
            "children": []
        },
        "Back-End Developer": {
            "description": "Develops the server-side logic of applications.",
            "children": []
        },
        "Business Analyst": {
            "description": "Analyzes the business processes and recommends solutions.",
            "children": []
        },
        "Network Administrator": {
            "description": "Manages and troubleshoots the organization's network infrastructure.",
            "children": []
        }
    }

    roles = []
    parent_map = {}
    role_id = 1

    def add_role(name, description, parent_id=None):
        nonlocal role_id
        
        roles.append({
            "id": role_id,
            "name": name,
            "description": description,
            "org_id": organization_id,
            "parent_id": parent_id
        })
        parent_map[name] = role_id
        role_id += 1

    def add_roles_recursively(name, parent_id=None):
        info = role_hierarchy[name]
        add_role(name, info['description'], parent_id)
        for child_name in info['children']:
            add_roles_recursively(child_name, parent_map[name])

    add_roles_recursively("Chief Information Officer")

    return json.dumps(roles, indent=4)
role_data_file_path = f"{folder_name}/roles.json"
with open(role_data_file_path, 'w') as file:
    file.write(generate_role_data_json())

role_data_file_path

'data/roles.json'

# 6. Inserting Roles Skills Target

Corporate governance
Strategic planning
Visionary leadership
Financial acumen
Network security
Information security
Cybersecurity policies
Incident response
System design
Coding best practices
Software testing
Continuous integration
Resource allocation
Risk assessment
Project scheduling
Stakeholder management
Data Analysis
Machine Learning
Data visualization
Big Data technologies
Continuous Integration/Continuous Deployment
Automation scripting
Infrastructure as Code
Monitoring and logging
Cloud Service Management
Cloud Migration Strategies
Cloud Security
Cloud resource optimization
Angular
React
Vue
HTML/CSS
Node.js
Spring Boot
.NET
Laravel
SQL
NoSQL
DB Admin
DB Design
Power BI
Tableau
Apache Superset
BI Tools
UX/UI Design
Web Design
Graphic Design
Interaction Design
Flutter
React Native
iOS
Android
Selenium
Puppeteer
Web Crawl
Data Scraping
Python
Bash
Ruby
PowerShell
DBT
Alteryx
Azure Synapse
Databricks
ETL Development
Data Modeling
Data Integration
Data Warehouse Optimization
Deep 

# 7. Inserting Users

In [None]:
from faker import Faker
import bcrypt
import uuid
import json

# Generate fake data
fake = Faker()

# Define role capacities
role_capacities = {
    "Chief Information Officer": 1,
    "IT Manager": 5,
    "Front-End Developer": 8,
    "Back-End Developer": 6,
    "Database Administrator": 4,
    "Cybersecurity Manager": 2,
    "Cybersecurity Analyst": 10,
    "Cloud Solutions Architect": 3,
}

# Define security groups
security_groups = ["admin", "supervisor", "general staff"]

# Load role data from JSON
with open("data/roles.json", "r") as file:
    roles_data = json.load(file)

# Generate users
users = []
for role, capacity in role_capacities.items():
    role_id = None
    for item in roles_data:
        if item["name"] == role:
            role_id = item["id"]
            break

    if role_id is not None:
        for _ in range(capacity):
            first_name = fake.first_name()
            last_name = fake.last_name()
            email = fake.email()

            # Hash password using bcrypt
            password = bcrypt.hashpw(b"Pass@1234", bcrypt.gensalt()).decode("utf-8")

            # Generate user entry
            user_entry = {
                "id": str(uuid.uuid4()),
                "password": password,
                "org_id": "8fd51c1d-32f9-48e3-a473-cf57f29bea05",
                "firstName": first_name,
                "lastName": last_name,
                "emailAddress": email,
                "roleId": role_id,
                "locationId": None,  # You can assign locations if needed
                "securityGroup": fake.random_element(security_groups),
            }
            users.append(user_entry)

# Save users to JSON file
users_file_path = "data/users.json"
with open(users_file_path, "w") as file:
    json.dump(users, file, indent=4)

users_file_path

# 8. Inserting Users & Skills Map

# 9. Inserting Training

# 10. Skill Audit Log

# 11. Evidence Type

# 12. General Evidence

# 13. Training Evidence