In [None]:
from typing import Annotated
from typing_extensions import TypedDict
from langgraph.graph import StateGraph, START, END
from langgraph.graph.message import add_messages

In [None]:
from dotenv import load_dotenv
load_dotenv()

In [None]:
from langchain_ollama import ChatOllama

llm = ChatOllama(
    model="llama3.1",
    temperature=0,
    max_length=6000,
    # other params...
)

In [None]:
import json

system_prompt = """
You are a helpful AI Assistant who generates data for the given details.

For the user given Department, Job Title. Make assessment and feedback as 10 lines.
Provide only the JSON response, don't include anything else. Make all values as string data type.
Please generate the data as an employee performance review data in JSON Format as below: 
{sample_data}""" 


In [None]:
sample_data = """   'Department': <user given value>,
    'JobTitle': <user given value>,
    'ReviewPeriod': '2024',
    'NoofProjects Accomplished': <1-7>,
    'TrainingandCertifications': <comma separated list>,
    'Awards': <comma separated list can be empty as well>,
    'Strengths': <5 lines>,
    'SelfAssessment': <10 lines>,
    'ClientsFeedback': <10 lines>,
    'ManagerAssessment': <10 lines>,
    'AreasforImprovement': <comma separated list of upto 5 items>,
    'KPIs': <comma separated list>,
    'OverallRating': <user given value>
"""

In [None]:
# Define your desired data structure.
from langchain_core.output_parsers import JsonOutputParser
from pydantic import BaseModel, Field
class employee_performance(BaseModel):
    JobTitle: str = Field(description="Job Title of the employee")
    Department: str = Field(description="Department of the employee")   
    ReviewPeriod: str = Field(description="Review Period of the employee")
    NoofProjects: str = Field(description="Number of Projects Accomplished by the employee")
    TrainingandCertifications: str = Field(description="Training and Certifications of the employee")
    Awards: str = Field(description="Awards received by the employee")
    Strengths: str = Field(description="Strengths of the employee")
    SelfAssessment: str = Field(description="Self Assessment of the employee")
    ClientsFeedback: str = Field(description="Feedback from clients")
    ManagerAssessment: str = Field(description="Manager's Assessment")
    AreasforImprovement: str = Field(description="Areas for Improvement")
    KPIs: str = Field(description="Key Performance Indicators")
    OverallRating: str = Field(description="Overall Rating of the employee")


parser = JsonOutputParser(pydantic_object=employee_performance)


In [None]:
from langchain_core.prompts import ChatPromptTemplate
prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            system_prompt + sample_data,
        ),
        ("human", "{input}"),
    ]
)

chain = prompt | llm | parser
response = chain.invoke(
    {
        "sample_data": sample_data,  
        "input": "Department: Data Science\nJob Title: Data Scientist\nReview Period: 2024\nOverall Rating: 6",
    }
)

In [None]:
# response

In [None]:
company_name = "IT_Company"
departments = ["Data Science", "Data Engineering", "Software Engineering", "Product Management", "Sales", "Marketing", "HR", "Finance", "Legal", "Operations", "Customer Support"]
job_titles = {
    "Data Science": ["Data Scientist", "Data Analyst", "Data Engineer", "Machine Learning Engineer", "AI Research Scientist"],
    "Data Engineering": ["Data Engineer", "Data Architect", "Data Warehouse Engineer", "Data Integration Engineer"],
    "Software Engineering": ["Software Engineer", "Frontend Engineer", "Backend Engineer", "Fullstack Engineer", "DevOps Engineer"],
    "Product Management": ["Product Manager", "Product Owner", "Product Analyst", "Product Marketing Manager"],
    "Sales": ["Sales Representative", "Sales Manager", "Sales Engineer", "Sales Operations Manager"],
    "Marketing": ["Marketing Manager", "Marketing Specialist", "Marketing Analyst", "Marketing Operations Manager"],
    "HR": ["HR Manager", "HR Specialist", "HR Generalist", "HR Business Partner"],
    "Finance": ["Finance Manager", "Finance Analyst", "Financial Analyst", "Accountant"],
    "Legal": ["Legal Counsel", "Legal Assistant", "Legal Analyst", "Legal Operations Manager"],
    "Operations": ["Operations Manager", "Operations Analyst", "Operations Specialist", "Operations Coordinator"],
    "Customer Support": ["Customer Support Representative", "Customer Support Manager", "Customer Support Specialist", "Customer Support Analyst"]
}

def get_random_btwn_4_10():
    return str(random.randint(4, 10))


In [None]:
import random
for i in range(100,500):
    department = random.choice(departments)
    job_title = random.choice(job_titles[department])
    response = chain.invoke(
        {
            "sample_data": sample_data,  
            "input": f"Department: {department}\nJob Title: {job_title}\nReview Period: 2024\nOverall Rating: {get_random_btwn_4_10()}",
        }
    )
    print(response)
    
    # save to data folder as json file
    with open(f"data/empid_{i}.json", "w") as f:
        json.dump(response, f)