[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/xiptos/generative/blob/main/notebooks/gen_solutions.ipynb)

# Solutions for exercises

## Synthetic Student Grades Dataset

In [None]:
import random
import pandas as pd
from faker import Faker
import matplotlib.pyplot as plt

# Initialize
fake = Faker()
random.seed(42)

# Parameters
num_students = 100
courses = ["Math", "Biology", "History", "Computer Science"]

# Generate student data
data = []
for i in range(1, num_students + 1):
    student_id = f"STU{i:03}"
    name = fake.name()
    age = random.randint(18, 25)
    course = random.choice(courses)
    grade = round(random.uniform(0, 20), 1)  # Portuguese-style grade

    data.append({
        "StudentID": student_id,
        "Name": name,
        "Age": age,
        "Course": course,
        "FinalGrade": grade
    })

# Create DataFrame
df = pd.DataFrame(data)

# Analysis
avg_grade = df.groupby("Course")["FinalGrade"].mean()
students_per_course = df["Course"].value_counts()
num_passed = (df["FinalGrade"] >= 9.5).sum()

# Print results
print("📊 Average grade per course:\n", avg_grade, "\n")
print("👥 Students per course:\n", students_per_course, "\n")
print(f"✅ Number of students who passed: {num_passed} out of {num_students}")

# Bonus: Plot histogram of grades per course
plt.figure(figsize=(10, 6))
for course in courses:
    subset = df[df["Course"] == course]
    plt.hist(subset["FinalGrade"], bins=10, alpha=0.5, label=course)

plt.title("Grade Distribution per Course")
plt.xlabel("Final Grade")
plt.ylabel("Number of Students")
plt.legend()
plt.grid(True)
plt.tight_layout()
plt.show()

## Synthetic Employee Data


In [None]:
import random
import pandas as pd
from faker import Faker

# Setup
fake = Faker()
random.seed(42)

# Configuration
roles = {
    "Intern": (15000, 25000),
    "Engineer": (50000, 90000),
    "Manager": (90000, 130000),
    "Director": (10000, 15000)
}
departments = ["Engineering", "Sales", "HR", "Finance"]
num_employees = 50

# Rule-based employee generator
def generate_employee(emp_id):
    role = random.choice(list(roles.keys()))
    salary_range = roles[role]
    salary = round(random.uniform(*salary_range), 2)

    if role in ["Manager", "Director"]:
        age = random.randint(30, 60)
    elif role == "Intern":
        age = random.randint(20, 25)
    else:
        age = random.randint(25, 60)

    department = random.choice(departments)

    email = (
        f"{fake.user_name()}@techcorp.com"
        if department == "Engineering"
        else fake.email()
    )

    return {
        "EmployeeID": f"EMP{emp_id:03}",
        "Name": fake.name(),
        "Age": age,
        "Role": role,
        "Department": department,
        "Salary": salary,
        "Email": email
    }

# Generate dataset
data = [generate_employee(i + 1) for i in range(num_employees)]
df = pd.DataFrame(data)

# === Validations ===

# Count per role
print("📊 Number of employees per role:\n", df["Role"].value_counts(), "\n")

# Check rule violation: Manager/Director under 30
underage_executives = df[(df["Role"].isin(["Manager", "Director"])) & (df["Age"] < 30)]
print("🚨 Managers/Directors under 30 (should be none):\n", underage_executives, "\n")

# Salary range check per role
print("💰 Salary ranges per role:")
for role, (low, high) in roles.items():
    salaries = df[df["Role"] == role]["Salary"]
    print(f"  {role}: Min={salaries.min()} | Max={salaries.max()}")

# === Bonus ===

# Promote interns aged 25 to engineers
df.loc[(df["Role"] == "Intern") & (df["Age"] == 25), "Role"] = "Engineer"

# Preview final dataset
print("\n🧾 Sample employee data:\n", df.head())

## Bar Queue

In [None]:
import simpy
import random
import pandas as pd

# Set seed for reproducibility
random.seed(42)

# Parameters
NUM_CUSTOMERS = 30
INTER_ARRIVAL = (2, 5)  # Minutes between arrivals
SERVICE_TIME = (1, 3)   # Minutes to serve each customer

# Collect simulation data
records = []

# Customer process
def customer(env, name, barista, records):
    arrival = env.now
    with barista.request() as request:
        yield request
        wait = env.now - arrival
        service_duration = random.randint(*SERVICE_TIME)
        yield env.timeout(service_duration)
        departure = env.now
        records.append({
            "Customer": name,
            "ArrivalTime": round(arrival, 2),
            "WaitTime": round(wait, 2),
            "ServiceStart": round(departure - service_duration, 2),
            "ServiceEnd": round(departure, 2),
            "ServiceDuration": service_duration
        })

# Arrival process
def customer_generator(env, barista, records):
    for i in range(1, NUM_CUSTOMERS + 1):
        yield env.timeout(random.randint(*INTER_ARRIVAL))
        env.process(customer(env, f"Cust_{i}", barista, records))

# Setup environment and resources
env = simpy.Environment()
barista = simpy.Resource(env, capacity=1)  # Only 1 barista
env.process(customer_generator(env, barista, records))
env.run()

# Create DataFrame
df = pd.DataFrame(records)

# === Analysis ===
print("\n📊 Average wait time:", round(df["WaitTime"].mean(), 2), "minutes")
print("⏱️ Peak wait time:", round(df["WaitTime"].max(), 2), "minutes")
print("⚠️ Customers waiting more than 3 mins:", (df["WaitTime"] > 3).sum(), "out of", NUM_CUSTOMERS)

# Preview
print("\n🧾 Sample data:\n", df.head())

# Optional: Save to CSV
df.to_csv("simulated_bar_queue.csv", index=False)