[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/xiptos/generative/blob/main/notebooks/rule_based_generator.ipynb)

# Rule-based Generator

## Synthetic Employee Records

This example generates employee data while enforcing rules like:
* Salary depends on job title
* Managers must be at least 30 years old
* Employees in _Engineering_ need an email with a specific domain

__Rule Logic Summary__
* Age is constrained by role
* Salary is role-dependent
* Engineering emails follow a domain rule
* Randomness is still used within rule boundaries

In [None]:
import numpy as np
import pandas as pd
import random
from faker import Faker

In [None]:
fake = Faker()
random.seed(42)

# Define roles and their salary ranges
roles = {
    "Intern": (15000, 25000),
    "Engineer": (50000, 90000),
    "Manager": (90000, 130000),
    "Director": (130000, 180000)
}

departments = ["Engineering", "HR", "Sales", "Finance"]

# Rule-based generator
def generate_employee(emp_id):
    role = random.choice(list(roles.keys()))
    salary_range = roles[role]
    salary = round(random.uniform(*salary_range), 2)

    if role == "Intern":
        age = random.randint(20, 25)
    elif role == "Manager":
        age = random.randint(30, 50)
    else:
        age = random.randint(25, 60)

    department = random.choice(departments)

    if department == "Engineering":
        email = f"{fake.user_name()}@engineeringdep.com"
    else:
        email = fake.email()

    return {
        "EmployeeID": f"EMP{emp_id:04d}",
        "Name": fake.name(),
        "Age": age,
        "Role": role,
        "Department": department,
        "Salary": salary,
        "Email": email
    }

# Generate dataset
num_employees = 50
data = [generate_employee(i) for i in range(1, num_employees + 1)]
df = pd.DataFrame(data)

# Preview
df.head()

In [None]:

# Optionally save
# df.to_csv("rule_based_employees.csv", index=False)