In [None]:
from IPython import get_ipython
from IPython.display import display
from google.colab import drive
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.distributions import Categorical
import os

In [None]:
# Mount Drive
drive.mount('/content/drive')

# Paths to your datasets
task_data_path = '/content/drive/MyDrive/Skripsi/Resources/Datasets/data_task_categorized.csv' 
employee_data_path = '/content/drive/MyDrive/Skripsi/Resources/Datasets/data_employee_categorized.csv'

# Load the datasets
df_tasks_original = pd.read_csv(task_data_path)
df_employees_original = pd.read_csv(employee_data_path)

# Extract skill columns from tasks and employees as NumPy arrays
# Keeping indexing: tasks start at column 3, employees at column 2
task_skill_cols = set(df_tasks_original.columns[3:])  # Assuming skills start at column 3
employee_skill_cols = set(df_employees_original.columns[2:])  # Assuming skills start at column 2

In [None]:
# Ensure both tasks and employees share the same set of skill columns.
# If there's a mismatch, take the intersection of the skill columns.
common_skills = task_skill_cols.intersection(employee_skill_cols)

# Filter both dataframes to these common skill columns plus their ID columns.
df_tasks_original = df_tasks_original[['task_id', 'project_id', 'story_points'] + list(common_skills)]
df_employees_original = df_employees_original[['employee_id', 'Role'] + list(common_skills)]

In [None]:
### Curriculum Data Loader
def load_curriculum_data(task_data, employee_data, iteration, task_factor=5, employee_factor=2):
    """
    Dynamically load a subset of tasks and employees based on the iteration.
    iteration n:
      - num_tasks = n * task_factor
      - num_employees = n * employee_factor
    """
    num_tasks = iteration * task_factor
    num_employees = iteration * employee_factor

    selected_tasks = task_data.head(num_tasks).reset_index(drop=True)
    selected_employees = employee_data.head(num_employees).reset_index(drop=True)

    return selected_tasks, selected_employees

In [None]:
def calculate_weighted_euclidean_distance(employee_skills, task_skills, alpha=0.5):
    """
    Calculate the normalized Weighted Euclidean Distance (WED) between an employee and a task.
    
    Args:
        employee_skills (array-like): Skill levels of the employee.
        task_skills (array-like): Skill requirements of the task.
        alpha (float): Weighting parameter to penalize overqualification.

    Returns:
        float: The normalized Weighted Euclidean Distance (0 to 1).
    """
    # Convert inputs to NumPy arrays
    employee_skills = np.array(employee_skills)
    task_skills = np.array(task_skills)

    # Compute the WED
    weights = 1 / (1 + alpha * np.maximum(0, employee_skills - task_skills))
    wed = np.sqrt(np.sum(weights * (employee_skills - task_skills) ** 2))

    # Compute worst-case WED
    max_task_skills = np.full(len(task_skills), 5)  # Max skill requirement
    min_employee_skills = np.zeros(len(employee_skills))  # Min skill level is 0

    # Calculate the actual WED  
    max_weights = 1 / (1 + alpha * np.maximum(0, min_employee_skills - max_task_skills))
    max_wed = np.sqrt(np.sum(max_weights * (min_employee_skills - max_task_skills) ** 2))

    # Normalize
    normalized_wed = 1 - (wed / max_wed)
    return normalized_wed

In [None]:
# Task Assignment Environment
class TaskAssignmentEnv:
    def __init__(self, tasks, employees, max_workload=20, alpha=0.5):
        self.tasks = tasks
        self.employees = employees
        self.max_workload = max_workload
        self.alpha = alpha
        self.num_tasks = len(tasks)
        self.num_employees = len(employees)

        # State representation: a matrix (num_tasks x num_employees)
        # indicating whether a particular employee is assigned to a task.
        self.state = np.zeros((self.num_tasks, self.num_employees))
        self.workloads = np.zeros(self.num_employees)

    def valid_actions(self, task_idx):
        """
        Return a list of employee indices that can be validly assigned to the given task.
        Invalid if:
        - The employee's workload would exceed max_workload
        - The employee is a 'Dummy' employee (if any)
        """
        valid = []
        for i in range(self.num_employees):
            if 'Dummy' in self.employees.iloc[i]['Role']:
                # Skip dummy employees if they exist (padding)
                continue
            if self.workloads[i] + self.tasks.iloc[task_idx]['story_points'] <= self.max_workload:
                valid.append(i)
        return valid

    def reset(self):
        self.state = np.zeros((self.num_tasks, self.num_employees))
        self.workloads = np.zeros(self.num_employees)
        return self.state

    def step(self, task_idx, employee_idx):
        """
        Assign task_idx to employee_idx and calculate the reward.
        """
        task = self.tasks.iloc[task_idx]
        employee = self.employees.iloc[employee_idx]
        task_skills = task[3:].values
        employee_skills = employee[2:].values

        # Similarity Score
        similarity_score = calculate_weighted_euclidean_distance(employee_skills, task_skills, self.alpha)

        # Update state & workload
        self.state[task_idx, employee_idx] = 1
        self.workloads[employee_idx] += task['story_points']

        # Workload balance score: lower std means more balanced
        std_workload = np.std(self.workloads)
        workload_balance_score = 1 / (1 + std_workload)

        # Combine into a single reward
        reward = 0.8 * similarity_score + 0.2 * workload_balance_score

        # Done if all tasks assigned or workload constraint violated
        done = np.all(self.state.sum(axis=1)) or np.any(self.workloads > self.max_workload)

        return self.state, reward, done

In [None]:
# PPO Agent Class
class PPOAgent(nn.Module):
    def __init__(self, state_dim, action_dim, hidden_dim=128):
        super(PPOAgent, self).__init__()
        self.fc = nn.Sequential(
            nn.Linear(state_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, hidden_dim),
            nn.ReLU(),
        )
        self.actor = nn.Linear(hidden_dim, action_dim)
        self.critic = nn.Linear(hidden_dim, 1)

    def forward(self, x):
        x = self.fc(x)
        return self.actor(x), self.critic(x)

    def get_action(self, state, valid_actions):
        logits, _ = self.forward(state)
        probs = torch.softmax(logits, dim=-1)
        if not valid_actions:
            # If no valid actions, return 0 by default and a dummy log_prob
            # Change from: return 0, torch.tensor(0.0)
            return 0, torch.tensor([0.0]) # This ensures it's a 1-dimensional tensor
        valid_probs = probs[valid_actions]
        action_dist = Categorical(valid_probs)
        action = action_dist.sample()
        chosen_action = valid_actions[action.item()]
        return chosen_action, action_dist.log_prob(action)


class PPO:
    def __init__(self, agent, lr=3e-4, gamma=0.99, eps_clip=0.2, k_epochs=4):
        self.agent = agent
        self.optimizer = optim.Adam(agent.parameters(), lr=lr)
        self.gamma = gamma
        self.eps_clip = eps_clip
        self.k_epochs = k_epochs
        self.mse_loss = nn.MSELoss()

    def compute_returns(self, rewards, dones, next_value):
        returns = []
        R = next_value
        for reward, done in zip(reversed(rewards), reversed(dones)):
            R = reward + self.gamma * R * (1 - done)
            returns.insert(0, R)
        return returns

    def train(self, states, actions, log_probs, rewards, dones, next_value):
        returns = self.compute_returns(rewards, dones, next_value)
        returns = torch.tensor(returns).float()
        states = torch.tensor(states).float()
        actions = torch.tensor(actions).long()
        log_probs = torch.tensor(log_probs).float()

        total_loss, policy_loss, value_loss = 0.0, 0.0, 0.0
        for _ in range(self.k_epochs):
            logits, state_values = self.agent(states)
            action_dist = Categorical(torch.softmax(logits, dim=-1))
            new_log_probs = action_dist.log_prob(actions)

            # Policy Loss
            ratios = torch.exp(new_log_probs - log_probs)
            advantages = returns - state_values.detach()
            surr1 = ratios * advantages
            surr2 = torch.clamp(ratios, 1 - self.eps_clip, 1 + self.eps_clip) * advantages
            policy_loss = -torch.min(surr1, surr2).mean()

            # Value Loss
            value_loss = self.mse_loss(state_values.squeeze(), returns)

            # Total Loss
            loss = policy_loss + 0.5 * value_loss
            self.optimizer.zero_grad()
            loss.backward()
            self.optimizer.step()

            total_loss += loss.item()

        return total_loss / self.k_epochs, policy_loss.item(), value_loss.item()

    def save_transferable_weights(self, path):
        transferable_state_dict = {
            name: param for name, param in self.agent.state_dict().items() if "actor" not in name
        }
        torch.save(transferable_state_dict, path)

    def load_transferable_weights(self, path):
        """
        Load transferable weights from a checkpoint, ignoring mismatched dimensions.
        
        Args:
            path (str): Path to the checkpoint file.
        """
        checkpoint = torch.load(path)
        model_state = self.agent.state_dict()

        # Update only matching keys
        for name, param in checkpoint.items():
            if name in model_state and model_state[name].size() == param.size():
                model_state[name].copy_(param)
            else:
                print(f"Skipping layer '{name}' due to size mismatch: {param.size()} vs {model_state[name].size()}")

        self.agent.load_state_dict(model_state, strict=False)
        print(f"Transferable weights loaded from {path}")



In [None]:
# Training configurations
num_iterations = 2  # We want to test with the first 2 iterations
num_episodes_per_iteration = 500  # Adjust as needed
convergence_threshold = 1e-2
convergence_patience = 100

# Lists to store overall metrics across curriculum
all_similarity_scores = []
all_workload_distributions = []
all_std_workloads = []
all_reward_history = []

# Train iteratively
agent = None
ppo = None

# Training loop with loss tracking by episode
for iteration in range(1, num_iterations + 1):
    print(f"\n=== Curriculum Iteration {iteration} ===")

    # Load subset of data for this iteration
    df_tasks, df_employees = load_curriculum_data(df_tasks_original, df_employees_original, iteration)

    # Initialize environment for this iteration
    env = TaskAssignmentEnv(df_tasks, df_employees)
    state_dim = env.state.size
    action_dim = env.num_employees

    # Initialize or load the agent
    if iteration == 1:
        agent = PPOAgent(state_dim, action_dim)
        ppo = PPO(agent)
    else:
        # Load transferable weights from the previous model
        agent = PPOAgent(state_dim, action_dim)
        ppo = PPO(agent)
        ppo.load_transferable_weights(f"/content/drive/MyDrive/Skripsi/Resources/Models/ppo_agent_iteration_{iteration-1}.pt")

    iteration_reward_history = []
    iteration_similarity_scores = []
    iteration_workload_distributions = []
    iteration_std_workloads = []
    task_assignments_strategy = {}

    for episode in range(num_episodes_per_iteration):
        state = env.reset()
        episode_reward = 0
        done = False
        task_idx = 0
        episode_similarity_scores = []
        episode_workload = np.zeros(env.num_employees)
        task_assignments_episode = {}

        while not done:
            state_tensor = torch.tensor(state.flatten(), dtype=torch.float32)
            valid_actions = env.valid_actions(task_idx=task_idx)
            action, log_prob = agent.get_action(state_tensor, valid_actions)
            next_state, reward, done = env.step(task_idx=task_idx, employee_idx=action)

            # Track assignments
            task = env.tasks.iloc[task_idx]
            employee = env.employees.iloc[action]
            task_assignments_episode[task['task_id']] = employee['employee_id']

            # Skills indexing: tasks start at col 3, employees at col 2
            t_skills = task[3:].values
            e_skills = employee[2:].values

            distance = calculate_weighted_euclidean_distance(e_skills, t_skills, env.alpha)
            episode_similarity_scores.append(distance)
            episode_workload[action] += task['story_points']

            # Train PPO and log losses
            loss, policy_loss, value_loss = ppo.train(
                [state_tensor.numpy()], [action], [log_prob.item()], [reward], [done], next_value=0
            )
            state = next_state
            episode_reward += reward

            if np.any(env.state[task_idx]):
                task_idx += 1
            if task_idx >= env.num_tasks:
                done = True

        # Record metrics for this episode
        iteration_similarity_scores.append(episode_similarity_scores)
        iteration_workload_distributions.append(episode_workload)
        iteration_std_workloads.append(np.std(episode_workload))
        iteration_reward_history.append(episode_reward)

        # Update final task assignment strategy
        task_assignments_strategy = task_assignments_episode

        # Log episode information
        print(
            f"Episode {episode+1}, Reward: {episode_reward:.6f}, "
            f"Loss: {loss:.6f}, Policy Loss: {policy_loss:.6f}, Value Loss: {value_loss:.6f}"
        )

        # Check for convergence
        if len(iteration_reward_history) > convergence_patience:
            recent_rewards = iteration_reward_history[-convergence_patience:]
            if max(recent_rewards) - min(recent_rewards) < convergence_threshold:
                print(f"Convergence achieved at episode {episode+1}. Stopping training early.")
                break

    # Save transferable model weights
    model_save_path = f"/content/drive/MyDrive/Skripsi/Resources/Models/ppo_agent_iteration_{iteration}.pt"
    ppo.save_transferable_weights(model_save_path)
    print(f"Saved model for iteration {iteration} at {model_save_path}")

    # Log final task assignment strategy for this iteration
    print("\nFinal Task Assignment Strategy:")
    for task_id, emp_id in task_assignments_strategy.items():
        print(f"Task {task_id} assigned to Employee {emp_id}")


In [None]:
def load_state_with_dimension_check(model, checkpoint_path):
    """
    Load weights from a checkpoint into the model, only for layers with matching dimensions.

    Args:
        model (torch.nn.Module): The model instance to load weights into.
        checkpoint_path (str): Path to the checkpoint file.
    """
    checkpoint = torch.load(checkpoint_path)
    model_state = model.state_dict()

    for name, param in checkpoint.items():
        if name in model_state and model_state[name].size() == param.size():
            model_state[name].copy_(param)
        else:
            print(f"Skipping layer '{name}' due to size mismatch: {param.size()} vs {model_state[name].size()}")

    model.load_state_dict(model_state, strict=False)
    print(f"Loaded weights with dimension check from {checkpoint_path}")


In [None]:
# After finishing all curriculum iterations, we now have a final trained model
# Evaluate the final model on:
# 1. iteration=1 dataset
# 2. iteration=2 dataset
# 3. new unseen dataset

def evaluate_model(agent, tasks_df, employees_df):
    """
    Evaluate the PPO agent on a given dataset.

    Args:
        agent (PPOAgent): The agent to evaluate.
        tasks_df (DataFrame): Task dataset.
        employees_df (DataFrame): Employee dataset.

    Returns:
        tuple: Similarity scores and workload distribution.
    """
    # Initialize environment for this dataset
    env = TaskAssignmentEnv(tasks_df, employees_df)
    state_dim = env.state.size
    action_dim = env.num_employees

    # Reinitialize agent with correct dimensions
    agent = PPOAgent(state_dim, action_dim)  # Adjust dimensions dynamically
    checkpoint_path = "/content/drive/MyDrive/Skripsi/Resources/Models/ppo_agent_iteration_2.pt"
    load_state_with_dimension_check(agent, checkpoint_path)

    state = env.reset()
    done = False
    task_idx = 0
    similarity_scores_eval = []
    workload_eval = np.zeros(env.num_employees)

    while not done:
        state_tensor = torch.tensor(state.flatten(), dtype=torch.float32)
        valid_actions = env.valid_actions(task_idx=task_idx)

        with torch.no_grad():
            action, log_prob = agent.get_action(state_tensor, valid_actions)

        next_state, reward, done = env.step(task_idx=task_idx, employee_idx=action)

        # Track similarity and workload
        task = env.tasks.iloc[task_idx]
        employee = env.employees.iloc[action]
        t_skills = task[3:].values
        e_skills = employee[2:].values
        distance = calculate_weighted_euclidean_distance(e_skills, t_skills, env.alpha)

        similarity_scores_eval.append(distance)
        workload_eval[action] += task['story_points']

        state = next_state
        if np.any(env.state[task_idx]):
            task_idx += 1
        if task_idx >= env.num_tasks:
            done = True

    return similarity_scores_eval, workload_eval


In [None]:
# Load final model (iteration=2)
final_agent = PPOAgent(state_dim, action_dim)  # Reinitialize with current dimensions
load_state_with_dimension_check(final_agent, "/content/drive/MyDrive/Skripsi/Resources/Models/ppo_agent_iteration_2.pt")

# Evaluate on iteration=1 dataset
iter1_tasks, iter1_emps = load_curriculum_data(df_tasks_original, df_employees_original, 1)
iter1_similarity, iter1_workload = evaluate_model(final_agent, iter1_tasks, iter1_emps)

# Evaluate on iteration=2 dataset
iter2_tasks, iter2_emps = load_curriculum_data(df_tasks_original, df_employees_original, 2)
iter2_similarity, iter2_workload = evaluate_model(final_agent, iter2_tasks, iter2_emps)

# Evaluate on a new unseen dataset
new_tasks = df_tasks_original.iloc[10:15].reset_index(drop=True)
new_emps = df_employees_original.iloc[4:6].reset_index(drop=True)
new_similarity, new_workload = evaluate_model(final_agent, new_tasks, new_emps)

# Plot final evaluation similarity scores
plt.figure(figsize=(8, 5))
plt.boxplot([iter1_similarity, iter2_similarity, new_similarity], labels=["Iter1 Data", "Iter2 Data", "New Data"])
plt.title('Distribution of Similarity Scores on Different Evaluation Sets')
plt.ylabel('Similarity Score')
plt.xlabel('Dataset')
plt.show()


In [None]:
# Plot workload distributions for evaluation
def plot_workload_distribution(workloads, title):
    plt.figure(figsize=(10, 6))
    plt.bar(range(len(workloads)), workloads, tick_label=[f'E{i+1}' for i in range(len(workloads))])
    plt.title(title)
    plt.xlabel('Employee')
    plt.ylabel('Total Workload')
    plt.show()

plot_workload_distribution(iter1_workload, 'Workload Distribution on Iteration 1 Data')
plot_workload_distribution(iter2_workload, 'Workload Distribution on Iteration 2 Data')
plot_workload_distribution(new_workload, 'Workload Distribution on New Unseen Data')

# Plot std deviation progress during training (for demonstration, we plot only from iteration 2)
# Flattening the lists for demonstration if you want a single plot
all_std_flat = [val for iteration_list in all_std_workloads for val in iteration_list]

plt.figure(figsize=(10, 6))
plt.plot(all_std_flat, label='Std Dev of Workloads Across Curriculum')
plt.title('Standard Deviation of Workloads Across Episodes in Curriculum')
plt.xlabel('Episode (across all iterations)')
plt.ylabel('Standard Deviation of Workloads')
plt.legend()
plt.show()