In [1]:
import pandas as pd

# Load the dataset
df = pd.read_csv('manufacturing_jobs.csv')

In [2]:
# Parse datetime columns
datetime_cols = ['Scheduled_Start', 'Scheduled_End', 'Actual_Start', 'Actual_End']
for col in datetime_cols:
    df[col] = pd.to_datetime(df[col], errors='coerce')

In [3]:
# Calculate durations (in hours)
df['Calculated_Scheduled_Duration'] = (df['Scheduled_End'] - df['Scheduled_Start']).dt.total_seconds() / 3600
df['Calculated_Actual_Duration'] = (df['Actual_End'] - df['Actual_Start']).dt.total_seconds() / 3600

In [4]:
# Display basic insights
print("✅ Data loaded and preprocessed successfully.")
print(f"🔍 Total jobs: {len(df)}")
print(f"🛠️ Unique machines: {df['Machine_ID'].nunique()}")
print("\n🧾 Sample preview:")
print(df.head(5))

✅ Data loaded and preprocessed successfully.
🔍 Total jobs: 1000
🛠️ Unique machines: 5

🧾 Sample preview:
  Job_ID Machine_ID Operation_Type  Material_Used  Processing_Time  \
0   J001        M01       Grinding           3.17               76   
1   J002        M01       Grinding           3.35               79   
2   J003        M04       Additive           2.29               56   
3   J004        M04       Grinding           1.76              106   
4   J005        M01          Lathe           1.90               46   

   Energy_Consumption  Machine_Availability     Scheduled_Start  \
0               11.42                    96 2023-03-18 08:00:00   
1                6.61                    84 2023-03-18 08:10:00   
2               11.11                    92 2023-03-18 08:20:00   
3               12.50                    95 2023-03-18 08:30:00   
4                8.13                    88 2023-03-18 08:40:00   

        Scheduled_End        Actual_Start          Actual_End Job_Statu

In [5]:
# Save preprocessed data for further modules
df.to_csv('preprocessed_job_data.csv', index=False)

In [6]:
# Load preprocessed data
df = pd.read_csv('preprocessed_job_data.csv')

In [None]:
# Helper function to find a spare machine
def find_spare_machine(operation_type, excluded_machine):
    candidates = df[
        (df['Operation_Type'] == operation_type) &
        (df['Machine_Availability'] > 90) &
        (df['Machine_ID'] != excluded_machine)
    ]
    if not candidates.empty:
        # Greedy choice: lowest energy consumption among available machines
        return candidates.sort_values('Energy_Consumption').iloc[0]['Machine_ID']
    return None

# Initialize results list
scheduled_jobs = []

In [8]:
# Iterate through each job
for index, row in df.iterrows():
    job_id = row['Job_ID']
    original_machine = row['Machine_ID']
    operation = row['Operation_Type']
    duration = row['Processing_Time']
    availability = row['Machine_Availability']
    
    # Schedule Start fallback if missing
    start_time = pd.to_datetime(row['Scheduled_Start']) if pd.notnull(row['Scheduled_Start']) else pd.Timestamp.now()

    if availability > 90:
        status = f"Assigned to {original_machine}"
        end_time = start_time + pd.to_timedelta(duration, unit='h')
        machine_used = original_machine
    else:
        spare_machine = find_spare_machine(operation, original_machine)
        if spare_machine:
            status = f"🔁 Reassigned to spare machine {spare_machine}"
            end_time = start_time + pd.to_timedelta(duration, unit='h')
            machine_used = spare_machine
        else:
            status = f" No available machine for {operation}"
            end_time = None
            machine_used = None

    scheduled_jobs.append({
        'Job_ID': job_id,
        'Assigned_Machine': machine_used,
        'Scheduled_Start': start_time,
        'Estimated_End': end_time,
        'Status': status
    })

In [9]:
# Create a new DataFrame
schedule_df = pd.DataFrame(scheduled_jobs)

# Show results
print("🗓️ Job Scheduling Summary:")
print(schedule_df[['Job_ID', 'Assigned_Machine', 'Scheduled_Start', 'Estimated_End', 'Status']].head(10))

🗓️ Job Scheduling Summary:
  Job_ID Assigned_Machine     Scheduled_Start       Estimated_End  \
0   J001              M01 2023-03-18 08:00:00 2023-03-21 12:00:00   
1   J002              M04 2023-03-18 08:10:00 2023-03-21 15:10:00   
2   J003              M04 2023-03-18 08:20:00 2023-03-20 16:20:00   
3   J004              M04 2023-03-18 08:30:00 2023-03-22 18:30:00   
4   J005              M04 2023-03-18 08:40:00 2023-03-20 06:40:00   
5   J006              M04 2023-03-18 08:50:00 2023-03-22 12:50:00   
6   J007              M03 2023-03-18 09:00:00 2023-03-19 07:00:00   
7   J008              M05 2023-03-18 09:10:00 2023-03-21 16:10:00   
8   J009              M03 2023-03-18 09:20:00 2023-03-20 03:20:00   
9   J010              M01 2023-03-18 09:30:00 2023-03-19 12:30:00   

                              Status  
0                    Assigned to M01  
1  🔁 Reassigned to spare machine M04  
2                    Assigned to M04  
3                    Assigned to M04  
4  🔁 Reassigned to

In [10]:
# Save results for visualization
schedule_df.to_csv('job_schedule_output.csv', index=False)

In [11]:
from collections import defaultdict

# Initialize machine failure history tracker
machine_failure_history = defaultdict(list)
breakdown_alerts = []

# Define threshold values
FAILURE_THRESHOLD = 2  # e.g., 2 consecutive failures
FAILURE_WINDOW = 5
FAILURE_RATIO = 0.6    # e.g., 60% failure rate in recent 5 jobs

# Sort by Scheduled_Start to track in chronological order
df = df.sort_values(by='Scheduled_Start')

# Track failures
for idx, row in df.iterrows():
    machine = row['Machine_ID']
    status = row['Job_Status']
    job_id = row['Job_ID']
    start_time = row['Scheduled_Start']

    # Record status
    machine_failure_history[machine].append(status)

    # Get recent statuses (last 5 jobs max)
    recent_statuses = machine_failure_history[machine][-FAILURE_WINDOW:]

    # Check for consecutive failures
    consecutive_failures = all(s == 'Failed' for s in recent_statuses[-FAILURE_THRESHOLD:])

    # Check failure ratio
    failure_ratio = recent_statuses.count('Failed') / len(recent_statuses)

    # Raise alert if either threshold is met
    if consecutive_failures or failure_ratio >= FAILURE_RATIO:
        alert_msg = f"⚠️ ALERT: {machine} is unreliable! ({failure_ratio*100:.0f}% failures in recent jobs)"
        alert_detail = {
            "Machine_ID": machine,
            "Affected_Job": job_id,
            "Alert_Time": start_time,
            "Alert_Message": alert_msg
        }
        breakdown_alerts.append(alert_detail)

In [12]:
# Convert to DataFrame for display
breakdown_alerts_df = pd.DataFrame(breakdown_alerts)
print("🚨 Breakdown Alerts 🚨")
print(breakdown_alerts_df if not breakdown_alerts_df.empty else "No critical machine issues detected.")

🚨 Breakdown Alerts 🚨
   Machine_ID Affected_Job           Alert_Time  \
0         M04         J003  2023-03-18 08:20:00   
1         M04         J175  2023-03-19 13:00:00   
2         M04         J205  2023-03-19 18:00:00   
3         M03         J222  2023-03-19 20:50:00   
4         M03         J239  2023-03-19 23:40:00   
5         M04         J342  2023-03-20 16:50:00   
6         M01         J514  2023-03-21 21:30:00   
7         M01         J516  2023-03-21 21:50:00   
8         M01         J525  2023-03-21 23:20:00   
9         M01         J527  2023-03-21 23:40:00   
10        M03         J573  2023-03-22 07:20:00   
11        M05         J585  2023-03-22 09:20:00   
12        M05         J591  2023-03-22 10:20:00   
13        M05         J594  2023-03-22 10:50:00   
14        M03         J608  2023-03-22 13:10:00   
15        M03         J609  2023-03-22 13:20:00   
16        M01         J611  2023-03-22 13:40:00   
17        M03         J612  2023-03-22 13:50:00   
18        

In [43]:
!pip install plyer

Defaulting to user installation because normal site-packages is not writeable
Collecting plyer
  Downloading plyer-2.1.0-py2.py3-none-any.whl.metadata (61 kB)
Downloading plyer-2.1.0-py2.py3-none-any.whl (142 kB)
Installing collected packages: plyer
Successfully installed plyer-2.1.0



[notice] A new release of pip is available: 24.3.1 -> 25.0.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [13]:
from plyer import notification

# Send alert for each breakdown
for alert in breakdown_alerts:
    notification.notify(
        title="⚠️ Machine Breakdown Alert",
        message=f"{alert['Alert_Message']} | Job: {alert['Affected_Job']} | Time: {alert['Alert_Time']}",
        timeout=10  # seconds
    )


In [14]:
def recommend_replacement(machine_id, job_row, df):
    same_type_machines = df[
        (df['Operation_Type'] == job_row['Operation_Type']) &
        (df['Machine_ID'] != machine_id) &
        (df['Machine_Availability'] > 80)  # arbitrary availability threshold
    ].sort_values(by=['Machine_Availability', 'Energy_Consumption'], ascending=[False, True])

    if not same_type_machines.empty:
        return same_type_machines.iloc[0]['Machine_ID']
    else:
        return "No optimal replacement found"

# Add replacement suggestion to alert
for alert in breakdown_alerts:
    job_row = df[df['Job_ID'] == alert['Affected_Job']].iloc[0]
    replacement = recommend_replacement(alert['Machine_ID'], job_row, df)
    alert['Suggested_Replacement'] = replacement


In [16]:
import pickle
import pandas as pd
from collections import defaultdict
from plyer import notification


class JobSchedulingModel:
    def __init__(self, df):
        self.df = df
        self.machine_failure_history = defaultdict(list)
        self.breakdown_alerts = []

    def send_desktop_alert(self, message):
        notification.notify(
            title="Machine Breakdown Alert",
            message=message,
            timeout=10
        )

    def recommend_replacement(self, machine_id, job_row):
        same_type_machines = self.df[
            (self.df['Operation_Type'] == job_row['Operation_Type']) &
            (self.df['Machine_ID'] != machine_id) &
            (self.df['Machine_Availability'] > 80)  # arbitrary availability threshold
        ].sort_values(by=['Machine_Availability', 'Energy_Consumption'], ascending=[False, True])

        if not same_type_machines.empty:
            return same_type_machines.iloc[0]['Machine_ID']
        else:
            return "No optimal replacement found"

    def track_machine_failures_and_recommendation(self):
        for idx, row in self.df.iterrows():
            machine = row['Machine_ID']
            status = row['Job_Status']
            job_id = row['Job_ID']
            start_time = row['Scheduled_Start']

            # Track failures for the machine
            self.machine_failure_history[machine].append(status)

            recent_statuses = self.machine_failure_history[machine][-5:]
            consecutive_failures = all(s == 'Failed' for s in recent_statuses[-2:])
            failure_ratio = recent_statuses.count('Failed') / len(recent_statuses)

            if consecutive_failures or failure_ratio >= 0.6:
                alert_msg = f"⚠️ ALERT: {machine} is unreliable! ({failure_ratio*100:.0f}% failures)"
                alert_detail = {
                    "Machine_ID": machine,
                    "Affected_Job": job_id,
                    "Alert_Time": start_time,
                    "Alert_Message": alert_msg
                }
                self.breakdown_alerts.append(alert_detail)

                self.send_desktop_alert(alert_msg)

                # Recommend replacement
                job_row = self.df[self.df['Job_ID'] == job_id].iloc[0]
                replacement_machine = self.recommend_replacement(machine, job_row)
                print(f"Recommended replacement for {machine}: {replacement_machine}")

    def save_model(self, filename='job_scheduling_model.pkl'):
        with open(filename, 'wb') as file:
            pickle.dump(self, file)
        print("Model saved successfully!")


# Example usage:

# Assuming df is your preprocessed DataFrame
df = pd.read_csv("preprocessed_job_data.csv")

# Initialize the model
model = JobSchedulingModel(df)

# Track failures and get alerts
model.track_machine_failures_and_recommendation()

# Save the model
model.save_model('job_scheduling_model.pkl')


Recommended replacement for M04: M01
Recommended replacement for M04: M02
Recommended replacement for M04: M03
Recommended replacement for M03: M02
Recommended replacement for M03: M02
Recommended replacement for M04: M01
Recommended replacement for M01: M05
Recommended replacement for M01: M04
Recommended replacement for M01: M03
Recommended replacement for M01: M03
Recommended replacement for M03: M02
Recommended replacement for M05: M02
Recommended replacement for M05: M01
Recommended replacement for M05: M04
Recommended replacement for M03: M02
Recommended replacement for M03: M02
Recommended replacement for M01: M02
Recommended replacement for M03: M02
Recommended replacement for M03: M01
Recommended replacement for M01: M04
Recommended replacement for M01: M04
Recommended replacement for M03: M01
Recommended replacement for M04: M01
Recommended replacement for M02: M01
Recommended replacement for M03: M02
Recommended replacement for M02: M04
Recommended replacement for M02: M03
R

In [17]:
# Save the trained model and its state
model.save_model('job_scheduling_model.pkl')

# Load the saved model
with open('job_scheduling_model.pkl', 'rb') as file:
    loaded_model = pickle.load(file)

# Use the loaded model
loaded_model.track_machine_failures_and_recommendation()


Model saved successfully!
Recommended replacement for M04: M02
Recommended replacement for M04: M03
Recommended replacement for M03: M02
Recommended replacement for M03: M02
Recommended replacement for M04: M01
Recommended replacement for M01: M05
Recommended replacement for M01: M04
Recommended replacement for M01: M03
Recommended replacement for M01: M03
Recommended replacement for M03: M02
Recommended replacement for M05: M02
Recommended replacement for M05: M01
Recommended replacement for M05: M04
Recommended replacement for M03: M02
Recommended replacement for M03: M02
Recommended replacement for M01: M02
Recommended replacement for M03: M02
Recommended replacement for M03: M01
Recommended replacement for M01: M04
Recommended replacement for M01: M04
Recommended replacement for M03: M01
Recommended replacement for M04: M01
Recommended replacement for M02: M01
Recommended replacement for M03: M02
Recommended replacement for M02: M04
Recommended replacement for M02: M03
Recommended 

EXPERIMENTS

In [18]:
import pandas as pd

# Load preprocessed data
df = pd.read_csv('preprocessed_job_data.csv')

# Convert datetime columns (if not already converted)
for col in ['Scheduled_Start', 'Scheduled_End', 'Actual_Start', 'Actual_End']:
    df[col] = pd.to_datetime(df[col], errors='coerce')

PRIORITY-BASED SCHEDULING

In [19]:
# Add fake priority if not present (High=1, Medium=2, Low=3)
if 'Priority' not in df.columns:
    import numpy as np
    np.random.seed(42)  # For reproducibility
    df['Priority'] = np.random.choice([1, 2, 3], size=len(df))  # Lower number = higher priority

# Sort by Priority first, then Scheduled Start
priority_schedule = df.sort_values(by=['Priority', 'Scheduled_Start']).copy()

print("✅ Priority-Based Schedule Sample:")
print(priority_schedule[['Job_ID', 'Machine_ID', 'Priority', 'Scheduled_Start']].head(10))


✅ Priority-Based Schedule Sample:
   Job_ID Machine_ID  Priority     Scheduled_Start
1    J002        M01         1 2023-03-18 08:10:00
4    J005        M01         1 2023-03-18 08:40:00
5    J006        M02         1 2023-03-18 08:50:00
12   J013        M03         1 2023-03-18 10:00:00
15   J016        M01         1 2023-03-18 10:30:00
20   J021        M02         1 2023-03-18 11:20:00
21   J022        M03         1 2023-03-18 11:30:00
24   J025        M04         1 2023-03-18 12:00:00
25   J026        M01         1 2023-03-18 12:10:00
26   J027        M03         1 2023-03-18 12:20:00


SHORTEST JOB FIRST

In [20]:
# Sort jobs by Processing_Time
sjf_schedule = df.sort_values(by=['Processing_Time', 'Scheduled_Start']).copy()

print("✅ Shortest-Job-First (SJF) Schedule Sample:")
print(sjf_schedule[['Job_ID', 'Machine_ID', 'Processing_Time', 'Scheduled_Start']].head(10))

✅ Shortest-Job-First (SJF) Schedule Sample:
    Job_ID Machine_ID  Processing_Time     Scheduled_Start
150   J151        M02               20 2023-03-19 09:00:00
207   J208        M01               20 2023-03-19 18:30:00
273   J274        M05               20 2023-03-20 05:30:00
299   J300        M04               20 2023-03-20 09:50:00
736   J737        M05               20 2023-03-23 10:40:00
897   J898        M05               20 2023-03-24 13:30:00
941   J942        M04               20 2023-03-24 20:50:00
25    J026        M01               21 2023-03-18 12:10:00
196   J197        M05               21 2023-03-19 16:40:00
206   J207        M05               21 2023-03-19 18:20:00


EARLIEST DEADLINE FIRST

In [21]:
# Sort jobs by Scheduled_End
edf_schedule = df.sort_values(by=['Scheduled_End']).copy()

print("✅ Earliest-Deadline-First (EDF) Schedule Sample:")
print(edf_schedule[['Job_ID', 'Machine_ID', 'Scheduled_End', 'Scheduled_Start']].head(10))


✅ Earliest-Deadline-First (EDF) Schedule Sample:
   Job_ID Machine_ID       Scheduled_End     Scheduled_Start
0    J001        M01 2023-03-18 09:16:00 2023-03-18 08:00:00
2    J003        M04 2023-03-18 09:16:00 2023-03-18 08:20:00
6    J007        M04 2023-03-18 09:22:00 2023-03-18 09:00:00
4    J005        M01 2023-03-18 09:26:00 2023-03-18 08:40:00
1    J002        M01 2023-03-18 09:29:00 2023-03-18 08:10:00
9    J010        M01 2023-03-18 09:57:00 2023-03-18 09:30:00
8    J009        M02 2023-03-18 10:02:00 2023-03-18 09:20:00
3    J004        M04 2023-03-18 10:16:00 2023-03-18 08:30:00
11   J012        M03 2023-03-18 10:18:00 2023-03-18 09:50:00
7    J008        M05 2023-03-18 10:29:00 2023-03-18 09:10:00


ROUND-ROBIN 

In [22]:
from collections import deque

# Parameters
time_quantum_hours = 2  # Example: 2-hour slots

# Copy DataFrame and create queue
rr_jobs = df.copy()
rr_jobs['Remaining_Time'] = rr_jobs['Processing_Time']
job_queue = deque(rr_jobs.to_dict(orient='records'))

rr_schedule = []

current_time = pd.Timestamp.now()

while job_queue:
    job = job_queue.popleft()
    job_id = job['Job_ID']
    machine_id = job['Machine_ID']
    remaining_time = job['Remaining_Time']

    time_slice = min(time_quantum_hours, remaining_time)
    
    rr_schedule.append({
        'Job_ID': job_id,
        'Machine_ID': machine_id,
        'Start_Time': current_time,
        'End_Time': current_time + pd.Timedelta(hours=time_slice),
        'Time_Slice_Hours': time_slice
    })
    
    current_time += pd.Timedelta(hours=time_slice)
    
    if remaining_time > time_quantum_hours:
        job['Remaining_Time'] = remaining_time - time_quantum_hours
        job_queue.append(job)

# Convert to DataFrame
rr_schedule_df = pd.DataFrame(rr_schedule)

print("✅ Round Robin Schedule Sample:")
print(rr_schedule_df.head(10))


✅ Round Robin Schedule Sample:
  Job_ID Machine_ID                 Start_Time                   End_Time  \
0   J001        M01 2025-04-26 20:04:46.073489 2025-04-26 22:04:46.073489   
1   J002        M01 2025-04-26 22:04:46.073489 2025-04-27 00:04:46.073489   
2   J003        M04 2025-04-27 00:04:46.073489 2025-04-27 02:04:46.073489   
3   J004        M04 2025-04-27 02:04:46.073489 2025-04-27 04:04:46.073489   
4   J005        M01 2025-04-27 04:04:46.073489 2025-04-27 06:04:46.073489   
5   J006        M02 2025-04-27 06:04:46.073489 2025-04-27 08:04:46.073489   
6   J007        M04 2025-04-27 08:04:46.073489 2025-04-27 10:04:46.073489   
7   J008        M05 2025-04-27 10:04:46.073489 2025-04-27 12:04:46.073489   
8   J009        M02 2025-04-27 12:04:46.073489 2025-04-27 14:04:46.073489   
9   J010        M01 2025-04-27 14:04:46.073489 2025-04-27 16:04:46.073489   

   Time_Slice_Hours  
0                 2  
1                 2  
2                 2  
3                 2  
4          

REINFORCEMENT LEARNING

In [23]:
import numpy as np
import pandas as pd
import random

# Load preprocessed job data
df = pd.read_csv('preprocessed_job_data.csv')

# Basic Parameters
jobs = df['Job_ID'].tolist()
machines = df['Machine_ID'].unique().tolist()

states = jobs
actions = machines

q_table = pd.DataFrame(0, index=states, columns=actions)

# Hyperparameters
alpha = 0.1  # learning rate
gamma = 0.6  # discount factor
epsilon = 0.1  # exploration rate
episodes = 5000

# Dummy Machine Availability Tracker
machine_availability = {m: 1.0 for m in machines}  # fully available initially

for episode in range(episodes):
    current_job = random.choice(jobs)
    
    if random.uniform(0, 1) < epsilon:
        chosen_machine = random.choice(machines)  # Explore
    else:
        chosen_machine = q_table.loc[current_job].idxmax()  # Exploit
    
    # Simulate reward (based on availability)
    if machine_availability[chosen_machine] > 0.9:
        reward = 1  # Good
    elif machine_availability[chosen_machine] > 0.5:
        reward = 0  # Neutral
    else:
        reward = -1  # Bad choice

    old_value = q_table.loc[current_job, chosen_machine]
    next_max = q_table.loc[current_job].max()

    # Update Q-Table
    new_value = (1 - alpha) * old_value + alpha * (reward + gamma * next_max)
    q_table.loc[current_job, chosen_machine] = new_value

    # Slightly random decay to simulate usage
    machine_availability[chosen_machine] *= 0.99

print("✅ Q-Learning Training Finished!")
print(q_table.head())


  q_table.loc[current_job, chosen_machine] = new_value
  q_table.loc[current_job, chosen_machine] = new_value
  q_table.loc[current_job, chosen_machine] = new_value
  q_table.loc[current_job, chosen_machine] = new_value
  q_table.loc[current_job, chosen_machine] = new_value


✅ Q-Learning Training Finished!
        M01    M04    M02    M05  M03
J001 -0.100 -0.100 -0.100  0.000  0.0
J002 -0.196 -0.196 -0.196 -0.100 -0.1
J003 -0.100 -0.100  0.000  0.000  0.0
J004 -0.100 -0.100 -0.100  0.000  0.0
J005 -0.196 -0.190 -0.196 -0.196 -0.1


DEFINE RL SETUP

In [24]:
import pandas as pd
import numpy as np
import random

# Load preprocessed data
df = pd.read_csv('preprocessed_job_data.csv')

# Ensure datetime columns are parsed
for col in ['Scheduled_Start', 'Scheduled_End', 'Actual_Start', 'Actual_End']:
    df[col] = pd.to_datetime(df[col], errors='coerce')

# Basic Setup
jobs = df['Job_ID'].tolist()
machines = df['Machine_ID'].unique().tolist()

states = jobs  # State = Current job
actions = machines  # Action = Assign to a machine

# Initialize Q-Table
q_table = pd.DataFrame(0, index=states, columns=actions)

# Machine Availability Map
machine_availability = {m: 1.0 for m in machines}

In [None]:
# Hyperparameters
alpha = 0.1   # Learning Rate
gamma = 0.6   # Discount Factor
epsilon = 0.1 # Exploration Rate
episodes = 5000  # Number of training episodes

In [26]:
for episode in range(episodes):
    current_job = random.choice(jobs)
    
    if random.uniform(0, 1) < epsilon:
        # Explore
        chosen_machine = random.choice(machines)
    else:
        # Exploit best known action
        chosen_machine = q_table.loc[current_job].idxmax()

    # Reward simulation
    if machine_availability[chosen_machine] > 0.9:
        reward = 1  # Best case
    elif machine_availability[chosen_machine] > 0.5:
        reward = 0  # Okay
    else:
        reward = -1  # Bad machine (low availability)

    old_value = q_table.loc[current_job, chosen_machine]
    next_max = q_table.loc[current_job].max()

    # Q-Value Update
    new_value = (1 - alpha) * old_value + alpha * (reward + gamma * next_max)
    q_table.loc[current_job, chosen_machine] = new_value

    # Decay availability (simulate usage)
    machine_availability[chosen_machine] *= 0.99

print("✅ Q-Learning Training Completed!")
print(q_table.head())


  q_table.loc[current_job, chosen_machine] = new_value
  q_table.loc[current_job, chosen_machine] = new_value
  q_table.loc[current_job, chosen_machine] = new_value
  q_table.loc[current_job, chosen_machine] = new_value
  q_table.loc[current_job, chosen_machine] = new_value


✅ Q-Learning Training Completed!
        M01    M04  M02  M05  M03
J001 -0.196 -0.196 -0.1 -0.1 -0.1
J002 -0.100 -0.100 -0.1 -0.1 -0.1
J003 -0.196 -0.100 -0.1 -0.1 -0.1
J004 -0.100 -0.100 -0.1  0.0  0.0
J005 -0.100 -0.100  0.0  0.0  0.0


In [27]:
# Scheduling Phase

rl_schedule = []

# Reset Machine Availabilities for fresh scheduling
machine_availability = {m: 1.0 for m in machines}
current_time = pd.Timestamp.now()

for job_id in jobs:
    # Pick the best machine learned
    assigned_machine = q_table.loc[job_id].idxmax()
    
    # Get job processing time
    job_row = df[df['Job_ID'] == job_id].iloc[0]
    duration = job_row['Processing_Time']

    # Schedule
    scheduled_start = current_time
    estimated_end = current_time + pd.to_timedelta(duration, unit='h')
    
    rl_schedule.append({
        'Job_ID': job_id,
        'Assigned_Machine': assigned_machine,
        'Scheduled_Start': scheduled_start,
        'Estimated_End': estimated_end
    })
    
    # Update current time (simulating serial processing for now)
    current_time = estimated_end

# Create RL schedule DataFrame
rl_schedule_df = pd.DataFrame(rl_schedule)

print("✅ RL-Based Job Scheduling Done!")
print(rl_schedule_df.head())


✅ RL-Based Job Scheduling Done!
  Job_ID Assigned_Machine            Scheduled_Start  \
0   J001              M02 2025-04-26 20:20:53.650877   
1   J002              M01 2025-04-30 00:20:53.650877   
2   J003              M04 2025-05-03 07:20:53.650877   
3   J004              M05 2025-05-05 15:20:53.650877   
4   J005              M02 2025-05-10 01:20:53.650877   

               Estimated_End  
0 2025-04-30 00:20:53.650877  
1 2025-05-03 07:20:53.650877  
2 2025-05-05 15:20:53.650877  
3 2025-05-10 01:20:53.650877  
4 2025-05-11 23:20:53.650877  


BREAKDOWN ANALYSIS

In [28]:
from collections import defaultdict

# Initialize machine failure history
machine_failure_history = defaultdict(list)

# Parameters for failure detection
FAILURE_THRESHOLD = 2  # Consecutive Failures
FAILURE_WINDOW = 5     # Look at last 5 jobs
FAILURE_RATIO = 0.6    # 60% failure rate = unreliable

# Detect breakdowns
breakdown_alerts = []

# Sort by scheduled start time
rl_schedule_df = rl_schedule_df.sort_values(by='Scheduled_Start')

for idx, job_row in rl_schedule_df.iterrows():
    machine = job_row['Assigned_Machine']
    job_id = job_row['Job_ID']
    start_time = job_row['Scheduled_Start']
    
    # Simulate Job Status (for now, assume 5% random failure)
    status = 'Completed' if np.random.rand() > 0.05 else 'Failed'
    
    machine_failure_history[machine].append(status)
    
    recent_statuses = machine_failure_history[machine][-FAILURE_WINDOW:]
    
    # Check if machine unreliable
    consecutive_failures = all(s == 'Failed' for s in recent_statuses[-FAILURE_THRESHOLD:])
    failure_ratio = recent_statuses.count('Failed') / len(recent_statuses)

    if consecutive_failures or failure_ratio >= FAILURE_RATIO:
        alert_msg = f"⚠️ Machine {machine} is unreliable! ({failure_ratio*100:.0f}% failures)"
        breakdown_alerts.append({
            "Machine_ID": machine,
            "Affected_Job": job_id,
            "Alert_Time": start_time,
            "Alert_Message": alert_msg
        })

# Show breakdown alerts
breakdown_alerts_df = pd.DataFrame(breakdown_alerts)

print("✅ Breakdown Detection Completed!")
print(breakdown_alerts_df if not breakdown_alerts_df.empty else "✅ No critical machine issues detected.")


✅ Breakdown Detection Completed!
  Machine_ID Affected_Job                 Alert_Time  \
0        M03         J230 2027-03-01 11:20:53.650877   

                                  Alert_Message  
0  ⚠️ Machine M03 is unreliable! (40% failures)  


In [29]:
# Recommend best replacement machine
def recommend_replacement_machine(broken_machine, operation_type, df):
    candidates = df[
        (df['Machine_ID'] != broken_machine) &
        (df['Operation_Type'] == operation_type)
    ]
    
    if not candidates.empty:
        # Choose machine with best availability and low energy consumption
        return candidates.sort_values(['Machine_Availability', 'Energy_Consumption'], ascending=[False, True]).iloc[0]['Machine_ID']
    else:
        return "⚠️ No Replacement Found"

# Update breakdown alerts with recommendations
for alert in breakdown_alerts:
    job_row = df[df['Job_ID'] == alert['Affected_Job']].iloc[0]
    replacement = recommend_replacement_machine(alert['Machine_ID'], job_row['Operation_Type'], df)
    alert['Suggested_Replacement'] = replacement

# Show updated breakdowns
breakdown_alerts_df = pd.DataFrame(breakdown_alerts)

print("✅ Best Replacement Machine Suggested!")
print(breakdown_alerts_df[['Machine_ID', 'Affected_Job', 'Suggested_Replacement']].head())


✅ Best Replacement Machine Suggested!
  Machine_ID Affected_Job Suggested_Replacement
0        M03         J230                   M02


In [30]:
# Predict Completion Time for all scheduled jobs (using RL scheduler as example)

def estimate_completion_times(schedule_df, df_reference):
    estimated_completion_list = []
    
    for idx, row in schedule_df.iterrows():
        job_id = row['Job_ID']
        assigned_machine = row['Assigned_Machine']
        start_time = row['Scheduled_Start']

        # Get original processing time from reference dataset
        job_row = df_reference[df_reference['Job_ID'] == job_id].iloc[0]
        processing_hours = job_row['Processing_Time']

        # Calculate Estimated Completion
        estimated_end_time = start_time + pd.to_timedelta(processing_hours, unit='h')
        
        estimated_completion_list.append(estimated_end_time)
    
    # Add to schedule
    schedule_df['Estimated_Completion_Time'] = estimated_completion_list
    return schedule_df

# Apply to RL Schedule
rl_schedule_df = estimate_completion_times(rl_schedule_df, df)

print("✅ Completion Time Estimation Done!")
print(rl_schedule_df[['Job_ID', 'Assigned_Machine', 'Scheduled_Start', 'Estimated_Completion_Time']].head())


✅ Completion Time Estimation Done!
  Job_ID Assigned_Machine            Scheduled_Start  \
0   J001              M02 2025-04-26 20:20:53.650877   
1   J002              M01 2025-04-30 00:20:53.650877   
2   J003              M04 2025-05-03 07:20:53.650877   
3   J004              M05 2025-05-05 15:20:53.650877   
4   J005              M02 2025-05-10 01:20:53.650877   

   Estimated_Completion_Time  
0 2025-04-30 00:20:53.650877  
1 2025-05-03 07:20:53.650877  
2 2025-05-05 15:20:53.650877  
3 2025-05-10 01:20:53.650877  
4 2025-05-11 23:20:53.650877  


In [33]:
# Calculate dynamic efficiency for each machine
def calculate_dynamic_machine_efficiency(df_reference):
    efficiency_map = {}

    machines = df_reference['Machine_ID'].unique()

    for machine in machines:
        machine_jobs = df_reference[df_reference['Machine_ID'] == machine]
        
        if not machine_jobs.empty:
            avg_actual_duration = machine_jobs['Calculated_Actual_Duration'].mean()
            avg_expected_duration = machine_jobs['Calculated_Scheduled_Duration'].mean()
            
            # Efficiency = Expected Duration / Actual Duration
            efficiency = avg_expected_duration / avg_actual_duration if avg_actual_duration else 1.0
            
            # Clip efficiency to avoid extreme values (safe range: 0.8 to 1.2)
            efficiency = max(0.8, min(1.2, efficiency))
            
            efficiency_map[machine] = efficiency
        else:
            efficiency_map[machine] = 1.0  # Default to normal

    return efficiency_map

# Calculate dynamic efficiencies
dynamic_efficiency_map = calculate_dynamic_machine_efficiency(df)

print("✅ Dynamic Machine Efficiency Map Ready!")
print(dynamic_efficiency_map)


✅ Dynamic Machine Efficiency Map Ready!
{'M01': 0.997787832691019, 'M04': 1.0089264088290923, 'M02': 0.9883088443063011, 'M05': 1.0009772586955559, 'M03': 1.0141822621875556}


In [34]:
# Enhanced Completion Time Estimator using dynamic efficiencies

def dynamic_estimate_completion_times(schedule_df, df_reference, dynamic_efficiency):
    estimated_completion_list = []
    
    for idx, row in schedule_df.iterrows():
        job_id = row['Job_ID']
        assigned_machine = row['Assigned_Machine']
        start_time = row['Scheduled_Start']

        # Fetch processing time
        job_row = df_reference[df_reference['Job_ID'] == job_id].iloc[0]
        processing_hours = job_row['Processing_Time']

        # Get dynamic efficiency
        efficiency = dynamic_efficiency.get(assigned_machine, 1.0)  # Default normal if missing
        adjusted_processing_time = processing_hours / efficiency

        # Estimate Completion
        estimated_end_time = start_time + pd.to_timedelta(adjusted_processing_time, unit='h')
        
        estimated_completion_list.append(estimated_end_time)
    
    # Add column
    schedule_df['Estimated_Completion_Time'] = estimated_completion_list
    return schedule_df

# Apply to RL Schedule
rl_schedule_df = dynamic_estimate_completion_times(rl_schedule_df, df, dynamic_efficiency_map)

print("✅ Dynamic Completion Time Estimation Done!")
print(rl_schedule_df[['Job_ID', 'Assigned_Machine', 'Scheduled_Start', 'Estimated_Completion_Time']].head())


✅ Dynamic Completion Time Estimation Done!
  Job_ID Assigned_Machine            Scheduled_Start  \
0   J001              M02 2025-04-26 20:20:53.650877   
1   J002              M01 2025-04-30 00:20:53.650877   
2   J003              M04 2025-05-03 07:20:53.650877   
3   J004              M05 2025-05-05 15:20:53.650877   
4   J005              M02 2025-05-10 01:20:53.650877   

      Estimated_Completion_Time  
0 2025-04-30 01:14:50.189957088  
1 2025-05-03 07:31:24.186109102  
2 2025-05-05 14:51:10.008379196  
3 2025-05-10 01:14:41.093044156  
4 2025-05-11 23:53:32.608741265  


In [35]:
# Machine Utilization Tracker

def calculate_machine_utilization(schedule_df, working_hours_per_day=24):
    # Step 1: Find first and last Scheduled_Start
    min_start = schedule_df['Scheduled_Start'].min()
    max_end = schedule_df['Estimated_Completion_Time'].max()

    # Step 2: Calculate total schedule span in days
    schedule_span_days = (max_end - min_start).days + 1  # Add 1 to include the current day

    # Step 3: Calculate total available working hours
    total_available_hours = working_hours_per_day * schedule_span_days

    # Step 4: Initialize usage counter
    machine_usage = {}

    for machine in schedule_df['Assigned_Machine'].unique():
        machine_jobs = schedule_df[schedule_df['Assigned_Machine'] == machine]
        
        # Total hours this machine is busy
        total_busy_hours = (machine_jobs['Estimated_Completion_Time'] - machine_jobs['Scheduled_Start']).dt.total_seconds().sum() / 3600
        
        utilization_percent = (total_busy_hours / total_available_hours) * 100
        machine_usage[machine] = {
            'Total_Busy_Hours': round(total_busy_hours, 2),
            'Total_Available_Hours': total_available_hours,
            'Utilization_%': round(utilization_percent, 2)
        }

    utilization_df = pd.DataFrame.from_dict(machine_usage, orient='index').reset_index()
    utilization_df.rename(columns={'index': 'Machine_ID'}, inplace=True)

    return utilization_df

# Calculate utilization
machine_utilization_df = calculate_machine_utilization(rl_schedule_df)

print("✅ Machine Utilization Calculation Done!")
print(machine_utilization_df)

✅ Machine Utilization Calculation Done!
  Machine_ID  Total_Busy_Hours  Total_Available_Hours  Utilization_%
0        M02          14249.59                  71400          19.96
1        M01          14005.98                  71400          19.62
2        M04          13869.20                  71400          19.42
3        M05          13851.46                  71400          19.40
4        M03          15251.70                  71400          21.36


In [36]:
# Simulate a Deadline column (within 1.2x scheduled end time)
if 'Deadline' not in df.columns:
    np.random.seed(42)
    df['Deadline'] = df['Scheduled_End'] + pd.to_timedelta(np.random.randint(1, 4, size=len(df)), unit='h')

print("✅ Deadlines Simulated for Jobs!")
print(df[['Job_ID', 'Scheduled_End', 'Deadline']].head())

✅ Deadlines Simulated for Jobs!
  Job_ID       Scheduled_End            Deadline
0   J001 2023-03-18 09:16:00 2023-03-18 12:16:00
1   J002 2023-03-18 09:29:00 2023-03-18 10:29:00
2   J003 2023-03-18 09:16:00 2023-03-18 12:16:00
3   J004 2023-03-18 10:16:00 2023-03-18 13:16:00
4   J005 2023-03-18 09:26:00 2023-03-18 10:26:00


In [37]:
# Deadline Risk Detector

def detect_deadline_risks(schedule_df, df_reference):
    risks = []

    for idx, row in schedule_df.iterrows():
        job_id = row['Job_ID']
        assigned_machine = row['Assigned_Machine']
        estimated_completion = row['Estimated_Completion_Time']

        # Get the deadline
        job_row = df_reference[df_reference['Job_ID'] == job_id].iloc[0]
        deadline = job_row['Deadline']

        if estimated_completion > deadline:
            risks.append({
                'Job_ID': job_id,
                'Assigned_Machine': assigned_machine,
                'Deadline': deadline,
                'Estimated_Completion_Time': estimated_completion,
                'Delay_Hours': round((estimated_completion - deadline).total_seconds() / 3600, 2),
                'Risk_Level': '⚠️ HIGH' if (estimated_completion - deadline).total_seconds() > 3600 else '⚠️ Medium'
            })

    risk_df = pd.DataFrame(risks)
    return risk_df

# Apply Deadline Risk Detection
deadline_risks_df = detect_deadline_risks(rl_schedule_df, df)

print("✅ Deadline Risk Detection Completed!")
print(deadline_risks_df if not deadline_risks_df.empty else "✅ No jobs at risk of missing deadlines!")

✅ Deadline Risk Detection Completed!
    Job_ID Assigned_Machine            Deadline     Estimated_Completion_Time  \
0     J001              M02 2023-03-18 12:16:00 2025-04-30 01:14:50.189957088   
1     J002              M01 2023-03-18 10:29:00 2025-05-03 07:31:24.186109102   
2     J003              M04 2023-03-18 12:16:00 2025-05-05 14:51:10.008379196   
3     J004              M05 2023-03-18 13:16:00 2025-05-10 01:14:41.093044156   
4     J005              M02 2023-03-18 10:26:00 2025-05-11 23:53:32.608741265   
..     ...              ...                 ...                           ...   
995   J996              M04 2023-03-25 08:40:00 2033-06-03 15:54:21.112932532   
996   J997              M01 2023-03-25 09:00:00 2033-06-06 04:28:52.538395052   
997   J998              M03 2023-03-25 10:52:00 2033-06-10 08:55:18.748730280   
998   J999              M05 2023-03-25 10:35:00 2033-06-13 13:16:30.048636782   
999  J1000              M04 2023-03-25 09:21:00 2033-06-18 03:21:58.2166

AUTO-RESHEDULING

In [38]:
# Auto-Rescheduler for Delayed Jobs

def auto_reschedule_delayed_jobs(schedule_df, df_reference, utilization_df, efficiency_map):
    rescheduled_jobs = []
    
    for idx, row in schedule_df.iterrows():
        job_id = row['Job_ID']
        assigned_machine = row['Assigned_Machine']
        estimated_completion = row['Estimated_Completion_Time']
        
        # Get job deadline
        job_row = df_reference[df_reference['Job_ID'] == job_id].iloc[0]
        deadline = job_row['Deadline']
        operation_type = job_row['Operation_Type']
        
        # Check if delayed
        if estimated_completion > deadline:
            # Try to find a better machine
            candidate_machines = df_reference[
                (df_reference['Operation_Type'] == operation_type) &
                (df_reference['Machine_ID'] != assigned_machine)
            ]['Machine_ID'].unique()

            best_candidate = None
            best_speed = -np.inf  # Higher is better
            
            for candidate in candidate_machines:
                utilization_row = utilization_df[utilization_df['Machine_ID'] == candidate]
                if utilization_row.empty:
                    continue
                utilization_percent = utilization_row.iloc[0]['Utilization_%']
                efficiency = efficiency_map.get(candidate, 1.0)
                
                # Prefer machines that are less utilized and faster
                score = (1 - utilization_percent/100) * efficiency

                if score > best_speed:
                    best_speed = score
                    best_candidate = candidate

            if best_candidate:
                # Update Assigned Machine
                schedule_df.at[idx, 'Assigned_Machine'] = best_candidate
                
                # Recalculate completion time based on new machine's speed
                processing_hours = job_row['Processing_Time']
                new_efficiency = efficiency_map.get(best_candidate, 1.0)
                adjusted_processing_time = processing_hours / new_efficiency
                new_estimated_completion = row['Scheduled_Start'] + pd.to_timedelta(adjusted_processing_time, unit='h')
                
                schedule_df.at[idx, 'Estimated_Completion_Time'] = new_estimated_completion

                rescheduled_jobs.append({
                    'Job_ID': job_id,
                    'Old_Machine': assigned_machine,
                    'New_Machine': best_candidate,
                    'Old_Completion': estimated_completion,
                    'New_Completion': new_estimated_completion
                })

    rescheduled_df = pd.DataFrame(rescheduled_jobs)
    return schedule_df, rescheduled_df

# Apply Auto-Rescheduling
rl_schedule_df, rescheduled_jobs_df = auto_reschedule_delayed_jobs(
    rl_schedule_df, df, machine_utilization_df, dynamic_efficiency_map
)

print("✅ Auto-Rescheduling Done!")
print(rescheduled_jobs_df if not rescheduled_jobs_df.empty else "✅ No rescheduling needed!")


✅ Auto-Rescheduling Done!
    Job_ID Old_Machine New_Machine                Old_Completion  \
0     J001         M02         M04 2025-04-30 01:14:50.189957088   
1     J002         M01         M04 2025-05-03 07:31:24.186109102   
2     J003         M04         M05 2025-05-05 14:51:10.008379196   
3     J004         M05         M04 2025-05-10 01:14:41.093044156   
4     J005         M02         M04 2025-05-11 23:53:32.608741265   
..     ...         ...         ...                           ...   
995   J996         M04         M05 2033-06-03 15:54:21.112932532   
996   J997         M01         M04 2033-06-06 04:28:52.538395052   
997   J998         M03         M04 2033-06-10 08:55:18.748730280   
998   J999         M05         M04 2033-06-13 13:16:30.048636782   
999  J1000         M04         M05 2033-06-18 03:21:58.216640278   

                   New_Completion  
0   2025-04-29 23:40:32.993201406  
1   2025-05-03 06:38:57.440924740  
2   2025-05-05 15:17:36.827870968  
3   2025-05-1

Gantt Chart for Job Scheduling Timeline

In [39]:
import plotly.express as px

# Gantt Chart for Scheduled Jobs
def create_gantt_chart(schedule_df):
    chart_df = schedule_df.copy()
    chart_df['Duration_Hours'] = (chart_df['Estimated_Completion_Time'] - chart_df['Scheduled_Start']).dt.total_seconds() / 3600
    
    fig = px.timeline(chart_df,
                      x_start="Scheduled_Start",
                      x_end="Estimated_Completion_Time",
                      y="Assigned_Machine",
                      color="Job_ID",
                      title="✅ Job Scheduling Timeline (Gantt Chart)",
                      hover_name="Job_ID")
    
    fig.update_yaxes(autorange="reversed")  # Machines in logical top-down order
    fig.update_layout(height=600, width=1000)

    fig.show()

# Show Gantt Chart
create_gantt_chart(rl_schedule_df)


Pie Chart for Machine Utilization

In [40]:
# Pie Chart for Machine Utilization
def create_utilization_pie_chart(utilization_df):
    fig = px.pie(utilization_df, 
                 names='Machine_ID', 
                 values='Utilization_%',
                 title='✅ Machine Utilization Share (%)')
    fig.update_layout(height=600, width=600)
    fig.show()

# Show Pie Chart
create_utilization_pie_chart(machine_utilization_df)
