<a href="https://colab.research.google.com/github/virtualemtee/tapping_schedule/blob/main/TappingSchedule.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
import joblib
import seaborn as sns
import matplotlib.pyplot as plt

# Load the pot data
data = pd.read_excel("previousPCCL_TappingSchedule_Compiled.xlsx")

# Display initial data sample
print("Data Sample:")
print(data.head())

# Preprocess the data
# Step 1: Handle missing values (in case any key features are missing)
data = data.dropna(subset=['Pot1_Si', 'Pot1_Fe', 'Pot2_Si', 'Pot2_Fe', 'Grade'])

# Step 2: Encode labels for grades
label_encoder = LabelEncoder()
data['grade_encoded'] = label_encoder.fit_transform(data['Grade'].astype(str))  # Ensure grades are string

# Features and target
X = data[['Pot1_Si', 'Pot1_Fe', 'Pot2_Si', 'Pot2_Fe', 'Avg_Si', 'Avg_Fe']]
y = data['grade_encoded']

# Step 3: Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 4: Build a machine learning pipeline
pipeline = Pipeline([
    ('scaler', StandardScaler()),         # Standardizes features
    ('classifier', RandomForestClassifier(n_estimators=100, random_state=42))  # Model
])

# Train the model
pipeline.fit(X_train, y_train)

# Predict on the test set
y_pred = pipeline.predict(X_test)

# Step 5: Evaluate the model
# Accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy:.2f}")

# Get unique labels from y_test
unique_labels = y_test.unique()

# Classification report
print("Classification Report:")
print(
    classification_report(
        y_test,
        y_pred,
        labels=unique_labels,  # Use unique labels from y_test
        target_names=label_encoder.inverse_transform(unique_labels),  # Decode the labels for display
        zero_division=0  # Avoids warnings for undefined metrics
    )
)

# Step 6: Confusion Matrix
conf_matrix = confusion_matrix(y_test, y_pred)

# # Plotting the confusion matrix
# plt.figure(figsize=(10, 7))
# sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', xticklabels=label_encoder.classes_, yticklabels=label_encoder.classes_)
# plt.xlabel('Predicted')
# plt.ylabel('True')
# plt.title('Confusion Matrix')
# plt.show()

# Save the model and encoder for future use if needed
joblib.dump(pipeline, "pot_pairing_model.pkl")
joblib.dump(label_encoder, "label_encoder.pkl")


Data Sample:
   Pot1_Si  Pot1_Fe  Pot2_Si  Pot2_Fe  Avg_Si  Avg_Fe  Grade
0     0.01     0.01     0.01     0.01   0.010   0.010    303
1     0.02     0.01     0.01     0.01   0.015   0.010    303
2     0.02     0.02     0.01     0.01   0.015   0.015    303
3     0.02     0.02     0.02     0.01   0.020   0.015    303
4     0.02     0.02     0.02     0.02   0.020   0.020    303
Model Accuracy: 0.99
Classification Report:
              precision    recall  f1-score   support

        2050       1.00      1.00      1.00        81
        1535       1.00      1.00      1.00        17
         303       1.00      1.00      1.00         6
         610       1.00      1.00      1.00         2
        1020       1.00      1.00      1.00         3
         506       0.00      0.00      0.00         1
         404       0.50      1.00      0.67         1

    accuracy                           0.99       111
   macro avg       0.79      0.86      0.81       111
weighted avg       0.99      0.99  

['label_encoder.pkl']

In [None]:
import pandas as pd
import joblib
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report
from itertools import combinations
from sklearn.preprocessing import LabelEncoder

# Load training data
data = pd.read_excel('potline_data_updated.xlsx')

# Function to assign grade based on Si and Fe values
def assign_grade(si, fe):
    if si <= 0.03 and fe <= 0.03:
        return '0303'
    elif si <= 0.04 and fe <= 0.04:
        return '0404'
    elif si <= 0.04 and fe <= 0.06:
        return '0406'
    elif si <= 0.05 and fe <= 0.06:
        return '0506'
    elif si <= 0.06 and fe <= 0.10:
        return '0610'
    elif si <= 0.10 and fe <= 0.20:
        return '1020'
    elif si <= 0.15 and fe <= 0.35:
        return '1535'
    elif si >= 0.15 or fe >= 0.35:
        return '2050'
    return None  # Fallback in case of unexpected values

# Apply grading function to each row
data['Grade'] = data.apply(lambda row: assign_grade(row['Si'], row['Fe']), axis=1)

# Add room and section based on cell ID
def get_room_section(cell_id):
    if 1 <= cell_id <= 50:
        room = 'A'
        section = 1 if cell_id <= 25 else 2
    else:
        room = 'B'
        section = 3 if cell_id <= 75 else 4
    return room, section

data[['Room', 'Section']] = data['Cell_ID'].apply(lambda x: pd.Series(get_room_section(x)))

# Generate pair data with target labels
pair_data = []
for (cell1, row1), (cell2, row2) in combinations(data.iterrows(), 2):
    if row1['Section'] == row2['Section']:  # Ensure cells are in the same section
        avg_si = (row1['Si'] + row2['Si']) / 2
        avg_fe = (row1['Fe'] + row2['Fe']) / 2
        paired_grade = assign_grade(avg_si, avg_fe)
        target_label = paired_grade  # Use the resultant grade as the target label
        pair_data.append({
            "Cell_A": row1['Cell_ID'], "Cell_B": row2['Cell_ID'],
            "Room": row1['Room'], "Section": row1['Section'],
            "Avg_Si": avg_si, "Avg_Fe": avg_fe,
            "Initial_Grade_A": row1['Grade'], "Initial_Grade_B": row2['Grade'],
            "Paired_Grade": target_label
        })

pair_df = pd.DataFrame(pair_data)

# Prepare features and labels
X = pair_df.drop(columns=['Paired_Grade', 'Cell_A', 'Cell_B', 'Initial_Grade_A', 'Initial_Grade_B'])  # Drop identifiers
y = pair_df['Paired_Grade']

# One-hot encode categorical columns
X = pd.get_dummies(X, columns=['Room', 'Section'], drop_first=True)  # Convert 'Room' and 'Section' to numerical

# Encode grades
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)

# Train the model
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Save model and label encoder
joblib.dump(model, 'paired_model.pkl')
joblib.dump(label_encoder, 'label_encoder.pkl')

# Evaluate the model
y_pred = model.predict(X_test)
print(classification_report(y_test, y_pred, target_names=label_encoder.classes_))


              precision    recall  f1-score   support

        0303       1.00      1.00      1.00        26
        0404       1.00      1.00      1.00        17
        0406       1.00      1.00      1.00        30
        0506       1.00      1.00      1.00        28
        0610       1.00      1.00      1.00        31
        1020       1.00      0.98      0.99        54
        1535       0.95      1.00      0.97        35
        2050       1.00      0.95      0.97        19

    accuracy                           0.99       240
   macro avg       0.99      0.99      0.99       240
weighted avg       0.99      0.99      0.99       240



In [None]:
import pandas as pd
import numpy as np

# Set a seed for reproducibility
np.random.seed(42)

# Define cell numbers from 1 to 100
cells = np.arange(1, 101)

# Generate random values for Si and Fe, adding some missing values to simulate offline cells
si_values = np.random.uniform(0.02, 0.25, size=100)
fe_values = np.random.uniform(0.02, 0.4, size=100)

# Randomly assign some cells as offline by setting Si and Fe to NaN
offline_cells = np.random.choice(cells, size=10, replace=False)
si_values[offline_cells - 1] = np.nan
fe_values[offline_cells - 1] = np.nan

# Create DataFrame
data = pd.DataFrame({
    "Cell": cells,
    "Si": si_values,
    "Fe": fe_values
})

# Save to Excel file
data.to_excel("sample_potline_data.xlsx", index=False)

print("Generated sample potline data with Si and Fe values.")


Generated sample potline data with Si and Fe values.
