In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import numpy as np
import pandas as pd

path = '/content/drive/My Drive/'
X_test_df = pd.read_csv(path + 'X_test.csv')
y_test_df = pd.read_csv(path + 'y_test.csv')
y_test_level_2_df = pd.read_csv(path + 'y_test_level_2.csv')

X_test = X_test_df.values
y_test = y_test_df.values
y_test_level_2 = y_test_level_2_df.values

In [None]:
import tensorflow as tf

# Load the Level 1 classifier model
level_1_model_path = '/content/drive/My Drive/model_first_level.keras'
level_1_model = tf.keras.models.load_model(level_1_model_path)

In [None]:
import pickle

# Load the saved Tokenizer and max_sentence_length
tokenizer_filename = f'/content/drive/MyDrive/Tokenizer_model_level_1.pkl'
max_len_filename = f'/content/drive/MyDrive/Max_len_level_1.pkl'

with open(tokenizer_filename, 'rb') as f:
    tokenizer = pickle.load(f)

with open(max_len_filename, 'rb') as f:
    max_sentence_length = pickle.load(f)

In [None]:
X_test = X_test_df.iloc[:, 0].tolist()
y_test = y_test_df.values
y_test_level_2 = y_test_level_2_df.values

In [None]:
import tensorflow as tf
from tensorflow.keras.preprocessing.sequence import pad_sequences

sequences_test = tokenizer.texts_to_sequences(X_test)
padded_test = pad_sequences(sequences_test, maxlen=max_sentence_length, padding='post')

# Make predictions with the loaded model
level_1_predictions = level_1_model.predict(padded_test)
level_1_pred_classes = level_1_predictions.argmax(axis=1)

# Evaluate the model on the test data
loss, accuracy = level_1_model.evaluate(padded_test, y_test, verbose=0)

print(f"Test Loss: {loss}")
print(f"Test Accuracy: {accuracy}")

[1m35/35[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 200ms/step
Test Loss: 0.6452960968017578
Test Accuracy: 0.8232600688934326


In [None]:
# Load all Level 2 models, tokenizers, and max lengths
level_2_models = {}
tokenizers = {}
max_lengths = {}

for i in range(17):
    model_path = f'/content/drive/My Drive/model_{i}.keras'
    level_2_models[i] = tf.keras.models.load_model(model_path)

    tokenizer_filename = f'/content/drive/MyDrive/Tokenizer_model_{i}.pkl'
    with open(tokenizer_filename, 'rb') as f:
        tokenizers[i] = pickle.load(f)

    max_len_filename = f'/content/drive/MyDrive/Max_len_{i}.pkl'
    with open(max_len_filename, 'rb') as f:
        max_lengths[i] = pickle.load(f)

In [None]:
final_predictions = []
correct_predictions = 0

# Loop over each test sample
for i, pred_class in enumerate(level_1_pred_classes):
    true_label = y_test_df.iloc[i, 0]

    if pred_class == true_label:
        # Load the corresponding Level 2 model, tokenizer, and max length
        level_2_model = level_2_models[pred_class]
        tokenizer = tokenizers[pred_class]
        max_len = max_lengths[pred_class]

        # Preprocess the test sample for Level 2
        sequence = tokenizer.texts_to_sequences([X_test[i]])
        padded_sequence = pad_sequences(sequence, maxlen=max_len, padding='post')

        # Predict using the Level 2 model
        level_2_prediction = level_2_model.predict(padded_sequence)
        final_prediction = level_2_prediction.argmax(axis=1)[0]  # Get the predicted class from Level 2

        final_predictions.append(final_prediction)

        # Check if the final prediction matches the true label
        if final_prediction == y_test_level_2_df.iloc[i,0]:
            correct_predictions += 1
    else:
        # If the Level 1 prediction is wrong, append -1 or a similar placeholder
        final_predictions.append(-1)

# Calculate final success percentage
final_success_percentage = (correct_predictions / len(y_test_level_2_df)) * 100

print(f"Final Success Percentage: {final_success_percentage:.2f}%")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 271ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 233ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 355ms/step




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 285ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 285ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 280ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 133ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 124ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 94ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 125ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 246ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 54ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 94ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 226ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 232ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 261ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1