In [30]:
import numpy as np
import tensorflow as tf
import pickle

# === Load model ===
model = tf.keras.models.load_model('../models/App Predictor Model 4.keras')

# === Load supporting data ===
# Load tokenizer and reverse word index
with open("../data/processed/tokenizer_2.pkl", "rb") as f:
    tokenizer = pickle.load(f)
reverse_word_index = {v: k for k, v in tokenizer.word_index.items()}

# Load test set and meta
test = np.load("../data/processed/test_data_2.npz")
X_test_seq = test['X_seq']
X_test_extra = test['X_extra']
y_test = test['y']

meta = np.load("../data/processed/meta_2.npz")
vocab_size = int(meta['vocab_size'][0])
oov_index = int(meta['oov_index'][0])

# === Run Predictions ===
N_PREDICTIONS = 100
test_indices = np.random.choice(len(X_test_seq), N_PREDICTIONS, replace=False)

print("--- Running Test Predictions ---")

for i in test_indices:
    input_sequence_tokens = X_test_seq[i]
    input_extra_features = X_test_extra[i]
    true_label_token = np.argmax(y_test[i])

    # Decode input sequence
    input_app_names = [reverse_word_index.get(token, '?') for token in input_sequence_tokens if token != 0]
    true_app_name = reverse_word_index.get(true_label_token, 'unknown')

    # Model expects batch dimensions
    input_seq_batch = np.expand_dims(input_sequence_tokens, axis=0)
    input_extra_batch = np.expand_dims(input_extra_features, axis=0)

    # Predict
    prediction_proba = model.predict([input_seq_batch, input_extra_batch], verbose=0)[0]

    # Optional: mask OOV
    if oov_index is not None and oov_index < len(prediction_proba):
        prediction_proba[oov_index] = 0

    # Top-3 predictions
    top_3_indices = np.argsort(prediction_proba)[-3:][::-1]
    top_3_apps = [reverse_word_index.get(idx, 'unknown') for idx in top_3_indices]
    top_3_probs = [prediction_proba[idx] for idx in top_3_indices]

    # Output
    print(f"{' | '.join(input_app_names)}")
    print("PREDICTIONS:")
    for app, prob in zip(top_3_apps, top_3_probs):
        print(f"   - {app:<25} (Confidence: {prob:.2%})")
    print(f"NEXT APP: {true_app_name}")
    print("="*60)


--- Running Test Predictions ---
telegram | contacts | telegram | contacts | telegram | contacts | telegram | contacts | telegram | contacts | telegram | gmail | telegram | instagram | gmail
PREDICTIONS:
   - instagram                 (Confidence: 61.49%)
   - telegram                  (Confidence: 29.60%)
   - contacts                  (Confidence: 3.40%)
NEXT APP: telegram
<OOV> | google | <OOV> | google | <OOV> | google | <OOV> | google | <OOV> | google | <OOV> | google | <OOV> | google | <OOV>
PREDICTIONS:
   - google                    (Confidence: 99.46%)
   - gmail                     (Confidence: 0.14%)
   - settings                  (Confidence: 0.08%)
NEXT APP: google
<OOV> | twitter | <OOV> | twitter | <OOV> | twitter | <OOV> | twitter | <OOV> | twitter | <OOV> | twitter | <OOV> | twitter | <OOV>
PREDICTIONS:
   - twitter                   (Confidence: 98.81%)
   - google                    (Confidence: 0.48%)
   - gmail                     (Confidence: 0.28%)
NEXT APP: twit

In [32]:
# Calculate how many times the true label is in the top-3 predictions
from tqdm import tqdm

top3_correct = 0
top1_correct = 0
N = min(len(X_test_seq), 10000)
mistakes = []

for i in tqdm(range(N), desc="Evaluating top-3 accuracy"):
    input_sequence_tokens = X_test_seq[i]
    input_extra_features = X_test_extra[i]
    true_label_token = np.argmax(y_test[i])

    input_seq_batch = np.expand_dims(input_sequence_tokens, axis=0)
    input_extra_batch = np.expand_dims(input_extra_features, axis=0)

    prediction_proba = model.predict([input_seq_batch, input_extra_batch], verbose=0)[0]
    
    if oov_index is not None and oov_index < len(prediction_proba):
        prediction_proba[oov_index] = 0

    top_3_indices = np.argsort(prediction_proba)[-3:][::-1]
    if true_label_token in top_3_indices:
        top3_correct += 1
    else:
        # Save mistake info
        input_app_names = [reverse_word_index.get(token, '?') for token in input_sequence_tokens if token != 0]
        true_app_name = reverse_word_index.get(true_label_token, 'unknown')
        top_3_apps = [reverse_word_index.get(idx, 'unknown') for idx in top_3_indices]
        top_3_probs = [prediction_proba[idx] for idx in top_3_indices]
        mistake_str = (
            f"INPUT: {' | '.join(input_app_names)}\n"
            f"TRUE NEXT APP: {true_app_name}\n"
            f"PREDICTIONS:\n"
            + "\n".join([f"   - {app:<25} (Confidence: {prob:.2%})" for app, prob in zip(top_3_apps, top_3_probs)])
            + "\n" + "="*60 + "\n"
        )
        mistakes.append(mistake_str)
    if true_label_token == top_3_indices[0]:
        top1_correct += 1

top3_accuracy = top3_correct / N
top1_accuracy = top1_correct / N
print(f"Top-3 accuracy: {top3_accuracy:.2%} ({top3_correct}/{N})")
print(f"Top-1 accuracy: {top1_accuracy:.2%} ({top1_correct}/{N})")

# Save mistakes to file.txt
with open("mistakes.txt", "w", encoding="utf-8") as f:
    for mistake in mistakes:
        f.write(mistake)


Evaluating top-3 accuracy: 100%|██████████| 10000/10000 [04:50<00:00, 34.37it/s]

Top-3 accuracy: 89.03% (8903/10000)
Top-1 accuracy: 74.36% (7436/10000)



