In [2]:
import numpy as np
import tensorflow as tf
import pickle

# === Load model ===
model = tf.keras.models.load_model('../models/App Predictor Model 2.keras')

# === Load supporting data ===
# Load tokenizer and reverse word index
with open("../data/processed/tokenizer.pkl", "rb") as f:
    tokenizer = pickle.load(f)
reverse_word_index = {v: k for k, v in tokenizer.word_index.items()}

# Load test set and meta
test = np.load("../data/processed/test_data.npz")
X_test_seq = test['X_seq']
X_test_extra = test['X_extra']
y_test = test['y']

meta = np.load("../data/processed/meta.npz")
vocab_size = int(meta['vocab_size'][0])
oov_index = int(meta['oov_index'][0])

# === Run Predictions ===
N_PREDICTIONS = 6
test_indices = np.random.choice(len(X_test_seq), N_PREDICTIONS, replace=False)

print("--- Running Test Predictions ---")

for i in test_indices:
    input_sequence_tokens = X_test_seq[i]
    input_extra_features = X_test_extra[i]
    true_label_token = np.argmax(y_test[i])

    # Decode input sequence
    input_app_names = [reverse_word_index.get(token, '?') for token in input_sequence_tokens if token != 0]
    true_app_name = reverse_word_index.get(true_label_token, 'unknown')

    # Model expects batch dimensions
    input_seq_batch = np.expand_dims(input_sequence_tokens, axis=0)
    input_extra_batch = np.expand_dims(input_extra_features, axis=0)

    # Predict
    prediction_proba = model.predict([input_seq_batch, input_extra_batch], verbose=0)[0]

    # Optional: mask OOV
    if oov_index is not None and oov_index < len(prediction_proba):
        prediction_proba[oov_index] = 0

    # Top-3 predictions
    top_3_indices = np.argsort(prediction_proba)[-3:][::-1]
    top_3_apps = [reverse_word_index.get(idx, 'unknown') for idx in top_3_indices]
    top_3_probs = [prediction_proba[idx] for idx in top_3_indices]

    # Output
    print("\n" + "="*60)
    print(f"INPUT SEQUENCE:\n{' -> '.join(input_app_names)}")
    print(f"\nACTUAL NEXT APP: {true_app_name}\n")
    print("MODEL PREDICTIONS (Top 3):")
    for app, prob in zip(top_3_apps, top_3_probs):
        print(f"   - {app:<25} (Confidence: {prob:.2%})")
    print("="*60)


--- Running Test Predictions ---

INPUT SEQUENCE:
<OOV> -> <OOV> -> facebook -> <OOV> -> facebook -> youtube -> <OOV> -> <OOV> -> <OOV> -> <OOV> -> <OOV> -> <OOV> -> <OOV> -> gmail -> <OOV>

ACTUAL NEXT APP: discord

MODEL PREDICTIONS (Top 3):
   - gmail                     (Confidence: 40.62%)
   - facebook                  (Confidence: 24.78%)
   - youtube                   (Confidence: 9.61%)

INPUT SEQUENCE:
messages -> <OOV> -> messages -> <OOV> -> messages -> <OOV> -> twitter -> <OOV> -> twitter -> s’more -> twitter -> <OOV> -> twitter -> <OOV> -> twitter

ACTUAL NEXT APP: instagram

MODEL PREDICTIONS (Top 3):
   - s’more                    (Confidence: 24.12%)
   - youtube                   (Confidence: 19.00%)
   - messages                  (Confidence: 10.70%)

INPUT SEQUENCE:
instagram -> telegram -> instagram -> <OOV> -> instagram -> gmail -> instagram -> maps -> facebook -> calendar -> instagram -> google -> maps -> google -> instagram

ACTUAL NEXT APP: clock

MODEL PREDICT