In [26]:
import numpy as np
from collections import defaultdict
import os
import sys
import tensorflow as tf
import matplotlib.pyplot as plt
sys.path.append(os.path.abspath('../'))
import utils.dataset_loader as dataset_loader
import json

In [27]:
PRESENT_SBOX = np.array([
    12,5,6,11,9,0,10,13,3,14,15,8,4,7,1,2
], dtype=np.uint8)


In [28]:
def convert_to_hex_string_array(array):
    result = []
    for item in array:
        hex_str = str(item[0])
        nibbles = [int(c, 16) for c in hex_str]
        result.append(nibbles)
    return np.array(result, dtype=np.uint8)

In [29]:
# Paths 
DATASETS_PATH = '../datasets'
PREPARED_DATASETS_PATH = '../dataset/present'
MODELS_PATH = 'trained/PRESENT'

In [30]:
# Dataset paths
evaluation_datasets = {
    "random_pt_dataset": "random_pt_dataset",
    "fixed_dataset_1": "fixed_dataset_1",
    "fixed_dataset_2": "fixed_dataset_2",
}

In [31]:
random_dataset = dataset_loader.load_dataset_files_with_cache(DATASETS_PATH+'/random_dataset', cache_path=f"{DATASETS_PATH}/cache/random_dataset_cache.pkl")

Checking for cache at: ../datasets/cache/random_dataset_cache.pkl
Loading datasets from cache: ../datasets/cache/random_dataset_cache.pkl


In [32]:
# Get the trace matrix
traces = dataset_loader.get_trace_matrix(random_dataset)  # shape (n_traces, n_samples)

# Get plaintexts and keys
plaintexts_values = random_dataset['plaintexts'].values
keys_values = random_dataset['keys'].values

# Convert hex strings to nibble arrays
plaintexts = convert_to_hex_string_array(plaintexts_values)
keys = convert_to_hex_string_array(keys_values)

In [33]:
trace_ranges_path = os.path.join(PREPARED_DATASETS_PATH, "ranges.json")

In [34]:
KNOWN_KEY_HEX = "FEDCBA0123456789"

In [35]:
def hex_to_nibbles(hex_str):
    return [int(c, 16) for c in hex_str]

true_key = hex_to_nibbles(KNOWN_KEY_HEX)

In [36]:
true_key

[15, 14, 13, 12, 11, 10, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

In [38]:
def key_guess(idx, probs, plaintexts, traces):
    """
    Performs key recovery for a single nibble using a trained model's softmax predictions.

    For each possible nibble key guess (0–15), the function simulates the expected SBox
    output using plaintexts and accumulates the model’s predicted probability (log-likelihood)
    for each guessed output across all traces. The key guess with the highest cumulative
    log-likelihood is selected as the most likely key.

    Parameters:
        idx (int): Target nibble index (0–15)
        probs (np.ndarray): Softmax output from the model (n_traces, 16)
        plaintexts (np.ndarray): Plaintext bytes or nibbles used per trace
        traces (np.ndarray): Trace input (not directly used, for compatibility/logging)

    Returns:
        best_guess (int): The most likely key nibble
        scores (np.ndarray): Log-likelihood scores for each key guess
    """

    n_traces = traces.shape[0]
    scores = np.zeros(16)

    # Precompute all 16 candidate SBox outputs for each trace
    for key_guess in range(16):
        sbox_out = np.array([PRESENT_SBOX[pt[idx] ^ key_guess] for pt in plaintexts], dtype=np.uint8)
        # Use advanced indexing to gather the predicted probs
        pred_probs = probs[np.arange(n_traces), sbox_out]
        pred_probs = np.clip(pred_probs, 1e-10, 1.0)
        scores[key_guess] = np.sum(np.log(pred_probs))

    best_guess = np.argmax(scores)
    return best_guess, scores

In [39]:
def compute_guessing_entropy(probs, plaintexts, nibble_idx, true_key_nibble, max_traces=5000):
    """
    Efficient Guessing Entropy calculation using cumulative log-likelihoods.
    """
    n_traces = min(max_traces, len(plaintexts))
    log_probs = np.log(np.clip(probs, 1e-10, 1.0))  # shape (n_traces, 16)

    # Precompute SBox outputs for each key guess
    all_outputs = np.zeros((16, n_traces), dtype=np.uint8)
    for guess in range(16):
        all_outputs[guess] = np.array([
            PRESENT_SBOX[pt[nibble_idx] ^ guess] for pt in plaintexts[:n_traces]
        ])

    # Cumulative scores over traces
    cum_scores = np.zeros((16, n_traces))
    for guess in range(16):
        guess_indices = all_outputs[guess]
        log_likelihoods = log_probs[np.arange(n_traces), guess_indices]
        cum_scores[guess] = np.cumsum(log_likelihoods)

    # Determine rank of true key guess at each trace count
    ranks = []
    for t in range(1, n_traces + 1):
        sorted_guesses = np.argsort(cum_scores[:, t - 1])[::-1]
        rank = np.where(sorted_guesses == true_key_nibble)[0][0]
        ranks.append(rank)

    return ranks


In [40]:
def load_trace_ranges(file_path):
    with open(file_path, 'r') as f:
        data = json.load(f)

    global_range = tuple(data["global"])

    # Convert all per-nibble ranges to list-of-tuples
    per_nibble = {
        int(k): [tuple(r) for r in v] for k, v in data["per_nibble"].items()
    }

    return global_range, per_nibble

In [41]:
def get_traces_for_ranges(traces, ranges):
    if isinstance(ranges, tuple):  # single (start, end)
        return traces[:, ranges[0]:ranges[1]]
    elif isinstance(ranges, list):  # list of (start, end)
        return np.concatenate([traces[:, start:end] for start, end in ranges], axis=1)
    else:
        raise ValueError("Invalid trace range format.")

In [42]:
def evaluate_model_accuracy(model, plaintexts, key_hex, nibble_idx, sbox, traces):
    """
    Evaluate model accuracy on known-key dataset.

    Parameters:
        model: Trained Keras model for a specific nibble
        plaintexts: np.ndarray of shape (n_traces, 8)
        key_hex: str, full 16-nibble PRESENT key in hex (e.g. "FEDCBA0123456789")
        nibble_idx: int, target nibble (0–15)
        sbox: list or np.ndarray, PRESENT SBox
        traces: np.ndarray of shape (n_traces, n_pois, 1)

    Returns:
        accuracy: float
    """
    # Parse key into nibbles
    key_nibbles = hex_to_nibbles(key_hex)
    key_nibble = key_nibbles[nibble_idx]

    # Compute true SBox outputs
    sbox_outputs = np.array([
        sbox[pt[nibble_idx] ^ key_nibble]
        for pt in plaintexts
    ])

    # Predict
    probs = model.predict(traces, verbose=0)
    predicted_classes = np.argmax(probs, axis=1)

    accuracy = np.mean(predicted_classes == sbox_outputs)
    print(f"Accuracy for nibble {nibble_idx}: {accuracy:.4f}")
    return accuracy

In [43]:
def recover_full_key_w_pois(traces, plaintexts, use_pois=True, dataset_name="dataset"):
    """
    Performs full 16-nibble key recovery using pre-trained per-nibble models.

    For each nibble (0–15), the function loads a dedicated model trained to predict
    the SBox output given side-channel traces. It supports two evaluation modes:
    - POI-based (per-nibble selected points of interest)
    - Range-based (per-nibble cropped trace regions)

    For each key guess (0–15), the function simulates the expected SBox output using
    the known plaintexts and scores the model’s softmax outputs via log-likelihood accumulation.
    The most likely key nibble is selected for each position.

    The method also compares the recovered key to the known true key, and prints whether
    full recovery was successful.

    Parameters:
        traces (np.ndarray): Power traces of shape (n_traces, n_samples)
        plaintexts (np.ndarray): Corresponding plaintexts as 2D byte/nibble arrays
        use_pois (bool): Whether to use POI-based models or range-based models

    Returns:
        None — prints the recovered key, true key, and match status, then plots the
        guessing entropy curve for the first nibble.
    """

    full_scores = []
    recovered_key = []
    guessing_entropy = defaultdict(list)

    for idx in range(16):
        print(f"Recovering nibble {idx}...")
        pois_path = f"{PREPARED_DATASETS_PATH}/pois/nibble_{idx}_pois.npy"

        poi_indices = np.load(pois_path)

        _traces = None
        _traces = traces.copy()

        if use_pois:
            _traces = _traces[:, poi_indices]
            model = tf.keras.models.load_model(f"{MODELS_PATH}/per_nibble_POI_n{idx}_aug0.keras")
        else:
            trace_ranges_path = os.path.join(PREPARED_DATASETS_PATH, "ranges.json")
            global_trace_range, per_nibble_trace_ranges = load_trace_ranges(trace_ranges_path)
            ranges = per_nibble_trace_ranges[idx]  # list of (start, end)
                            
            _traces = get_traces_for_ranges(_traces, ranges)
            model = tf.keras.models.load_model(f"{MODELS_PATH}/per_nibble_range_n{idx}_aug0.keras")
        
        if len(_traces.shape) == 2:
            _traces = np.expand_dims(_traces, axis=-1)
        evaluate_model_accuracy(model, plaintexts, KNOWN_KEY_HEX, idx, PRESENT_SBOX, _traces)

        # Predict softmax outputs
        probs = model.predict(_traces, verbose=0)

        best_guess, scores = key_guess(idx, probs, plaintexts, _traces)
        
        
        ranks = compute_guessing_entropy(probs, plaintexts, idx, true_key[idx], max_traces=5000)
        label = "pois" if use_pois else "range"
        os.makedirs(f"ge_data/{dataset_name}/{label}", exist_ok=True)
        np.save(f"ge_data/{dataset_name}/{label}/ge_nibble_{idx}.npy", ranks)  # Save to file
        guessing_entropy[idx] = ranks

        recovered_key.append(best_guess)
        full_scores.append(scores)


    recovered_key_hex = "".join(f"{k:x}" for k in recovered_key).upper()
    print("\n\nRecovered Key: \t", recovered_key_hex)
    print("True Key: \t", KNOWN_KEY_HEX)
    print("Key Match:", recovered_key_hex == KNOWN_KEY_HEX)

In [44]:
# Load all datasets with caching
loaded_datasets = {
    name: dataset_loader.load_dataset_files_with_cache(path, cache_path=f"{DATASETS_PATH}/cache/{name}_cache.pkl")
    for name, path in evaluation_datasets.items()
}

Checking for cache at: ../datasets/cache/random_pt_dataset_cache.pkl
Loading datasets from cache: ../datasets/cache/random_pt_dataset_cache.pkl
Checking for cache at: ../datasets/cache/fixed_dataset_1_cache.pkl
Loading datasets from cache: ../datasets/cache/fixed_dataset_1_cache.pkl
Checking for cache at: ../datasets/cache/fixed_dataset_2_cache.pkl
Loading datasets from cache: ../datasets/cache/fixed_dataset_2_cache.pkl


# Evaluation on datasets with per nibble POIs model

The models used were the ones trained without augmentation.

In [45]:
def evaluate_dataset(dataset, name, use_pois=True):
    print(f"\n Evaluating on dataset: {name}")

    # Extract traces and plaintexts
    traces = dataset_loader.get_trace_matrix(dataset)  # shape (n_traces, n_samples)

    plaintexts_raw = dataset["plaintexts"].values
    plaintexts = convert_to_hex_string_array(plaintexts_raw)

    recover_full_key_w_pois(traces, plaintexts, use_pois=use_pois, dataset_name=name)

In [46]:
random_pt_dataset = loaded_datasets["random_pt_dataset"]
fixed_dataset_1 = loaded_datasets["fixed_dataset_1"]
fixed_dataset_2 = loaded_datasets["fixed_dataset_2"]

In [47]:
evaluate_dataset(random_pt_dataset, "random_pt_dataset")


 Evaluating on dataset: random_pt_dataset
Recovering nibble 0...
Accuracy for nibble 0: 0.8640
Recovering nibble 1...
Accuracy for nibble 1: 0.8762
Recovering nibble 2...
Accuracy for nibble 2: 0.7872
Recovering nibble 3...
Accuracy for nibble 3: 0.7792
Recovering nibble 4...
Accuracy for nibble 4: 0.6950
Recovering nibble 5...
Accuracy for nibble 5: 0.8900
Recovering nibble 6...
Accuracy for nibble 6: 0.7826
Recovering nibble 7...
Accuracy for nibble 7: 0.8708
Recovering nibble 8...
Accuracy for nibble 8: 0.7914
Recovering nibble 9...
Accuracy for nibble 9: 0.9126
Recovering nibble 10...
Accuracy for nibble 10: 0.8884
Recovering nibble 11...
Accuracy for nibble 11: 0.8920
Recovering nibble 12...
Accuracy for nibble 12: 0.8050
Recovering nibble 13...
Accuracy for nibble 13: 0.7704
Recovering nibble 14...
Accuracy for nibble 14: 0.8392
Recovering nibble 15...
Accuracy for nibble 15: 0.8360


Recovered Key: 	 FEDCBA0123456789
True Key: 	 FEDCBA0123456789
Key Match: True


In [48]:
evaluate_dataset(random_pt_dataset, "fixed_dataset_1")


 Evaluating on dataset: fixed_dataset_1
Recovering nibble 0...
Accuracy for nibble 0: 0.8640
Recovering nibble 1...
Accuracy for nibble 1: 0.8762
Recovering nibble 2...
Accuracy for nibble 2: 0.7872
Recovering nibble 3...
Accuracy for nibble 3: 0.7792
Recovering nibble 4...
Accuracy for nibble 4: 0.6950
Recovering nibble 5...
Accuracy for nibble 5: 0.8900
Recovering nibble 6...
Accuracy for nibble 6: 0.7826
Recovering nibble 7...
Accuracy for nibble 7: 0.8708
Recovering nibble 8...
Accuracy for nibble 8: 0.7914
Recovering nibble 9...
Accuracy for nibble 9: 0.9126
Recovering nibble 10...
Accuracy for nibble 10: 0.8884
Recovering nibble 11...
Accuracy for nibble 11: 0.8920
Recovering nibble 12...
Accuracy for nibble 12: 0.8050
Recovering nibble 13...
Accuracy for nibble 13: 0.7704
Recovering nibble 14...
Accuracy for nibble 14: 0.8392
Recovering nibble 15...
Accuracy for nibble 15: 0.8360


Recovered Key: 	 FEDCBA0123456789
True Key: 	 FEDCBA0123456789
Key Match: True


In [49]:
evaluate_dataset(random_pt_dataset, "fixed_dataset_2")


 Evaluating on dataset: fixed_dataset_2
Recovering nibble 0...
Accuracy for nibble 0: 0.8640
Recovering nibble 1...
Accuracy for nibble 1: 0.8762
Recovering nibble 2...
Accuracy for nibble 2: 0.7872
Recovering nibble 3...
Accuracy for nibble 3: 0.7792
Recovering nibble 4...
Accuracy for nibble 4: 0.6950
Recovering nibble 5...
Accuracy for nibble 5: 0.8900
Recovering nibble 6...
Accuracy for nibble 6: 0.7826
Recovering nibble 7...
Accuracy for nibble 7: 0.8708
Recovering nibble 8...
Accuracy for nibble 8: 0.7914
Recovering nibble 9...
Accuracy for nibble 9: 0.9126
Recovering nibble 10...
Accuracy for nibble 10: 0.8884
Recovering nibble 11...
Accuracy for nibble 11: 0.8920
Recovering nibble 12...
Accuracy for nibble 12: 0.8050
Recovering nibble 13...
Accuracy for nibble 13: 0.7704
Recovering nibble 14...
Accuracy for nibble 14: 0.8392
Recovering nibble 15...
Accuracy for nibble 15: 0.8360


Recovered Key: 	 FEDCBA0123456789
True Key: 	 FEDCBA0123456789
Key Match: True


# Evaluation on datasets with Range POIs per nibble model

The models used were the ones trained without augmentation.

In [50]:
evaluate_dataset(random_pt_dataset, "random_pt_dataset", use_pois=False)


 Evaluating on dataset: random_pt_dataset
Recovering nibble 0...
Accuracy for nibble 0: 0.9306
Recovering nibble 1...
Accuracy for nibble 1: 0.9072
Recovering nibble 2...
Accuracy for nibble 2: 0.5238
Recovering nibble 3...
Accuracy for nibble 3: 0.8750
Recovering nibble 4...
Accuracy for nibble 4: 0.0654
Recovering nibble 5...
Accuracy for nibble 5: 0.0604
Recovering nibble 6...
Accuracy for nibble 6: 0.0632
Recovering nibble 7...
Accuracy for nibble 7: 0.9362
Recovering nibble 8...
Accuracy for nibble 8: 0.8432
Recovering nibble 9...
Accuracy for nibble 9: 0.7558
Recovering nibble 10...
Accuracy for nibble 10: 0.0620
Recovering nibble 11...
Accuracy for nibble 11: 0.8348
Recovering nibble 12...
Accuracy for nibble 12: 0.8500
Recovering nibble 13...
Accuracy for nibble 13: 0.7808
Recovering nibble 14...
Accuracy for nibble 14: 0.8026
Recovering nibble 15...
Accuracy for nibble 15: 0.9304


Recovered Key: 	 FEDCD1B123C56789
True Key: 	 FEDCBA0123456789
Key Match: False


In [51]:
evaluate_dataset(random_pt_dataset, "fixed_dataset_1", use_pois=False)


 Evaluating on dataset: fixed_dataset_1
Recovering nibble 0...
Accuracy for nibble 0: 0.9306
Recovering nibble 1...
Accuracy for nibble 1: 0.9072
Recovering nibble 2...
Accuracy for nibble 2: 0.5238
Recovering nibble 3...
Accuracy for nibble 3: 0.8750
Recovering nibble 4...
Accuracy for nibble 4: 0.0654
Recovering nibble 5...
Accuracy for nibble 5: 0.0604
Recovering nibble 6...
Accuracy for nibble 6: 0.0632
Recovering nibble 7...
Accuracy for nibble 7: 0.9362
Recovering nibble 8...
Accuracy for nibble 8: 0.8432
Recovering nibble 9...
Accuracy for nibble 9: 0.7558
Recovering nibble 10...
Accuracy for nibble 10: 0.0620
Recovering nibble 11...
Accuracy for nibble 11: 0.8348
Recovering nibble 12...
Accuracy for nibble 12: 0.8500
Recovering nibble 13...
Accuracy for nibble 13: 0.7808
Recovering nibble 14...
Accuracy for nibble 14: 0.8026
Recovering nibble 15...
Accuracy for nibble 15: 0.9304


Recovered Key: 	 FEDCD1B123C56789
True Key: 	 FEDCBA0123456789
Key Match: False


In [52]:
evaluate_dataset(random_pt_dataset, "fixed_dataset_2", use_pois=False)


 Evaluating on dataset: fixed_dataset_2
Recovering nibble 0...
Accuracy for nibble 0: 0.9306
Recovering nibble 1...
Accuracy for nibble 1: 0.9072
Recovering nibble 2...
Accuracy for nibble 2: 0.5238
Recovering nibble 3...
Accuracy for nibble 3: 0.8750
Recovering nibble 4...
Accuracy for nibble 4: 0.0654
Recovering nibble 5...
Accuracy for nibble 5: 0.0604
Recovering nibble 6...
Accuracy for nibble 6: 0.0632
Recovering nibble 7...
Accuracy for nibble 7: 0.9362
Recovering nibble 8...
Accuracy for nibble 8: 0.8432
Recovering nibble 9...
Accuracy for nibble 9: 0.7558
Recovering nibble 10...
Accuracy for nibble 10: 0.0620
Recovering nibble 11...
Accuracy for nibble 11: 0.8348
Recovering nibble 12...
Accuracy for nibble 12: 0.8500
Recovering nibble 13...
Accuracy for nibble 13: 0.7808
Recovering nibble 14...
Accuracy for nibble 14: 0.8026
Recovering nibble 15...
Accuracy for nibble 15: 0.9304


Recovered Key: 	 FEDCD1B123C56789
True Key: 	 FEDCBA0123456789
Key Match: False


# Conclusion: POI vs. Range POI Evaluation

The evaluation of per-nibble models on the testing datasets demonstrated a clear advantage of POI-based input selection over range-based cropping. While both approaches achieved high accuracy on several high-leakage nibbles, the range-based models exhibited critical failures on specific nibbles (notably 4, 5, 6, and 10), with accuracy dropping to near-random levels (≈6%). This inconsistency suggests that the pre-defined trace ranges for those nibbles were either misaligned or insufficient to capture the relevant leakage.

In contrast, the POI-based models — trained on statistically selected samples using SNR and correlation analysis — maintained consistently high accuracy across nearly all nibbles, with all 16 models correctly recovering the key. The full key match under the POI setting confirms that selective, leakage-aware input extraction yields more robust and generalizable models for key recovery. These results underscore the importance of precise feature selection in profiling side-channel attacks and validate the use of POI clustering as a core part of the attack pipeline.