Set Working Directory

In [None]:
import os
os.chdir('..')
os.getcwd()

Packages

In [None]:
import yaml, torch
import numpy as np

import src.experiments.file_io as file_io
import src.experiments.aggregate_average as core
import src.experiments.shared as shared
import src.viz.aggregate_average as viz

from pathlib import Path
from datetime import datetime

from src.utils.reproducibility import set_global_seed
from src.utils.metadata import get_git_commit_hash
from src.utils.config_loading import load_yaml_config

Functions

In [None]:
def compute_score_pair_errors(gt_score_pairs, pred_score_pairs):
    # calculate mean absolute error
    mean_error = np.mean(np.abs(gt_score_pairs - pred_score_pairs))

    # calculate ground truth std
    ground_truth_std = np.std(gt_score_pairs, ddof=1)
    
    return mean_error, ground_truth_std

In [None]:
def evaluate_error_by_missing_count(test_x, test_y, test_predictions, dims=14):
    _, cur_score = core.split_encoding_and_scores(test_x, dims=dims)
    future_score_gt = test_y

    mean_errors_list = []
    ground_truth_std_list = []
    ground_truth_dict = {}
    missing_counts = list(range(0, dims))

    for n in missing_counts:
        filter_mask = core.filter_sessions_by_missing_count(cur_score, n)
        filtered_gt = future_score_gt[filter_mask]
        filtered_pred = test_predictions[filter_mask]

        ground_truth_dict[str(n)] = filtered_gt

        if filtered_gt.size == 0:
            mean_errors_list.append(np.nan)
            ground_truth_std_list.append(np.nan)
            continue

        mean_error, std_dev = compute_score_pair_errors(filtered_gt, filtered_pred)
        mean_errors_list.append(mean_error)
        ground_truth_std_list.append(std_dev)

    return missing_counts, mean_errors_list, ground_truth_std_list, ground_truth_dict

Script

In [None]:
config_path = "config/experiments/20250624_aggavg_1.yaml"

In [None]:
# load configuration
config = load_yaml_config(config_path)
run_type = config["settings"]["type"] # repeat vs non-repeat
device = config["settings"]["device"] # cpu or cuda
seed = config["settings"]["seed"]

data_source = config["data"]["data_source"] # npz file with multiple arrays
model_source = config["data"]["model_source"] # path to the model
output_destination = config["data"]["destination_base"]

In [None]:
## general setup
figure_names = ["accuracy_assessment.png", "aggregate_average.png"]


## set global seed
set_global_seed(seed)

## get git commit hash
git_commit_hash = get_git_commit_hash()

In [None]:
class NN(torch.nn.Module):
    def __init__(self):
        super().__init__()
        n_domains = 14
        
        self.model = torch.nn.Sequential(
            torch.nn.Linear(n_domains * 3, 100),
            torch.nn.Sigmoid(),
            torch.nn.Linear(100, n_domains)
        )

    def forward(self, x):
        return self.model(x)
    
model = NN()
model = torch.load("old_data/model.pt", map_location=torch.device('cpu'))


In [None]:
## load model
# model = shared.load_model(model_source, device=device)

In [None]:
## read data
test_data = np.load("old_data/next_step_test_data_encoded.npy")
test_data, mask = core.filter_rows_by_sum(test_data, slice(0, 14), 1)
from src.training.training_torch import split_input_target

test_x, test_y = split_input_target(test_data)
test_predictions = shared.inference(model, test_x)
test_x = test_x.cpu().numpy()
test_y = test_y.cpu().numpy()

In [None]:
test_x.shape, test_y.shape, test_predictions.shape

In [None]:
## filter by session type
repeat_mask = core.assign_repeat(test_x)  # type: ignore

In [None]:
repeat_mask.sum()

In [None]:
test_x[:3]

In [None]:
# if run type is repeat, filter for only repeat sessions
# if run type is non-repeat, filter for only non-repeat sessions
if run_type == "repeat":
    test_x = test_x[repeat_mask]
    test_y = test_y[repeat_mask]
    test_predictions = test_predictions[repeat_mask]
elif run_type == "non-repeat":
    test_x = test_x[~repeat_mask]
    test_y = test_y[~repeat_mask]
    test_predictions = test_predictions[~repeat_mask]

In [None]:
test_x.shape, test_y.shape, test_predictions.shape

(1) find ground truth std and prediction MAE

In [None]:
encoding, cur_score = core.split_encoding_and_scores(test_x, dims=14)
future_score_gt = test_y

mean_errors_list = []
ground_truth_std_list = []
ground_truth_dict = {}
missing_counts = list(range(0, 14))

In [None]:
for n in missing_counts:
    filter_mask = core.filter_sessions_by_missing_count(cur_score, n)
    filtered_encoding = encoding[filter_mask]
    filtered_gt = future_score_gt[filter_mask][filtered_encoding == 1]
    filtered_pred = test_predictions[filter_mask][filtered_encoding == 1]

    ground_truth_dict[str(n)] = filtered_gt

    if filtered_gt.size == 0:
        mean_errors_list.append(np.nan)
        ground_truth_std_list.append(np.nan)
        continue

    mean_error, std_dev = core.compute_errors(filtered_gt, filtered_pred)
    mean_errors_list.append(mean_error)
    ground_truth_std_list.append(std_dev)

In [None]:
mean_errors_list

In [None]:
# plot error by missing count and save figure
viz.plot_error_by_missing_count(
    x_axis=missing_counts,
    std=ground_truth_std_list,
    error=mean_errors_list,
    run_type=run_type
)

(2) predict scores based on strategy

In [None]:
# ground truth original scores
encoding, scores_gt = core.split_encoding_and_scores(test_x, dims=14)

In [None]:
# ground truth future scores
future_scores_gt = test_y

In [None]:
scores_gt.shape, future_scores_gt.shape

In [None]:
scores_gt[:3]

In [None]:
future_scores_gt[:3]

In [None]:
# best
prediction_list = []
rows, cols = test_y.shape
_, scores = core.split_encoding_and_scores(test_x, dims=14)
for domain in missing_counts:
    single_encoding = core.create_single_encoding(rows, cols, domain)
    tmp_single = core.add_encoding(scores, single_encoding)
    single_prediction = shared.inference(model, tmp_single)
    prediction_list.append(single_prediction[:, domain])

prediction_matrix = np.column_stack(prediction_list)
difference = prediction_matrix - scores[:, ::2]

# find max indices
max_values, max_indices = shared.max_prediction_from_difference_pair(difference, prediction_matrix, scores, run_type)
# reconstruct matrices
future_scores_best, best_encoding = core.reconstruct_max_matrices(max_values, max_indices, prediction_matrix.shape)

In [None]:
# random
random_encoding, future_scores_random = core.find_random_predictions(
    model=model,
    data=scores_gt,
    run_type=run_type
)

In [None]:
average_gt_list = []
average_best_list = []
average_random_list = []

std_gt_list = []
std_best_list = []
std_random_list = []

In [None]:
def compute_averages_and_stds(cur_scores, future_scores, masks):
    """
    masks: first mask for missing count, second mask for location of target value (encoding == 1)
    """
    difference = future_scores - cur_scores
    difference_filtered = core.filter_with_masks(difference, masks)

    average = np.mean(difference_filtered)
    std_dev = np.std(difference_filtered)

    return average, std_dev

In [None]:
for n in missing_counts:
    missing_mask = core.filter_sessions_by_missing_count(scores_gt, n)
    
    scores_gt_tmp = scores_gt[missing_mask][:, ::2]

    future_scores_gt_tmp = future_scores_gt[missing_mask]
    future_scores_best_tmp = future_scores_best[missing_mask]
    future_scores_random_tmp = future_scores_random[missing_mask]

    difference_gt = future_scores_gt_tmp - scores_gt_tmp
    difference_gt = difference_gt[encoding[missing_mask] == 1]
    difference_best = future_scores_best_tmp - scores_gt_tmp
    difference_best = difference_best[best_encoding[missing_mask] == 1]
    difference_random = future_scores_random_tmp - scores_gt_tmp
    difference_random = difference_random[random_encoding[missing_mask] == 1]

    average_gt = np.mean(difference_gt)
    average_best = np.mean(difference_best)
    average_random = np.mean(difference_random)

    std_gt = np.std(difference_gt)
    std_best = np.std(difference_best)
    std_random = np.std(difference_random)

    average_gt_list.append(average_gt)
    average_best_list.append(average_best)
    average_random_list.append(average_random)

    std_gt_list.append(std_gt)
    std_best_list.append(std_best)
    std_random_list.append(std_random)

In [None]:
random_encoding[0]

In [None]:
test_x[0, 14:]

In [None]:
test_y[0]

In [None]:
test_x[missing_mask]

In [None]:
average_best_list

Newest edition of debugging~~~
with imported functions because they be wrong apparently

In [None]:
# reset everything
%reset -f

In [None]:
# deal with data
import numpy as np
data = np.load("old_data/next_step_test_data_encoded.npy")

In [None]:
test_data = data[:3]
test_data

Test each function given the order that they appear in the script 04_aggregate_average.py

In [None]:
## first: filter_rows_by_sum()
from src.experiments.aggregate_average import filter_rows_by_sum
test_data, mask = filter_rows_by_sum(test_data, slice(0, 14), 1)
test_data.shape

In [None]:
fake_data_x = np.array([
    # Row 1 (repeat)
    [
        # First 14 (encoding)
        1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        # Middle 28 (raw scores)
        0.0625, 0.9375, 0, 0, 0.1875, 0.8125, 1, 1,
        0.3125, 0.6875, 0.375, 0.625, 0.4375, 0.5625, 0.5, 0.5,
        0.5625, 0.4375, 0.625, 0.375, 0.6875, 0.3125, 0.75, 0.25,
        0.8125, 0.1875, 0.875, 0.125
    ],
    # Row 2 (nonrepeat)
    [
        0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0.0625, 0.9375, 0, 0, 0.1875, 0.8125, 0.25, 0.75,
        0.3125, 0.6875, 0.375, 0.625, 0.4375, 0.5625, 0.5, 0.5,
        0.5625, 0.4375, 0.625, 0.375, 0.6875, 0.3125, 0.75, 0.25,
        0.8125, 0.1875, 0.875, 0.125
    ],
    # Row 3
    [
        0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,
        0.0625, 0.9375, 0.125, 0.875, 0.1875, 0.8125, 0.25, 0.75,
        0.3125, 0.6875, 0.375, 0.625, 0.4375, 0.5625, 0.5, 0.5,
        0.5625, 0.4375, 0.625, 0.375, 0.6875, 0.3125, 0.75, 0.25,
        0.8125, 0.1875, 0.875, 0.125,
    ]
])

fake_data_y = np.array([[0.1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                       [0, 0.1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                       [0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0]])


fake_data_filtered, mask_filter = filter_rows_by_sum(fake_data_x, slice(0, 14), 1)
fake_data_filtered.shape, mask_filter

In [None]:
fake_data_y = fake_data_y[mask_filter]
fake_data_y.shape

In [None]:
## first: assign_repeat()
from src.experiments.aggregate_average import assign_repeat
assign_repeat(test_data)

In [None]:
repeat_mask = assign_repeat(fake_data_filtered)
repeat_mask

In [None]:
fake_data_repeat = fake_data_filtered[repeat_mask]
fake_data_nonrepeat = fake_data_filtered[~repeat_mask]
print("repeat: ", fake_data_repeat)
print("non-repeat: ", fake_data_nonrepeat)

In [None]:
## start running through the pipeline of evaluate_error_by_missing_count()
from src.experiments.aggregate_average import split_encoding_and_scores
fake_encoding_repeat, fake_cur_score_repeat = split_encoding_and_scores(fake_data_repeat, dims=14)
fake_encoding_nonrepeat, fake_cur_score_nonrepeat = split_encoding_and_scores(fake_data_nonrepeat, dims=14)

In [None]:
fake_encoding_repeat, fake_cur_score_repeat

In [None]:
fake_encoding_nonrepeat, fake_cur_score_nonrepeat

In [None]:
fake_future_score_repeat = fake_data_y[repeat_mask]
fake_future_score_nonrepeat = fake_data_y[~repeat_mask]
print("repeat", fake_future_score_repeat)
print("non-repeat", fake_future_score_nonrepeat)

In [None]:
from src.experiments.aggregate_average import filter_sessions_by_missing_count
fake_mask_repeat = filter_sessions_by_missing_count(fake_cur_score_repeat, 2)
fake_mask_repeat

In [None]:
fake_mask_nonrepeat = filter_sessions_by_missing_count(fake_cur_score_nonrepeat, 1)
fake_mask_nonrepeat

In [None]:
from src.experiments.aggregate_average import filter_with_masks
filter_with_masks(fake_future_score_repeat, [fake_mask_repeat, fake_encoding_repeat == 1])

In [None]:
## going to aggregate_average_pipeline()
# load the model first (sigmoid bounded so output is between 0 and 1)
import src.experiments.shared as shared

model_source = "outputs/training_runs/run_20250922_sigmoid_final_layer_v1/model.pt"
model = shared.load_model(model_path=model_source, device="cpu")

In [None]:
## find best_idx_pred()
from src.experiments.aggregate_average import find_best_idx_pred
find_best_idx_pred(model, fake_cur_score_repeat, fake_future_score_repeat, missing_counts=list(range(0, 14)), run_type="nonrepeat")

In [None]:
import numpy as np
data = np.array([
            [1, 1, 1],
            [2, 1, 2],
            [0, 0, 0],
        ])

filter_rows_by_sum(data, slice(0, 2), 4)

In [None]:
data[:, slice(1, 3)].sum(axis=1)

Fake data run through pipeline

In [1]:
# reset everything
%reset -f

In [2]:
# set working directory
import os
os.chdir('..')
os.getcwd()

'/Users/964505/CT/ct_research/v2'

What we need in fake data
- different numbers of missing domains (want to see multiple options)
- multiple of the same number of missing domains (so we can actually test average)

In [3]:
import numpy as np

### REPEAT
# Overview: 1 and 3 both have 3 missing counts, so we can test average
# session 2 has no missing counts, we want to see that they have multiple options for repeat
# domains they can choose from, and they choose according to the strategy
fake_data_x_repeat = np.array([
    # Session 1 (target domain 1, missing at domain 2, 4 and, 14, positive change)
    [
        # First 14 (encoding)
        1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        # Middle 28 (raw scores)
        0.0625, 0.9375, 0, 0, 0.1875, 0.8125, 1, 1, # first 4
        0.3125, 0.6875, 0.375, 0.625, 0.4375, 0.5625, 0.5, 0.5, # 5-8
        0.5625, 0.4375, 0.625, 0.375, 0.6875, 0.3125, 0.75, 0.25, # 9-12
        0.8125, 0.1875, 0, 0 # 13-14
    ],
    # Session 2 (no missing domains, target domain 5, negative change)
    [
        0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, # encoding
        0.0625, 0.9375, 0.125, 0.875, 0.1875, 0.8125, 0.25, 0.75, # first 4
        0.3125, 0.6875, 0.375, 0.625, 0.4375, 0.5625, 0.5, 0.5, # 5-8
        0.5625, 0.4375, 0.625, 0.375, 0.6875, 0.3125, 0.75, 0.25, # 9-12
        0.8125, 0.1875, 0.875, 0.125 # 13-14
    ],
    # Session 3 (target domain 2, missing at domain 1, 4, and 6)
    [
        0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, # encoding
        0., 0., 0.125, 0.875, 0.1875, 0.8125, 0.25, 0.75, # first 4
        0.3125, 0.6875, 1, 1, 0.4375, 0.5625, 0.5, 0.5, # 5-8
        0.5625, 0.4375, 0.625, 0.375, 0.6875, 0.3125, 0.75, 0.25, # 9-12
        0.8125, 0.1875, 0.875, 0.125 # 13-14
]])


### NON-REPEAT
# Overview: 1 and 3 both of 2 missing counts, so we can test average
# session has 4 missing values, so there are 4 options for best and random
# Session 1 (target domain 2, missing at domain 2 and 4)
fake_data_x_nonrepeat = np.array([
    [
        0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, # encoding
        0.0625, 0.9375, 0, 0, 0.1875, 0.8125, 1, 1, # first 4
        0.3125, 0.6875, 0.375, 0.625, 0.4375, 0.5625, 0.5, 0.5, # 5-8
        0.5625, 0.4375, 0.625, 0.375, 0.6875, 0.3125, 0.75, 0.25, # 9-12
        0.8125, 0.1875, 0.875, 0.125 # 13-14
    ],
    # Session 2 (target domain 4, missing at domain 2, 4, 12, 13)
    [
        0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, # encoding
        0.0625, 0.9375, 0, 0, 0.1875, 0.8125, 1, 1, # first 4
        0.3125, 0.6875, 0.375, 0.625, 0.4375, 0.5625, 0.5, 0.5, # 5-8
        0.5625, 0.4375, 0.625, 0.375, 0.6875, 0.3125, 0, 0, # 9-12
        0, 0, 0.875, 0.125 # 13-14
    ],
    # Session 3 (target domain 1, missing at domain 1 and 6)
    [
        1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, # encoding
        0., 0., 0.125, 0.875, 0.1875, 0.8125, 0.25, 0.75, # first 4
        0.3125, 0.6875, 0, 0, 0.4375, 0.5625, 0.5, 0.5, # 5-8
        0.5625, 0.4375, 0.625, 0.375, 0.6875, 0.3125, 0.75, 0.25, # 9-12
        0.8125, 0.1875, 0.875, 0.125 # 13-14
    ],
])

# target values
fake_data_y_repeat = np.array([[0.1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                       [0, 0.1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                       [0, 0, 0, 0, 0.01, 0, 0, 0, 0, 0, 0, 0, 0, 0]])
fake_data_y_nonrepeat = np.array([[0, 0.1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                       [0, 0, 0, 0.1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                       [0.01, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]])

In [4]:
import torch
import torch.nn as nn

class fakeModel(nn.Module):
    """
    For each input row x:
      - compute i = argmax(x[:14])
      - output a (1,14) tensor of all zeros except at i, where output[i] = fixed_output[i].
    """
    def __init__(self, fixed_output, dtype=torch.float32):
        super().__init__()
        fixed_output = torch.as_tensor(fixed_output, dtype=dtype)
        assert fixed_output.ndim == 1 and fixed_output.numel() >= 14, \
            "fixed_output must be 1D with at least 14 elements."
        self.register_buffer("fixed_output", fixed_output)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        if x.dim() == 1:
            x = x.unsqueeze(0)  # handle single sample (1, D)
        if x.dim() > 2:
            # flatten features if more than 2 dims
            B = x.shape[0]
            x = x.view(B, -1)
        B = x.shape[0]

        # compute argmax over first 14 features for each row
        idx = x[:, :14].argmax(dim=1)  # (B,)
        out = torch.zeros((B, 14), dtype=x.dtype, device=x.device)
        out[torch.arange(B, device=x.device), idx] = self.fixed_output[idx]
        return out

In [5]:
## example usage of fake model
fixed_output = np.array([0.5, 0.25, 0.3, 0.6, 0.8, 0.9, 0.01, 0.75, 0.1, 0.0, 0.2, 0.65, 0.05, 0.7])
x = np.array([
    [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.1, 0.2, 0.3], # should output 0.5 at index 0
    [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.4, 0.5, 0.6], # should output 0.25 at index 1
    [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0.7, 0.8, 0.9], # should output 0.3 at index 2
])

model = fakeModel(fixed_output=fixed_output, dtype=torch.float32)
model.eval()
with torch.no_grad():
    test_tensor = torch.as_tensor(x, dtype=torch.float32)
    predictions = model(test_tensor).numpy()
predictions, predictions.shape

(array([[0.5 , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ,
         0.  , 0.  , 0.  ],
        [0.  , 0.25, 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ,
         0.  , 0.  , 0.  ],
        [0.  , 0.  , 0.3 , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ,
         0.  , 0.  , 0.  ]], dtype=float32),
 (3, 14))

First, look at nonrepeat!!! because that one seems to be having problems
What we expect at each step?
- find best: find best out of all available
- find random: pick a random one of all available 
- average best: take average of best
- average random: take average of random
- average gt: take average of GT (actually let's look at this one first)
- plotting (final plot looks as expected)

In [6]:
from src.experiments.aggregate_average import split_encoding_and_scores
fake_encoding_repeat, fake_cur_score_repeat = split_encoding_and_scores(fake_data_x_repeat, dims=14)

In [7]:
fake_encoding_repeat.shape, fake_cur_score_repeat.shape

((3, 14), (3, 28))

In [8]:
fake_encoding_nonrepeat, fake_cur_score_nonrepeat = split_encoding_and_scores(fake_data_x_nonrepeat, dims=14)
fake_encoding_nonrepeat.shape, fake_cur_score_nonrepeat.shape

((3, 14), (3, 28))

In [9]:
from src.experiments.aggregate_average import find_best_idx_pred

In [10]:
fixed_output = np.array([0.5, 0.25, 0.3, 0.6, 0.8, 0.9, 0.01, 0.75, 0.1, 0.0, 0.2, 0.65, 0.05, 0.7])

model = fakeModel(fixed_output=fixed_output, dtype=torch.float32)

In [11]:
fake_best_encoding_repeat, fake_best_predictions_repeat = find_best_idx_pred(model, fake_cur_score_repeat, fake_data_y_repeat, missing_counts=list(range(0, 14)), run_type="repeat")

Data shape: torch.Size([3, 42])
Data shape: torch.Size([3, 42])
Data shape: torch.Size([3, 42])
Data shape: torch.Size([3, 42])
Data shape: torch.Size([3, 42])
Data shape: torch.Size([3, 42])
Data shape: torch.Size([3, 42])
Data shape: torch.Size([3, 42])
Data shape: torch.Size([3, 42])
Data shape: torch.Size([3, 42])
Data shape: torch.Size([3, 42])
Data shape: torch.Size([3, 42])
Data shape: torch.Size([3, 42])
Data shape: torch.Size([3, 42])


In [12]:
fake_best_encoding_repeat.argmax(axis=1), fake_best_encoding_repeat.shape

(array([5, 5, 4]), (3, 14))

In [13]:
fake_best_encoding_repeat

array([[0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0]])

In [14]:
fake_best_predictions_repeat.argmax(axis=1), fake_best_predictions_repeat.shape

(array([5, 5, 4]), (3, 14))

In [15]:
fake_best_predictions_repeat

array([[      -inf,       -inf,       -inf,       -inf,       -inf,
        0.52499998,       -inf,       -inf,       -inf,       -inf,
              -inf,       -inf,       -inf,       -inf],
       [      -inf,       -inf,       -inf,       -inf,       -inf,
        0.52499998,       -inf,       -inf,       -inf,       -inf,
              -inf,       -inf,       -inf,       -inf],
       [      -inf,       -inf,       -inf,       -inf, 0.48750001,
              -inf,       -inf,       -inf,       -inf,       -inf,
              -inf,       -inf,       -inf,       -inf]])

In [16]:
fake_best_encoding_nonrepeat, fake_best_predictions_nonrepeat = find_best_idx_pred(model, fake_cur_score_nonrepeat, fake_data_y_nonrepeat, missing_counts=list(range(0, 14)), run_type="nonrepeat")

Data shape: torch.Size([3, 42])
Data shape: torch.Size([3, 42])
Data shape: torch.Size([3, 42])
Data shape: torch.Size([3, 42])
Data shape: torch.Size([3, 42])
Data shape: torch.Size([3, 42])
Data shape: torch.Size([3, 42])
Data shape: torch.Size([3, 42])
Data shape: torch.Size([3, 42])
Data shape: torch.Size([3, 42])
Data shape: torch.Size([3, 42])
Data shape: torch.Size([3, 42])
Data shape: torch.Size([3, 42])
Data shape: torch.Size([3, 42])


In [17]:
fake_best_encoding_nonrepeat.argmax(axis=1), fake_best_encoding_nonrepeat.shape

(array([ 3, 11,  5]), (3, 14))

In [18]:
fake_best_predictions_nonrepeat

array([[      -inf,       -inf,       -inf, 0.60000002,       -inf,
              -inf,       -inf,       -inf,       -inf,       -inf,
              -inf,       -inf,       -inf,       -inf],
       [      -inf,       -inf,       -inf,       -inf,       -inf,
              -inf,       -inf,       -inf,       -inf,       -inf,
              -inf, 0.64999998,       -inf,       -inf],
       [      -inf,       -inf,       -inf,       -inf,       -inf,
        0.89999998,       -inf,       -inf,       -inf,       -inf,
              -inf,       -inf,       -inf,       -inf]])

In [19]:
from src.experiments.aggregate_average import predict_all_domains
prediction_matrix = predict_all_domains(model, fake_cur_score_nonrepeat, fake_data_y_nonrepeat, list(range(0, 14)))

Data shape: torch.Size([3, 42])
Data shape: torch.Size([3, 42])
Data shape: torch.Size([3, 42])
Data shape: torch.Size([3, 42])
Data shape: torch.Size([3, 42])
Data shape: torch.Size([3, 42])
Data shape: torch.Size([3, 42])
Data shape: torch.Size([3, 42])
Data shape: torch.Size([3, 42])
Data shape: torch.Size([3, 42])
Data shape: torch.Size([3, 42])
Data shape: torch.Size([3, 42])
Data shape: torch.Size([3, 42])
Data shape: torch.Size([3, 42])


In [20]:
prediction_matrix

array([[0.5 , 0.25, 0.3 , 0.6 , 0.8 , 0.9 , 0.01, 0.75, 0.1 , 0.  , 0.2 ,
        0.65, 0.05, 0.7 ],
       [0.5 , 0.25, 0.3 , 0.6 , 0.8 , 0.9 , 0.01, 0.75, 0.1 , 0.  , 0.2 ,
        0.65, 0.05, 0.7 ],
       [0.5 , 0.25, 0.3 , 0.6 , 0.8 , 0.9 , 0.01, 0.75, 0.1 , 0.  , 0.2 ,
        0.65, 0.05, 0.7 ]], dtype=float32)

In [21]:
from src.experiments.aggregate_average import decode_missing_indicator

current_matrix_pairs = fake_cur_score_nonrepeat.reshape(-1, 14, 2)

eq_mask = current_matrix_pairs[:, :, 0] == current_matrix_pairs[:, :, 1]
val_mask = (current_matrix_pairs[:, :, 0] == 0) | (current_matrix_pairs[:, :, 0] == 1)
missing_mask = eq_mask & val_mask  # Shape: (N, D)


In [22]:
# invert missing mask if run type is repeat, so we consider the opposite set of domains
run_type = "nonrepeat"  # or "repeat"
if run_type == "repeat":
    valid_mask = ~missing_mask
else:
    valid_mask = missing_mask

current_matrix = decode_missing_indicator(fake_cur_score_nonrepeat)

max_indices = np.full(current_matrix.shape, 0)
max_values = np.full(current_matrix.shape, 0.0)

# compute per-cell difference
difference_full = np.where(np.isnan(current_matrix),
                           prediction_matrix,
                           prediction_matrix - current_matrix)

# mask out invalid columns per row
masked = np.where(valid_mask, difference_full, 0)

# argmax per row
row_argmax = np.argmax(masked, axis=1)
row_maxval = masked[np.arange(masked.shape[0]), row_argmax]

# build outputs
max_indices = np.zeros_like(current_matrix, dtype=int)
max_indices[np.arange(max_indices.shape[0]), row_argmax] = 1

max_values = np.full_like(current_matrix, -np.inf, dtype=float)
max_values[np.arange(max_values.shape[0]), row_argmax] = row_maxval

In [23]:
max_indices

array([[0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0],
       [0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0]])

In [24]:
max_values

array([[      -inf,       -inf,       -inf, 0.60000002,       -inf,
              -inf,       -inf,       -inf,       -inf,       -inf,
              -inf,       -inf,       -inf,       -inf],
       [      -inf,       -inf,       -inf,       -inf,       -inf,
              -inf,       -inf,       -inf,       -inf,       -inf,
              -inf, 0.64999998,       -inf,       -inf],
       [      -inf,       -inf,       -inf,       -inf,       -inf,
        0.89999998,       -inf,       -inf,       -inf,       -inf,
              -inf,       -inf,       -inf,       -inf]])

In [57]:
from src.experiments.aggregate_average import find_random_predictions, create_random_encoding

In [58]:
fake_random_encoding_repeat, fake_random_predictions_repeat = find_random_predictions(model, fake_cur_score_nonrepeat, run_type="repeat")
fake_random_encoding_repeat

Data shape: torch.Size([3, 42])


array([[0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0]])

In [85]:
fake_random_encoding_nonrepeat, fake_random_predictions_nonrepeat = find_random_predictions(model, fake_cur_score_nonrepeat, run_type="nonrepeat")
fake_random_encoding_nonrepeat

Data shape: torch.Size([3, 42])


array([[0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0]])

In [60]:
def find_missing_mask(x1, x2, eps=1e-8):
    """
    Given two arrays x1 and x2, return a boolean mask where the pairs (same index) are missing
    - i.e., both values are equal and either 0 or 1 (i.e., [0,0] or [1,1]).
    """
    eq = np.isclose(x1, x2, atol=eps, rtol=0)  # they are (almost) equal
    is_0_or_1 = np.isclose(x1, 0.0, atol=eps, rtol=0) | np.isclose(x1, 1.0, atol=eps, rtol=0)  # they are (almost) 0 or 1
    return eq & is_0_or_1

In [61]:
score_pairs = fake_cur_score_repeat.reshape(-1, 14, 2)

In [62]:
score_pairs

array([[[0.0625, 0.9375],
        [0.    , 0.    ],
        [0.1875, 0.8125],
        [1.    , 1.    ],
        [0.3125, 0.6875],
        [0.375 , 0.625 ],
        [0.4375, 0.5625],
        [0.5   , 0.5   ],
        [0.5625, 0.4375],
        [0.625 , 0.375 ],
        [0.6875, 0.3125],
        [0.75  , 0.25  ],
        [0.8125, 0.1875],
        [0.    , 0.    ]],

       [[0.0625, 0.9375],
        [0.125 , 0.875 ],
        [0.1875, 0.8125],
        [0.25  , 0.75  ],
        [0.3125, 0.6875],
        [0.375 , 0.625 ],
        [0.4375, 0.5625],
        [0.5   , 0.5   ],
        [0.5625, 0.4375],
        [0.625 , 0.375 ],
        [0.6875, 0.3125],
        [0.75  , 0.25  ],
        [0.8125, 0.1875],
        [0.875 , 0.125 ]],

       [[0.    , 0.    ],
        [0.125 , 0.875 ],
        [0.1875, 0.8125],
        [0.25  , 0.75  ],
        [0.3125, 0.6875],
        [1.    , 1.    ],
        [0.4375, 0.5625],
        [0.5   , 0.5   ],
        [0.5625, 0.4375],
        [0.625 , 0.375 ],
        

In [63]:
~find_missing_mask(score_pairs[:, :, 0], score_pairs[:, :, 1])

array([[ True, False,  True, False,  True,  True,  True,  True,  True,
         True,  True,  True,  True, False],
       [ True,  True,  True,  True,  True,  True,  True,  True,  True,
         True,  True,  True,  True,  True],
       [False,  True,  True,  True,  True, False,  True,  True,  True,
         True,  True,  True,  True,  True]])

In [64]:
create_random_encoding(fake_cur_score_nonrepeat, run_type="nonrepeat")

array([[0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0],
       [0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0]])

In [65]:
fake_random_encoding_repeat = create_random_encoding(fake_cur_score_repeat, run_type="repeat")

In [66]:
fake_random_encoding_repeat

array([[0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0]])

In [67]:
from src.experiments.aggregate_average import add_encoding
x_random = add_encoding(fake_cur_score_repeat, fake_random_encoding_repeat)
x_random

array([[0.    , 0.    , 0.    , 0.    , 0.    , 1.    , 0.    , 0.    ,
        0.    , 0.    , 0.    , 0.    , 0.    , 0.    , 0.0625, 0.9375,
        0.    , 0.    , 0.1875, 0.8125, 1.    , 1.    , 0.3125, 0.6875,
        0.375 , 0.625 , 0.4375, 0.5625, 0.5   , 0.5   , 0.5625, 0.4375,
        0.625 , 0.375 , 0.6875, 0.3125, 0.75  , 0.25  , 0.8125, 0.1875,
        0.    , 0.    ],
       [0.    , 0.    , 0.    , 0.    , 0.    , 0.    , 0.    , 0.    ,
        0.    , 0.    , 0.    , 1.    , 0.    , 0.    , 0.0625, 0.9375,
        0.125 , 0.875 , 0.1875, 0.8125, 0.25  , 0.75  , 0.3125, 0.6875,
        0.375 , 0.625 , 0.4375, 0.5625, 0.5   , 0.5   , 0.5625, 0.4375,
        0.625 , 0.375 , 0.6875, 0.3125, 0.75  , 0.25  , 0.8125, 0.1875,
        0.875 , 0.125 ],
       [0.    , 0.    , 0.    , 0.    , 0.    , 0.    , 0.    , 0.    ,
        0.    , 0.    , 1.    , 0.    , 0.    , 0.    , 0.    , 0.    ,
        0.125 , 0.875 , 0.1875, 0.8125, 0.25  , 0.75  , 0.3125, 0.6875,
        1.    