In [1]:
import os
os.environ['ENV'] = 'prod'
os.environ['REGION'] = 'apse1'
os.environ['TENANT'] ="in"
os.environ['RECO_S3_BUCKET'] = "p13n-reco-offline-prod"
os.environ['COUNTRY_KEY']= "in"
os.environ['AWS_REGION']= "ap-southeast-1"
os.environ['USE_REAL_CMS3']= "True"
os.environ['RECO_CREDENTIAL']= "-----BEGINRSAPRIVATEKEY-----\nMGICAQACEQCdHOlGnxIMWCMzjK2JAg37AgMBAAECEGOIwGTEO9vd3X9+jyiF4NECCQnoqDakDgSm2QIID9sadWN0XvMCCQLiqPkgVKSuIQIIDCAsWM+pJB8CCQG0jbIGCNX9MA==\n-----ENDRSAPRIVATEKEY-----"

import argparse, gc
import json
import os
import numpy as np
import s3fs
import pyarrow
import tensorflow as tf
from tqdm import tqdm

tfv1 = tf.compat.v1
tfv1.disable_v2_behavior()

# Enable memory growth for GPUs to avoid memory fragmentation
physical_devices = tf.config.list_physical_devices('GPU')
if physical_devices:
    for device in physical_devices:
        tf.config.experimental.set_memory_growth(device, True)

import tensorflow_addons as tfa
import tensorflow_recommenders_addons as tfra
from model.losses import masked_binary_entropy_loss
from model.metrics import MaskedAUC

from common.config.utils import data_path, model_path
from common.config import TENANT
from tpfy.tf_model.tpfy_model_v3_mtl import TpfyModelV3, TpfyMtlModelConfig
from tpfy.etl.schema import TpfyMtlDatasetSchema
from model.parquet_dataset import TFParquetDataset
from tpfy.common import TpfyDataPath
from omegaconf import OmegaConf
from dataclasses import dataclass
from tpfy.train_v3_mtl import make_example_mtl, TpfyTrainConfig, TpfyConfig
from tpfy.helper import load_model_weights_from_s3

class Args:
    """Simple class to hold training arguments (replaces argparse)"""
    def __init__(self):
        # Positional arguments
        self.model_name = "tpfy-v3-mtl-r2"
        self.date = "2026-02-06"  # Training date
        self.val_date = "2026-02-06"  # Validation date
        
        # Optional arguments
        self.conf = None
        self.max_epoch = None
        self.val_days = 1
        self.click_ns = 0.08
        self.variant = "cms3"
        self.num_workers = 4
        self.repeat = 1
        self.eval_freq = None
        self.lr = 1e-4
        self.batch_size = 512
        self.click_weight = 1.0
        self.watch_weight = 1.0
        self.upload = False  # Set to False if you don't want to upload to S3
        self.reload_local_model = None
        self.reload_s3_model = "tpfy-v3-mtl-r2"  # Set to None if starting fresh
        self.extract_activations = True
        self.output = None
        self.clear_nn = False
        self.ckpt = None
        self.verbose = False
        self.countries = None

args = Args()

# Display configuration
print("Training Configuration:")
print(f"  Model Name: {args.model_name}")
print(f"  Training Date: {args.date}")
print(f"  Validation Date: {args.val_date}")
print(f"  Variant: {args.variant}")
print(f"  Click NS: {args.click_ns}")
print(f"  Num Workers: {args.num_workers}")
print(f"  Reload Model: {args.reload_s3_model}")
print(f"  Upload: {args.upload}")

# Load configuration
config_name = f"tpfy/tpfy_config/mtl-{TENANT}.yaml"
if not os.path.exists(config_name):
    raise FileNotFoundError(f"Config file {config_name} not found")

hparams: TpfyConfig = OmegaConf.merge(
    OmegaConf.structured(TpfyConfig),
    OmegaConf.load(config_name),
)
print(f"\nLoaded config: {config_name}")

# Override batch size if specified
if args.batch_size:
    hparams.train.batch_size = args.batch_size

batch_size = hparams.train.batch_size
print(f"Batch size: {batch_size}")

# Load dataset
variant = args.variant
if variant and not variant.startswith("-"):
    variant = "-" + variant

session = tfv1.keras.backend.get_session()

def create_training_dataset(date):
    data_path_str = data_path(
        TpfyDataPath.S3_TPFY_IMPR_V3_AGG_MTL_EXTRACTED_EXAMPLES_VAR, TENANT
    ) % (variant, date)

    dataset = TFParquetDataset([data_path_str], TpfyMtlDatasetSchema, shuffle_files=False)
    tf_dataset = dataset.create_tf_dataset(batch_size).map(make_example_mtl)
    # tf_dataset = dataset.create_parallel_tf_dataset(
    #     batch_size,
    #     args.num_workers,
    #     num_epochs=1,
    #     queue_size=16,
    #     v2=True,
    #     row_transformer_factory=None,
    # ).map(make_example_mtl)
    # train_it = tfv1.data.make_initializable_iterator(tf_dataset)
    return tf_dataset

def load_and_compile_model():
    # Build model
    print(f"\n{'='*80}")
    print("BUILDING MODEL")
    print(f"{'='*80}")

    model = TpfyModelV3(
        hparams.model,
        click_ns=args.click_ns,
        enable_random_watch=hparams.train.enable_random_watch,
    )

    # Create optimizer (needed for compilation, even though we won't train)
    optimizer = tfa.optimizers.AdamW(
        weight_decay=0.0,  # Not needed for inference
        learning_rate=0.001,  # Not needed for inference
        epsilon=1e-4,
    )

    loss_dict = {
            "click": masked_binary_entropy_loss(from_logits=True),
            "watch": masked_binary_entropy_loss(from_logits=True),
            "random_watch": masked_binary_entropy_loss(from_logits=False),
            "paywall_view": masked_binary_entropy_loss(from_logits=True),
            "add_watchlist": masked_binary_entropy_loss(from_logits=True),
    }
    metric_dict = {
            "click": MaskedAUC(from_logits=True),
            "watch": MaskedAUC(from_logits=True),
            "random_watch": MaskedAUC(from_logits=False),
            "paywall_view": MaskedAUC(from_logits=True),
            "add_watchlist": MaskedAUC(from_logits=True),
    }

    optimizer = tfra.dynamic_embedding.DynamicEmbeddingOptimizer(optimizer)

    model.compile(optimizer=optimizer, loss=loss_dict, metrics=metric_dict)
    print("Model compiled")
    
    return model

def get_activations_and_labels(iterator, model, last_layer_tensor):
    features, labels, metadata = session.run(iterator)

    # Run model
    predictions = model(features, training=False)

    # Execute
    pred_values, activation_values = session.run(
        [predictions, last_layer_tensor]
    )

    return activation_values, labels, pred_values, metadata

def compute_A(A, iterator, model, last_layer_tensor):
    H, y_batch_all_labels, _, _ = get_activations_and_labels(iterator, model, last_layer_tensor)
    y_batch = y_batch_all_labels['click']

    mask = (y_batch != -1)

    if not np.any(mask):
        return A

    H = H[mask.squeeze()]
    H = H / (np.linalg.norm(H, axis=1, keepdims=True) + 1e-8)

    # Accumulate
    A += H.T @ H

    assert np.linalg.eigvalsh(A).min() > 0
    return A

2026-02-10 09:06:38.829098: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2026-02-10 09:06:38.829124: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


Instructions for updating:
non-resource variables are not supported in the long term


2026-02-10 09:06:39.787259: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory
2026-02-10 09:06:39.787286: W tensorflow/stream_executor/cuda/cuda_driver.cc:326] failed call to cuInit: UNKNOWN ERROR (303)
2026-02-10 09:06:39.787301: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (ip-10-11-72-173): /proc/driver/nvidia/version does not exist


Training Configuration:
  Model Name: tpfy-v3-mtl-r2
  Training Date: 2026-02-06
  Validation Date: 2026-02-06
  Variant: cms3
  Click NS: 0.08
  Num Workers: 4
  Reload Model: tpfy-v3-mtl-r2
  Upload: False

Loaded config: tpfy/tpfy_config/mtl-in.yaml
Batch size: 512



2026-02-10 09:06:40.371144: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
tf_dataset = create_training_dataset(args.date)
iterator = tf_dataset.make_one_shot_iterator()
next_batch = iterator.get_next()
sample_features, sample_labels, sample_metadata = session.run(next_batch)
tpfy_model = load_and_compile_model()

prediction = tpfy_model(sample_features, training=False)
session.run([
    tfv1.global_variables_initializer(),
    tfv1.local_variables_initializer(),
    tfv1.tables_initializer()
])

plain_weights = load_model_weights_from_s3(
    args.model_name,
    use_s3=True
)
plain_weights_modified = {k.replace('train/', ''): v for k, v in plain_weights.items()}
restore_ops = tpfy_model.restore_plain_weights_ops(
    plain_weights_modified,
    clear_nn=args.clear_nn
)
session.run(restore_ops)

# Create NEW iterator (reset to start of dataset)
iterator = tf_dataset.make_one_shot_iterator()
next_batch = iterator.get_next()

# Get compress_output tensor (linear_input)
graph = tf.compat.v1.get_default_graph()
compress_output_tensor = graph.get_tensor_by_name('tpfy_model_v3/deepfm/Relu:0')

files s3://p13n-reco-offline-prod/dataset_v5/tpfy-impr-v3/agg-mtl-extracted-cms3/2026-02-06/part-00000-tid-7617731706831743383-d7b69ff7-441e-4659-95cb-8067a05f5f34-2835-1-c000.snappy.parquet,s3://p13n-reco-offline-prod/dataset_v5/tpfy-impr-v3/agg-mtl-extracted-cms3/2026-02-06/part-00001-tid-7617731706831743383-d7b69ff7-441e-4659-95cb-8067a05f5f34-2909-1-c000.snappy.parquet,s3://p13n-reco-offline-prod/dataset_v5/tpfy-impr-v3/agg-mtl-extracted-cms3/2026-02-06/part-00002-tid-7617731706831743383-d7b69ff7-441e-4659-95cb-8067a05f5f34-3012-1-c000.snappy.parquet,s3://p13n-reco-offline-prod/dataset_v5/tpfy-impr-v3/agg-mtl-extracted-cms3/2026-02-06/part-00003-tid-7617731706831743383-d7b69ff7-441e-4659-95cb-8067a05f5f34-2995-1-c000.snappy.parquet,s3://p13n-reco-offline-prod/dataset_v5/tpfy-impr-v3/agg-mtl-extracted-cms3/2026-02-06/part-00004-tid-7617731706831743383-d7b69ff7-441e-4659-95cb-8067a05f5f34-2892-1-c000.snappy.parquet,s3://p13n-reco-offline-prod/dataset_v5/tpfy-impr-v3/agg-mtl-extracted

2026-02-10 09:06:42.583001: I tensorflow/core/platform/profile_utils/cpu_utils.cc:114] CPU Frequency: 2499995000 Hz
2026-02-10 09:06:42.628512: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:176] None of the MLIR Optimization Passes are enabled (registered 2)



BUILDING MODEL
Model compiled
--------------
Instructions for updating:
The `validate_indices` argument has no effect. Indices are always validated on CPU and never validated on GPU.
q Tensor("tpfy_model_v3/feature_prep/strided_slice:0", shape=(512, 32), dtype=float32)
k Tensor("tpfy_model_v3/feature_prep/watched_content_embedding_unpooled:0", shape=(512, ?, 32), dtype=float32)
Kw Tensor("tpfy_model_v3/feature_prep/GetSlotFids:1", shape=(512, ?), dtype=float32)
target embedding shape (512, 9, 32)
user embedding shape (512, 27, 32)
target: Tensor("tpfy_model_v3/feature_prep/target_feature/target_embeddings:0", shape=(512, 9, 32), dtype=float32)
user: Tensor("tpfy_model_v3/feature_prep/user_feature/user_embeddings:0", shape=(512, 27, 32), dtype=float32)
watched: Tensor("tpfy_model_v3/feature_prep/dot_prod_attention_pooling/cond/Merge:0", shape=(512, 32), dtype=float32)
fm_user Tensor("tpfy_model_v3/deepfm/fwfm/concat:0", shape=(512, 28, 32), dtype=float32)
fm_item Tensor("tpfy_model_v3/

2026-02-10 09:06:50.889109: I ./tensorflow_recommenders_addons/dynamic_embedding/core/kernels/lookup_impl/lookup_table_op_cpu.h:143] HashTable on CPU is created on optimized mode: K=l, V=f, DIM=32, init_size=8192


In [3]:
iterator = tf_dataset.make_one_shot_iterator()
next_batch = iterator.get_next()

In [4]:
d = 128
A = 1 * np.eye(d, dtype=np.float64)
H, y_batch_all_labels, pred_values, _ = get_activations_and_labels(next_batch, tpfy_model, compress_output_tensor)
y_batch = y_batch_all_labels['click']
pred_values = pred_values['click']

mask = (y_batch != -1)
y_batch = y_batch[mask].reshape(sum(mask)[0], 1)
H = H[mask.squeeze()]
H = H / (np.linalg.norm(H, axis=1, keepdims=True) + 1e-8)
pred_values = pred_values[mask].reshape(sum(mask)[0], 1)

# Accumulate
A += H.T @ H

--------------
q Tensor("tpfy_model_v3_1/feature_prep/strided_slice:0", shape=(512, 32), dtype=float32)
k Tensor("tpfy_model_v3_1/feature_prep/watched_content_embedding_unpooled:0", shape=(512, ?, 32), dtype=float32)
Kw Tensor("tpfy_model_v3_1/feature_prep/GetSlotFids:1", shape=(512, ?), dtype=float32)
target embedding shape (512, 9, 32)
user embedding shape (512, 27, 32)
target: Tensor("tpfy_model_v3_1/feature_prep/target_feature/target_embeddings:0", shape=(512, 9, 32), dtype=float32)
user: Tensor("tpfy_model_v3_1/feature_prep/user_feature/user_embeddings:0", shape=(512, 27, 32), dtype=float32)
watched: Tensor("tpfy_model_v3_1/feature_prep/dot_prod_attention_pooling/cond/Merge:0", shape=(512, 32), dtype=float32)
fm_user Tensor("tpfy_model_v3_1/deepfm/fwfm/concat:0", shape=(512, 28, 32), dtype=float32)
fm_item Tensor("tpfy_model_v3_1/feature_prep/target_feature/target_embeddings:0", shape=(512, 9, 32), dtype=float32)
fwfm out Tensor("tpfy_model_v3_1/deepfm/fwfm/Reshape:0", shape=(512,

In [5]:
from sklearn.metrics import roc_auc_score

auc = roc_auc_score(y_batch, pred_values)
print(f"AUC Score: {auc}")

AUC Score: 0.6948470209339774


In [6]:
A

array([[3.88248253e+00, 2.79561937e-01, 6.72603399e-02, ...,
        4.61801916e-01, 2.53122473e+00, 3.26381624e-01],
       [2.79561937e-01, 1.48074925e+00, 4.26062047e-02, ...,
        4.00694236e-02, 5.35355508e-01, 3.05682328e-02],
       [6.72603399e-02, 4.26062047e-02, 1.07329577e+00, ...,
        3.64637226e-02, 7.83567429e-02, 6.26924622e-04],
       ...,
       [4.61801916e-01, 4.00694236e-02, 3.64637226e-02, ...,
        1.35456702e+00, 6.59448355e-02, 5.83657473e-02],
       [2.53122473e+00, 5.35355508e-01, 7.83567429e-02, ...,
        6.59448355e-02, 5.45065403e+00, 2.26386592e-01],
       [3.26381624e-01, 3.05682328e-02, 6.26924622e-04, ...,
        5.83657473e-02, 2.26386592e-01, 1.13920102e+00]])

In [1]:
import os
os.environ['ENV'] = 'prod'
os.environ['REGION'] = 'apse1'
os.environ['TENANT'] ="in"
os.environ['RECO_S3_BUCKET'] = "p13n-reco-offline-prod"
os.environ['COUNTRY_KEY']= "in"
os.environ['AWS_REGION']= "ap-southeast-1"
os.environ['USE_REAL_CMS3']= "True"
os.environ['RECO_CREDENTIAL']= "-----BEGINRSAPRIVATEKEY-----\nMGICAQACEQCdHOlGnxIMWCMzjK2JAg37AgMBAAECEGOIwGTEO9vd3X9+jyiF4NECCQnoqDakDgSm2QIID9sadWN0XvMCCQLiqPkgVKSuIQIIDCAsWM+pJB8CCQG0jbIGCNX9MA==\n-----ENDRSAPRIVATEKEY-----"

import argparse, gc
import json
import os
import numpy as np
import s3fs
import pyarrow
import tensorflow as tf
from tqdm import tqdm

tfv1 = tf.compat.v1
tfv1.disable_v2_behavior()

# Enable memory growth for GPUs to avoid memory fragmentation
physical_devices = tf.config.list_physical_devices('GPU')
if physical_devices:
    for device in physical_devices:
        tf.config.experimental.set_memory_growth(device, True)

import tensorflow_addons as tfa
import tensorflow_recommenders_addons as tfra
from model.losses import masked_binary_entropy_loss
from model.metrics import MaskedAUC

from common.config.utils import data_path, model_path
from common.config import TENANT
from tpfy.tf_model.tpfy_model_v3_mtl import TpfyModelV3, TpfyMtlModelConfig
from tpfy.etl.schema import TpfyMtlDatasetSchema
from model.parquet_dataset import TFParquetDataset
from tpfy.common import TpfyDataPath
from omegaconf import OmegaConf
from dataclasses import dataclass
from tpfy.train_v3_mtl import make_example_mtl, TpfyTrainConfig, TpfyConfig
from tpfy.helper import load_model_weights_from_s3

class Args:
    """Simple class to hold training arguments (replaces argparse)"""
    def __init__(self):
        # Positional arguments
        self.model_name = "tpfy-v3-mtl-r2"
        self.date = "2026-02-06"  # Training date
        self.val_date = "2026-02-06"  # Validation date
        
        # Optional arguments
        self.conf = None
        self.max_epoch = None
        self.val_days = 1
        self.click_ns = 0.08
        self.variant = "cms3"
        self.num_workers = 4
        self.repeat = 1
        self.eval_freq = None
        self.lr = 1e-4
        self.batch_size = 512
        self.click_weight = 1.0
        self.watch_weight = 1.0
        self.upload = False  # Set to False if you don't want to upload to S3
        self.reload_local_model = None
        self.reload_s3_model = "tpfy-v3-mtl-r2"  # Set to None if starting fresh
        self.extract_activations = True
        self.output = None
        self.clear_nn = False
        self.ckpt = None
        self.verbose = False
        self.countries = None

args = Args()

# Display configuration
print("Training Configuration:")
print(f"  Model Name: {args.model_name}")
print(f"  Training Date: {args.date}")
print(f"  Validation Date: {args.val_date}")
print(f"  Variant: {args.variant}")
print(f"  Click NS: {args.click_ns}")
print(f"  Num Workers: {args.num_workers}")
print(f"  Reload Model: {args.reload_s3_model}")
print(f"  Upload: {args.upload}")

# Load configuration
config_name = f"tpfy/tpfy_config/mtl-{TENANT}.yaml"
if not os.path.exists(config_name):
    raise FileNotFoundError(f"Config file {config_name} not found")

hparams: TpfyConfig = OmegaConf.merge(
    OmegaConf.structured(TpfyConfig),
    OmegaConf.load(config_name),
)
print(f"\nLoaded config: {config_name}")

# Override batch size if specified
if args.batch_size:
    hparams.train.batch_size = args.batch_size

batch_size = hparams.train.batch_size
print(f"Batch size: {batch_size}")

# Load dataset
variant = args.variant
if variant and not variant.startswith("-"):
    variant = "-" + variant

session = tfv1.keras.backend.get_session()

def create_training_dataset(date):
    data_path_str = data_path(
        TpfyDataPath.S3_TPFY_IMPR_V3_AGG_MTL_EXTRACTED_EXAMPLES_VAR, TENANT
    ) % (variant, date)

    dataset = TFParquetDataset([data_path_str], TpfyMtlDatasetSchema, shuffle_files=False)
    tf_dataset = dataset.create_tf_dataset(batch_size).map(make_example_mtl)
    # tf_dataset = dataset.create_parallel_tf_dataset(
    #     batch_size,
    #     args.num_workers,
    #     num_epochs=1,
    #     queue_size=16,
    #     v2=True,
    #     row_transformer_factory=None,
    # ).map(make_example_mtl)
    # train_it = tfv1.data.make_initializable_iterator(tf_dataset)
    return tf_dataset

def load_and_compile_model():
    # Build model
    print(f"\n{'='*80}")
    print("BUILDING MODEL")
    print(f"{'='*80}")

    model = TpfyModelV3(
        hparams.model,
        click_ns=args.click_ns,
        enable_random_watch=hparams.train.enable_random_watch,
    )

    # Create optimizer (needed for compilation, even though we won't train)
    optimizer = tfa.optimizers.AdamW(
        weight_decay=0.0,  # Not needed for inference
        learning_rate=0.001,  # Not needed for inference
        epsilon=1e-4,
    )

    loss_dict = {
            "click": masked_binary_entropy_loss(from_logits=True),
            "watch": masked_binary_entropy_loss(from_logits=True),
            "random_watch": masked_binary_entropy_loss(from_logits=False),
            "paywall_view": masked_binary_entropy_loss(from_logits=True),
            "add_watchlist": masked_binary_entropy_loss(from_logits=True),
    }
    metric_dict = {
            "click": MaskedAUC(from_logits=True),
            "watch": MaskedAUC(from_logits=True),
            "random_watch": MaskedAUC(from_logits=False),
            "paywall_view": MaskedAUC(from_logits=True),
            "add_watchlist": MaskedAUC(from_logits=True),
    }

    optimizer = tfra.dynamic_embedding.DynamicEmbeddingOptimizer(optimizer)

    model.compile(optimizer=optimizer, loss=loss_dict, metrics=metric_dict)
    print("Model compiled")
    
    return model

def get_activations_and_labels(next_batch, last_layer_tensor):
    features, labels, metadata = session.run(next_batch)
    
    activation_values = session.run(
        last_layer_tensor,
        feed_dict={} if not features else None  
    )
    
    return activation_values, labels, metadata

def compute_A(A, next_batch, last_layer_tensor):
    activation_values, labels, metadata = get_activations_and_labels(next_batch, last_layer_tensor)
    y_batch = labels['click']
    mask = (y_batch != -1)

    if not np.any(mask):
        del activation_values, labels, metadata, mask
        return A

    H = activation_values[mask.squeeze()].copy() 
    del activation_values, mask  # Delete immediately
    
    H = H / (np.linalg.norm(H, axis=1, keepdims=True) + 1e-8)
    A += H.T @ H

    del H, labels, metadata 
    return A

2026-02-10 13:59:21.950011: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2026-02-10 13:59:21.950041: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


Instructions for updating:
non-resource variables are not supported in the long term


2026-02-10 13:59:23.306871: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory
2026-02-10 13:59:23.306898: W tensorflow/stream_executor/cuda/cuda_driver.cc:326] failed call to cuInit: UNKNOWN ERROR (303)
2026-02-10 13:59:23.306914: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (ip-10-11-72-173): /proc/driver/nvidia/version does not exist


Training Configuration:
  Model Name: tpfy-v3-mtl-r2
  Training Date: 2026-02-06
  Validation Date: 2026-02-06
  Variant: cms3
  Click NS: 0.08
  Num Workers: 4
  Reload Model: tpfy-v3-mtl-r2
  Upload: False

Loaded config: tpfy/tpfy_config/mtl-in.yaml
Batch size: 512



2026-02-10 13:59:24.130202: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:

# if __name__ == '__main__':
tf_dataset = create_training_dataset(args.date)
iterator = tf_dataset.make_one_shot_iterator()
next_batch = iterator.get_next()
sample_features, sample_labels, sample_metadata = session.run(next_batch)
tpfy_model = load_and_compile_model()

prediction = tpfy_model(sample_features, training=False)
session.run([
    tfv1.global_variables_initializer(),
    tfv1.local_variables_initializer(),
    tfv1.tables_initializer()
])

plain_weights = load_model_weights_from_s3(
    args.model_name,
    use_s3=True
)
plain_weights_modified = {k.replace('train/', ''): v for k, v in plain_weights.items()}
restore_ops = tpfy_model.restore_plain_weights_ops(
    plain_weights_modified,
    clear_nn=args.clear_nn
)
session.run(restore_ops)

# Create NEW iterator (reset to start of dataset)
iterator = tf_dataset.make_one_shot_iterator()
next_batch = iterator.get_next()

# Get compress_output tensor (linear_input)
graph = tf.compat.v1.get_default_graph()
compress_output_tensor = graph.get_tensor_by_name('tpfy_model_v3/deepfm/Relu:0')
add_output_tensor = graph.get_tensor_by_name('tpfy_model_v3/deepfm/add:0')

files s3://p13n-reco-offline-prod/dataset_v5/tpfy-impr-v3/agg-mtl-extracted-cms3/2026-02-06/part-00000-tid-7617731706831743383-d7b69ff7-441e-4659-95cb-8067a05f5f34-2835-1-c000.snappy.parquet,s3://p13n-reco-offline-prod/dataset_v5/tpfy-impr-v3/agg-mtl-extracted-cms3/2026-02-06/part-00001-tid-7617731706831743383-d7b69ff7-441e-4659-95cb-8067a05f5f34-2909-1-c000.snappy.parquet,s3://p13n-reco-offline-prod/dataset_v5/tpfy-impr-v3/agg-mtl-extracted-cms3/2026-02-06/part-00002-tid-7617731706831743383-d7b69ff7-441e-4659-95cb-8067a05f5f34-3012-1-c000.snappy.parquet,s3://p13n-reco-offline-prod/dataset_v5/tpfy-impr-v3/agg-mtl-extracted-cms3/2026-02-06/part-00003-tid-7617731706831743383-d7b69ff7-441e-4659-95cb-8067a05f5f34-2995-1-c000.snappy.parquet,s3://p13n-reco-offline-prod/dataset_v5/tpfy-impr-v3/agg-mtl-extracted-cms3/2026-02-06/part-00004-tid-7617731706831743383-d7b69ff7-441e-4659-95cb-8067a05f5f34-2892-1-c000.snappy.parquet,s3://p13n-reco-offline-prod/dataset_v5/tpfy-impr-v3/agg-mtl-extracted

2026-02-10 13:59:28.082794: I tensorflow/core/platform/profile_utils/cpu_utils.cc:114] CPU Frequency: 2499995000 Hz
2026-02-10 13:59:28.153424: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:176] None of the MLIR Optimization Passes are enabled (registered 2)



BUILDING MODEL
Model compiled
--------------
Instructions for updating:
The `validate_indices` argument has no effect. Indices are always validated on CPU and never validated on GPU.
q Tensor("tpfy_model_v3/feature_prep/strided_slice:0", shape=(512, 32), dtype=float32)
k Tensor("tpfy_model_v3/feature_prep/watched_content_embedding_unpooled:0", shape=(512, ?, 32), dtype=float32)
Kw Tensor("tpfy_model_v3/feature_prep/GetSlotFids:1", shape=(512, ?), dtype=float32)
target embedding shape (512, 9, 32)
user embedding shape (512, 27, 32)
target: Tensor("tpfy_model_v3/feature_prep/target_feature/target_embeddings:0", shape=(512, 9, 32), dtype=float32)
user: Tensor("tpfy_model_v3/feature_prep/user_feature/user_embeddings:0", shape=(512, 27, 32), dtype=float32)
watched: Tensor("tpfy_model_v3/feature_prep/dot_prod_attention_pooling/cond/Merge:0", shape=(512, 32), dtype=float32)
fm_user Tensor("tpfy_model_v3/deepfm/fwfm/concat:0", shape=(512, 28, 32), dtype=float32)
fm_item Tensor("tpfy_model_v3/

2026-02-10 13:59:37.285528: I ./tensorflow_recommenders_addons/dynamic_embedding/core/kernels/lookup_impl/lookup_table_op_cpu.h:143] HashTable on CPU is created on optimized mode: K=l, V=f, DIM=32, init_size=8192


In [9]:
iterator = tf_dataset.make_one_shot_iterator()
next_batch = iterator.get_next()

features, labels, metadata = session.run(next_batch)

activation_values, add_tensor = session.run(
    [compress_output_tensor, add_output_tensor],
    feed_dict={} if not features else None  
)

In [12]:
add_tensor[0]

array([-3.6381006e-02,  2.0870616e-01, -3.1749216e-01, -7.8268969e-01,
       -9.3370378e-03, -1.0260473e-01,  9.3128765e-01,  6.6656411e-02,
       -8.9544547e-01,  3.6860645e-01, -6.1288929e-01, -1.2922154e-01,
        5.7820641e-02,  4.0108407e-01,  1.6118914e-01, -1.8722306e-01,
        9.2071140e-01, -2.3697898e-01, -1.5648040e-01, -4.9121645e-01,
        1.7610466e-01, -3.3055282e-01, -2.0456858e-01, -1.4516298e-01,
       -2.2657064e-01, -4.3391439e-01, -2.5236571e-01, -6.3165896e-02,
        2.4258375e-02,  8.0752686e-02,  4.3632853e-01, -5.7931566e-01,
        7.2076052e-02, -1.3074704e-01, -3.1468624e-01, -5.7420921e-01,
        1.6350855e-01, -3.8171995e-01,  4.7261083e-01, -5.2388495e-01,
       -1.9027245e-01, -2.6893115e-02, -2.7601308e-01, -3.5369068e-01,
       -4.5817351e-01, -2.1271887e-01,  7.0292789e-01, -2.7641302e-01,
       -4.7807723e-01,  5.2648619e-02,  9.3844509e-01,  5.6751186e-01,
       -1.3235238e-01,  9.0824288e-01,  4.3074238e-01,  7.5353599e-01,
      

In [13]:
activation_values[0]

array([0.        , 0.20870616, 0.        , 0.        , 0.        ,
       0.        , 0.93128765, 0.06665641, 0.        , 0.36860645,
       0.        , 0.        , 0.05782064, 0.40108407, 0.16118914,
       0.        , 0.9207114 , 0.        , 0.        , 0.        ,
       0.17610466, 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.02425838, 0.08075269,
       0.43632853, 0.        , 0.07207605, 0.        , 0.        ,
       0.        , 0.16350855, 0.        , 0.47261083, 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.7029279 , 0.        , 0.        , 0.05264862,
       0.9384451 , 0.56751186, 0.        , 0.9082429 , 0.43074238,
       0.753536  , 0.5045053 , 0.        , 0.        , 0.        ,
       0.89141923, 0.93157434, 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.41687167, 0.00738127, 0.10256493,
       0.        , 0.9419999 , 0.        , 0.15850542, 1.12912

In [3]:
iterator = tf_dataset.make_one_shot_iterator()
next_batch = iterator.get_next()

#train feature matrix
lambda_=1.0
d=128
A = lambda_ * np.eye(d, dtype=np.float64)

# for run_ in range(10_000):
#     if run_ % 100 == 0:
#         print(f'Run {run_} completed !')
#         np.save(f'tpfy/neural_linUCB_training_data/A_{run_}.npy', A)
#         gc.collect()
A = compute_A(A, next_batch, compress_output_tensor)

In [7]:
A.shape

(128, 128)

In [9]:
A_inv.mean()

0.0002328249584400909

In [1]:
import time


In [2]:
time.time()

1770714740.041274

In [4]:
import numpy as np
A = np.load('tpfy/neural_linUCB_training_data/A_500.npy')

In [5]:
A_inv = np.linalg.inv(A)

In [8]:
[x for x in plain_weights.keys() if 'Relu' in x]

[]

In [14]:
graph.get_operations()[0].name

'normalize_element/component_0'

In [3]:
! ls -lrth tpfy/neural_linUCB_training_data/A* | tail -n 2

-rw-rw-r-- 1 ubuntu ubuntu 129K Feb 11 04:30 tpfy/neural_linUCB_training_data/A_285400.npy
-rw-rw-r-- 1 ubuntu ubuntu 129K Feb 11 04:30 tpfy/neural_linUCB_training_data/A_285500.npy


In [10]:
import numpy as np
A = np.load('tpfy/neural_linUCB_training_data/A_285400.npy')

In [11]:
A_inv = np.linalg.inv(A)
A_inv

array([[ 2.44612633e-05, -5.82197560e-07, -5.88762095e-07, ...,
        -7.00092343e-07,  8.00384422e-07, -6.85502739e-06],
       [-5.82197478e-07,  5.02142854e-05, -9.16449807e-06, ...,
         3.90269700e-06, -7.15972693e-06,  2.71313145e-06],
       [-5.88762050e-07, -9.16449805e-06,  7.86230807e-05, ...,
        -3.18181759e-06, -2.99785467e-06,  3.31799695e-07],
       ...,
       [-7.00092399e-07,  3.90269700e-06, -3.18181757e-06, ...,
         8.04704189e-05,  2.83699142e-06,  3.75896958e-06],
       [ 8.00384381e-07, -7.15972702e-06, -2.99785471e-06, ...,
         2.83699148e-06,  4.59738042e-05, -9.86619269e-06],
       [-6.85502742e-06,  2.71313143e-06,  3.31799694e-07, ...,
         3.75896960e-06, -9.86619272e-06,  6.90804107e-05]])

In [12]:
np.linalg.eigvalsh(A)

array([-1.19190959e-02,  7.02655493e-02,  1.00000000e+00,  1.00000000e+00,
        1.00000000e+00,  1.19579315e+00,  2.59723194e+00,  3.32603627e+00,
        5.39817573e+00,  6.73626891e+00,  2.44787524e+02,  1.00028692e+03,
        1.41044686e+03,  1.45309081e+03,  1.83258658e+03,  2.15268813e+03,
        2.35076308e+03,  2.67539033e+03,  3.05553401e+03,  3.13167839e+03,
        3.59019707e+03,  3.73325982e+03,  4.01662766e+03,  4.17179685e+03,
        4.67381584e+03,  4.86049034e+03,  5.14703741e+03,  5.23293096e+03,
        5.76903295e+03,  5.88755154e+03,  6.17980877e+03,  6.43507004e+03,
        6.71583620e+03,  7.17269468e+03,  7.49825584e+03,  7.63465294e+03,
        7.93422508e+03,  8.14512980e+03,  8.28837734e+03,  8.45216959e+03,
        9.06220539e+03,  9.16820557e+03,  9.39064974e+03,  9.94380505e+03,
        1.04024283e+04,  1.09789085e+04,  1.11306598e+04,  1.15738630e+04,
        1.18206874e+04,  1.23628211e+04,  1.25652833e+04,  1.31171538e+04,
        1.34915970e+04,  

In [13]:
! ls -lrth tpfy/neural_linUCB_before_relu/A* | tail -n 2

-rw-rw-r-- 1 ubuntu ubuntu 129K Feb 11 03:42 tpfy/neural_linUCB_before_relu/A_268100.npy
-rw-rw-r-- 1 ubuntu ubuntu 129K Feb 11 03:42 tpfy/neural_linUCB_before_relu/A_268200.npy


In [1]:
import numpy as np
A = np.load('tpfy/neural_linUCB_before_relu/A_260000.npy')

In [2]:
A_inv = np.linalg.inv(A)
A_inv

array([[ 1.69467979e-04,  1.09820089e-05, -1.61855526e-05, ...,
        -2.38588130e-05, -1.76663268e-05, -2.41909191e-05],
       [ 1.09820089e-05,  1.32791266e-04,  3.91018686e-06, ...,
         1.37460344e-05,  1.38228689e-05, -6.13068184e-06],
       [-1.61855526e-05,  3.91018684e-06,  1.33321034e-04, ...,
        -8.15058869e-06, -5.60825504e-06, -9.89307581e-07],
       ...,
       [-2.38588129e-05,  1.37460345e-05, -8.15058874e-06, ...,
         3.10229580e-04,  2.07410728e-05,  4.20916716e-06],
       [-1.76663268e-05,  1.38228689e-05, -5.60825504e-06, ...,
         2.07410728e-05,  1.56146537e-04, -1.69285096e-05],
       [-2.41909192e-05, -6.13068190e-06, -9.89307588e-07, ...,
         4.20916732e-06, -1.69285096e-05,  1.65708766e-04]])

In [3]:
np.mean(A_inv)

0.00018348474680191727

In [4]:
np.linalg.eigvalsh(A).min()

0.03076847271130545

In [17]:
tmp = A - np.eye(128, dtype=np.float64)

In [24]:
import pandas as pd
a = pd.read_parquet('/home/ubuntu/vedansh/data/tpfy-ranker-exploration/2026-02-08/part-00000-tid-5972008309819576191-08545583-ab6a-47d5-b473-c13d7c674a83-16038-1-c000.snappy.parquet')

In [29]:
a.groupby(['dw_p_id', 'timestamp'])['content_id'].count()

dw_p_id                                                           timestamp 
008e062d251e85bb26a89439fee6927c75d4f9e4a5e830ca633363cf744c94a8  1770532558     5
00b2181b481fa39139f8e3e0b6ad07590fb0aea1b01284415d9db39504ad4114  1770534871     5
0186fd1c6ff719709dc28a2a13500345600b187aa66dfeee5e986b5f19e76629  1770529663     5
01a661cb0e62ebfdf4b9bb3e500bd75b8292a881bf8badf751a28faab4669658  1770550853     8
01da7f85a7b11e3cfd8f9de00a95edf2f0fcfbeb980654a7dcf17530355183e3  1770565360     5
                                                                                ..
ff89b0e878dd6ef71be671c68aac1e5629f569f71301aab1d5683e428d77ac8f  1770555544     5
ff8cdfc7fa4f933e85a24095bc6f108209d3fc97d9c2377f4a40821b3dc3b8ce  1770564184    11
ff9c27a025a69ff62cdefc5f318b344d8d58b0ff544a2b9e29e2b99f8beb8fe3  1770565211    11
ffbe9410a78f1d913ade806ade2ec1788ddcd399a34f6549576a29ead6b17fa8  1770555820     5
ffe31aaf6e67246308b873d5dfb9f389460fa828ec5eff9c892e5b7f7e4abb9b  1770576144     5
Name: cont

In [5]:
import numpy as np

In [7]:
np.zeros((128,128), dtype=np.float64).shape

(128, 128)

In [19]:
import numpy as np
A = np.load('tpfy/neural_linUCB_offline_matrices_2026-02-06_1770463379/A_42600.npy')

In [21]:
A = A + np.eye(128, dtype=np.float32)

In [22]:
# A_inv = np.linalg.inv(A)
np.linalg.eigvalsh(A)

array([8.43207404e-01, 9.91275014e-01, 1.00000000e+00, 1.00000000e+00,
       1.00000000e+00, 1.05343179e+00, 1.07166555e+00, 1.21849077e+00,
       1.52842012e+00, 1.72738827e+00, 3.87487660e+01, 1.57584302e+02,
       2.18619546e+02, 2.31103945e+02, 2.65635659e+02, 3.64679916e+02,
       3.85642015e+02, 4.41423150e+02, 4.95579696e+02, 5.12837972e+02,
       5.58959250e+02, 5.87122257e+02, 6.13945595e+02, 6.32074479e+02,
       6.84865676e+02, 7.30864707e+02, 7.75571076e+02, 7.81335729e+02,
       8.65465646e+02, 9.17198424e+02, 9.27633711e+02, 9.88935576e+02,
       9.91057950e+02, 1.07735594e+03, 1.10514598e+03, 1.17630533e+03,
       1.20938746e+03, 1.26009970e+03, 1.27050050e+03, 1.32016720e+03,
       1.35788018e+03, 1.39841336e+03, 1.41842678e+03, 1.50179396e+03,
       1.58490926e+03, 1.61372136e+03, 1.67786867e+03, 1.76560831e+03,
       1.84551470e+03, 1.90580471e+03, 1.95860384e+03, 2.04094175e+03,
       2.11387494e+03, 2.19950567e+03, 2.21365333e+03, 2.30632191e+03,
      