<h3>Training re-run</h3>

In [1]:
import os
os.environ['ENV'] = 'prod'
os.environ['REGION'] = 'apse1'
os.environ['TENANT'] ="in"
os.environ['RECO_S3_BUCKET'] = "p13n-reco-offline-prod"
os.environ['COUNTRY_KEY']= "in"
os.environ['AWS_REGION']= "ap-southeast-1"
os.environ['USE_REAL_CMS3']= "True"
os.environ['RECO_CREDENTIAL']= "-----BEGINRSAPRIVATEKEY-----\nMGICAQACEQCdHOlGnxIMWCMzjK2JAg37AgMBAAECEGOIwGTEO9vd3X9+jyiF4NECCQnoqDakDgSm2QIID9sadWN0XvMCCQLiqPkgVKSuIQIIDCAsWM+pJB8CCQG0jbIGCNX9MA==\n-----ENDRSAPRIVATEKEY-----"

In [2]:
import argparse
import json
import time
from dataclasses import dataclass

import tensorflow as tf

tfv1 = tf.compat.v1
tfv1.disable_v2_behavior()

import os
import pyarrow

from common.time_utils import get_dates_list_forwards
import tensorflow_addons as tfa
import tensorflow_recommenders_addons as tfra

import numpy as np
import s3fs
from common.s3_utils import upload_folder, upload_file

from common.config.utils import data_path, tenant_countries, model_path
from common.config import TENANT

from common.s3_utils import is_s3_path_success
from model.trainer import Trainer, ValData, LearningRateScheduler
from model.losses import masked_binary_entropy_loss
from model.metrics import MaskedAUC
from tpfy.tf_model.tpfy_model_v3_mtl import TpfyModelV3, TpfyMtlModelConfig
from tpfy.common import TpfyDataPath
import tpfy.tf_model.exporter_v3_mtl as exporter
from tpfy.etl.schema import TpfyMtlDatasetSchema
from model.parquet_dataset import TFParquetDataset, _get_dataset_columns
from omegaconf import OmegaConf


S3_TPFY_MODEL_EXPORT = model_path(TpfyDataPath.S3_TPFY_MODEL_EXPORT, TENANT)


@dataclass
class TpfyTrainConfig:
    repeat: int
    eval_freq: int
    eval_steps: int
    step_unit: int
    max_step: int
    batch_size: int

    learning_rate: float
    lr_decay: bool
    lr_decay_start: int
    min_lr: float
    weight_decay: float

    enable_random_watch: bool


@dataclass
class TpfyConfig:
    train: TpfyTrainConfig
    model: TpfyMtlModelConfig


_dataset_column_names = [col.name for col in _get_dataset_columns(TpfyMtlDatasetSchema)]

TASK_COL_INDEX = _dataset_column_names.index("task")


def filter_random_watch_factory():
    def _fn(imm_row):
        task = imm_row[TASK_COL_INDEX]
        if task[0] == 0:
            return [imm_row]
        else:
            return []

    return _fn


def init_local(args, countries) -> TpfyConfig:
    name = f"tpfy/tpfy_config/mtl-{TENANT}.yaml"

    if not os.path.exists(name):
        raise Exception(f"conf file {name} missing")

    if args.conf:
        cli_conf_dotlist = [p for p in args.conf.split(",") if len(p) > 0]
    else:
        cli_conf_dotlist = []

    conf: TpfyConfig = OmegaConf.merge(
        OmegaConf.structured(TpfyConfig),
        OmegaConf.load(name),
        OmegaConf.from_dotlist(cli_conf_dotlist),
    )

    if args.lr:
        conf.train.learning_rate = args.lr

    if args.eval_freq is not None:
        conf.train.eval_freq = args.eval_freq

    if args.batch_size:
        conf.train.batch_size = args.batch_size

    if args.max_epoch:
        conf.train.max_step = args.max_epoch

    if args.repeat:
        conf.train.repeat = args.repeat

    return conf


def assert_label_shape(tensor):
    assert len(tensor.shape) == 2
    assert tensor.shape[1].value == 1


def partition_columns(values):
    schema = TpfyMtlDatasetSchema
    num_features = len(schema.features)
    num_labels = len(schema.labels)
    num_metadata = len(schema.metadata)
    offset = 0
    features = schema.make_feature_tuple(
        values[offset : offset + num_features]
    )._asdict()
    offset += num_features

    labels = schema.make_label_tuple(values[offset : offset + num_labels])
    offset += num_labels

    metadata = schema.make_metadata_tuple(
        values[offset : offset + num_metadata]
    )._asdict()
    return features, labels, metadata


def make_example_mtl(*tensors):
    features, original_labels, metadata = partition_columns(tensors)

    task = features["task"]
    click = tf.cast(original_labels.click, tf.float32)
    watch = tf.cast(original_labels.watch, tf.float32)
    paywall_view = tf.cast(original_labels.paywall_view, tf.float32)
    add_watchlist = tf.cast(original_labels.add_watchlist, tf.float32)

    click = tf.where(tf.equal(task, 0), click, -1.0)
    random_watch = tf.where(tf.equal(task, 1), watch, -1.0)

    is_postclick = tf.greater(click, 0)
    watch = tf.where(is_postclick, watch, -1.0)
    paywall_view = tf.where(is_postclick, paywall_view, -1.0)
    add_watchlist = tf.where(is_postclick, add_watchlist, -1.0)

    assert_label_shape(click)
    assert_label_shape(watch)
    assert_label_shape(random_watch)

    labels = {
        "click": click,
        "watch": watch,
        "add_watchlist": add_watchlist,
        "paywall_view": paywall_view,
        "random_watch": random_watch,
    }

    return features, labels, metadata


def create_exp_lr_schedule_callback(decay_start, min_lr, alpha, verbose=False):
    def lr_schedule(epoch, current_lr):
        if epoch < decay_start:
            return current_lr
        else:
            lr = current_lr * alpha
            if lr < min_lr:
                return min_lr
            else:
                return lr

    return LearningRateScheduler(lr_schedule, verbose=verbose)


class TpfyCustomTrainer(Trainer):
    def __init__(
        self,
        model: TpfyModelV3,
        session,
        model_name,
        plain_weights,
        clear_nn,
        weight_decay,
        countries,
    ):
        super().__init__(model=model, model_name=model_name, session=session)
        self.plain_weights = plain_weights
        self.clear_nn = clear_nn
        self.weight_decay = weight_decay
        self.countries = countries

        assert isinstance(model, TpfyModelV3)

    def on_train_start(self):
        print("on train start")
        if self.plain_weights is not None:
            print("restore model weights")

            plain_weights = self.plain_weights
            restore_ops = self.model.restore_plain_weights_ops(
                plain_weights, clear_nn=self.clear_nn
            )
            self.session.run(restore_ops)
            print("done")

    def build_train_step(self, tape, loss):
        if not self.weight_decay:
            trainable_vars = self.model.trainable_variables
            gradients = tape.gradient(loss, trainable_vars)
            train_step = self.model.optimizer.apply_gradients(
                zip(gradients, trainable_vars)
            )
            return train_step
        else:
            print("build weight decay")
            trainable_variables = self.model.get_all_trainable_variables()
            print("trainable", [v.name for v in trainable_variables])
            decayed_variables = [v for v in trainable_variables if "bias" not in v.name]

            train_step = self.model.optimizer.minimize(
                loss,
                trainable_variables,
                decay_var_list=decayed_variables,
                tape=tape,
            )
            return train_step


def run(args):
    countries = tenant_countries(args.countries)
    hparams = init_local(args, countries)
    print(hparams)

    variant = args.variant
    if variant and not variant.startswith("-"):
        variant = "-" + variant

    train_date = args.date
    train_path = data_path(
        TpfyDataPath.S3_TPFY_IMPR_V3_AGG_MTL_EXTRACTED_EXAMPLES_VAR, TENANT
    ) % (variant, train_date)
    print("train data", train_path)
    if not is_s3_path_success(train_path):
        raise Exception("train data not available")
    dataset_schema = TpfyMtlDatasetSchema

    train_dataset = TFParquetDataset([train_path], dataset_schema, shuffle_files=True)
    train_row_transformer_factory = None

    make_example_fn = make_example_mtl

    session = tfv1.keras.backend.get_session()

    print("load dataset objective stat")
    fs = s3fs.S3FileSystem(use_ssl=False)
    with fs.open(os.path.join(train_path, "stats.json"), "r") as f:
        stats = json.load(f)
    task_weights = stats["task_weights"]

    batch_size = hparams.train.batch_size
    print(f"obj stat: batch_size {batch_size}")

    train_tf_dataset = train_dataset.create_parallel_tf_dataset(
        batch_size,
        args.num_workers,
        num_epochs=hparams.train.repeat,
        queue_size=16,
        v2=True,
        row_transformer_factory=train_row_transformer_factory,
    ).map(make_example_fn)

    print("output_shapes", train_tf_dataset.output_shapes)
    print("num output", len(train_tf_dataset.output_shapes))

    validation_dataset_dict = {}
    val_tenant_or_countries = [TENANT]
    for country in val_tenant_or_countries:
        for dt in get_dates_list_forwards(args.val_date, args.val_days):
            val_dataset = TFParquetDataset(
                [
                    data_path(
                        TpfyDataPath.S3_TPFY_IMPR_V3_DAILY_MTL_EXTRACTED_EXAMPLES,
                        country,
                    )
                    % (variant, dt)
                ],
                dataset_schema,
                shuffle_files=False,
            )
            validation_dataset_dict[f"{country}-{dt}"] = ValData(
                val_dataset.create_tf_dataset(batch_size)
                .take(hparams.train.eval_steps)
                .cache(f"val_mtl_{country}_{dt}")
                .map(make_example_fn),
                active_objectives=[
                    "click",
                    "watch",
                    "random_watch",
                    "paywall_view",
                    "add_watchlist",
                ],
            )

    model_name = args.model_name
    tpfy_model = TpfyModelV3(
        hparams.model,
        click_ns=args.click_ns,
        enable_random_watch=hparams.train.enable_random_watch,
    )

    optimizer = tfa.optimizers.AdamW(
        weight_decay=float(hparams.train.weight_decay),
        learning_rate=hparams.train.learning_rate,
        epsilon=1e-4,
    )
    optimizer = tfra.dynamic_embedding.DynamicEmbeddingOptimizer(optimizer)

    if hparams.train.lr_decay:
        lr_scheduler = create_exp_lr_schedule_callback(
            hparams.train.lr_decay_start,
            hparams.train.min_lr,
            0.7 ** (1 / 10),
            verbose=True,
        )
    else:
        lr_scheduler = None

    loss_dict = {
        "click": masked_binary_entropy_loss(from_logits=True),
        "watch": masked_binary_entropy_loss(from_logits=True),
        "random_watch": masked_binary_entropy_loss(from_logits=False),
        "paywall_view": masked_binary_entropy_loss(from_logits=True),
        "add_watchlist": masked_binary_entropy_loss(from_logits=True),
    }
    metric_dict = {
        "click": MaskedAUC(from_logits=True),
        "watch": MaskedAUC(from_logits=True),
        "random_watch": MaskedAUC(from_logits=False),
        "paywall_view": MaskedAUC(from_logits=True),
        "add_watchlist": MaskedAUC(from_logits=True),
    }

    loss_weight_dict = {
        "click": args.click_weight,
        "watch": args.watch_weight,
        "random_watch": 1.0 if hparams.train.enable_random_watch else 0.0,
        "paywall_view": 1.0,
        "add_watchlist": 1.0,
    }
    total_loss_weight = sum(loss_weight_dict.values())
    loss_weight_dict = {
        obj: w / total_loss_weight for obj, w in loss_weight_dict.items()
    }

    tpfy_model.compile(
        optimizer=optimizer,
        loss=loss_dict,
        metrics=metric_dict,
        loss_weights=loss_weight_dict,
    )
    plain_weights = None
    clear_nn = args.clear_nn
    if args.reload_s3_model or args.reload_local_model:
        if args.reload_s3_model:
            filesystem = s3fs.S3FileSystem(use_ssl=False)
            model_path = S3_TPFY_MODEL_EXPORT % args.reload_s3_model
        else:
            filesystem = pyarrow.LocalFileSystem()
            model_path = os.path.join("export", args.reload_local_model)

        if args.ckpt:
            checkpoint = args.ckpt
        else:
            checkpoint_path = os.path.join(model_path, "checkpoint")
            print("read checkpoint", checkpoint_path)
            with filesystem.open(checkpoint_path, "r") as f:
                checkpoint = f.read().strip()
        weights_path = os.path.join(model_path, checkpoint, "plain_weights.npz")
        if not filesystem.exists(weights_path):
            raise Exception(f"Model weights {weights_path} unavailable")
        else:
            print(f"Restore from {weights_path}")
            with filesystem.open(weights_path, "rb") as f:
                plain_weights = {}
                for k, v in np.load(f).items():
                    plain_weights[k] = v

            print("plain weights keys", list(plain_weights.keys()))

    trainer = TpfyCustomTrainer(
        tpfy_model,
        session,
        model_name,
        plain_weights,
        clear_nn=clear_nn,
        weight_decay=True,
        countries=countries,
    )
    trained_epochs = trainer.train(
        train_tf_dataset,
        epochs=hparams.train.max_step,
        steps_per_epoch=hparams.train.step_unit,
        validation_data_dict=validation_dataset_dict,
        validation_steps=None,
        validation_freq=hparams.train.eval_freq,
        log_dir="train/logs/" + model_name,
        lr_scheduler=lr_scheduler,
        early_stopping=None,
        verbose=args.verbose,
        validation_on_start=hparams.train.eval_freq > 0
        and (args.reload_local_model or args.reload_s3_model),
    )

    # WORKAROUND: clear de optimizer state to save memory online
    # TODO: remove optimizer entirely
    for de_var in tpfy_model.dynamic_embeddings:
        print("clear optimizer state for embedding variable", de_var.name)
        for i, opt in enumerate([optimizer]):
            print("opt", i)
            slot_variables = de_var.get_slot_variables(opt)
            for slot_variable in slot_variables:
                print(
                    "clear",
                    slot_variable.name,
                    "size",
                    session.run(slot_variable.size()),
                )
                session.run(slot_variable.clear())

    warmup_dataset = list(validation_dataset_dict.values())[0].tf_dataset
    warmup_it = warmup_dataset.make_one_shot_iterator()
    warmup_next = warmup_it.get_next()
    warmup_data = []
    print("get warmup data")
    for i in range(1):
        warmup_data.append(session.run(warmup_next))
    print("warmup data done")

    model_dir = "export/{}".format(model_name)
    version = int(time.time())
    export_path = "{}/{}".format(model_dir, version)
    print("export path", export_path)
    # from IPython import embed
    # embed()
    exporter.export_tpfy_model(export_path, session, tpfy_model, warmup_data)
    checkpoint_path = os.path.join(model_dir, "checkpoint")
    with open(checkpoint_path, "w") as f:
        f.write(str(version))

    if args.upload:
        print("upload")
        export_s3_path = S3_TPFY_MODEL_EXPORT % f"{model_name}/{version}"
        upload_folder(export_s3_path, export_path, set_acl=False)

        checkpoint_s3_path = S3_TPFY_MODEL_EXPORT % f"{model_name}/checkpoint"
        upload_file(checkpoint_s3_path, checkpoint_path, set_acl=False)

    print("done")


def main():
    parser = argparse.ArgumentParser(description="TPFY offline Training.")
    parser.add_argument("model_name", type=str, default="tpfy-v3-mtl-r2")
    parser.add_argument("date", type=str, default="2026-01-25")
    parser.add_argument("val_date", type=str, default="2026-01-25")
    parser.add_argument("--conf", type=str, default=None)
    parser.add_argument("--max_epoch", type=int, default=None)
    parser.add_argument("--val_days", type=int, default=1)
    parser.add_argument("--click_ns", type=float, default=0.08)
    parser.add_argument("--variant", type=str, default="cms3")
    parser.add_argument("--num_workers", type=int, default=4)
    parser.add_argument("--repeat", type=int, default=1)
    parser.add_argument("--eval_freq", type=int, default=None)
    parser.add_argument("--lr", type=float, default=None)
    parser.add_argument("--batch_size", type=int, default=None)
    parser.add_argument("--click_weight", type=float, default=1)
    parser.add_argument("--watch_weight", type=float, default=1)
    parser.add_argument("--upload", action="store_true", help="uploading model to s3")
    parser.add_argument("--reload_local_model", type=str, default=None)
    parser.add_argument("--reload_s3_model", type=str, default=None)
    parser.add_argument("--clear_nn", action="store_true")
    parser.add_argument("--ckpt", default=None, type=str)
    parser.add_argument("--verbose", action="store_true")
    parser.add_argument(
        "--countries",
        type=str,
        help="countries to run, separated with comma. "
        "default is None. fallback to region countries",
    )
    args = parser.parse_args()
    print("Start training")
    # run(args)

2026-02-03 16:18:39.053695: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2026-02-03 16:18:39.053723: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.


Instructions for updating:
non-resource variables are not supported in the long term


In [3]:
class Args:
    """Simple class to hold training arguments (replaces argparse)"""
    def __init__(self):
        # Positional arguments
        self.model_name = "tpfy-v3-mtl-r2"
        self.date = "2026-01-27"  # Training date
        self.val_date = "2026-01-28"  # Validation date
        
        # Optional arguments
        self.conf = None
        self.max_epoch = None
        self.val_days = 1
        self.click_ns = 0.08
        self.variant = "cms3"
        self.num_workers = 4
        self.repeat = 1
        self.eval_freq = None
        self.lr = 1e-4
        self.batch_size = 16
        self.click_weight = 1.0
        self.watch_weight = 1.0
        self.upload = False  # Set to False if you don't want to upload to S3
        self.reload_local_model = None
        self.reload_s3_model = "tpfy-v3-mtl-r2"  # Set to None if starting fresh
        self.clear_nn = False
        self.ckpt = None
        self.verbose = True
        self.countries = None

# Create args instance
args = Args()

# Display configuration
print("Training Configuration:")
print(f"  Model Name: {args.model_name}")
print(f"  Training Date: {args.date}")
print(f"  Validation Date: {args.val_date}")
print(f"  Variant: {args.variant}")
print(f"  Click NS: {args.click_ns}")
print(f"  Num Workers: {args.num_workers}")
print(f"  Reload Model: {args.reload_s3_model}")
print(f"  Upload: {args.upload}")

Training Configuration:
  Model Name: tpfy-v3-mtl-r2
  Training Date: 2026-01-27
  Validation Date: 2026-01-28
  Variant: cms3
  Click NS: 0.08
  Num Workers: 4
  Reload Model: tpfy-v3-mtl-r2
  Upload: False


In [4]:
countries = tenant_countries(args.countries)
hparams = init_local(args, countries)
print(hparams)

variant = args.variant
if variant and not variant.startswith("-"):
    variant = "-" + variant

train_date = args.date
train_path = data_path(
    TpfyDataPath.S3_TPFY_IMPR_V3_AGG_MTL_EXTRACTED_EXAMPLES_VAR, TENANT
) % (variant, train_date)
print("train data", train_path)
if not is_s3_path_success(train_path):
    raise Exception("train data not available")
dataset_schema = TpfyMtlDatasetSchema

{'train': {'repeat': 1, 'eval_freq': 0, 'eval_steps': 500, 'step_unit': 500, 'max_step': 1000, 'batch_size': 16, 'learning_rate': 0.0001, 'lr_decay': False, 'lr_decay_start': 40, 'min_lr': 0.0001, 'weight_decay': 1e-06, 'enable_random_watch': True}, 'model': {'dim': 32, 'middle_dim': 128, 'dnn_units': [256], 'multi_country': False, 'dnn_activation': 'relu', 'init_method': 'xavier_uniform', 'init_value': 0.01, 'dnn_l2': 0.0, 'embedding_l2': 0.0, 'enable_discover_popularity': False, 'enable_hp_feature': True}}
train data s3://p13n-reco-offline-prod/dataset_v5/tpfy-impr-v3/agg-mtl-extracted-cms3/2026-01-27


In [5]:
train_dataset = TFParquetDataset([train_path], dataset_schema, shuffle_files=True)
train_row_transformer_factory = None

make_example_fn = make_example_mtl

session = tfv1.keras.backend.get_session()

print("load dataset objective stat")
fs = s3fs.S3FileSystem(use_ssl=False)
with fs.open(os.path.join(train_path, "stats.json"), "r") as f:
    stats = json.load(f)
task_weights = stats["task_weights"]

files s3://p13n-reco-offline-prod/dataset_v5/tpfy-impr-v3/agg-mtl-extracted-cms3/2026-01-27/part-00136-tid-1941705305637033386-cb696035-06bd-4c10-985a-8327755d8544-2322-1-c000.snappy.parquet,s3://p13n-reco-offline-prod/dataset_v5/tpfy-impr-v3/agg-mtl-extracted-cms3/2026-01-27/part-00201-tid-1941705305637033386-cb696035-06bd-4c10-985a-8327755d8544-2293-1-c000.snappy.parquet,s3://p13n-reco-offline-prod/dataset_v5/tpfy-impr-v3/agg-mtl-extracted-cms3/2026-01-27/part-00219-tid-1941705305637033386-cb696035-06bd-4c10-985a-8327755d8544-2348-1-c000.snappy.parquet,s3://p13n-reco-offline-prod/dataset_v5/tpfy-impr-v3/agg-mtl-extracted-cms3/2026-01-27/part-00182-tid-1941705305637033386-cb696035-06bd-4c10-985a-8327755d8544-2436-1-c000.snappy.parquet,s3://p13n-reco-offline-prod/dataset_v5/tpfy-impr-v3/agg-mtl-extracted-cms3/2026-01-27/part-00177-tid-1941705305637033386-cb696035-06bd-4c10-985a-8327755d8544-2319-1-c000.snappy.parquet,s3://p13n-reco-offline-prod/dataset_v5/tpfy-impr-v3/agg-mtl-extracted

2026-02-03 16:18:48.694486: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory
2026-02-03 16:18:48.694507: W tensorflow/stream_executor/cuda/cuda_driver.cc:326] failed call to cuInit: UNKNOWN ERROR (303)
2026-02-03 16:18:48.694537: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (ip-10-11-72-173): /proc/driver/nvidia/version does not exist
2026-02-03 16:18:48.694886: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [6]:
batch_size = hparams.train.batch_size
print(f"obj stat: batch_size {batch_size}")

train_tf_dataset = train_dataset.create_parallel_tf_dataset(
    batch_size,
    args.num_workers,
    num_epochs=hparams.train.repeat,
    queue_size=16,
    v2=True,
    row_transformer_factory=train_row_transformer_factory,
).map(make_example_fn)

obj stat: batch_size 16
Instructions for updating:
Use output_signature instead
Instructions for updating:
Use output_signature instead


In [7]:
print("output_shapes", train_tf_dataset.output_shapes)
print("num output", len(train_tf_dataset.output_shapes))

validation_dataset_dict = {}
val_tenant_or_countries = [TENANT]
for country in val_tenant_or_countries:
    for dt in get_dates_list_forwards(args.val_date, args.val_days):
        val_dataset = TFParquetDataset(
            [
                data_path(
                    TpfyDataPath.S3_TPFY_IMPR_V3_DAILY_MTL_EXTRACTED_EXAMPLES,
                    country,
                )
                % (variant, dt)
            ],
            dataset_schema,
            shuffle_files=False,
        )
        validation_dataset_dict[f"{country}-{dt}"] = ValData(
            val_dataset.create_tf_dataset(batch_size)
            .take(hparams.train.eval_steps)
            .cache(f"val_mtl_{country}_{dt}")
            .map(make_example_fn),
            active_objectives=[
                "click",
                "watch",
                "random_watch",
                "paywall_view",
                "add_watchlist",
            ],
        )

Instructions for updating:
Use `tf.compat.v1.data.get_output_shapes(dataset)`.
output_shapes (OrderedDict([('user_fids', TensorShape([Dimension(None), Dimension(None)])), ('user_weighted_fids', TensorShape([Dimension(None), Dimension(None)])), ('user_weighted_fid_weights', TensorShape([Dimension(None), Dimension(None)])), ('fids', TensorShape([Dimension(None), Dimension(None)])), ('weighted_fids', TensorShape([Dimension(None), Dimension(None)])), ('weighted_fid_weights', TensorShape([Dimension(None), Dimension(None)])), ('sparse_indices', TensorShape([Dimension(None), Dimension(None)])), ('sparse_values', TensorShape([Dimension(None), Dimension(None)])), ('task', TensorShape([Dimension(None), Dimension(1)]))]), {'click': TensorShape([Dimension(None), Dimension(1)]), 'watch': TensorShape([Dimension(None), Dimension(1)]), 'add_watchlist': TensorShape([Dimension(None), Dimension(1)]), 'paywall_view': TensorShape([Dimension(None), Dimension(1)]), 'random_watch': TensorShape([Dimension(None

In [8]:
model_name = args.model_name
tpfy_model = TpfyModelV3(
    hparams.model,
    click_ns=args.click_ns,
    enable_random_watch=hparams.train.enable_random_watch,
)

optimizer = tfa.optimizers.AdamW(
    weight_decay=float(hparams.train.weight_decay),
    learning_rate=hparams.train.learning_rate,
    epsilon=1e-4,
)
optimizer = tfra.dynamic_embedding.DynamicEmbeddingOptimizer(optimizer)

if hparams.train.lr_decay:
    lr_scheduler = create_exp_lr_schedule_callback(
        hparams.train.lr_decay_start,
        hparams.train.min_lr,
        0.7 ** (1 / 10),
        verbose=True,
    )
else:
    lr_scheduler = None

loss_dict = {
    "click": masked_binary_entropy_loss(from_logits=True),
    "watch": masked_binary_entropy_loss(from_logits=True),
    "random_watch": masked_binary_entropy_loss(from_logits=False),
    "paywall_view": masked_binary_entropy_loss(from_logits=True),
    "add_watchlist": masked_binary_entropy_loss(from_logits=True),
}
metric_dict = {
    "click": MaskedAUC(from_logits=True),
    "watch": MaskedAUC(from_logits=True),
    "random_watch": MaskedAUC(from_logits=False),
    "paywall_view": MaskedAUC(from_logits=True),
    "add_watchlist": MaskedAUC(from_logits=True),
}

loss_weight_dict = {
    "click": args.click_weight,
    "watch": args.watch_weight,
    "random_watch": 1.0 if hparams.train.enable_random_watch else 0.0,
    "paywall_view": 1.0,
    "add_watchlist": 1.0,
}
total_loss_weight = sum(loss_weight_dict.values())
loss_weight_dict = {
    obj: w / total_loss_weight for obj, w in loss_weight_dict.items()
}

tpfy_model.compile(
    optimizer=optimizer,
    loss=loss_dict,
    metrics=metric_dict,
    loss_weights=loss_weight_dict,
)

In [9]:
plain_weights = None
clear_nn = args.clear_nn
if args.reload_s3_model or args.reload_local_model:
    if args.reload_s3_model:
        filesystem = s3fs.S3FileSystem(use_ssl=False)
        model_path = S3_TPFY_MODEL_EXPORT % args.reload_s3_model
    else:
        filesystem = pyarrow.LocalFileSystem()
        model_path = os.path.join("export", args.reload_local_model)

    if args.ckpt:
        checkpoint = args.ckpt
    else:
        checkpoint_path = os.path.join(model_path, "checkpoint")
        print("read checkpoint", checkpoint_path)
        with filesystem.open(checkpoint_path, "r") as f:
            checkpoint = f.read().strip()
    weights_path = os.path.join(model_path, checkpoint, "plain_weights.npz")
    if not filesystem.exists(weights_path):
        raise Exception(f"Model weights {weights_path} unavailable")
    else:
        print(f"Restore from {weights_path}")
        with filesystem.open(weights_path, "rb") as f:
            plain_weights = {}
            for k, v in np.load(f).items():
                plain_weights[k] = v

        print("plain weights keys", list(plain_weights.keys()))

read checkpoint s3://p13n-reco-offline-models-prod/models/tpfy/tpfy-v3-mtl-r2/checkpoint
Restore from s3://p13n-reco-offline-models-prod/models/tpfy/tpfy-v3-mtl-r2/1770115825/plain_weights.npz
plain weights keys ['train/tpfy_model_v3/feature_prep/dot_prod_attention_pooling/query_dense/kernel:0', 'train/tpfy_model_v3/feature_prep/dot_prod_attention_pooling/query_dense/bias:0', 'train/tpfy_model_v3/feature_prep/dot_prod_attention_pooling/key_dense/kernel:0', 'train/tpfy_model_v3/feature_prep/dot_prod_attention_pooling/key_dense/bias:0', 'train/tpfy_model_v3/deepfm/dnn/dnn_hyb/kernel_full_0:0', 'train/tpfy_model_v3/deepfm/dnn/dnn_hyb/kernel_compact_0:0', 'train/tpfy_model_v3/deepfm/dnn/dnn_hyb/bias_0:0', 'train/tpfy_model_v3/deepfm/compress_dense/kernel:0', 'train/tpfy_model_v3/deepfm/compress_dense/bias:0', 'train/tpfy_model_v3/deepfm/linear/linear_bias:0', 'train/tpfy_model_v3/deepfm/linear/linear_kernel:0', 'train/tpfy_model_v3/deepfm/mtl_linear/linear_bias:0', 'train/tpfy_model_v3/dee

In [10]:
trainer = TpfyCustomTrainer(
    tpfy_model,
    session,
    model_name,
    plain_weights,
    clear_nn=clear_nn,
    weight_decay=True,
    countries=countries,
)
trained_epochs = trainer.train(
    train_tf_dataset,
    epochs=hparams.train.max_step,
    steps_per_epoch=hparams.train.step_unit,
    validation_data_dict=validation_dataset_dict,
    validation_steps=None,
    validation_freq=hparams.train.eval_freq,
    log_dir="train/logs/" + model_name,
    lr_scheduler=lr_scheduler,
    early_stopping=None,
    verbose=args.verbose,
    validation_on_start=hparams.train.eval_freq > 0
    and (args.reload_local_model or args.reload_s3_model),
)

--------------
Instructions for updating:
The `validate_indices` argument has no effect. Indices are always validated on CPU and never validated on GPU.
q Tensor("train/tpfy_model_v3/feature_prep/strided_slice_1:0", shape=(?, 32), dtype=float32)
k Tensor("train/tpfy_model_v3/feature_prep/watched_content_embedding_unpooled:0", shape=(?, ?, 32), dtype=float32)
Kw Tensor("train/tpfy_model_v3/feature_prep/GetSlotFids:1", shape=(?, ?), dtype=float32)
target embedding shape (?, 9, 32)
user embedding shape (?, 27, 32)
target: Tensor("train/tpfy_model_v3/feature_prep/target_feature/target_embeddings:0", shape=(?, 9, 32), dtype=float32)
user: Tensor("train/tpfy_model_v3/feature_prep/user_feature/user_embeddings:0", shape=(?, 27, 32), dtype=float32)
watched: Tensor("train/tpfy_model_v3/feature_prep/dot_prod_attention_pooling/comp_new/add:0", shape=(?, 32), dtype=float32)
fm_user Tensor("train/tpfy_model_v3/deepfm/fwfm/concat:0", shape=(?, 28, 32), dtype=float32)
fm_item Tensor("train/tpfy_model_

  "shape. This may consume a large amount of memory." % value)


--------------
q Tensor("val_in-2026-01-28/tpfy_model_v3/feature_prep/strided_slice:0", shape=(?, 32), dtype=float32)
k Tensor("val_in-2026-01-28/tpfy_model_v3/feature_prep/watched_content_embedding_unpooled:0", shape=(?, ?, 32), dtype=float32)
Kw Tensor("val_in-2026-01-28/tpfy_model_v3/feature_prep/GetSlotFids:1", shape=(?, ?), dtype=float32)
target embedding shape (?, 9, 32)
user embedding shape (?, 27, 32)
target: Tensor("val_in-2026-01-28/tpfy_model_v3/feature_prep/target_feature/target_embeddings:0", shape=(?, 9, 32), dtype=float32)
user: Tensor("val_in-2026-01-28/tpfy_model_v3/feature_prep/user_feature/user_embeddings:0", shape=(?, 27, 32), dtype=float32)
watched: Tensor("val_in-2026-01-28/tpfy_model_v3/feature_prep/dot_prod_attention_pooling/cond/Merge:0", shape=(?, 32), dtype=float32)
fm_user Tensor("val_in-2026-01-28/tpfy_model_v3/deepfm/fwfm/concat:0", shape=(?, 28, 32), dtype=float32)
fm_item Tensor("val_in-2026-01-28/tpfy_model_v3/feature_prep/target_feature/target_embeddin

2026-02-03 16:20:03.350975: I tensorflow/core/platform/profile_utils/cpu_utils.cc:114] CPU Frequency: 2499995000 Hz
2026-02-03 16:20:03.478448: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:176] None of the MLIR Optimization Passes are enabled (registered 2)


on train start
restore model weights
reload source keys
train/tpfy_model_v3/feature_prep/dot_prod_attention_pooling/query_dense/kernel:0
train/tpfy_model_v3/feature_prep/dot_prod_attention_pooling/query_dense/bias:0
train/tpfy_model_v3/feature_prep/dot_prod_attention_pooling/key_dense/kernel:0
train/tpfy_model_v3/feature_prep/dot_prod_attention_pooling/key_dense/bias:0
train/tpfy_model_v3/deepfm/dnn/dnn_hyb/kernel_full_0:0
train/tpfy_model_v3/deepfm/dnn/dnn_hyb/kernel_compact_0:0
train/tpfy_model_v3/deepfm/dnn/dnn_hyb/bias_0:0
train/tpfy_model_v3/deepfm/compress_dense/kernel:0
train/tpfy_model_v3/deepfm/compress_dense/bias:0
train/tpfy_model_v3/deepfm/linear/linear_bias:0
train/tpfy_model_v3/deepfm/linear/linear_kernel:0
train/tpfy_model_v3/deepfm/mtl_linear/linear_bias:0
train/tpfy_model_v3/deepfm/mtl_linear/linear_kernel:0
train/tpfy_model_v3/sparse_layer:0
train/tpfy_model_v3/click_biases:0
train/tpfy_model_v3/watch_biases:0
embedding_layer/fids
embedding_layer/embeddings
resolve de

2026-02-03 16:20:03.745513: I ./tensorflow_recommenders_addons/dynamic_embedding/core/kernels/lookup_impl/lookup_table_op_cpu.h:143] HashTable on CPU is created on optimized mode: K=l, V=f, DIM=32, init_size=8192


done
Epoch 0
start generator


2026-02-03 16:20:06.005470: I ./tensorflow_recommenders_addons/dynamic_embedding/core/kernels/lookup_impl/lookup_table_op_cpu.h:143] HashTable on CPU is created on optimized mode: K=l, V=f, DIM=32, init_size=8192
2026-02-03 16:20:06.006211: I ./tensorflow_recommenders_addons/dynamic_embedding/core/kernels/lookup_impl/lookup_table_op_cpu.h:143] HashTable on CPU is created on optimized mode: K=l, V=f, DIM=32, init_size=8192
2026-02-03 16:20:06.007099: I ./tensorflow_recommenders_addons/dynamic_embedding/core/kernels/lookup_impl/lookup_table_op_cpu.h:143] HashTable on CPU is created on optimized mode: K=l, V=f, DIM=32, init_size=8192
2026-02-03 16:20:06.008099: I ./tensorflow_recommenders_addons/dynamic_embedding/core/kernels/lookup_impl/lookup_table_op_cpu.h:143] HashTable on CPU is created on optimized mode: K=l, V=f, DIM=32, init_size=8192
2026-02-03 16:20:06.608141: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dler

tpfy-v3-mtl-r2 loss:0.254716, add_watchlist_loss:0.033368, click_loss:0.612241, paywall_view_loss:0.079079, random_watch_loss:0.043880, watch_loss:0.505014, click_masked_auc:0.699140, watch_masked_auc_1:0.767569, random_watch_masked_auc_2:0.994120, paywall_view_masked_auc_3:0.922986, add_watchlist_masked_auc_4:0.710154 ; time 10s
Epoch 1
tpfy-v3-mtl-r2 loss:0.258322, add_watchlist_loss:0.035631, click_loss:0.616898, paywall_view_loss:0.068068, random_watch_loss:0.047174, watch_loss:0.523840, click_masked_auc:0.689175, watch_masked_auc_1:0.766065, random_watch_masked_auc_2:0.991009, paywall_view_masked_auc_3:0.915290, add_watchlist_masked_auc_4:0.644746 ; time 1s
Epoch 2

KeyboardInterrupt: 

Process SpawnProcess-1:
Traceback (most recent call last):
  File "/home/ubuntu/anaconda3/envs/tpfy_ranker_py37/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
    self.run()
  File "/home/ubuntu/anaconda3/envs/tpfy_ranker_py37/lib/python3.7/multiprocessing/process.py", line 99, in run
    self._target(*self._args, **self._kwargs)
  File "/home/ubuntu/vedansh/code/persona-reco-core/offline/src/main/python/model/parquet_dataset.py", line 94, in worker_v2
    minibatch = session.run(nxt)
  File "/home/ubuntu/.local/lib/python3.7/site-packages/tensorflow/python/client/session.py", line 968, in run
    run_metadata_ptr)
  File "/home/ubuntu/.local/lib/python3.7/site-packages/tensorflow/python/client/session.py", line 1191, in _run
    feed_dict_tensor, options, run_metadata)
  File "/home/ubuntu/.local/lib/python3.7/site-packages/tensorflow/python/client/session.py", line 1369, in _do_run
    run_metadata)
  File "/home/ubuntu/.local/lib/python3.7/site-packages/tensorflo

worker start 0
worker start 3
worker start 1
worker start 2


In [11]:
print("\n" + "="*80)
print("DEBUG MODE - Inspecting Training Data")
print("="*80)

# Create debug iterator
debug_it = train_tf_dataset.make_one_shot_iterator()
debug_next = debug_it.get_next()

# Get a batch
print("\nFetching a batch from training dataset...")
features, labels, metadata = session.run(debug_next)

print("\n--- FEATURE SHAPES ---")
for fname, fval in features.items():
    print(f"{fname}: {fval.shape}, dtype={fval.dtype}, "
          f"range=[{np.min(fval):.4f}, {np.max(fval):.4f}]")

print("\n--- LABEL SHAPES ---")
for lname, lval in labels.items():
    valid_count = np.sum(lval != -1)
    pos_count = np.sum(lval > 0)
    print(f"{lname}: {lval.shape}, valid={valid_count}/{lval.size}, "
          f"positive={pos_count}/{valid_count if valid_count > 0 else 1}")

print("\n--- FIRST EXAMPLE IN BATCH ---")
print("\nFeatures:")
for fname, fval in features.items():
    ex = fval[0]
    if ex.size > 20:
        print(f"  {fname}: {ex[:10]} ... [{ex.size} elements]")
    else:
        print(f"  {fname}: {ex}")

print("\nLabels:")
for lname, lval in labels.items():
    print(f"  {lname}: {lval[0]}")

# Save example for later use
debug_example_path = f'debug_example_{model_name}.npz'
np.savez(
    debug_example_path,
    # **{f"feature_{k}": v[0:1] for k, v in features.items()},
    # **{f"label_{k}": v[0:1] for k, v in labels.items()},
    # **{f"metadata_{k}": v[0:1] for k, v in metadata.items()},
    **{f"feature_{k}": v for k, v in features.items()},
    **{f"label_{k}": v for k, v in labels.items()},
    **{f"metadata_{k}": v for k, v in metadata.items()},
)
print(f"\nSaved first example to: {debug_example_path}")

print("\n" + "="*80)


DEBUG MODE - Inspecting Training Data
Instructions for updating:
This is a deprecated API that should only be used in TF 1 graph mode and legacy TF 2 graph mode available through `tf.compat.v1`. In all other situations -- namely, eager mode and inside `tf.function` -- you can consume dataset elements using `for elem in dataset: ...` or by explicitly creating iterator via `iterator = iter(dataset)` and fetching its elements via `values = next(iterator)`. Furthermore, this API is not available in TF 2. During the transition from TF 1 to TF 2 you can use `tf.compat.v1.data.make_one_shot_iterator(dataset)` to create a TF 1 graph mode style iterator for a dataset created through TF 2 APIs. Note that this should be a transient state of your code base as there are in general no guarantees about the interoperability of TF 1 and TF 2 code.

Fetching a batch from training dataset...
start generator


2026-02-03 16:20:19.863347: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2026-02-03 16:20:19.863376: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2026-02-03 16:20:19.868369: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2026-02-03 16:20:19.868390: I tensorflow/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2026-02-03 16:20:19.907707: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or dire


--- FEATURE SHAPES ---
user_fids: (16, 26), dtype=int64, range=[0.0000, 1882504644240867328.0000]
user_weighted_fids: (16, 168), dtype=int64, range=[0.0000, 311858832311094720.0000]
user_weighted_fid_weights: (16, 168), dtype=float32, range=[0.0000, 1.0000]
fids: (16, 12), dtype=int64, range=[0.0000, 522417556774977536.0000]
weighted_fids: (16, 1), dtype=int64, range=[0.0000, 0.0000]
weighted_fid_weights: (16, 1), dtype=float32, range=[0.0000, 0.0000]
sparse_indices: (16, 13), dtype=int32, range=[1.0000, 14.0000]
sparse_values: (16, 13), dtype=float32, range=[0.0000, 1.0000]
task: (16, 1), dtype=int32, range=[0.0000, 1.0000]

--- LABEL SHAPES ---
click: (16, 1), valid=11/16, positive=6/11
watch: (16, 1), valid=6/16, positive=1/6
add_watchlist: (16, 1), valid=6/16, positive=2/6
paywall_view: (16, 1), valid=6/16, positive=0/6
random_watch: (16, 1), valid=5/16, positive=0/5

--- FIRST EXAMPLE IN BATCH ---

Features:
  user_fids: [11868170659787972 25023955281098067 30473928018519015 4099

In [12]:
features['user_fids'].shape

(16, 26)

In [13]:
print("\n" + "="*80)
print("DEBUG MODE - Testing Model Predictions")
print("="*80)

# Load the saved example
debug_example_path = f'debug_example_{model_name}.npz'
if os.path.exists(debug_example_path):
    print(f"\nLoading example from: {debug_example_path}")
    data = np.load(debug_example_path)
    test_features = {k.replace('feature_', ''): v for k, v in data.items() 
                    if k.startswith('feature_')}
    test_labels = {k.replace('label_', ''): v for k, v in data.items() 
                  if k.startswith('label_')}

    print("\nRunning model prediction...")
    predictions = tpfy_model(test_features, training=False)
    pred_values = session.run(predictions)

    print("\n--- PREDICTIONS ---")
    for task_name, pred in pred_values.items():
        for index, prediction in enumerate(pred):
            print(f"\n{task_name}: sample : {index}")
            print(f"  Prediction: {prediction}")
            if task_name != 'click_recab':
                print(f"  Ground truth: {test_labels[task_name][index]}")

print("\n" + "="*80)


DEBUG MODE - Testing Model Predictions

Loading example from: debug_example_tpfy-v3-mtl-r2.npz

Running model prediction...
--------------
q Tensor("tpfy_model_v3/feature_prep/strided_slice:0", shape=(16, 32), dtype=float32)
k Tensor("tpfy_model_v3/feature_prep/watched_content_embedding_unpooled:0", shape=(16, ?, 32), dtype=float32)
Kw Tensor("tpfy_model_v3/feature_prep/GetSlotFids:1", shape=(16, ?), dtype=float32)
target embedding shape (16, 9, 32)
user embedding shape (16, 27, 32)
target: Tensor("tpfy_model_v3/feature_prep/target_feature/target_embeddings:0", shape=(16, 9, 32), dtype=float32)
user: Tensor("tpfy_model_v3/feature_prep/user_feature/user_embeddings:0", shape=(16, 27, 32), dtype=float32)
watched: Tensor("tpfy_model_v3/feature_prep/dot_prod_attention_pooling/cond/Merge:0", shape=(16, 32), dtype=float32)
fm_user Tensor("tpfy_model_v3/deepfm/fwfm/concat:0", shape=(16, 28, 32), dtype=float32)
fm_item Tensor("tpfy_model_v3/feature_prep/target_feature/target_embeddings:0", sha

In [15]:
print("\n" + "="*80)
print("DEBUG MODE - Extracting Activations via Tensor Names")
print("="*80)

# Load example
debug_example_path = f'debug_example_{model_name}.npz'
if os.path.exists(debug_example_path):
    data = np.load(debug_example_path)
    test_features = {k.replace('feature_', ''): v for k, v in data.items() 
                    if k.startswith('feature_')}
    test_labels = {k.replace('label_', ''): v for k, v in data.items() 
                  if k.startswith('label_')}

    # Run model to build the graph
    predictions = tpfy_model(test_features, training=False)

    # Access the compression layer output by name
    graph = tf.compat.v1.get_default_graph()

    # Find the compress_out tensor (the name is set in the model)
    compress_output_tensor = graph.get_tensor_by_name('train/tpfy_model_v3/deepfm/Relu:0')
    
    # Run both predictions and activations
    pred_values, activation_values = session.run(
        [predictions, compress_output_tensor]
    )

    print("\n--- LAST LAYER ACTIVATIONS ---")
    print(f"Shape: {activation_values.shape}")
    print(f"First example activations:\n{activation_values[0]}")

    # Save for later use
    np.save(f'debug_activations_{model_name}.npy', activation_values)

print("\n" + "="*80)


DEBUG MODE - Extracting Activations via Tensor Names
--------------
q Tensor("tpfy_model_v3_2/feature_prep/strided_slice:0", shape=(16, 32), dtype=float32)
k Tensor("tpfy_model_v3_2/feature_prep/watched_content_embedding_unpooled:0", shape=(16, ?, 32), dtype=float32)
Kw Tensor("tpfy_model_v3_2/feature_prep/GetSlotFids:1", shape=(16, ?), dtype=float32)
target embedding shape (16, 9, 32)
user embedding shape (16, 27, 32)
target: Tensor("tpfy_model_v3_2/feature_prep/target_feature/target_embeddings:0", shape=(16, 9, 32), dtype=float32)
user: Tensor("tpfy_model_v3_2/feature_prep/user_feature/user_embeddings:0", shape=(16, 27, 32), dtype=float32)
watched: Tensor("tpfy_model_v3_2/feature_prep/dot_prod_attention_pooling/cond/Merge:0", shape=(16, 32), dtype=float32)
fm_user Tensor("tpfy_model_v3_2/deepfm/fwfm/concat:0", shape=(16, 28, 32), dtype=float32)
fm_item Tensor("tpfy_model_v3_2/feature_prep/target_feature/target_embeddings:0", shape=(16, 9, 32), dtype=float32)
fwfm out Tensor("tpfy_mo

In [35]:
activation_values[0]

array([0.08695749, 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.78716743, 0.01773863, 0.        , 0.449311  ,
       0.18746579, 0.        , 0.        , 0.31603715, 0.        ,
       0.20602731, 0.78144383, 0.06048387, 0.        , 0.        ,
       0.26326728, 0.        , 0.        , 0.        , 0.08943954,
       0.        , 0.4442915 , 0.        , 0.        , 0.01635639,
       0.12391361, 0.        , 0.08880766, 0.0822864 , 0.        ,
       0.        , 0.        , 0.3119455 , 0.        , 0.08248432,
       0.        , 0.16048622, 0.        , 0.        , 0.        ,
       0.48351926, 0.7710632 , 0.02109431, 0.        , 0.        ,
       0.38850766, 0.71469355, 0.0494737 , 0.7755226 , 0.22736154,
       0.738459  , 0.6852269 , 0.18739124, 0.        , 0.        ,
       0.76795065, 0.78708273, 0.        , 0.        , 0.        ,
       0.        , 0.        , 0.        , 0.        , 0.        ,
       0.        , 0.79704624, 0.        , 0.        , 0.     

In [26]:
pred_values['click'][0]

array([-3.7573314], dtype=float32)

In [39]:
plain_weights['train/tpfy_model_v3/deepfm/linear/linear_bias:0'].shape

(2,)

In [59]:
relu_output = activation_values[0]
weights = plain_weights['train/tpfy_model_v3/deepfm/linear/linear_kernel:0']
bias = plain_weights['train/tpfy_model_v3/deepfm/linear/linear_bias:0']

relu_output_reshaped = tf.reshape(relu_output, (1, 128))
bias_reshaped = tf.reshape(bias, (1, 2))
activation = tf.matmul(relu_output_reshaped, weights)
output = activation + bias_reshaped

In [64]:
session.run(output)

array([[ 1.0678123, -0.4755299]], dtype=float32)

In [51]:
activations

<tf.Tensor 'Reshape_2:0' shape=(1, 128) dtype=float32>

In [36]:
activation_values[0].shape, plain_weights['train/tpfy_model_v3/deepfm/linear/linear_kernel:0'].shape

((128,), (128, 2))

In [44]:
activations = tf.matmul(activation_values[0].reshape(1, 128), plain_weights['train/tpfy_model_v3/deepfm/linear/linear_kernel:0'])

In [49]:
activations

<tf.Tensor 'MatMul_6:0' shape=(1, 2) dtype=float32>

In [41]:
tf.matmul(activation_values[0].reshape(1, 128), plain_weights['train/tpfy_model_v3/deepfm/linear/linear_kernel:0']).reshape(2,) + plain_weights['train/tpfy_model_v3/deepfm/linear/linear_bias:0']


AttributeError: 
        'Tensor' object has no attribute 'reshape'.
        If you are looking for numpy-related methods, please run the following:
        import tensorflow.python.ops.numpy_ops.np_config
        np_config.enable_numpy_behavior()