In [1]:
!pip uninstall -y tensorboard protobuf
!pip install protobuf==3.20.3 tensorboard==2.13.0


Found existing installation: tensorboard 2.18.0
Uninstalling tensorboard-2.18.0:
  Successfully uninstalled tensorboard-2.18.0
Found existing installation: protobuf 6.33.0
Uninstalling protobuf-6.33.0:
  Successfully uninstalled protobuf-6.33.0
[0m[31mERROR: Could not find a version that satisfies the requirement protobuf==3.20.3 (from versions: none)[0m[31m
[0m[31mERROR: No matching distribution found for protobuf==3.20.3[0m[31m
[0m

In [3]:
# ============================================
# CTR MODEL - ARCHITECTURE EXACTE
# [Embeddings] ‚Üí [Transformer] ‚Üí [Concat avec features] ‚Üí [DCNv2 + DNN parall√®le] ‚Üí [MLP final]
#                     ‚îÇ
#                     ‚îî‚îÄ‚Üí [Sequence + Target] dans Transformer
# ============================================

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn import TransformerEncoder, TransformerEncoderLayer
import numpy as np
from typing import List, Dict
import pytorch_lightning as pl

# ==================== CONFIGURATION ====================
class Config:
    # Data
    MAX_SEQ_LEN = 100
    MAX_TAG_LEN = 10
    
    # Model dimensions
    EMBEDDING_DIM = 64  # UNIFORME pour toutes les embeddings
    ITEM_EMB_DIM = 128  # Dimension des embeddings d'items
    
    # Transformer
    TRANSFORMER_HEADS = 2      # 1 t√™te seulement
    TRANSFORMER_LAYERS = 4
    DIM_FEEDFORWARD = 256
    TRANSFORMER_DROPOUT = 0.2
    
    # DCN
    DCN_LAYERS = 3
    DCN_LOW_RANK = 64
    
    # DNN
    DNN_HIDDEN = [1024, 512, 256]  # Comme solution gagnante
    MLP_HIDDEN = [64, 32]          # Comme solution gagnante
    
    # Features extraction
    FIRST_K_COLS = 16     # Prendre les 16 derni√®res colonnes
    CONCAT_MAX_POOL = True  # Ajouter max pooling
    
    # Training
    LR = 5e-4
    EPOCHS = 7
    
    # Vocab sizes (doivent correspondre √† vos donn√©es)
    NUM_TAGS = 11740 + 1000
    LIKES_VOCAB = 11 + 5      # 0-10
    VIEWS_VOCAB = 11 + 5      # 0-10

# ==================== MOD√àLE PRINCIPAL ====================
class ExactCTRModel(nn.Module):
    """
    Architecture EXACTE :
    1. Embeddings uniformes
    2. Transformer(items + target SEULEMENT)
    3. Concat avec autres features
    4. DCNv2 + DNN parall√®le
    5. MLP final
    """
    
    def __init__(self, item_dim: int, num_users: int, config: Config = Config()):
        super().__init__()
        self.config = config
        
        # ========== 1. EMBEDDINGS UNIFORMES ==========
        self.user_embedding = nn.Embedding(num_users + 100, config.EMBEDDING_DIM, padding_idx=0)
        self.tag_embedding = nn.Embedding(config.NUM_TAGS + 100, config.EMBEDDING_DIM, padding_idx=0)
        self.likes_embedding = nn.Embedding(config.LIKES_VOCAB + 10, config.EMBEDDING_DIM, padding_idx=0)
        self.views_embedding = nn.Embedding(config.VIEWS_VOCAB + 10, config.EMBEDDING_DIM, padding_idx=0)
        
        # Dropout l√©ger pour embeddings
        self.emb_dropout = nn.Dropout(0.1)
        # Geler les embeddings (les rendre non-entra√Ænables)
        for emb in [self.user_embedding, self.tag_embedding, self.likes_embedding, self.views_embedding]:
            for param in emb.parameters():
                param.requires_grad = False

        
        # ========== 2. TRANSFORMER (ITEMS + TARGET SEULEMENT) ==========
        # Input: concat(item_emb, target_emb) = item_dim * 2
        transformer_input_dim = item_dim * 2
        
        self.transformer = TransformerEncoder(
            TransformerEncoderLayer(
                d_model=transformer_input_dim,
                nhead=config.TRANSFORMER_HEADS,
                dim_feedforward=config.DIM_FEEDFORWARD,
                dropout=config.TRANSFORMER_DROPOUT,
                batch_first=True,
                activation='relu'  # Exactement relu
            ),
            num_layers=config.TRANSFORMER_LAYERS
        )
        
        # ========== 3. CALCUL DES DIMENSIONS ==========
        # Dimension de sortie du transformer
        transformer_out_dim = self._get_transformer_output_dim(item_dim)
        
        # Dimension d'entr√©e pour DCN
        self.dcn_input_dim = (
            config.EMBEDDING_DIM * 4 +  # user + tags + likes + views
            2 +                         # numerics (likes_norm, views_norm)
            item_dim +                  # target original
            transformer_out_dim         # transformer output
        )
        
        # ========== 4. DCNv2 SIMPLE ==========
        self.dcn_layers, self.dcn_identities = self._build_dcn()
        
        # ========== 5. DNN PARALL√àLE ==========
        self.parallel_dnn = self._build_parallel_dnn()
        
        # ========== 6. MLP FINAL ==========
        self.final_mlp = self._build_final_mlp()
        
        # Initialisation
        self._init_weights()
    
    def _get_transformer_output_dim(self, item_dim: int) -> int:
        """Calcule la dimension de sortie du transformer"""
        transformer_input_dim = item_dim * 2
        transformer_out_dim = self.config.FIRST_K_COLS * transformer_input_dim
        
        if self.config.CONCAT_MAX_POOL:
            transformer_out_dim += transformer_input_dim
            
        return transformer_out_dim
    
    def _build_dcn(self) -> nn.Module:
        """Construit le DCNv2 simple"""
        layers = []
        x0_identity = []  # Pour garder x0
        
        for i in range(self.config.DCN_LAYERS):
            # Low-rank transformation: U et V
            U = nn.Linear(self.dcn_input_dim, self.config.DCN_LOW_RANK, bias=False)
            V = nn.Linear(self.config.DCN_LOW_RANK, self.dcn_input_dim)
            
            # Gating mechanism
            gate = nn.Linear(self.dcn_input_dim, self.dcn_input_dim)
            
            layers.append(nn.ModuleDict({
                'U': U,
                'V': V,
                'gate': gate
            }))
            x0_identity.append(nn.Identity())  # Pour garder x0
        
        return nn.ModuleList(layers), nn.ModuleList(x0_identity)
    
    def _build_parallel_dnn(self) -> nn.Sequential:
        """Construit le DNN parall√®le"""
        layers = []
        input_dim = self.dcn_input_dim
        
        for hidden_dim in self.config.DNN_HIDDEN:
            layers.extend([
                nn.Linear(input_dim, hidden_dim),
                nn.ReLU(),
                nn.Dropout(0.2)
            ])
            input_dim = hidden_dim
        
        return nn.Sequential(*layers)
    
    def _build_final_mlp(self) -> nn.Sequential:
        """Construit le MLP final"""
        # Input: dcn_output + dnn_output
        final_input_dim = self.dcn_input_dim + self.config.DNN_HIDDEN[-1]
        
        layers = []
        input_dim = final_input_dim
        
        for hidden_dim in self.config.MLP_HIDDEN:
            layers.extend([
                nn.Linear(input_dim, hidden_dim),
                nn.ReLU(),
                nn.Dropout(0.2)
            ])
            input_dim = hidden_dim
        
        # Couche de sortie
        layers.append(nn.Linear(input_dim, 1))
        
        return nn.Sequential(*layers)
    
    def _init_weights(self):
        """Initialisation simple des poids"""
        for name, module in self.named_modules():
            if isinstance(module, nn.Linear):
                if 'final' in name or 'output' in name:
                    nn.init.xavier_uniform_(module.weight, gain=0.01)
                else:
                    nn.init.xavier_uniform_(module.weight, gain=1.0)
                
                if module.bias is not None:
                    nn.init.zeros_(module.bias)
            
            elif isinstance(module, nn.Embedding):
                nn.init.normal_(module.weight, mean=0, std=0.01)
    
    def forward(self, 
                tgt_vec: torch.Tensor,      # [B, item_dim]
                seq_vecs: torch.Tensor,     # [B, seq_len, item_dim]
                seq_mask: torch.Tensor,     # [B, seq_len]
                numerics: torch.Tensor,     # [B, 2] (likes_norm, views_norm)
                user_ids: torch.Tensor,     # [B]
                tag_ids: torch.Tensor,      # [B, max_tags]
                tag_mask: torch.Tensor,     # [B, max_tags]
                likes: torch.Tensor,        # [B]
                views: torch.Tensor         # [B]
               ) -> torch.Tensor:
        """
        Forward EXACTEMENT selon l'architecture :
        1. Embeddings
        2. Transformer(items + target)
        3. Concat avec autres features
        4. DCNv2 + DNN parall√®le
        5. MLP final
        """
        batch_size, seq_len, item_dim = seq_vecs.shape
        
        # ========== 1. EMBEDDINGS SIMPLES ==========
        user_emb = self.emb_dropout(self.user_embedding(user_ids))          # [B, D]
        
        # Tags: simple mean pooling
        tag_emb = self.emb_dropout(self.tag_embedding(tag_ids))            # [B, max_tags, D]
        tag_mask_expanded = tag_mask.unsqueeze(-1).float()                 # [B, max_tags, 1]
        tag_repr = (tag_emb * tag_mask_expanded).sum(dim=1)                # [B, D]
        tag_repr = tag_repr / tag_mask.sum(dim=1, keepdim=True).clamp(min=1)  # [B, D]
        likes = likes.clamp(0, self.likes_embedding.num_embeddings - 1)
        views = views.clamp(0, self.views_embedding.num_embeddings - 1)

        likes_emb = self.emb_dropout(self.likes_embedding(likes))
        views_emb = self.emb_dropout(self.views_embedding(views))

                  # [B, D]
                 # [B, D]
        
        # ========== 2. TRANSFORMER (ITEMS + TARGET SEULEMENT) ==========
        # Expansion du target pour chaque position de s√©quence
        tgt_expanded = tgt_vec.unsqueeze(1).expand(-1, seq_len, -1)        # [B, seq_len, item_dim]
        
        # Concat√©nation: items + target (SEULEMENT ces deux!)
        transformer_input = torch.cat([seq_vecs, tgt_expanded], dim=-1)    # [B, seq_len, item_dim*2]
        
        # Transformer
        transformer_out = self.transformer(
            src=transformer_input,
            src_key_padding_mask=~seq_mask
        )                                                                  # [B, seq_len, item_dim*2]
        
        # ========== 3. EXTRACTION DES FEATURES DU TRANSFORMER ==========
        features_list = []
        
        # a) First K columns
        first_k_features = transformer_out[:, -self.config.FIRST_K_COLS:, :]  # [B, K, item_dim*2]
        first_k_features = first_k_features.flatten(start_dim=1)              # [B, K * item_dim*2]
        features_list.append(first_k_features)
        
        # b) Max pooling (optionnel)
        if self.config.CONCAT_MAX_POOL:
            # Mask pour max pooling
            transformer_masked = transformer_out.masked_fill(
                ~seq_mask.unsqueeze(-1), 
                float('-inf')
            )
            max_pooled = transformer_masked.max(dim=1).values                # [B, item_dim*2]
            features_list.append(max_pooled)
        
        # Concat√©nation des features du transformer
        transformer_features = torch.cat(features_list, dim=1)              # [B, transformer_out_dim]
        
        # ========== 4. CONCAT√âNATION AVEC AUTRES FEATURES ==========
        # Ordre EXACT: transformer ‚Üí user ‚Üí tags ‚Üí likes ‚Üí views ‚Üí numerics ‚Üí target
        dcn_input = torch.cat([
            transformer_features,  # [B, transformer_out_dim]
            user_emb,              # [B, D]
            tag_repr,              # [B, D]
            likes_emb,             # [B, D]
            views_emb,             # [B, D]
            numerics,              # [B, 2]
            tgt_vec                # [B, item_dim]
        ], dim=1)                                                          # [B, dcn_input_dim]
        
        # ========== 5. DCNv2 ==========
        dcn_layers, x0_identity = self.dcn_layers, self.dcn_identities
        x0 = dcn_input  # Sauvegarde de l'input original
        
        for i, layer in enumerate(dcn_layers):
            # Garder x0 pour cette couche
            x0_i = x0_identity[i](x0)
            
            # Low-rank transformation
            u = layer['U'](dcn_input)                    # [B, low_rank]
            v = layer['V'](u)                            # [B, dcn_input_dim]
            
            # Gating
            gate = torch.sigmoid(layer['gate'](dcn_input))  # [B, dcn_input_dim]
            
            # Interaction cross
            cross = gate * (x0_i * v)                     # [B, dcn_input_dim]
            
            # Residual connection
            dcn_input = dcn_input + cross
        
        dcn_output = dcn_input  # [B, dcn_input_dim]
        
        # ========== 6. DNN PARALL√àLE ==========
        dnn_output = self.parallel_dnn(x0)                # [B, DNN_HIDDEN[-1]]
        
        # ========== 7. MLP FINAL ==========
        # Concat√©nation: dcn_output + dnn_output
        final_input = torch.cat([dcn_output, dnn_output], dim=1)  # [B, dcn_input_dim + DNN_HIDDEN[-1]]
        
        # Pr√©diction finale
        logits = self.final_mlp(final_input).squeeze(-1)  # [B]
        
        return logits
###################
class CTRLightningModule(pl.LightningModule):
    """Module Lightning avec s√©curit√© des indices"""
    
    def __init__(self, model: nn.Module, learning_rate: float = Config.LR):
        super().__init__()
        self.model = model
        self.learning_rate = learning_rate
        
        # Loss function
        self.criterion = nn.BCEWithLogitsLoss()
        
        # Pour le monitoring
        self.val_targets = []
        self.val_preds = []
        
        self.save_hyperparameters(ignore=['model'])
    
    def safe_forward(self, batch):
        """Forward avec s√©curit√© des indices"""
        # Extraire les tailles d'embeddings du mod√®le
        user_emb_size = self.model.user_embedding.num_embeddings
        tag_emb_size = self.model.tag_embedding.num_embeddings
        likes_emb_size = self.model.likes_embedding.num_embeddings
        views_emb_size = self.model.views_embedding.num_embeddings
        
        # Clamp s√©curitaire
        batch['user_ids'] = torch.clamp(batch['user_ids'], 0, user_emb_size - 1)
        batch['tag_ids'] = torch.clamp(batch['tag_ids'], 0, tag_emb_size - 1)
        batch['likes'] = torch.clamp(batch['likes'], 0, likes_emb_size - 1)
        batch['views'] = torch.clamp(batch['views'], 0, views_emb_size - 1)
        
        return self.model(
            tgt_vec=batch['tgt_vec'],
            seq_vecs=batch['seq_vecs'],
            seq_mask=batch['seq_mask'],
            numerics=batch['numerics'],
            user_ids=batch['user_ids'],
            tag_ids=batch['tag_ids'],
            tag_mask=batch['tag_mask'],
            likes=batch['likes'],
            views=batch['views']
        )
    
    def forward(self, batch: Dict[str, torch.Tensor]) -> torch.Tensor:
        device = next(self.model.parameters()).device
        batch_on_device = {}
        return self.model(
            tgt_vec=batch['tgt_vec'],
            seq_vecs=batch['seq_vecs'],
            seq_mask=batch['seq_mask'],
            numerics=batch['numerics'],
            user_ids=batch['user_ids'],
            tag_ids=batch['tag_ids'],
            tag_mask=batch['tag_mask'],
            likes=batch['likes'],
            views=batch['views']
        )
    
    def training_step(self, batch: Dict[str, torch.Tensor], batch_idx: int):
        logits = self.forward(batch)
        targets = batch['labels']
        
        loss = self.criterion(logits, targets)
        
        self.log('train_loss', loss, prog_bar=True, on_step=True, on_epoch=True)
        return loss
    
    def validation_step(self, batch: Dict[str, torch.Tensor], batch_idx: int):
        logits = self.forward(batch)
        targets = batch['labels']
        
        loss = self.criterion(logits, targets)
        
        # Pour AUC
        probs = torch.sigmoid(logits).detach()
        self.val_targets.extend(targets.cpu().numpy().tolist())
        self.val_preds.extend(probs.cpu().numpy().tolist())
        
        self.log('val_loss', loss, prog_bar=True, on_epoch=True)
        return loss
    
    def on_validation_epoch_end(self):
        if len(self.val_targets) > 0:
            try:
                from sklearn.metrics import roc_auc_score
                auc = roc_auc_score(self.val_targets, self.val_preds)
                self.log('val_auc', auc, prog_bar=True)
                
                print(f"\nEpoch {self.current_epoch} - Validation AUC: {auc:.4f}")
                
            except Exception as e:
                print(f"Erreur calcul AUC: {e}")
                self.log('val_auc', 0.5, prog_bar=True)
            
            # R√©initialisation
            self.val_targets.clear()
            self.val_preds.clear()
    
    def configure_optimizers(self):
        """Optimizer simple (Adam)"""
        optimizer = torch.optim.Adam(
            self.parameters(),
            lr=self.learning_rate,
            betas=(0.9, 0.999),
            eps=1e-8
        )
        
        return optimizer
    
    def predict_step(self, batch: Dict[str, torch.Tensor], batch_idx: int):
        """√âtape de pr√©diction"""
        with torch.no_grad():
            logits = self.forward(batch)
            probs = torch.sigmoid(logits)
        return probs

# ==================== UTILITAIRES ====================
def create_collate_fn_simple(item_embeddings: torch.Tensor):
    """Fonction de collation simple"""
    
    def collate_fn(batch: List[Dict]):
        batch_size = len(batch)
        
        # R√©cup√©ration des donn√©es
        tgt_idxs = torch.LongTensor([b['tgt_idx'] for b in batch])
        user_ids = torch.LongTensor([b['user_id'] for b in batch])
        likes = torch.LongTensor([b['likes'] for b in batch])
        views = torch.LongTensor([b['views'] for b in batch])
        numerics = torch.from_numpy(np.stack([b['numerics'] for b in batch])).float()
        labels = torch.FloatTensor([b['label'] for b in batch])
        
        # S√©quences
        seq_lengths = [len(b['seq_idxs']) for b in batch]
        max_seq_len = max(seq_lengths)
        
        seq_idxs = torch.zeros(batch_size, max_seq_len, dtype=torch.long)
        seq_mask = torch.zeros(batch_size, max_seq_len, dtype=torch.bool)
        
        for i, b in enumerate(batch):
            seq_len = len(b['seq_idxs'])
            seq_idxs[i, :seq_len] = torch.LongTensor(b['seq_idxs'])
            seq_mask[i, :seq_len] = True
        
        # Tags
        tag_lengths = [len(b['tags']) for b in batch]
        max_tag_len = max(tag_lengths)
        
        tag_ids = torch.zeros(batch_size, max_tag_len, dtype=torch.long)
        tag_mask = torch.zeros(batch_size, max_tag_len, dtype=torch.bool)
        
        for i, b in enumerate(batch):
            tag_len = len(b['tags'])
            tag_ids[i, :tag_len] = torch.LongTensor(b['tags'])
            tag_mask[i, :tag_len] = True
        
        # Embeddings (sur CPU pour DataLoader)
        if item_embeddings.device.type != 'cpu':
            item_emb_cpu = item_embeddings.cpu()
        else:
            item_emb_cpu = item_embeddings
        
        tgt_vec = item_emb_cpu[tgt_idxs]
        seq_vecs = item_emb_cpu[seq_idxs]
        
        return {
            'tgt_vec': tgt_vec,
            'seq_vecs': seq_vecs,
            'seq_mask': seq_mask,
            'numerics': numerics,
            'labels': labels,
            'user_ids': user_ids,
            'tag_ids': tag_ids,
            'tag_mask': tag_mask,
            'likes': likes,
            'views': views
        }
    
    return collate_fn

# ==================== ENTR√âE PRINCIPALE ====================
if __name__ == "__main__":
    # Test simple du mod√®le
    config = Config()
    
    # Dimensions de test
    batch_size = 32
    seq_len = 50
    item_dim = 128
    num_users = 1000
    
    # Cr√©ation du mod√®le
    model = ExactCTRModel(
        item_dim=item_dim,
        num_users=num_users,
        config=config
    )
    
    # Donn√©es d'exemple
    tgt_vec = torch.randn(batch_size, item_dim)
    seq_vecs = torch.randn(batch_size, seq_len, item_dim)
    seq_mask = torch.ones(batch_size, seq_len, dtype=torch.bool)
    numerics = torch.randn(batch_size, 2)
    user_ids = torch.randint(0, num_users, (batch_size,))
    tag_ids = torch.randint(0, config.NUM_TAGS, (batch_size, 5))
    tag_mask = torch.ones(batch_size, 5, dtype=torch.bool)
    likes = torch.randint(0, config.LIKES_VOCAB, (batch_size,))
    views = torch.randint(0, config.VIEWS_VOCAB, (batch_size,))
    
    # Forward pass
    logits = model(
        tgt_vec=tgt_vec,
        seq_vecs=seq_vecs,
        seq_mask=seq_mask,
        numerics=numerics,
        user_ids=user_ids,
        tag_ids=tag_ids,
        tag_mask=tag_mask,
        likes=likes,
        views=views
    )
    
    print(f"‚úì Mod√®le cr√©√© avec succ√®s!")
    print(f"  ‚Ä¢ Input shape: {batch_size} batchs")
    print(f"  ‚Ä¢ Output shape: {logits.shape}")
    print(f"  ‚Ä¢ Architecture v√©rifi√©e ‚úì")
    
    # Compter les param√®tres
    total_params = sum(p.numel() for p in model.parameters())
    trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    
    print(f"\nüìä Statistiques du mod√®le:")
    print(f"  ‚Ä¢ Param√®tres totaux: {total_params:,}")
    print(f"  ‚Ä¢ Param√®tres entra√Ænables: {trainable_params:,}")
    
    # V√©rification de l'architecture
    print(f"\n‚úÖ ARCHITECTURE V√âRIFI√âE:")
    print(f"  1. Embeddings uniformes (64D) ‚úì")
    print(f"  2. Transformer(items + target seulement) ‚úì")
    print(f"  3. Concat avec autres features ‚úì")
    print(f"  4. DCNv2 + DNN parall√®le ‚úì")
    print(f"  5. MLP final ‚úì")

‚úì Mod√®le cr√©√© avec succ√®s!
  ‚Ä¢ Input shape: 32 batchs
  ‚Ä¢ Output shape: torch.Size([32])
  ‚Ä¢ Architecture v√©rifi√©e ‚úì

üìä Statistiques du mod√®le:
  ‚Ä¢ Param√®tres totaux: 77,503,001
  ‚Ä¢ Param√®tres entra√Ænables: 76,607,513

‚úÖ ARCHITECTURE V√âRIFI√âE:
  1. Embeddings uniformes (64D) ‚úì
  2. Transformer(items + target seulement) ‚úì
  3. Concat avec autres features ‚úì
  4. DCNv2 + DNN parall√®le ‚úì
  5. MLP final ‚úì


In [4]:
import numpy as np
import polars as plrs
from torch.utils.data import Dataset
from typing import List, Dict, Optional
from tqdm.auto import tqdm

class FeatureNormalizer:
    """Normalise les features num√©riques (likes_level, views_level)"""
    
    def __init__(self):
        self.stats = {}
    
    def fit(self, df: plrs.DataFrame, numeric_cols: List[str]):
        """Calcule les statistiques sur le train set"""
        for col in numeric_cols:
            if col in df.columns:
                mean_val = float(df[col].mean())
                std_val = float(df[col].std())
                self.stats[col] = {
                    'mean': mean_val,
                    'std': std_val if std_val > 0 else 1.0,
                    'min': float(df[col].min()),
                    'max': float(df[col].max())
                }
        return self
    
    def transform(self, df: plrs.DataFrame):
        """Applique la normalisation standard scaling"""
        df_norm = df.clone()
        for col, stat in self.stats.items():
            if col in df.columns:
                df_norm = df_norm.with_columns(
                    ((plrs.col(col) - stat['mean']) / stat['std']).alias(col)
                )
        return df_norm


In [5]:
import pandas as pd
class CTRDataset(Dataset):
    """Dataset pour CTR prediction - VERSION EXACTE pour l'architecture"""
    
    def __init__(self, 
                 df: pd.DataFrame, 
                 id2idx: Dict[int, int],
                 normalizer: Optional[FeatureNormalizer] = None,
                 max_seq_len: int = 100,
                 max_tag_len: int = 5,
                 is_train: bool = True):
        
        self.df = df.to_pandas()
        self.id2idx = id2idx
        self.normalizer = normalizer
        self.max_seq_len = max_seq_len
        self.max_tag_len = max_tag_len
        self.is_train = is_train
        
        # Pr√©-calcul
        self._preprocess_data()
    
    def _preprocess_data(self):
        """Pr√©process les donn√©es pour l'architecture exacte"""
        self.processed_data = []
        
        for idx in tqdm(range(len(self.df)), desc="Preprocessing"):
            r = self.df.iloc[idx]
            
            # 1. Item target (index)
            tgt_idx = self.id2idx.get(int(r['item_id']), 0)
            
            # 2. S√©quence d'items (pour transformer)
            seq = r.get('item_seq', [])
            if isinstance(seq, str):
                try:
                    seq = eval(seq)
                except:
                    seq = []
            # Garder les derniers items (ordre chronologique)
            seq_idxs = [self.id2idx.get(int(i), 0) for i in seq[-self.max_seq_len:]]
            
            # 3. Tags (pour embedding simple)
            tags = r.get('item_tags', [0])
            if isinstance(tags, str):
                try:
                    tags = eval(tags)
                except:
                    tags = [0]
            tags = [int(t) for t in tags[:self.max_tag_len]]
            
            # 4. Features cat√©gorielles (pour embeddings)
            likes = int(r.get('likes_level', 0))
            views = int(r.get('views_level', 0))
            
            # 5. Features num√©riques NORMALIS√âES (pour concat apr√®s transformer)
            if self.normalizer:
                # Normalisation standard scaling
                likes_norm = (float(r.get('likes_level', 0)) - self.normalizer.stats['likes_level']['mean']) / self.normalizer.stats['likes_level']['std']
                views_norm = (float(r.get('views_level', 0)) - self.normalizer.stats['views_level']['mean']) / self.normalizer.stats['views_level']['std']
            else:
                likes_norm = float(r.get('likes_level', 0))
                views_norm = float(r.get('views_level', 0))
            
            numerics = np.array([likes_norm, views_norm], dtype=np.float32)
            
            # 6. Label (0 ou 1)
            label = np.float32(r['label']) if 'label' in r.index else 0.0
            
            # 7. User ID (pour embedding)
            user_id = int(r['user_id'])
            
            # Stockage dans le format EXACT pour l'architecture
            self.processed_data.append({
                'tgt_idx': tgt_idx,      # Index de l'item target
                'seq_idxs': seq_idxs,    # S√©quence d'items (pour transformer)
                'tags': tags,            # Tags (pour embedding simple)
                'likes': likes,          # Likes level (cat√©goriel, 0-10)
                'views': views,          # Views level (cat√©goriel, 0-10)
                'numerics': numerics,    # Features num√©riques normalis√©es
                'label': label,          # Label binaire
                'user_id': user_id       # User ID
            })
    
    def __len__(self):
        return len(self.processed_data)
    
    def __getitem__(self, idx):
        """Retourne un √©chantillon - format EXACT pour l'architecture"""
        return self.processed_data[idx]

In [6]:
# train_simple.py
import torch
from torch.utils.data import DataLoader
import pytorch_lightning as pl
from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping
import numpy as np
import pandas as pd
import polars as plrs
from tqdm.auto import tqdm

def main():
    print("=" * 70)
    print("ENTRA√éNEMENT - ARCHITECTURE EXACTE")
    print("=" * 70)
    
    # Configuration
    config = Config()
    
    # Chemins des donn√©es
    DATA_PATH = "/kaggle/input/www2025-mmctr-data/MicroLens_1M_MMCTR/MicroLens_1M_x1"
    
    # 1. Chargement des donn√©es
    print("\n1. Chargement des donn√©es...")
    train_df = plrs.read_parquet(f"{DATA_PATH}/train.parquet")
    valid_df = plrs.read_parquet(f"{DATA_PATH}/valid.parquet")
    test_df = plrs.read_parquet(f"{DATA_PATH}/test.parquet")
    item_info = plrs.read_parquet(f"{DATA_PATH}/item_info.parquet")
    
    # 2. Normalisation
    print("\n2. Normalisation...")
    normalizer = FeatureNormalizer()
    normalizer.fit(train_df, ['likes_level', 'views_level'])
    train_df = normalizer.transform(train_df)
    valid_df = normalizer.transform(valid_df)
    test_df = normalizer.transform(test_df)
    
    # 3. Pr√©paration embeddings
    print("\n3. Pr√©paration embeddings...")
    all_item_ids = [r['item_id'] for r in item_info.to_dicts()]
    id2idx = {item_id: idx for idx, item_id in enumerate(all_item_ids)}
    
    item_embeddings = torch.from_numpy(
        np.stack(item_info['item_emb_d128'].to_list())
    ).float().cpu()
    
    # 4. Datasets et DataLoaders
    print("\n4. Cr√©ation DataLoaders...")
    collate_fn = create_collate_fn_simple(item_embeddings)
    
    train_dataset = CTRDataset(train_df, id2idx, normalizer)
    valid_dataset = CTRDataset(valid_df, id2idx, normalizer)
    test_dataset = CTRDataset(test_df, id2idx, normalizer)
    
    train_loader = DataLoader(
        train_dataset,
        batch_size=128,
        shuffle=True,
        num_workers=0,
        collate_fn=collate_fn
    )
    
    valid_loader = DataLoader(
        valid_dataset,
        batch_size=256,
        shuffle=False,
        num_workers=0,
        collate_fn=collate_fn
    )
    
    test_loader = DataLoader(
        test_dataset,
        batch_size=256,
        shuffle=False,
        num_workers=0,
        collate_fn=collate_fn
    )
    
    # 5. Cr√©ation du mod√®le - CORRECTION AJOUT√âE ICI
    print("\n5. Cr√©ation du mod√®le...")
    num_users = int(max(
        train_df['user_id'].max(),
        valid_df['user_id'].max(),
        test_df['user_id'].max()
    )) + 5000
    
    # AJOUT: Calculer les valeurs max pour les autres vocabulaires
    print("  ‚Ä¢ Calcul des vocabulaires...")
    
    # Pour les tags - extraire la valeur max des listes de tags
    def get_max_tag(df):
        if 'item_tags' in df.columns:
            # Extraire toutes les valeurs des listes
            tags_series = df['item_tags'].explode()
            return int(tags_series.max()) if tags_series is not None else 0
        return 0
    
    max_tag = max(
        get_max_tag(train_df),
        get_max_tag(valid_df),
        get_max_tag(test_df)
    ) + 1000  # Marge
    
    # Pour likes et views
    max_like = int(max(
        train_df['likes_level'].max(),
        valid_df['likes_level'].max(),
        test_df['likes_level'].max()
    )) + 10  # Marge
    
    max_view = int(max(
        train_df['views_level'].max(),
        valid_df['views_level'].max(),
        test_df['views_level'].max()
    )) + 10  # Marge
    
    # Mettre √† jour la config
    config.NUM_TAGS = max_tag
    config.LIKES_VOCAB = max_like
    config.VIEWS_VOCAB = max_view
    
    print(f"  ‚Ä¢ Vocabulaires ajust√©s:")
    print(f"    - NUM_TAGS: {config.NUM_TAGS}")
    print(f"    - LIKES_VOCAB: {config.LIKES_VOCAB}")
    print(f"    - VIEWS_VOCAB: {config.VIEWS_VOCAB}")
    
    # Cr√©ation du mod√®le avec les nouvelles valeurs
    model = ExactCTRModel(
        item_dim=item_embeddings.shape[1],
        num_users=num_users,
        config=config
    )
    
    # AJOUT: Test sur CPU avant GPU
    print("  ‚Ä¢ Test forward sur CPU...")
    try:
        # Prendre un batch de test
        test_batch = next(iter(train_loader))
        
        # Test sur CPU
        with torch.no_grad():
            logits = model(
                tgt_vec=test_batch['tgt_vec'],
                seq_vecs=test_batch['seq_vecs'],
                seq_mask=test_batch['seq_mask'],
                numerics=test_batch['numerics'],
                user_ids=test_batch['user_ids'],
                tag_ids=test_batch['tag_ids'],
                tag_mask=test_batch['tag_mask'],
                likes=test_batch['likes'],
                views=test_batch['views']
            )
        print(f"  ‚úì Test forward r√©ussi!")
        
    except Exception as e:
        print(f"  ‚úó Erreur lors du test CPU: {e}")
        print("  ‚Ä¢ V√©rification des valeurs max:")
        print(f"    user_ids max: {test_batch['user_ids'].max().item()}")
        print(f"    tag_ids max: {test_batch['tag_ids'].max().item()}")
        print(f"    likes max: {test_batch['likes'].max().item()}")
        print(f"    views max: {test_batch['views'].max().item()}")
        raise
    
    # D√©placer sur GPU si disponible
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print(f"  ‚Ä¢ D√©placement sur {device}...")
    model = model.to(device)
    
    # 6. Lightning Module
    lightning_model = CTRLightningModule(
        model=model,
        learning_rate=config.LR
    )
    
    # 7. Callbacks
    checkpoint_callback = ModelCheckpoint(
        monitor='val_auc',
        mode='max',
        save_top_k=1,
        filename='ctr_exact_{epoch:02d}_{val_auc:.4f}'
    )
    
    early_stop_callback = EarlyStopping(
        monitor='val_auc',
        mode='max',
        patience=3,
        min_delta=0.001
    )
    
    # 8. Trainer - AJOUT: Option de fallback sur CPU si CUDA √©choue
    try:
        trainer = pl.Trainer(
            max_epochs=config.EPOCHS,
            accelerator='gpu' if torch.cuda.is_available() else 'cpu',
            devices=1,
            precision='32-true',
            gradient_clip_val=1.0,
            callbacks=[checkpoint_callback, early_stop_callback],
            enable_progress_bar=True,
            log_every_n_steps=50
        )
    except:
        print("  ‚Ä¢ Fallback sur CPU training...")
        trainer = pl.Trainer(
            max_epochs=config.EPOCHS,
            accelerator='cpu',
            devices=1,
            precision='32-true',
            gradient_clip_val=1.0,
            callbacks=[checkpoint_callback, early_stop_callback],
            enable_progress_bar=True,
            log_every_n_steps=50
        )
    
    # 9. Entra√Ænement
    print("\n6. D√©but entra√Ænement...")
    print("=" * 70)
    
    trainer.fit(
        model=lightning_model,
        train_dataloaders=train_loader,
        val_dataloaders=valid_loader
    )
    
    # 10. Pr√©dictions
    print("\n7. Pr√©dictions...")
    lightning_model.model.eval()
    predictions = []
    
    with torch.no_grad():
        for batch in tqdm(test_loader, desc="Test predictions"):
            # Move to device
            batch = {k: v.to(device) if isinstance(v, torch.Tensor) else v 
                    for k, v in batch.items()}
            
            # Forward
            logits = lightning_model.model(
                tgt_vec=batch['tgt_vec'],
                seq_vecs=batch['seq_vecs'],
                seq_mask=batch['seq_mask'],
                numerics=batch['numerics'],
                user_ids=batch['user_ids'],
                tag_ids=batch['tag_ids'],
                tag_mask=batch['tag_mask'],
                likes=batch['likes'],
                views=batch['views']
            )
            
            probs = torch.sigmoid(logits)
            predictions.extend(probs.cpu().numpy().tolist())
    
    # 11. Sauvegarde
    submission = pd.DataFrame({
        'user_id': test_df['user_id'].to_list(),
        'item_id': test_df['item_id'].to_list(),
        'prediction': predictions
    })
    
    submission.to_csv('submission_exact.csv', index=False)
    
    print(f"\n‚úÖ Fichier sauvegard√©: submission_exact.csv")
    print(f"üìä AUC estim√©: {checkpoint_callback.best_model_score:.4f}")

if __name__ == "__main__":
    main()

ENTRA√éNEMENT - ARCHITECTURE EXACTE

1. Chargement des donn√©es...

2. Normalisation...

3. Pr√©paration embeddings...

4. Cr√©ation DataLoaders...


Preprocessing:   0%|          | 0/3600000 [00:00<?, ?it/s]

KeyboardInterrupt: 

In [17]:
import os
os.environ['CUDA_LAUNCH_BLOCKING'] = '1'  # Force le debug CUDA

# Ajoutez au d√©but de main()
print("Debug CUDA activ√©...")

Debug CUDA activ√©...


In [None]:
# train_simple.py
import os
import torch
from torch.utils.data import DataLoader
import pytorch_lightning as pl
from pytorch_lightning.callbacks import ModelCheckpoint, EarlyStopping
import numpy as np
import pandas as pd
import polars as plrs
from tqdm.auto import tqdm

def main():
    print("=" * 70)
    print("ENTRA√éNEMENT - ARCHITECTURE EXACTE")
    print("=" * 70)
    
    # Configuration
    config = Config()
    
    # Chemins des donn√©es
    DATA_PATH = "/kaggle/input/www2025-mmctr-data/MicroLens_1M_MMCTR/MicroLens_1M_x1"
    
    # 1. Chargement des donn√©es
    print("\n1. Chargement des donn√©es...")
    train_df = plrs.read_parquet(f"{DATA_PATH}/train.parquet")
    valid_df = plrs.read_parquet(f"{DATA_PATH}/valid.parquet")
    test_df = plrs.read_parquet(f"{DATA_PATH}/test.parquet")
    item_info = plrs.read_parquet(f"{DATA_PATH}/item_info.parquet")
    
    # 2. Normalisation
    print("\n2. Normalisation...")
    normalizer = FeatureNormalizer()
    normalizer.fit(train_df, ['likes_level', 'views_level'])
    train_df = normalizer.transform(train_df)
    valid_df = normalizer.transform(valid_df)
    test_df = normalizer.transform(test_df)
    
    # 3. Pr√©paration embeddings
    print("\n3. Pr√©paration embeddings...")
    all_item_ids = [r['item_id'] for r in item_info.to_dicts()]
    id2idx = {item_id: idx for idx, item_id in enumerate(all_item_ids)}
    
    item_embeddings = torch.from_numpy(
        np.stack(item_info['item_emb_d128'].to_list())
    ).float().cpu()
    
    # 4. Datasets et DataLoaders
    print("\n4. Cr√©ation DataLoaders...")
    collate_fn = create_collate_fn_simple(item_embeddings)
    
    train_dataset = CTRDataset(train_df, id2idx, normalizer)
    valid_dataset = CTRDataset(valid_df, id2idx, normalizer)
    test_dataset = CTRDataset(test_df, id2idx, normalizer)
    
    train_loader = DataLoader(
        train_dataset,
        batch_size=128,
        shuffle=True,
        num_workers=0,
        collate_fn=collate_fn
    )
    
    valid_loader = DataLoader(
        valid_dataset,
        batch_size=256,
        shuffle=False,
        num_workers=0,
        collate_fn=collate_fn
    )
    
    test_loader = DataLoader(
        test_dataset,
        batch_size=256,
        shuffle=False,
        num_workers=0,
        collate_fn=collate_fn
    )
    
    # 5. Cr√©ation du mod√®le - CORRECTION AJOUT√âE ICI
    print("\n5. Cr√©ation du mod√®le...")
    num_users = int(max(
        train_df['user_id'].max(),
        valid_df['user_id'].max(),
        test_df['user_id'].max()
    )) + 5000
    
    # AJOUT: Calculer les valeurs max pour les autres vocabulaires
    print("  ‚Ä¢ Calcul des vocabulaires...")
    
    # Pour les tags - extraire la valeur max des listes de tags
    def get_max_tag(df):
        if 'item_tags' in df.columns:
            tags_series = df['item_tags'].explode()
            return int(tags_series.max()) if tags_series is not None else 0
        return 0
    
    max_tag = max(
        get_max_tag(train_df),
        get_max_tag(valid_df),
        get_max_tag(test_df)
    ) + 1000  # Marge
    
    # Pour likes et views
    max_like = int(max(
        train_df['likes_level'].max(),
        valid_df['likes_level'].max(),
        test_df['likes_level'].max()
    )) + 10  # Marge
    
    max_view = int(max(
        train_df['views_level'].max(),
        valid_df['views_level'].max(),
        test_df['views_level'].max()
    )) + 10  # Marge
    
    # Mettre √† jour la config
    config.NUM_TAGS = max_tag
    config.LIKES_VOCAB = max_like
    config.VIEWS_VOCAB = max_view
    
    print(f"  ‚Ä¢ Vocabulaires ajust√©s:")
    print(f"    - NUM_TAGS: {config.NUM_TAGS}")
    print(f"    - LIKES_VOCAB: {config.LIKES_VOCAB}")
    print(f"    - VIEWS_VOCAB: {config.VIEWS_VOCAB}")
    
    # Cr√©ation du mod√®le avec les nouvelles valeurs
    model = ExactCTRModel(
        item_dim=item_embeddings.shape[1],
        num_users=num_users,
        config=config
    )
    
    # AJOUT: Test sur CPU avant GPU
    print("  ‚Ä¢ Test forward sur CPU...")
    try:
        test_batch = next(iter(train_loader))
        with torch.no_grad():
            logits = model(
                tgt_vec=test_batch['tgt_vec'],
                seq_vecs=test_batch['seq_vecs'],
                seq_mask=test_batch['seq_mask'],
                numerics=test_batch['numerics'],
                user_ids=test_batch['user_ids'],
                tag_ids=test_batch['tag_ids'],
                tag_mask=test_batch['tag_mask'],
                likes=test_batch['likes'],
                views=test_batch['views']
            )
        print(f"  ‚úì Test forward r√©ussi!")
    except Exception as e:
        print(f"  ‚úó Erreur lors du test CPU: {e}")
        raise
    
    # D√©placer sur GPU si disponible
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    print(f"  ‚Ä¢ D√©placement sur {device}...")
    model = model.to(device)
    
    # 6. Lightning Module
    lightning_model = CTRLightningModule(
        model=model,
        learning_rate=config.LR
    )
    
    # 7. Callbacks
    checkpoint_callback = ModelCheckpoint(
        monitor='val_auc',
        mode='max',
        save_top_k=1,
        filename='ctr_exact_{epoch:02d}_{val_auc:.4f}'
    )
    
    early_stop_callback = EarlyStopping(
        monitor='val_auc',
        mode='max',
        patience=3,
        min_delta=0.001
    )
    
    # === üîÅ OPTION : reprise depuis un checkpoint pr√©c√©dent ===
    resume_checkpoint = None
    ckpt_dir = "/kaggle/working/lightning_logs/version_4/checkpoints"
    if os.path.exists(ckpt_dir):
        ckpts = [os.path.join(ckpt_dir, f) for f in os.listdir(ckpt_dir) if f.endswith(".ckpt")]
        if len(ckpts) > 0:
            resume_checkpoint = max(ckpts, key=os.path.getmtime)
            print(f"  ‚Ä¢ Reprise depuis le checkpoint : {resume_checkpoint}")
            config.EPOCHS += 10
        else:
            print("  ‚Ä¢ Aucun checkpoint trouv√©, entra√Ænement depuis z√©ro.")
    else:
        print("  ‚Ä¢ Aucun checkpoint trouv√©, entra√Ænement depuis z√©ro.")
    
    # 8. Trainer
    try:
        trainer = pl.Trainer(
            max_epochs=config.EPOCHS,
            accelerator='gpu' if torch.cuda.is_available() else 'cpu',
            devices=1,
            precision='32-true',
            gradient_clip_val=1.0,
            callbacks=[checkpoint_callback, early_stop_callback],
            enable_progress_bar=True,
            log_every_n_steps=50
        )
    except:
        print("  ‚Ä¢ Fallback sur CPU training...")
        trainer = pl.Trainer(
            max_epochs=config.EPOCHS,
            accelerator='cpu',
            devices=1,
            precision='32-true',
            gradient_clip_val=1.0,
            callbacks=[checkpoint_callback, early_stop_callback],
            enable_progress_bar=True,
            log_every_n_steps=50,
            resume_from_checkpoint=resume_checkpoint
        )
    
    # 9. Entra√Ænement
    print("\n6. D√©but entra√Ænement...")
    print("=" * 70)
    
    trainer.fit(
        model=lightning_model,
        train_dataloaders=train_loader,
        val_dataloaders=valid_loader,
        ckpt_path=resume_checkpoint
    )
    
    # 10. Pr√©dictions
    print("\n7. Pr√©dictions...")
    lightning_model.model.eval()
    predictions = []
    
    with torch.no_grad():
        for batch in tqdm(test_loader, desc="Test predictions"):
            batch = {k: v.to(device) if isinstance(v, torch.Tensor) else v 
                    for k, v in batch.items()}
            logits = lightning_model.model(
                tgt_vec=batch['tgt_vec'],
                seq_vecs=batch['seq_vecs'],
                seq_mask=batch['seq_mask'],
                numerics=batch['numerics'],
                user_ids=batch['user_ids'],
                tag_ids=batch['tag_ids'],
                tag_mask=batch['tag_mask'],
                likes=batch['likes'],
                views=batch['views']
            )
            probs = torch.sigmoid(logits)
            predictions.extend(probs.cpu().numpy().tolist())
    
    # 11. Sauvegarde
    submission = pd.DataFrame({
        'user_id': test_df['user_id'].to_list(),
        'item_id': test_df['item_id'].to_list(),
        'prediction': predictions
    })
    
    submission.to_csv('submission_exact.csv', index=False)
    
    print(f"\n‚úÖ Fichier sauvegard√©: submission_exact.csv")
    print(f"üìä AUC estim√©: {checkpoint_callback.best_model_score:.4f}")

if __name__ == "__main__":
    main()


ENTRA√éNEMENT - ARCHITECTURE EXACTE

1. Chargement des donn√©es...

2. Normalisation...

3. Pr√©paration embeddings...

4. Cr√©ation DataLoaders...


Preprocessing:   0%|          | 0/3600000 [00:00<?, ?it/s]

Preprocessing:   0%|          | 0/10000 [00:00<?, ?it/s]

Preprocessing:   0%|          | 0/379142 [00:00<?, ?it/s]


5. Cr√©ation du mod√®le...
  ‚Ä¢ Calcul des vocabulaires...
  ‚Ä¢ Vocabulaires ajust√©s:
    - NUM_TAGS: 1000
    - LIKES_VOCAB: 11
    - VIEWS_VOCAB: 11
  ‚Ä¢ Test forward sur CPU...


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs


  ‚úì Test forward r√©ussi!
  ‚Ä¢ D√©placement sur cuda...
  ‚Ä¢ Reprise depuis le checkpoint : /kaggle/working/lightning_logs/version_4/checkpoints/ctr_exact_epoch=06_val_auc=0.7077.ckpt

6. D√©but entra√Ænement...


Restoring states from the checkpoint path at /kaggle/working/lightning_logs/version_4/checkpoints/ctr_exact_epoch=06_val_auc=0.7077.ckpt
/usr/local/lib/python3.11/dist-packages/pytorch_lightning/callbacks/model_checkpoint.py:445: The dirpath has changed from '/kaggle/working/lightning_logs/version_4/checkpoints' to '/kaggle/working/lightning_logs/version_5/checkpoints', therefore `best_model_score`, `kth_best_model_path`, `kth_value`, `last_model_path` and `best_k_models` won't be reloaded. Only `best_model_path` will be reloaded.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name      | Type              | Params | Mode 
--------------------------------------------------------
0 | model     | ExactCTRModel     | 141 M  | train
1 | criterion | BCEWithLogitsLoss | 0      | train
--------------------------------------------------------
76.6 M    Trainable params
64.4 M    Non-trainable params
141 M     Total params
564.028   Total estimated model params size (MB)
84        Modules in tr

Sanity Checking: |          | 0/? [00:00<?, ?it/s]


Epoch 6 - Validation AUC: 0.7044


/usr/local/lib/python3.11/dist-packages/pytorch_lightning/trainer/connectors/data_connector.py:433: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=3` in the `DataLoader` to improve performance.
/usr/local/lib/python3.11/dist-packages/pytorch_lightning/trainer/connectors/data_connector.py:433: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=3` in the `DataLoader` to improve performance.


Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]


Epoch 7 - Validation AUC: 0.7199


Validation: |          | 0/? [00:00<?, ?it/s]


Epoch 8 - Validation AUC: 0.7273


Validation: |          | 0/? [00:00<?, ?it/s]


Epoch 9 - Validation AUC: 0.7365
