In [None]:
!pip install scikit-learn pandas numpy torch

In [None]:
import numpy as np

import pandas as pd

import torch

import torch.nn as nn

import torch.nn.functional as F

from torch.utils.data import Dataset, DataLoader

from sklearn.preprocessing import StandardScaler, MinMaxScaler

import math

from typing import List, Tuple, Dict





class TimeSeriesDataset(Dataset):

    def __init__(self, data_path: str, index_path: str, seq_length: int = 50):

        #print(f"Loading data from {data_path} and {index_path}")



        self.data = pd.read_csv(f'/kaggle/input/twitter/{data_path}')

        self.index = pd.read_csv(f'/kaggle/input/twitter/{index_path}')



        print(f"Data shape: {self.data.shape}")

        print(f"Index shape: {self.index.shape}")

        print(f"Index columns: {self.index.columns.tolist()}")

        print(f"Start index range: {self.index['start_ind'].min()} to {self.index['start_ind'].max()}")

        print(f"End index range: {self.index['end_ind'].min()} to {self.index['end_ind'].max()}")



        # Normalize indices to match data length

        max_idx = len(self.data) - 1

        self.index['start_ind'] = self.index['start_ind'].apply(

            lambda x: int(x % (max_idx + 1)) if pd.notnull(x) else 0

        )

        self.index['end_ind'] = self.index['end_ind'].apply(

            lambda x: int(x % (max_idx + 1)) if pd.notnull(x) else max_idx

        )



        print("\nAfter normalization:")

        print(f"Start index range: {self.index['start_ind'].min()} to {self.index['start_ind'].max()}")

        print(f"End index range: {self.index['end_ind'].min()} to {self.index['end_ind'].max()}")



        self.seq_length = seq_length

        self.time_scaler = MinMaxScaler()

        self.follower_scaler = StandardScaler()



        # Preprocess

        self.preprocess_data()

        self.cascade_sequences = self.create_sequences()

        print(f"\nCreated {len(self.cascade_sequences)} valid sequences")



        if len(self.cascade_sequences) == 0:

            raise ValueError("No valid sequences were created. Check your data and index files.")



    def preprocess_data(self):

        print("\nPreprocessing data...")

        # Check if required columns exist

        required_cols = ['number_of_followers', 'relative_time_second']

        missing_cols = [col for col in required_cols if col not in self.data.columns]

        if missing_cols:

            raise ValueError(f"Missing required columns: {missing_cols}")



        self.data['log_followers'] = np.log1p(self.data['number_of_followers'])

        self.data['normalized_time'] = self.time_scaler.fit_transform(

            self.data['relative_time_second'].values.reshape(-1, 1)

        )

        self.data['normalized_followers'] = self.follower_scaler.fit_transform(

            self.data['log_followers'].values.reshape(-1, 1)

        )

        print("Data preprocessing completed")



    def create_sequences(self) -> List[Tuple[np.ndarray, int]]:

        print("\nCreating sequences...")

        sequences = []

        total_rows = len(self.index)

        valid_sequences = 0

        skipped_sequences = 0



        for idx, row in self.index.iterrows():

            try:

                start_idx = int(row['start_ind'])

                end_idx = int(row['end_ind'])



                # Validate sequence bounds

                if start_idx > end_idx:

                    start_idx, end_idx = end_idx, start_idx



                if not (0 <= start_idx < len(self.data) and 0 <= end_idx < len(self.data)):

                    skipped_sequences += 1

                    if idx % 1000 == 0:

                        print(f"Skipping sequence at index {idx}: Invalid bounds ({start_idx}, {end_idx})")

                    continue



                cascade = self.data.iloc[start_idx:end_idx + 1]

                if len(cascade) >= self.seq_length:

                    seq_data = cascade[['normalized_time', 'normalized_followers']].values

                    sequences.append((seq_data, len(cascade)))

                    valid_sequences += 1

                else:

                    skipped_sequences += 1

            except Exception as e:

                print(f"Error processing sequence at index {idx}: {e}")

                skipped_sequences += 1

                continue



            if idx % 1000 == 0:

                print(f"Processed {idx+1}/{total_rows} sequences. "

                      f"Valid: {valid_sequences}, Skipped: {skipped_sequences}")



        print(f"\nSequence creation completed:")

        print(f"Total sequences processed: {total_rows}")

        print(f"Valid sequences: {valid_sequences}")

        print(f"Skipped sequences: {skipped_sequences}")

        return sequences



    def __len__(self):

        return len(self.cascade_sequences)



    def __getitem__(self, idx):

        sequence, total_length = self.cascade_sequences[idx]

        if len(sequence) > self.seq_length:

            start_idx = np.random.randint(0, len(sequence) - self.seq_length)

            sequence = sequence[start_idx:start_idx + self.seq_length]

        return torch.FloatTensor(sequence), torch.FloatTensor([total_length])



class DeepCas(nn.Module):

    """

    DeepCas implementation using Transformer architecture

    """

    def __init__(self, input_size: int, d_model: int, nhead: int, num_layers: int):

        super().__init__()

        self.input_projection = nn.Linear(input_size, d_model)

        self.positional_encoding = PositionalEncoding(d_model)

        self.transformer_encoder = nn.TransformerEncoder(

            nn.TransformerEncoderLayer(d_model, nhead, dim_feedforward=2048),

            num_layers

        )

        self.output_layer = nn.Sequential(

            nn.Linear(d_model, d_model // 2),

            nn.ReLU(),

            nn.Dropout(0.1),

            nn.Linear(d_model // 2, 1)

        )



    def forward(self, x):

        x = self.input_projection(x)

        x = self.positional_encoding(x)

        x = self.transformer_encoder(x)

        x = x.mean(dim=1)  # Global average pooling

        return self.output_layer(x)



class DeepHawkes(nn.Module):

    """

    DeepHawkes implementation with attention mechanism

    """

    def __init__(self, input_size: int, hidden_size: int):

        super().__init__()

        self.feature_encoder = nn.Sequential(

            nn.Linear(input_size, hidden_size),

            nn.LayerNorm(hidden_size),

            nn.ReLU(),

            nn.Dropout(0.1)

        )



        self.attention = nn.MultiheadAttention(hidden_size, num_heads=4)



        self.intensity_network = nn.Sequential(

            nn.Linear(hidden_size, hidden_size),

            nn.ReLU(),

            nn.Dropout(0.1),

            nn.Linear(hidden_size, 1),

            nn.Softplus()

        )



    def forward(self, x):

        features = self.feature_encoder(x)

        features = features.permute(1, 0, 2)  # [seq_len, batch, hidden]



        attended_features, _ = self.attention(features, features, features)

        attended_features = attended_features.permute(1, 0, 2)  # [batch, seq_len, hidden]



        intensity = self.intensity_network(attended_features)

        return intensity.sum(dim=1)  # Aggregate intensity over time



class CasCN(nn.Module):
    """
    CasCN implementation with residual connections and dilated convolutions
    Fixed to handle dimension matching properly
    """
    def __init__(self, input_size: int, hidden_size: int, num_layers: int):
        super().__init__()
        self.input_proj = nn.Linear(input_size, hidden_size)
        
        # Dilated Causal Convolution blocks with padding adjustment
        self.conv_blocks = nn.ModuleList()
        self.residual_dense = nn.ModuleList()
        
        for i in range(num_layers):
            # Add conv block
            self.conv_blocks.append(
                DilatedCausalConv1d(
                    hidden_size,
                    hidden_size,
                    kernel_size=3,
                    dilation=2**i
                )
            )
            
            # Add residual dense connection
            self.residual_dense.append(
                nn.Sequential(
                    nn.Linear(hidden_size * (i + 1), hidden_size),
                    nn.LayerNorm(hidden_size)
                )
            )
        
        self.output_layer = nn.Sequential(
            nn.Linear(hidden_size, hidden_size // 2),
            nn.ReLU(),
            nn.Dropout(0.1),
            nn.Linear(hidden_size // 2, 1)
        )

    def forward(self, x):
        batch_size, seq_len, _ = x.shape
        
        # Initial projection
        x = self.input_proj(x)  # [batch, seq_len, hidden]
        
        # Store features for dense connections
        features = []
        current = x
        
        for conv, dense in zip(self.conv_blocks, self.residual_dense):
            features.append(current)
            
            # Combine features for dense connection
            if len(features) > 1:
                combined = torch.cat(features, dim=-1)
                current = dense(combined)
            else:
                current = dense(current)
            
            # Apply convolution while maintaining sequence length
            conv_out = conv(current)
            
            # Handle potential sequence length changes
            if conv_out.size(1) != current.size(1):
                # Adjust conv_out to match current's sequence length
                conv_out = F.pad(conv_out, (0, 0, 0, current.size(1) - conv_out.size(1)))
            
            # Residual connection
            current = F.relu(conv_out + current)
        
        # Global average pooling
        pooled = current.mean(dim=1)
        
        return self.output_layer(pooled)




class TiDeH(nn.Module):
    """
    TiDeH implementation with neural ODE approach and fixed dimension handling
    """
    def __init__(self, input_size: int, hidden_size: int):
        super().__init__()
        self.hidden_size = hidden_size

        # Feature encoder with layer normalization
        self.encoder = nn.Sequential(
            nn.Linear(input_size, hidden_size),
            nn.LayerNorm(hidden_size),
            nn.ReLU(),
            nn.Linear(hidden_size, hidden_size),
            nn.LayerNorm(hidden_size)
        )

        self.ode_func = ODEFunc(hidden_size)

        self.decoder = nn.Sequential(
            nn.Linear(hidden_size, hidden_size),
            nn.ReLU(),
            nn.Dropout(0.1),
            nn.Linear(hidden_size, 1),
            nn.Softplus()
        )

    def forward(self, x):
        # x shape: [batch_size, seq_len, input_size]
        batch_size, seq_len, _ = x.shape
        
        # Encode sequence
        h = self.encoder(x)  # [batch_size, seq_len, hidden_size]
        
        # Process each timestep through ODE
        # We'll use the last hidden state for prediction
        h = h[:, -1, :]  # Take last timestep: [batch_size, hidden_size]
        
        # Create time points for ODE solving
        t = torch.linspace(0, 1, steps=10).to(x.device)
        
        # Solve ODE
        h = self.ode_solve(self.ode_func, h, t)  # [batch_size, hidden_size]
        
        # Decode to get final prediction
        output = self.decoder(h)  # [batch_size, 1]
        
        return output

    def ode_solve(self, func, y0, t):
        """
        Simple Euler method for ODE solving with improved stability
        Args:
            func: ODEFunc instance
            y0: Initial condition [batch_size, hidden_size]
            t: Time points [steps]
        """
        h = t[1] - t[0]
        y = y0  # [batch_size, hidden_size]

        for i in range(len(t) - 1):
            slope = func(t[i], y)
            y = y + h * slope
            
            # Add stability through normalization
            y = F.layer_norm(y, [self.hidden_size])

        return y

class ODEFunc(nn.Module):
    """ODE function for TiDeH with improved stability"""
    def __init__(self, hidden_size: int):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(hidden_size, hidden_size),
            nn.LayerNorm(hidden_size),
            nn.Tanh(),
            nn.Linear(hidden_size, hidden_size),
            nn.LayerNorm(hidden_size)
        )

    def forward(self, t, y):
        return self.net(y)



class DilatedCausalConv1d(nn.Module):
    """
    Dilated Causal Convolution layer with fixed sequence length handling
    """
    def __init__(self, in_channels: int, out_channels: int, kernel_size: int, dilation: int = 1):
        super().__init__()
        self.causal_padding = (kernel_size - 1) * dilation
        
        # Use same padding to maintain sequence length
        self.conv = nn.Conv1d(
            in_channels,
            out_channels,
            kernel_size,
            padding='same',  # This ensures output length matches input length
            dilation=dilation
        )
        
        self.layer_norm = nn.LayerNorm(out_channels)

    def forward(self, x):
        # Input shape: [batch, seq_len, channels]
        batch_size, seq_len, channels = x.shape
        
        # Add causal padding at the start
        x = F.pad(x.transpose(1, 2), (self.causal_padding, 0))
        
        # Apply convolution
        x = self.conv(x)
        
        # Convert back and apply layer norm
        x = x.transpose(1, 2)
        x = self.layer_norm(x)
        
        return x[:, :seq_len, :]  # Ensure output length matches input length



class PositionalEncoding(nn.Module):

    """Positional encoding for transformer-based models"""

    def __init__(self, d_model: int, max_len: int = 5000):

        super().__init__()

        position = torch.arange(max_len).unsqueeze(1)

        div_term = torch.exp(torch.arange(0, d_model, 2) * (-math.log(10000.0) / d_model))

        pe = torch.zeros(max_len, 1, d_model)

        pe[:, 0, 0::2] = torch.sin(position * div_term)

        pe[:, 0, 1::2] = torch.cos(position * div_term)

        self.register_buffer('pe', pe)



    def forward(self, x):

        return x + self.pe[:x.size(0)]



def train_cascade_model(

    model: nn.Module,

    train_loader: DataLoader,

    val_loader: DataLoader,

    num_epochs: int = 50,

    learning_rate: float = 0.001,

    device: str = 'cuda' if torch.cuda.is_available() else 'cpu'

):

    model = model.to(device)

    optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate)

    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', patience=5)

    criterion = nn.MSELoss()



    best_val_loss = float('inf')

    best_model = None



    for epoch in range(num_epochs):

        # Training

        model.train()

        train_loss = 0

        for batch_x, batch_y in train_loader:

            batch_x, batch_y = batch_x.to(device), batch_y.to(device)



            optimizer.zero_grad()

            output = model(batch_x)

            loss = criterion(output, batch_y)

            loss.backward()

            torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)

            optimizer.step()



            train_loss += loss.item()



        # Validation

        model.eval()

        val_loss = 0

        with torch.no_grad():

            for batch_x, batch_y in val_loader:

                batch_x, batch_y = batch_x.to(device), batch_y.to(device)

                output = model(batch_x)

                val_loss += criterion(output, batch_y).item()



        train_loss /= len(train_loader)

        val_loss /= len(val_loader)



        # Update learning rate

        scheduler.step(val_loss)



        # Save best model

        if val_loss < best_val_loss:

            best_val_loss = val_loss

            best_model = model.state_dict().copy()



        print(f'Epoch {epoch+1}/{num_epochs}:')

        print(f'Train Loss: {train_loss:.4f}, Val Loss: {val_loss:.4f}')



    # Load best model

    model.load_state_dict(best_model)

    return model



def main():

    # Model hyperparameters

    input_size = 2  # Since we have normalized_time and normalized_followers

    hidden_size = 128

    num_layers = 6

    nhead = 8  # Number of attention heads for transformer

    batch_size = 32

    num_epochs = 50

    learning_rate = 0.001



    print("\nInitializing dataset...")

    try:

        dataset = TimeSeriesDataset('data.csv', 'index.csv')



        if len(dataset) < 2:

            raise ValueError("Dataset contains too few sequences for training/validation split")



        # Calculate split sizes

        train_size = int(0.8 * len(dataset))

        val_size = len(dataset) - train_size



        # Create data splits

        train_dataset, val_dataset = torch.utils.data.random_split(dataset, [train_size, val_size])



        print(f"\nDataset split:")

        print(f"Total sequences: {len(dataset)}")

        print(f"Training sequences: {len(train_dataset)}")

        print(f"Validation sequences: {len(val_dataset)}")



        # Create data loaders

        train_loader = DataLoader(

            train_dataset,

            batch_size=batch_size,

            shuffle=True,

            num_workers=0,  # Set to 0 for debugging

            pin_memory=True if torch.cuda.is_available() else False

        )



        val_loader = DataLoader(

            val_dataset,

            batch_size=batch_size,

            num_workers=0,  # Set to 0 for debugging

            pin_memory=True if torch.cuda.is_available() else False

        )



        # Initialize models with proper parameters

        print("\nInitializing models...")

        models = {

            'DeepCas': DeepCas(

                input_size=input_size,

                d_model=hidden_size,

                nhead=nhead,

                num_layers=num_layers

            ),

            'DeepHawkes': DeepHawkes(

                input_size=input_size,

                hidden_size=hidden_size

            ),

            'CasCN': CasCN(

                input_size=input_size,

                hidden_size=hidden_size,

                num_layers=num_layers

            ),
            
            'TiDeH': TiDeH(

                input_size=input_size,

                hidden_size=hidden_size
            )

        }



        # Train each model

        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

        print(f"\nUsing device: {device}")



        results = {}

        for name, model in models.items():

            print(f"\nTraining {name}...")

            trained_model = train_cascade_model(

                model=model,

                train_loader=train_loader,

                val_loader=val_loader,

                num_epochs=num_epochs,

                learning_rate=learning_rate,

                device=device

            )

            results[name] = trained_model



            # Save model checkpoint

            torch.save({

                'model_state_dict': trained_model.state_dict(),

                'model_name': name,

                'hyperparameters': {

                    'input_size': input_size,

                    'hidden_size': hidden_size,

                    'num_layers': num_layers,

                    'nhead': nhead

                }

            }, f'checkpoint_{name}.pt')



        print("\nTraining completed for all models!")



    except Exception as e:

        print(f"\nError during training: {str(e)}")

        print("Stack trace:")

        import traceback

        traceback.print_exc()

        raise



if __name__ == "__main__":

    main()