In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.layers import Input, Embedding, LSTM, Dense, Concatenate, Conv1D, GlobalMaxPooling1D, MultiHeadAttention, LayerNormalization, Attention
from tensorflow.keras.models import Model

# Data Preprocessing
def preprocess_data(df):
    event_type_mapping = df['event_type'].astype('category').cat.categories
    agent_id_mapping = df['agent_id'].astype('category').cat.categories
    context_mapping = df['context'].astype('category').cat.categories

    df['event_type'] = df['event_type'].astype('category').cat.codes
    df['agent_id'] = df['agent_id'].astype('category').cat.codes
    df['context'] = df['context'].astype('category').cat.codes

    return df, event_type_mapping, agent_id_mapping, context_mapping

# Load your dataset
df = pd.read_csv('1k_single_agent_minmax.csv')
df, event_type_mapping, agent_id_mapping, context_mapping = preprocess_data(df)

# Define constants
num_classes = df['event_type'].nunique()
num_agents = df['agent_id'].nunique()
num_contexts = df['context'].nunique()
sequence_length = 10  # Adjust based on your data
num_features = df.shape[1]  # Number of features

# Create sequences for training
def create_sequences(df, sequence_length):
    sequences = []
    for i in range(len(df) - sequence_length):
        seq = df.iloc[i:i+sequence_length].values
        sequences.append(seq)
    return np.array(sequences)

sequences = create_sequences(df, sequence_length)

# Splitting the data
train_size = int(0.7 * len(sequences))
val_size = int(0.15 * len(sequences))
train_sequences = sequences[:train_size]
val_sequences = sequences[train_size:train_size+val_size]
test_sequences = sequences[train_size+val_size:]

# Extract targets
def extract_targets(sequences):
    next_event = to_categorical(sequences[:, -1, 0], num_classes=num_classes)
    agent = to_categorical(sequences[:, -1, 1], num_classes=num_agents)
    context = to_categorical(sequences[:, -1, 2], num_classes=num_contexts)
    anomaly = np.random.randint(0, 2, size=(sequences.shape[0], 1))  # Placeholder for anomaly
    return next_event, agent, context, anomaly

train_targets = extract_targets(train_sequences)
val_targets = extract_targets(val_sequences)
test_targets = extract_targets(test_sequences)

# Define the model branches
def lstm_branch(input_shape):
    inputs = Input(shape=input_shape)
    x = Embedding(input_dim=num_classes, output_dim=64)(inputs)
    x = LSTM(128, return_sequences=True)(x)
    x = Attention()([x, x])
    x = GlobalMaxPooling1D()(x)
    return inputs, x

def transformer_branch(input_shape):
    inputs = Input(shape=input_shape)
    x = Embedding(input_dim=num_classes, output_dim=64)(inputs)
    attn_output = MultiHeadAttention(num_heads=4, key_dim=64)(x, x)
    x = LayerNormalization()(x + attn_output)
    x = GlobalMaxPooling1D()(x)  # Apply GlobalMaxPooling1D to convert 3D to 2D
    return inputs, x

def convnet_branch(input_shape):
    inputs = Input(shape=input_shape)
    x = Embedding(input_dim=num_classes, output_dim=64)(inputs)
    x = Conv1D(128, kernel_size=3, activation='relu')(x)
    x = GlobalMaxPooling1D()(x)
    return inputs, x

# Build the model
def build_model(sequence_length):
    input_shape = (sequence_length,)

    # Define the branches for each input
    lstm_event_inputs, lstm_event_output = lstm_branch(input_shape)
    lstm_agent_inputs, lstm_agent_output = lstm_branch(input_shape)
    lstm_context_inputs, lstm_context_output = lstm_branch(input_shape)
    
    transformer_event_inputs, transformer_event_output = transformer_branch(input_shape)
    transformer_agent_inputs, transformer_agent_output = transformer_branch(input_shape)
    transformer_context_inputs, transformer_context_output = transformer_branch(input_shape)
    
    convnet_event_inputs, convnet_event_output = convnet_branch(input_shape)
    convnet_agent_inputs, convnet_agent_output = convnet_branch(input_shape)
    convnet_context_inputs, convnet_context_output = convnet_branch(input_shape)
    
    # Concatenate outputs from all branches
    concatenated = Concatenate()([
        lstm_event_output, lstm_agent_output, lstm_context_output,
        transformer_event_output, transformer_agent_output, transformer_context_output,
        convnet_event_output, convnet_agent_output, convnet_context_output
    ])
    
    shared_dense = Dense(128, activation='relu')(concatenated)
    
    next_event_head = Dense(num_classes, activation='softmax', name='next_event')(shared_dense)
    agent_head = Dense(num_agents, activation='softmax', name='agent')(shared_dense)
    context_head = Dense(num_contexts, activation='softmax', name='context')(shared_dense)
    anomaly_head = Dense(1, activation='sigmoid', name='anomaly')(shared_dense)
    
    model = Model(inputs=[
        lstm_event_inputs, lstm_agent_inputs, lstm_context_inputs,
        transformer_event_inputs, transformer_agent_inputs, transformer_context_inputs,
        convnet_event_inputs, convnet_agent_inputs, convnet_context_inputs
    ], outputs=[next_event_head, agent_head, context_head, anomaly_head])
    
    return model

# Build the model
model = build_model(sequence_length)

# Define learnable uncertainty parameters
sigma_next_event = tf.Variable(1.0, dtype=tf.float32, trainable=True)
sigma_agent = tf.Variable(1.0, dtype=tf.float32, trainable=True)
sigma_context = tf.Variable(1.0, dtype=tf.float32, trainable=True)
sigma_anomaly = tf.Variable(1.0, dtype=tf.float32, trainable=True)

# Define custom loss functions with uncertainty weighting
def uncertainty_loss(y_true, y_pred, sigma):
    return tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y_true, logits=y_pred)) / (2 * sigma ** 2) + tf.math.log(sigma)

# Define meta-learner model for learning rate adjustment
meta_input = Input(shape=(1,))
meta_output = Dense(1, activation='linear')(meta_input)
meta_model = Model(meta_input, meta_output)
meta_model.compile(optimizer='adam', loss='mse')

# Initial learning rate
initial_lr = 1e-3
model_optimizer = tf.keras.optimizers.Adam(learning_rate=initial_lr)

# Compile the base model with the custom losses
model.compile(optimizer=model_optimizer,
              loss={'next_event': lambda y_true, y_pred: uncertainty_loss(y_true, y_pred, sigma_next_event),
                    'agent': lambda y_true, y_pred: uncertainty_loss(y_true, y_pred, sigma_agent),
                    'context': lambda y_true, y_pred: uncertainty_loss(y_true, y_pred, sigma_context),
                    'anomaly': lambda y_true, y_pred: tf.reduce_mean(tf.keras.losses.binary_crossentropy(y_true, y_pred)) / (2 * sigma_anomaly ** 2) + tf.math.log(sigma_anomaly)},
              metrics={'next_event': 'accuracy', 
                       'agent': 'accuracy',
                       'context': 'accuracy',
                       'anomaly': 'accuracy'})

# Training the model with meta-learning
for epoch in range(50):
    print(f"Epoch {epoch+1}/50")
    
    # Train the base model
    history = model.fit(
        [train_sequences[:,:,0], train_sequences[:,:,1], train_sequences[:,:,2],
         train_sequences[:,:,0], train_sequences[:,:,1], train_sequences[:,:,2],
         train_sequences[:,:,0], train_sequences[:,:,1], train_sequences[:,:,2]],
        {'next_event': train_targets[0], 'agent': train_targets[1], 'context': train_targets[2], 'anomaly': train_targets[3]},
        validation_data=(
            [val_sequences[:,:,0], val_sequences[:,:,1], val_sequences[:,:,2],
             val_sequences[:,:,0], val_sequences[:,:,1], val_sequences[:,:,2],
             val_sequences[:,:,0], val_sequences[:,:,1], val_sequences[:,:,2]],
            {'next_event': val_targets[0], 'agent': val_targets[1], 'context': val_targets[2], 'anomaly': val_targets[3]}
        ),
        epochs=1, batch_size=64
    )
    
    # Get the validation loss
    val_loss = history.history['val_loss'][-1]
    
    # Meta-learning step: Update learning rate
    current_lr = model.optimizer.learning_rate.numpy()
    new_lr = meta_model.predict(np.array([[val_loss]]))
    new_lr = np.clip(new_lr, 1e-6, 1e-2)  # Clipping to prevent too large or small learning rates
    model.optimizer.learning_rate.assign(new_lr[0, 0])
    print(f"Updated learning rate: {new_lr[0][0]}")

# Evaluate the model
eval_results = model.evaluate(
    [test_sequences[:,:,0], test_sequences[:,:,1], test_sequences[:,:,2],
     test_sequences[:,:,0], test_sequences[:,:,1], test_sequences[:,:,2],
     test_sequences[:,:,0], test_sequences[:,:,1], test_sequences[:,:,2]],
    {'next_event': test_targets[0], 'agent': test_targets[1], 'context': test_targets[2], 'anomaly': test_targets[3]}
)

print(f"Test Results - Loss: {eval_results[0]}, Next Event Accuracy: {eval_results[1]}, Agent Accuracy: {eval_results[2]}, Context Accuracy: {eval_results[3]}, Anomaly Accuracy: {eval_results[4]}")


Epoch 1/50
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m46s[0m 528ms/step - agent_accuracy: 0.3590 - anomaly_accuracy: 0.4888 - context_accuracy: 0.0748 - loss: 2.6649 - next_event_accuracy: 0.5969 - val_agent_accuracy: 0.5493 - val_anomaly_accuracy: 0.5070 - val_context_accuracy: 0.1056 - val_loss: 2.5340 - val_next_event_accuracy: 0.8028
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 73ms/step
Updated learning rate: 0.009999999776482582
Epoch 2/50
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 237ms/step - agent_accuracy: 0.4913 - anomaly_accuracy: 0.4759 - context_accuracy: 0.1458 - loss: 2.5898 - next_event_accuracy: 0.7889 - val_agent_accuracy: 0.4366 - val_anomaly_accuracy: 0.4930 - val_context_accuracy: 0.1408 - val_loss: 2.5675 - val_next_event_accuracy: 0.8028
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step
Updated learning rate: 0.009999999776482582
Epoch 3/50
[1m11/11[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m

In [2]:
num_samples = 10
sample_sequences = test_sequences[:num_samples]

# Predict the next values
predictions = model.predict(
    [sample_sequences[:,:,0], sample_sequences[:,:,1], sample_sequences[:,:,2],
     sample_sequences[:,:,0], sample_sequences[:,:,1], sample_sequences[:,:,2],
     sample_sequences[:,:,0], sample_sequences[:,:,1], sample_sequences[:,:,2]]
)

# Extract predictions
next_event_predictions = np.argmax(predictions[0], axis=-1)
agent_predictions = np.argmax(predictions[1], axis=-1)
context_predictions = np.argmax(predictions[2], axis=-1)
anomaly_predictions = (predictions[3] > 0.5).astype(int)

# Decode predictions
def decode_predictions(predictions, id_to_label):
    return [id_to_label[idx] for idx in predictions]

decoded_next_event_predictions = decode_predictions(next_event_predictions, event_type_mapping)
decoded_agent_predictions = decode_predictions(agent_predictions, agent_id_mapping)
decoded_context_predictions = decode_predictions(context_predictions, context_mapping)

# Decode input sequences
def decode_sequences(sequences, event_mapping, agent_mapping, context_mapping):
    decoded_sequences = []
    for seq in sequences:
        decoded_seq = []
        for step in seq:
            decoded_step = [
                event_mapping[step[0]],
                agent_mapping[step[1]],
                context_mapping[step[2]]
            ]
            decoded_seq.append(decoded_step)
        decoded_sequences.append(decoded_seq)
    return np.array(decoded_sequences)

decoded_sample_sequences = decode_sequences(sample_sequences, event_type_mapping, agent_id_mapping, context_mapping)

# Print decoded input sequences and predictions
for i in range(num_samples):
    print(f"Input sequence (event_id, agent_id, context):\n{decoded_sample_sequences[i]}")
    print(f"Predicted next event_id: {decoded_next_event_predictions[i]}")
    print(f"Predicted next agent_id: {decoded_agent_predictions[i]}")
    print(f"Predicted next context: {decoded_context_predictions[i]}")
    print(f"Predicted anomaly: {anomaly_predictions[i]}")
    print("-" * 30)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 848ms/step
Input sequence (event_id, agent_id, context):
[['GAME_START' 'system' 'New Game']
 ['MOVE' 'X' '1,0']
 ['MOVE' 'O' '0,2']
 ['MOVE' 'X' '2,1']
 ['MOVE' 'O' '2,2']
 ['MOVE' 'X' '1,2']
 ['MOVE' 'O' '1,1']
 ['MOVE' 'X' '0,0']
 ['MOVE' 'O' '2,0']
 ['GAME_END' 'system' 'O']]
Predicted next event_id: GAME_END
Predicted next agent_id: system
Predicted next context: O
Predicted anomaly: [1]
------------------------------
Input sequence (event_id, agent_id, context):
[['MOVE' 'X' '1,0']
 ['MOVE' 'O' '0,2']
 ['MOVE' 'X' '2,1']
 ['MOVE' 'O' '2,2']
 ['MOVE' 'X' '1,2']
 ['MOVE' 'O' '1,1']
 ['MOVE' 'X' '0,0']
 ['MOVE' 'O' '2,0']
 ['GAME_END' 'system' 'O']
 ['GAME_START' 'system' 'New Game']]
Predicted next event_id: GAME_START
Predicted next agent_id: system
Predicted next context: New Game
Predicted anomaly: [1]
------------------------------
Input sequence (event_id, agent_id, context):
[['MOVE' 'O' '0,2']
 ['MOVE' 'X' '2,1']
