This notebook works through training and evaluating data provided by the NFL Big Data Bowl Kaggle Competition. Graph based  representation models were trained and evaluated to predict which reciever a quarterback will throw to during a pass play.

#Load Packages

In [None]:
!pip install torch_geometric

In [None]:
import sklearn
import pandas as pd
pd.set_option('display.max_columns', None)
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.nn import GCNConv, global_mean_pool, GATConv
from torch_geometric.data import Data
from torch_geometric.loader import DataLoader

#Graph Construction

In [None]:
# Usage for graph classification
input_dim =
hidden_dim =
num_layers =
num_classes =
edge_dim =
global_dim =

# Generate training  graph with # nodes for training
x_train = torch.randn(10, input_dim)
edge_index_train = torch.tensor([[],
                                 []], dtype=torch.long)
edge_attr_train = torch.randn(14, edge_dim)  # Generate edge features
global_features_train = torch.randn(1, global_dim)  # Global features
y_train = torch.tensor([])  # Graph labels

# Generate testing graph with nodes for testing
x_test = torch.randn(, input_dim)
edge_index_test = torch.tensor([[],
                                []], dtype=torch.long)
edge_attr_test = torch.randn(, edge_dim)  # Generate edge features
global_features_test = torch.randn(, global_dim)  # Global features
y_test = torch.tensor([])  # Graph label

# Create PyTorch Geometric data objects
data_train = Data(x=x_train, edge_index=edge_index_train, edge_attr=edge_attr_train, y=y_train)
data_train.global_features = global_features_train
data_test = Data(x=x_test, edge_index=edge_index_test, edge_attr=edge_attr_test, y=y_test)
data_test.global_features = global_features_test

# Create data loaders
batch_size =
train_loader = DataLoader([data_train], batch_size=batch_size, shuffle=True)
test_loader = DataLoader([data_test], batch_size=batch_size)

#Graph Network Architecture Training

##GCN

In [None]:
class GraphConvNet(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_layers, num_classes):
        super(GraphConvNet, self).__init__()
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        self.num_layers = num_layers
        self.num_classes = num_classes

        self.conv_layers = nn.ModuleList([
            GCNConv(input_dim, hidden_dim) if i == 0 else GCNConv(hidden_dim, hidden_dim)
            for i in range(num_layers)
        ])

        self.fc = nn.Linear(hidden_dim + input_dim + edge_dim + global_dim, num_classes)

    def forward(self, x, edge_index, edge_attr, batch, global_features):
        # Graph convolution layers
        for i in range(self.num_layers):
            x = self.conv_layers[i](x, edge_index, edge_attr)  # Apply graph convolution with edge features
            x = F.relu(x)

        # Concatenate global features with node features and edge features
        x = torch.cat([x, edge_attr, global_features.repeat(x.size(0), 1)], dim=1)

        x = self.fc(x)
        x = F.softmax(x, dim=1)

        return x

In [None]:
# Instantiate the GCN model
model = GraphConvNet(input_dim, hidden_dim, num_layers, num_classes)

# Training loop
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
epochs = 10

In [None]:
for epoch in range(epochs):
    model.train()
    for data in train_loader:
        optimizer.zero_grad()
        out = model(data.x, data.edge_index, data.edge_attr, data.batch, data.global_features)
        loss = F.cross_entropy(out, data.y)
        loss.backward()
        optimizer.step()

# Evaluation
model.eval()
for data in test_loader:
    out = model(data.x, data.edge_index, data.edge_attr, data.batch, data.global_features)
    pred = out.argmax(dim=1)
    print("Predicted label:", pred.item())

##GAT

In [None]:
class GraphAttentionNet(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_layers, num_heads_in, num_heads_out, num_classes):
        super(GraphAttentionNet, self).__init__()
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim

        self.num_classes = num_classes

        self.in_head = num_heads_in
        self.out_head = num_heads_out

        self.conv1 = GATConv(self.input_dim, self.hidden_dim, heads=self.in_head, dropout=0.3)
        self.conv2 = GATConv(self.hid*self.in_head, self.hidden_dim, heads=self.in_head, dropout=0.3)
        self.conv3 = GATConv(self.hid*self.in_head, self.num_classes, concat=False, heads=self.out_head, dropout=0.3)

        self.fc = nn.Linear(hidden_dim * num_heads + input_dim + edge_dim + global_dim, num_classes)

   # redo from here
    def forward(self, x, edge_index, edge_attr, batch, global_features):
        # Graph attention layers
        for i in range(self.num_layers):
            x = self.attention_layers[i](x, edge_index)  # Apply graph attention
            x = F.relu(x)

        # Concatenate global features with node features and edge features
        x = torch.cat([x, edge_attr, global_features.repeat(x.size(0), 1)], dim=1)

        x = self.fc(x)
        x = F.softmax(x, dim=1)

        return x

In [None]:
# Instantiate the GAT model
model = GraphAttentionNet(input_dim, hidden_dim, num_layers, num_heads, num_classes).to(device)

# Training loop
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
epochs =

In [None]:
for epoch in range(epochs):
    model.train()
    for data in train_loader:
        optimizer.zero_grad()
        out = model(data.x, data.edge_index, data.edge_attr, data.batch, data.global_features)
        loss = F.cross_entropy(out, data.y)
        loss.backward()
        optimizer.step()

# Evaluation
model.eval()
for data in test_loader:
    out = model(data.x, data.edge_index, data.edge_attr, data.batch, data.global_features)
    pred = out.argmax(dim=1)
    print("Predicted label:", pred.item())

##GCN LSTM

***Will require data re-structuring to have multiple time points per play***

In [None]:
# Example usage for graph classification
input_dim =
hidden_dim =
num_layers =
num_classes =

# Generate a random graph with 10 nodes for training
x_train = torch.randn(10, input_dim)
edge_index_train = torch.tensor([[0, 1, 1, 2, 2, 3, 4, 5, 5, 6, 7, 8, 8, 9],
                                 [1, 0, 2, 1, 3, 2, 5, 4, 6, 5, 8, 7, 9, 8]], dtype=torch.long)
edge_weight_train = torch.randn(14)  # Generate random edge weights
global_features_train = torch.randn(1, input_dim)  # Global features
y_train = torch.tensor([0, 1])  # Graph labels

# Generate a random graph with 5 nodes for testing
x_test = torch.randn(5, input_dim)
edge_index_test = torch.tensor([[0, 1, 1, 2, 3, 4],
                                [1, 0, 2, 1, 4, 3]], dtype=torch.long)
edge_weight_test = torch.randn(6)  # Generate random edge weights
global_features_test = torch.randn(1, input_dim)  # Global features
y_test = torch.tensor([1])  # Graph label

# Create PyTorch Geometric data objects
data_train = Data(x=x_train, edge_index=edge_index_train, edge_attr=edge_weight_train, y=y_train)
data_train.global_features = global_features_train
data_test = Data(x=x_test, edge_index=edge_index_test, edge_attr=edge_weight_test, y=y_test)
data_test.global_features = global_features_test

# Create data loaders
batch_size = 1
train_loader = DataLoader([data_train], batch_size=batch_size, shuffle=True)
test_loader = DataLoader([data_test], batch_size=batch_size)

In [None]:
class GraphConvLSTM(nn.Module):
    def __init__(self, input_dim, hidden_dim, num_layers, num_classes):
        super(GraphConvLSTM, self).__init__()
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        self.num_layers = num_layers
        self.num_classes = num_classes

        self.conv_layers = nn.ModuleList([
            GCNConv(input_dim, hidden_dim) if i == 0 else GCNConv(hidden_dim, hidden_dim)
            for i in range(num_layers)
        ])

        self.lstm = nn.LSTM(hidden_dim + input_dim, hidden_dim)
        self.fc = nn.Linear(hidden_dim, num_classes)

    def forward(self, x, edge_index, edge_weight, batch):
        # Graph convolution layers
        for i in range(self.num_layers):
            x = self.conv_layers[i](x, edge_index, edge_weight)  # Apply graph convolution with edge weights
            x = F.relu(x)

        # Concatenate global features with node features
        x = torch.cat([x, global_mean_pool(x, batch)], dim=1)

        x = x.unsqueeze(0)  # Add a time step dimension

        # Initialize hidden state and cell state
        h0 = torch.zeros(1, x.size(1), self.hidden_dim).to(x.device)
        c0 = torch.zeros(1, x.size(1), self.hidden_dim).to(x.device)

        out, _ = self.lstm(x, (h0, c0))
        out = out.squeeze(0)  # Remove the time step dimension

        x = global_mean_pool(out, batch)  # Global pooling over nodes

        x = self.fc(x)
        x = F.softmax(x, dim=1)

        return x

In [None]:
# Instantiate the GC-LSTM model
model = GraphConvLSTM(input_dim, hidden_dim, num_layers, num_classes)

# Training loop
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
epochs =

In [None]:
for epoch in range(epochs):
    model.train()
    for data in train_loader:
        optimizer.zero_grad()
        out = model(data.x, data.edge_index, data.edge_attr, data.batch)
        loss = F.cross_entropy(out, data.y)
        loss.backward()
        optimizer.step()

# Evaluation
model.eval()
for data in test_loader:
    out = model(data.x, data.edge_index, data.edge_attr, data.batch)
    pred = out.argmax(dim=1)
    print("Predicted label:", pred.item())