In [1]:
import pandas as pd
import numpy as np
import torch
from torch_geometric.data import Data
import networkx as nx
import geopandas as gpd
from shapely.geometry import Point
import torch.nn.functional as F
from torch_geometric.nn import GCNConv
import matplotlib.pyplot as plt

In [2]:
import numpy as np

def haversine_distance(lat1, lon1, lat2, lon2):
    r = 6371  # Radius of the earth in kilometers
    phi1 = np.radians(lat1)
    phi2 = np.radians(lat2)
    delta_phi = np.radians(lat2 - lat1)
    delta_lambda = np.radians(lon2 - lon1)
    a = np.sin(delta_phi/2)**2 + np.cos(phi1) * np.cos(phi2) * np.sin(delta_lambda/2)**2
    res = r * (2 * np.arctan2(np.sqrt(a), np.sqrt(1 - a)))
    return np.round(res, 2)


In [3]:
# Load the data
df = pd.read_csv(r'C:\Users\24707\Downloads\202206-divvy-tripdata\202206-divvy-tripdata.csv')

# Remove rows with 'casual' in the 'member_casual' column

df = df[df['member_casual'] != 'casual']

# Drop rows with NaN values
df = df.dropna()

In [4]:
G = nx.Graph()

# Adding edges along with weights
for i in range(len(df)):
    row_data = df.iloc[i]
    lat1, lon1 = row_data['start_lat'], row_data['start_lng']
    lat2, lon2 = row_data['end_lat'], row_data['end_lng']
    weight = haversine_distance(lat1, lon1, lat2, lon2)
    G.add_edge(row_data['start_station_id'], row_data['end_station_id'], weight=weight)


In [5]:
# Create a mapping of node names to unique integers
node_mapping = {node: i for i, node in enumerate(G.nodes)}

# Now use this mapping when creating edge_index and edge_weight
edge_index = []
edge_weight = []
for edge in G.edges.data():
    # Get the nodes and weight
    node1, node2, data = edge
    # Map nodes to integers
    node1 = node_mapping[node1]
    node2 = node_mapping[node2]
    edge_index.append((node1, node2))
    edge_weight.append(data['weight'])

# Convert to tensors
edge_index = torch.tensor(edge_index, dtype=torch.long).t().contiguous()
edge_weight = torch.tensor(edge_weight, dtype=torch.float)


In [6]:
import torch
from torch_geometric.data import Data
from torch_geometric.nn import GCNConv
import torch.optim as optim
import matplotlib.pyplot as plt
from torch_geometric.nn import GCNConv, ChebConv  

In [12]:
import torch
from torch_geometric.nn import ChebConv

class STGCN(torch.nn.Module):
    def __init__(self, num_node_features, hidden_channels, num_classes):
        super(STGCN, self).__init__()
        self.conv1 = ChebConv(num_node_features, hidden_channels, K=2)
        self.conv2 = ChebConv(hidden_channels, num_classes, K=2)

    def forward(self, data):
        x, edge_index, edge_weight = data['x'], data['edge_index'], data['edge_weight']
        x = self.conv1(x, edge_index, edge_weight)
        x = x.relu()
        x = F.dropout(x, training=self.training)
        x = self.conv2(x, edge_index, edge_weight)
        return x


In [16]:
# Create a tensor for node features (in this case, we generate random features)
num_nodes = len(G.nodes())
x = torch.randn((num_nodes, 10), dtype=torch.float)

# Create a tensor for target values (in this case, we generate random targets)
y = torch.randn(num_nodes, dtype=torch.float)

# Create the data dictionary
data = {
    'x': x,
    'edge_index': edge_index,
    'edge_weight': edge_weight,
    'y': y,
    'train_mask': train_mask,
}

# Define the model
model = STGCN(num_node_features=10, hidden_channels=16, num_classes=1)

# Define the optimizer
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)

# Define the loss function
loss_func = torch.nn.MSELoss()

# Start training
losses = []
for epoch in range(100):  # Number of epochs
    optimizer.zero_grad()  # Clear gradients
    out = model(data)  # Perform forward pass
    loss = loss_func(out[data['train_mask']], data['y'][data['train_mask']])  # Compute the loss
    loss.backward()  # Perform backward pass
    optimizer.step()  # Update model weights

    losses.append(loss.item())  # Store the loss from this epoch
    print(f'Epoch: {epoch+1}, Loss: {loss.item()}')


  return F.mse_loss(input, target, reduction=self.reduction)


Epoch: 1, Loss: 2.4084224700927734
Epoch: 2, Loss: 1.9836935997009277
Epoch: 3, Loss: 1.895984411239624
Epoch: 4, Loss: 1.6935316324234009
Epoch: 5, Loss: 1.621172308921814
Epoch: 6, Loss: 1.6832197904586792
Epoch: 7, Loss: 1.614659309387207
Epoch: 8, Loss: 1.524267315864563
Epoch: 9, Loss: 1.3947986364364624
Epoch: 10, Loss: 1.3775975704193115
Epoch: 11, Loss: 1.3041738271713257
Epoch: 12, Loss: 1.2688615322113037
Epoch: 13, Loss: 1.2115181684494019
Epoch: 14, Loss: 1.2319945096969604
Epoch: 15, Loss: 1.2184947729110718
Epoch: 16, Loss: 1.1592190265655518
Epoch: 17, Loss: 1.136747121810913
Epoch: 18, Loss: 1.1550792455673218
Epoch: 19, Loss: 1.1335463523864746
Epoch: 20, Loss: 1.1425673961639404
Epoch: 21, Loss: 1.0981675386428833
Epoch: 22, Loss: 1.1009050607681274
Epoch: 23, Loss: 1.0841503143310547
Epoch: 24, Loss: 1.0763030052185059
Epoch: 25, Loss: 1.070788025856018
Epoch: 26, Loss: 1.049621343612671
Epoch: 27, Loss: 1.0383409261703491
Epoch: 28, Loss: 1.0363706350326538
Epoch: 2

In [17]:
import torch.nn.functional as F

# Create a mask for validation data (we'll assume 15% of the data for validation)
num_val_nodes = int(num_nodes * 0.15)
val_mask = torch.zeros(num_nodes, dtype=torch.bool)
val_mask[num_train_nodes:num_train_nodes+num_val_nodes] = True

# Add the validation mask to the data object
data['val_mask'] = val_mask

# Define the model, optimizer, and loss function as before

# Start training and validation
train_losses = []
val_losses = []
val_maes = []
for epoch in range(100):  # Number of epochs
    optimizer.zero_grad()  # Clear gradients
    out = model(data)  # Perform forward pass
    
    # Compute the loss on the training data
    train_loss = loss_func(out[data['train_mask']], data['y'][data['train_mask']])
    train_loss.backward()  # Perform backward pass
    optimizer.step()  # Update model weights
    train_losses.append(train_loss.item())  # Store the training loss from this epoch

    # Compute the loss on the validation data, without affecting gradients
    with torch.no_grad():
        val_out = out[data['val_mask']]
        val_loss = loss_func(val_out, data['y'][data['val_mask']])
        val_losses.append(val_loss.item())  # Store the validation loss from this epoch

        # Compute the mean absolute error on the validation data
        val_mae = F.l1_loss(val_out, data['y'][data['val_mask']])
        val_maes.append(val_mae.item())  # Store the validation MAE from this epoch

    print(f'Epoch: {epoch+1}, Training Loss: {train_loss.item()}, Validation Loss: {val_loss.item()}, Validation MAE: {val_mae.item()}')


  return F.mse_loss(input, target, reduction=self.reduction)
  val_mae = F.l1_loss(val_out, data['y'][data['val_mask']])


Epoch: 1, Training Loss: 0.9659696817398071, Validation Loss: 1.028355360031128, Validation MAE: 0.8119622468948364
Epoch: 2, Training Loss: 0.9640079736709595, Validation Loss: 0.9815146923065186, Validation MAE: 0.7937741875648499
Epoch: 3, Training Loss: 0.964176595211029, Validation Loss: 0.9778414964675903, Validation MAE: 0.793232798576355
Epoch: 4, Training Loss: 0.9652591943740845, Validation Loss: 0.975040853023529, Validation MAE: 0.791785478591919
Epoch: 5, Training Loss: 0.9659022092819214, Validation Loss: 1.0356346368789673, Validation MAE: 0.81059330701828
Epoch: 6, Training Loss: 0.9656134247779846, Validation Loss: 0.967863917350769, Validation MAE: 0.7901021838188171
Epoch: 7, Training Loss: 0.9636872410774231, Validation Loss: 0.9732804894447327, Validation MAE: 0.7918275594711304
Epoch: 8, Training Loss: 0.9636361598968506, Validation Loss: 1.050524115562439, Validation MAE: 0.8045141696929932
Epoch: 9, Training Loss: 0.9621676802635193, Validation Loss: 0.980370938

Epoch: 73, Training Loss: 0.95988529920578, Validation Loss: 0.9495419859886169, Validation MAE: 0.7828983664512634
Epoch: 74, Training Loss: 0.9602516293525696, Validation Loss: 0.9552556872367859, Validation MAE: 0.7838786840438843
Epoch: 75, Training Loss: 0.9604790210723877, Validation Loss: 0.9461085796356201, Validation MAE: 0.7811239957809448
Epoch: 76, Training Loss: 0.9597371816635132, Validation Loss: 0.946458637714386, Validation MAE: 0.7808794379234314
Epoch: 77, Training Loss: 0.9606422781944275, Validation Loss: 1.0070894956588745, Validation MAE: 0.796288251876831
Epoch: 78, Training Loss: 0.9601771831512451, Validation Loss: 0.9917725324630737, Validation MAE: 0.7937639355659485
Epoch: 79, Training Loss: 0.9600672125816345, Validation Loss: 0.9488013386726379, Validation MAE: 0.7811232209205627
Epoch: 80, Training Loss: 0.959846019744873, Validation Loss: 0.9425500631332397, Validation MAE: 0.7791012525558472
Epoch: 81, Training Loss: 0.9595492482185364, Validation Loss