In [3]:
!pip install optuna



In [4]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import optuna
from optuna.trial import TrialState
from optuna.visualization import plot_slice, plot_param_importances
from load import Load_Data_Construct_A_For_GNN

In [5]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [6]:
class GCN_layer(nn.Module):
  def __init__(self, nfeat, nhid, bias=True):
    super().__init__()
    self.W = nn.Parameter(torch.randn(nfeat, nhid))
    if bias:
      self.bias = nn.Parameter(torch.randn(1, nhid))
    else:
      self.register_parameter('bias', None)
    nn.init.xavier_uniform_(self.W)
    if self.bias is not None:
      nn.init.xavier_uniform_(self.bias)

  def forward(self, x, adj):
    x = torch.matmul(x, self.W)
    x = torch.matmul(adj, x)
    if self.bias is not None:
      return x + self.bias
    else:
      return x

In [13]:
class GCN(nn.Module):
  def __init__(self, trial):
    super().__init__()
    n_layers = trial.suggest_int("layer", 1, 3)
    self.layers = nn.ModuleList()
    in_features = 1433
    for i in range(n_layers):
      out_features = trial.suggest_int("n_unit_l{}".format(i), 16, 128)
      self.layers.append(GCN_layer(in_features, out_features))
      self.layers.append(nn.ReLU())
      p = trial.suggest_float("layer_dropout{}".format(i), 0.2, 0.5)
      self.layers.append(nn.Dropout(p))

      in_features = out_features

    self.layers.append(GCN_layer(in_features, 7))

  def forward(self, x, adj):
    for layer in self.layers:
      if isinstance(layer, GCN_layer):
        x = layer(x, adj)
      else:
        x = layer(x)
    return F.log_softmax(x, dim=1)

In [14]:
def objective(trial):
  model = GCN(trial).to(device)
  optimizer_name = trial.suggest_categorical("optimizer", ["Adam", "RMSprop", "SGD"])
  lr = trial.suggest_float("lr", 1e-5, 1e-1, log=True)
  optimizer = getattr(torch.optim, optimizer_name)(model.parameters(), lr=lr)
  data = Load_Data_Construct_A_For_GNN(dir1='/content/drive/MyDrive/cora.content.txt', dir2='/content/drive/MyDrive/cora.cites', device=device)
  fts, lbls, A, idx_train, idx_val, idx_test = data.load_data()
  for epoch in range(200):
    model.train()
    y_hat = model(fts, A)
    loss = F.nll_loss(y_hat[idx_train], lbls[idx_train])
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    model.eval()
    with torch.no_grad():
      y_hat = model(fts, A)
      _, pred = torch.max(y_hat[idx_val], 1)
      acc = (pred == lbls[idx_val]).sum().item() / len(lbls[idx_val])

    trial.report(acc, epoch)

    if trial.should_prune():
      raise optuna.exceptions.TrialPruned()
  return acc

In [15]:
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=50, timeout=600)

complete_trials = study.get_trials(deepcopy=False, states=[TrialState.COMPLETE])
pruned_trials = study.get_trials(deepcopy=False, states=[TrialState.PRUNED])

print("Study Statistics: ")
print("Number of finished trials: ", len(study.trials))
print("Number of Complete trials: ", len(complete_trials))
print("Number of pruned trials: ", len(pruned_trials))

print('Best trials: ')
trial = study.best_trial

print('Value: ', trial.value)

print('Params: ')
for key, value in trial.params.items():
  print("{}: {}".format(key, value))


[I 2024-07-07 15:51:20,538] A new study created in memory with name: no-name-ce4d9a7f-7d0f-4db4-b969-43d12e776c5d
[I 2024-07-07 15:51:26,785] Trial 0 finished with value: 0.16 and parameters: {'layer': 3, 'n_unit_l0': 48, 'layer_dropout0': 0.3236035160865369, 'n_unit_l1': 69, 'layer_dropout1': 0.24172960463262658, 'n_unit_l2': 19, 'layer_dropout2': 0.43200236174913353, 'optimizer': 'SGD', 'lr': 0.00012691058057702283}. Best is trial 0 with value: 0.16.
[I 2024-07-07 15:51:31,449] Trial 1 finished with value: 0.16 and parameters: {'layer': 1, 'n_unit_l0': 127, 'layer_dropout0': 0.3678246906047736, 'optimizer': 'SGD', 'lr': 0.0037109008219196166}. Best is trial 0 with value: 0.16.
[I 2024-07-07 15:51:36,825] Trial 2 finished with value: 0.79875 and parameters: {'layer': 3, 'n_unit_l0': 42, 'layer_dropout0': 0.38796996701125475, 'n_unit_l1': 106, 'layer_dropout1': 0.47186107116006243, 'n_unit_l2': 115, 'layer_dropout2': 0.43984136589617717, 'optimizer': 'Adam', 'lr': 0.00533193993034576}.

Study Statistics: 
Number of finished trials:  50
Number of Complete trials:  19
Number of pruned trials:  31
Best trials: 
Value:  0.8125
Params: 
layer: 1
n_unit_l0: 40
layer_dropout0: 0.49934888499569025
optimizer: RMSprop
lr: 0.008471792529578843


In [17]:
plot_slice(study)

In [16]:
plot_param_importances(study)