In [None]:
import argparse
import datetime
import gc
import os
import pickle
import random
import time

In [None]:
import numpy as np
import pandas as pd
import torch
import torch.optim as optim
from torch.autograd import Variable
from torch.optim import lr_scheduler

In [None]:
from metrics import KTLoss, graph_attention_loss
from models import RouteKT
from process import my_load_dataset

In [None]:
torch.cuda.empty_cache()

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# device = "cpu"
torch.autograd.set_detect_anomaly(True)

In [None]:
parser = argparse.ArgumentParser()
parser.add_argument('--no-cuda', action='store_false', default=False, help='Disables CUDA training.')
parser.add_argument('--seed', type=int, default=42, help='Random seed.')
parser.add_argument('--data-dir', type=str, default='data', help='Data dir for loading input data.')
parser.add_argument('--data-file', type=str, default='phy-micro-extra.csv', help='Name of input data file.')
parser.add_argument('--knowledge-file', type=str, default='KG_phy.xlsx', help='Name of knowledge graph data file.')

parser.add_argument('--save-dir', type=str, default='logs',
                    help='Where to save the trained model, leave empty to not save anything.')
parser.add_argument('--load-dir', type=str, default='',
                    help='Where to load the trained model if finetunning. ' + 'Leave empty to train from scratch')

parser.add_argument('--model', type=str, default='RouteKT', help='Model type to use.')
parser.add_argument('--head', type=int, default=2, help='Number of heads.')
parser.add_argument('--hidden-dim', type=int, default=128, help='Dimension of hidden knowledge states.')
parser.add_argument('--embedding_dim', type=int, default=128, help='Dimension of concept embedding.')
parser.add_argument('--dropout', type=float, default=0.0, help='Dropout rate (1 - keep probability).')
parser.add_argument('--bias', type=bool, default=True, help='Whether to add bias for neural network layers.')
parser.add_argument('--k1', type=float, default=1e-3, help='the loss for 1 layers.')
parser.add_argument('--k2', type=float, default=1e-3, help='the loss for 2 layers.')

parser.add_argument('--epochs', type=int, default=50, help='Number of epochs to train.')
parser.add_argument('--batch-size', type=int, default=8, help='Number of samples per batch.')
parser.add_argument('--train-ratio', type=float, default=0.7, help='The ratio of training samples in a dataset.')
parser.add_argument('--val-ratio', type=float, default=0.2, help='The ratio of validation samples in a dataset.')
parser.add_argument('--shuffle', type=bool, default=True, help='Whether to shuffle the dataset or not.')
parser.add_argument('--lr', type=float, default=0.001, help='Initial learning rate.')
parser.add_argument('--lr-decay', type=int, default=200, help='After how epochs to decay LR by a factor of gamma.')
parser.add_argument('--gamma', type=float, default=0.5, help='LR decay factor.')
parser.add_argument('--test', type=bool, default=False, help='Whether to test for existed model.')
parser.add_argument('--test-model-dir', type=str, default='logs/expRouteKT', help='Existed model file dir.')

args = parser.parse_known_args()[0]    #  这里与放在py文件中不同
args.cuda = not args.no_cuda and torch.cuda.is_available()
# args.cuda = False
print(args)

In [None]:
random.seed(args.seed)
np.random.seed(args.seed)
torch.manual_seed(args.seed)
if args.cuda:
    torch.cuda.manual_seed(args.seed)
    torch.cuda.manual_seed_all(args.seed)
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True

In [None]:
# Save model and meta-data. Always saves in a new sub-folder.
log = None
save_dir = args.save_dir
if args.save_dir:
    exp_counter = 0
    now = datetime.datetime.now()
    timestamp = now.strftime('%Y-%m-%d %H-%M-%S')
    model_file_name = args.model
    save_dir = '{}/exp{}/'.format(args.save_dir, model_file_name + timestamp)
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)
    meta_file = os.path.join(save_dir, 'metadata.pkl')
    model_file = os.path.join(save_dir, model_file_name + '.pt')
    optimizer_file = os.path.join(save_dir, model_file_name + '-Optimizer.pt')
    scheduler_file = os.path.join(save_dir, model_file_name + '-Scheduler.pt')
    log_file = os.path.join(save_dir, 'log.txt')
    log = open(log_file, 'w')
    pickle.dump({'args': args}, open(meta_file, "wb"))
else:
    print("WARNING: No save_dir provided!" + "Testing (within this script) will throw an error.")

In [None]:
# load dataset
student_num, concept_num, question_num, \
whole_edge_index, whole_edge_attr, edge_index, edge_attr, edge_real, attn_real, \
train_loader, valid_loader, test_loader = my_load_dataset(args.data_dir, args.data_file, args.knowledge_file,
                                                          args.batch_size,
                                                          train_ratio=args.train_ratio,
                                                          val_ratio=args.val_ratio,
                                                          shuffle=args.shuffle)

In [None]:
# 尝试一下，看看一组数据
for data in train_loader:
    students, questions, features, features_len, routes, routes_len, answers = data
    print(students.shape)
    print(questions.shape)
    print(features.shape)
    print(features_len.shape)
    print(routes.shape)
    print(routes_len.shape)
    print(answers.shape)
    break


In [None]:
# build models
model = RouteKT(concept_num, question_num, student_num,
                args.head, args.hidden_dim, args.embedding_dim, 
                dropout=args.dropout, bias=args.bias)
kt_loss = KTLoss()

# build optimizer
optimizer = optim.Adam(model.parameters(), lr=args.lr)
scheduler = lr_scheduler.StepLR(optimizer, step_size=args.lr_decay, gamma=args.gamma)

In [None]:
# load model/optimizer/scheduler params

if args.load_dir:
    model_file = os.path.join(args.load_dir, model_file_name + '.pt')
    optimizer_file = os.path.join(save_dir, model_file_name + '-Optimizer.pt')
    scheduler_file = os.path.join(save_dir, model_file_name + '-Scheduler.pt')
    model.load_state_dict(torch.load(model_file))
    optimizer.load_state_dict(torch.load(optimizer_file))
    scheduler.load_state_dict(torch.load(scheduler_file))
    args.save_dir = False

if args.cuda:
    model = model.to(device)
    kt_loss = KTLoss()

# record the result of experiment
record_time = datetime.datetime.now().strftime('%Y-%m-%d %H-%M-%S')
f = open("result-%s.txt" % record_time, "w")

In [None]:
def train(epoch, best_val_auc, f, whole_edge_index, whole_edge_attr, edge_index, edge_attr):
    
    # process the graph data
    whole_edge_index, whole_edge_attr = whole_edge_index.to(device), whole_edge_attr.to(device)
    edge_index, edge_attr = edge_index.to(device), edge_attr.to(device)

    t = time.time()
    loss_train = []
    kt_train = []
    auc_train = []
    acc_train = []
    model.train()

    for batch_idx, (students, questions, features, features_len, routes, routes_len, answers) in enumerate(train_loader):
        
        time_start = time.time()
        
        if args.cuda:
            students = students.to(device)
            questions = questions.to(device)
            features = features.to(device)
            features_len = features_len.to(device)
            routes = routes.to(device)
            routes_len = routes_len.to(device)
            answers = answers.to(device)

        batch_size, attn_0, \
        state, pred_res = model(students, questions, 
                                features, features_len, routes, routes_len, 
                                answers, whole_edge_index, whole_edge_attr, edge_index, edge_attr)
        
        loss_kt, auc, acc = kt_loss(pred_res, answers)
        loss = loss_kt
        
        kt_train.append(float(loss_kt.cpu().detach().numpy()))
        if auc != -1 and acc != -1:
            auc_train.append(auc)
            acc_train.append(acc)

        print('batch idx: ', batch_idx, 'loss kt: ', loss_kt.item(), 'loss all: ', loss.item(),
              'auc: ', auc, 'acc: ', acc, end=' ')
        
        loss_train.append(float(loss.cpu().detach().numpy()))
        

        loss.backward()
        optimizer.step()
        scheduler.step()
        optimizer.zero_grad()
        del loss
        print('cost time: ', str(time.time() - time_start))

    loss_val = []
    kt_val = []
    auc_val = []
    acc_val = []

    model.eval()
    with torch.no_grad():
        for batch_idx, (students, questions, features, features_len, routes, routes_len, answers) in enumerate(valid_loader):
            
            if args.cuda:
                students = students.to(device)
                questions = questions.to(device)
                features = features.to(device)
                features_len = features_len.to(device)
                routes = routes.to(device)
                routes_len = routes_len.to(device)
                answers = answers.to(device)
            
            batch_size, attn_0, \
            state, pred_res = model(students, questions, 
                                    features, features_len, routes, routes_len, 
                                    answers, whole_edge_index, whole_edge_attr, edge_index, edge_attr)
            

            loss_kt, auc, acc = kt_loss(pred_res, answers)
            loss = loss_kt
            
            loss_kt = float(loss_kt.cpu().detach().numpy())
            kt_val.append(loss_kt)
            if auc != -1 and acc != -1:
                auc_val.append(auc)
                acc_val.append(acc)

            loss = float(loss.cpu().detach().numpy())
            loss_val.append(loss)
            del loss
    
    f.write('Epoch: {:04d} '.format(epoch) +
              'loss_kt: {:.10f} '.format(np.mean(kt_train)) +
              'loss_train: {:.10f} '.format(np.mean(loss_train)) +
              'auc_train: {:.10f} '.format(np.mean(auc_train)) +
              'acc_train: {:.10f} '.format(np.mean(acc_train)) +
              'kt_val: {:.10f} '.format(np.mean(kt_val)) +
              'loss_val: {:.10f} '.format(np.mean(loss_val)) +
              'auc_val: {:.10f} '.format(np.mean(auc_val)) +
              'acc_val: {:.10f} '.format(np.mean(acc_val)) +
              'time: {:.4f}s'.format(time.time() - t) + '\n')
    print('Epoch: {:04d}'.format(epoch),
          'loss_kt: {:.10f} '.format(np.mean(kt_train)) +
          'loss_train: {:.10f}'.format(np.mean(loss_train)),
          'auc_train: {:.10f}'.format(np.mean(auc_train)),
          'acc_train: {:.10f}'.format(np.mean(acc_train)),
          'kt_val: {:.10f} '.format(np.mean(kt_val)),
          'loss_val: {:.10f}'.format(np.mean(loss_val)),
          'auc_val: {:.10f}'.format(np.mean(auc_val)),
          'acc_val: {:.10f}'.format(np.mean(acc_val)),
          'time: {:.4f}s'.format(time.time() - t))
    
    if args.save_dir and np.mean(auc_val) >= best_val_auc:
        print('Best model so far, saving...')
        torch.save(model.state_dict(), model_file)
        torch.save(optimizer.state_dict(), optimizer_file)
        torch.save(scheduler.state_dict(), scheduler_file)

        print('Epoch: {:04d}'.format(epoch),
              'loss_kt: {:.10f} '.format(np.mean(kt_train)) +
              'loss_train: {:.10f}'.format(np.mean(loss_train)),
              'auc_train: {:.10f}'.format(np.mean(auc_train)),
              'acc_train: {:.10f}'.format(np.mean(acc_train)),
              'kt_val: {:.10f} '.format(np.mean(kt_val)),
              'loss_val: {:.10f}'.format(np.mean(loss_val)),
              'auc_val: {:.10f}'.format(np.mean(auc_val)),
              'acc_val: {:.10f}'.format(np.mean(acc_val)),
              'time: {:.4f}s'.format(time.time() - t), file=log)
            #
            #print(model.graph)
            # model.graph.cuda()
        log.flush()
    res = np.mean(auc_val)
    del kt_train
    del loss_train
    del auc_train
    del acc_train
    del loss_val
    del auc_val
    del acc_val
    gc.collect()
    if args.cuda:
        torch.cuda.empty_cache()
    return res

In [None]:
def test(whole_edge_index, whole_edge_attr, edge_index, edge_attr):
    loss_test = []
    kt_test = []
    auc_test = []
    acc_test = []
    
    # process the graph data
    whole_edge_index, whole_edge_attr = whole_edge_index.to(device), whole_edge_attr.to(device)
    edge_index, edge_attr = edge_index.to(device), edge_attr.to(device)

    model.eval()
    model.load_state_dict(torch.load(model_file))
    
    with torch.no_grad():
        for batch_idx, (students, questions, features, features_len, routes, routes_len, answers) in enumerate(test_loader):
            
            if args.cuda:
                students = students.to(device)
                questions = questions.to(device)
                features = features.to(device)
                features_len = features_len.to(device)
                routes = routes.to(device)
                routes_len = routes_len.to(device)
                answers = answers.to(device)
            
            batch_size, attn_0, \
            state, pred_res = model(students, questions, 
                                    features, features_len, routes, routes_len, 
                                    answers, whole_edge_index, whole_edge_attr, edge_index, edge_attr)
            
            loss_kt, auc, acc = kt_loss(pred_res, answers)
            loss_kt = float(loss_kt.cpu().detach().numpy())
            if auc != -1 and acc != -1:
                auc_test.append(auc)
                acc_test.append(acc)
            kt_test.append(loss_kt)
            loss = loss_kt
            loss_test.append(loss)
            del loss
    print('--------------------------------')
    print('--------Testing-----------------')
    print('--------------------------------')
    print('loss_test: {:.10f}'.format(np.mean(loss_test)),
          'auc_test: {:.10f}'.format(np.mean(auc_test)),
          'acc_test: {:.10f}'.format(np.mean(acc_test)))
    if args.save_dir:
        print('--------------------------------', file=log)
        print('--------Testing-----------------', file=log)
        print('--------------------------------', file=log)
        print('loss_test: {:.10f}'.format(np.mean(loss_test)),
              'auc_test: {:.10f}'.format(np.mean(auc_test)),
              'acc_test: {:.10f}'.format(np.mean(acc_test)), file=log)
        log.flush()
    del loss_test
    del auc_test
    del acc_test
    gc.collect()
    if args.cuda:
        torch.cuda.empty_cache()

In [None]:
# Train model
if args.test is False:
    
    print('start training!')
    t_total = time.time()
    best_val_auc = -np.inf
    best_epoch = 0
    for epoch in range(args.epochs):
        val_auc = train(epoch, best_val_auc, f, whole_edge_index, whole_edge_attr, edge_index, edge_attr)
        if val_auc > best_val_auc:
            best_val_auc = val_auc
            best_epoch = epoch
    print("Optimization Finished!")
    print("Best Epoch: {:04d}".format(best_epoch))
    if args.save_dir:
        print("Best Epoch: {:04d}".format(best_epoch), file=log)
        log.flush()

In [None]:
# test
test(whole_edge_index, whole_edge_attr, edge_index, edge_attr)
if log is not None:
    print(save_dir)
    log.close()

f.close()