In [1]:
import argparse
import datetime
import gc
import os
import pickle
import random
import time
import json

In [2]:
import numpy as np
import pandas as pd
import torch
import torch.optim as optim
from torch.autograd import Variable
from torch.optim import lr_scheduler

In [None]:
from metrics import KTLoss
from model import MSKT
from process import my_load_dataset
from utils import paper_shuffle, is_new_paper

In [None]:
torch.cuda.empty_cache()

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# device = torch.device("cpu")
torch.autograd.set_detect_anomaly(True)

In [None]:
parser = argparse.ArgumentParser()
parser.add_argument('--no-cuda', action='store_false', default=False, help='Disables CUDA training.')
parser.add_argument('--seed', type=int, default=42, help='Random seed.')
parser.add_argument('--data-dir', type=str, default='data', help='Data dir for loading input data.')
parser.add_argument('--data-file', type=str, default='new_college_physics.csv', help='Name of input data file.')
parser.add_argument('--data-dict', type=str, default='college_physics.json', help='Name of exam data file.')

parser.add_argument('--save-dir', type=str, default='logs',
                    help='Where to save the trained model, leave empty to not save anything.')
parser.add_argument('--load-dir', type=str, default='',
                    help='Where to load the trained model if finetunning. ' + 'Leave empty to train from scratch')

parser.add_argument('--model', type=str, default='CDKT', help='Model type to use.')
parser.add_argument('--cognitive_num', type=int, default=6, help='cognitive levels.')
parser.add_argument('--hidden_dim', type=int, default=256, help='Dimension of hidden knowledge states.')

parser.add_argument('--dropout', type=float, default=0.1, help='Dropout rate (1 - keep probability).')
parser.add_argument('--bias', type=bool, default=True, help='Whether to add bias for neural network layers.')
parser.add_argument('--disc', type=float, default=5, help='The value of disc.')
parser.add_argument('--k1', type=float, default=0.1, help='The weight of extra_loss.')
parser.add_argument('--k2', type=float, default=0.005, help='The weight of extra_loss.')

parser.add_argument('--epochs', type=int, default=100, help='Number of epochs to train.')
parser.add_argument('--batch-size', type=int, default=32, help='Number of samples per batch.')
parser.add_argument('--train-ratio', type=float, default=0.8, help='The ratio of training samples in a dataset.')
parser.add_argument('--val-ratio', type=float, default=0.2, help='The ratio of validation samples in a dataset.')
parser.add_argument('--shuffle', type=bool, default=True, help='Whether to shuffle the dataset or not.')
parser.add_argument('--lr', type=float, default=0.001, help='Initial learning rate.')
parser.add_argument('--lr-decay', type=int, default=200, help='After how epochs to decay LR by a factor of gamma.')
parser.add_argument('--gamma', type=float, default=0.5, help='LR decay factor.')
parser.add_argument('--test', type=bool, default=False, help='Whether to test for existed model.')
parser.add_argument('--test-model-dir', type=str, default='logs/expCDKT', help='Existed model file dir.')

args = parser.parse_known_args()[0]    #  这里与放在py文件中不同
args.cuda = not args.no_cuda and torch.cuda.is_available()
# args.cuda = False
print(args)

In [6]:
random.seed(args.seed)
np.random.seed(args.seed)
torch.manual_seed(args.seed)
if args.cuda:
    torch.cuda.manual_seed(args.seed)
    torch.cuda.manual_seed_all(args.seed)
    torch.backends.cudnn.benchmark = False
    torch.backends.cudnn.deterministic = True

In [7]:
# Save model and meta-data. Always saves in a new sub-folder.
log = None
save_dir = args.save_dir
if args.save_dir:
    exp_counter = 0
    now = datetime.datetime.now()
    timestamp = now.strftime('%Y-%m-%d %H-%M-%S')
    model_file_name = args.model
    save_dir = '{}/exp{}/'.format(args.save_dir, model_file_name + timestamp)
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)
    meta_file = os.path.join(save_dir, 'metadata.pkl')
    model_file = os.path.join(save_dir, model_file_name + '.pt')
    optimizer_file = os.path.join(save_dir, model_file_name + '-Optimizer.pt')
    scheduler_file = os.path.join(save_dir, model_file_name + '-Scheduler.pt')
    log_file = os.path.join(save_dir, 'log.txt')
    log = open(log_file, 'w')
    pickle.dump({'args': args}, open(meta_file, "wb"))
else:
    print("WARNING: No save_dir provided!" + "Testing (within this script) will throw an error.")

In [None]:
# load dataset
student_num, concept_num, exercise_num, train_loader, valid_loader, test_loader = my_load_dataset(args.data_dir, args.data_file,
                                                                                                  args.batch_size,
                                                                                                  train_ratio=args.train_ratio,
                                                                                                  val_ratio=args.val_ratio,
                                                                                                  shuffle=args.shuffle)
args.student_num = student_num
args.concept_num = concept_num
args.question_num = exercise_num

In [None]:
# read the ideal score
dict_path = os.path.join(args.data_dir, args.data_dict)
with open(dict_path, 'r', encoding='utf-8') as json_file:
    exam_origin_dic = json.load(json_file)

exam_dic = dict()
for key in exam_origin_dic.keys():
    exam_dic[int(key)] = exam_origin_dic[key]

print(len(exam_dic))
temp = exam_dic[0]
print(type(temp))
print(len(temp), len(temp[0]))
print(temp)

In [None]:
# build models
model = MSKT(args.concept_num, args.cognitive_num, args.question_num, args.hidden_dim, args.disc,
             dropout=args.dropout, bias=args.bias)
kt_loss = KTLoss(exam_dic, k1=args.k1, k2=args.k2)

# build optimizer
optimizer = optim.Adam(model.parameters(), lr=args.lr)
scheduler = lr_scheduler.StepLR(optimizer, step_size=args.lr_decay, gamma=args.gamma)

In [None]:
for name, parameters in model.named_parameters():
    print(name,':', parameters.size())

In [None]:
# load model/optimizer/scheduler params
if args.load_dir:
    model_file = os.path.join(args.load_dir, model_file_name + '.pt')
    optimizer_file = os.path.join(save_dir, model_file_name + '-Optimizer.pt')
    scheduler_file = os.path.join(save_dir, model_file_name + '-Scheduler.pt')
    model.load_state_dict(torch.load(model_file))
    optimizer.load_state_dict(torch.load(optimizer_file))
    scheduler.load_state_dict(torch.load(scheduler_file))
    args.save_dir = False

if args.cuda:
    model = model.to(device)

# record the result in txt
record_time = datetime.datetime.now().strftime('%Y-%m-%d %H-%M-%S')
f = open("result-%s.txt" % record_time, "w")

In [None]:
def train(epoch, best_val_loss, f):
    t = time.time()
    loss_train = []
    kt_train = []
    auc_train = []
    acc_train = []
    model.train()

    for batch_idx, (students, questions, features, difficulties, papers, answers) in enumerate(train_loader):
        
        time_start = time.time()
        
        if args.cuda:
            students = students.to(device)
            questions = questions.to(device)
            features = features.to(device)
            difficulties = difficulties.to(device)
            papers = papers.to(device)
            answers = answers.to(device)

        # encode the exam information
        papers, need_score = is_new_paper(papers)
        # shuffle the origin sequence
        new_questions, new_features, new_difficulties, new_papers, new_answers = paper_shuffle(questions, features, difficulties, papers, answers)
        ht, yt, pred_res = model(features, questions, difficulties, papers, answers)
        ht_trans, _, _ = model(new_features, new_questions, new_difficulties, new_papers, new_answers)

        # answers [batch_size, seq_len]
        loss_kt, auc, acc = kt_loss(students, pred_res, answers, ht, ht_trans, yt, need_score)
        loss = loss_kt
        kt_train.append(float(loss_kt.cpu().detach().numpy()))
        if auc != -1 and acc != -1:
            auc_train.append(auc)
            acc_train.append(acc)

        #loss = loss_kt
        print('batch idx: ', batch_idx, 'loss kt: ', loss_kt.item(), 'auc: ', auc, 'acc: ', acc, end=' ')
        
        loss_train.append(float(loss.cpu().detach().numpy()))
        
        # time.sleep(60)

        loss.backward()
        optimizer.step()
        scheduler.step()
        optimizer.zero_grad()
        del loss
        print('cost time: ', str(time.time() - time_start))

    loss_val = []
    kt_val = []
    auc_val = []
    acc_val = []

    model.eval()
    with torch.no_grad():
        for batch_idx, (students, questions, features, difficulties, papers, answers) in enumerate(valid_loader):
            
            if args.cuda:
                students = students.to(device)
                questions = questions.to(device)
                features = features.to(device)
                difficulties = difficulties.to(device)
                papers = papers.to(device)
                answers = answers.to(device)

            # encode the exam information
            papers, need_score = is_new_paper(papers)
            # shuffle the origin sequence
            new_questions, new_features, new_difficulties, new_papers, new_answers = paper_shuffle(questions, features, difficulties, papers, answers)
            ht, yt, pred_res = model(features, questions, difficulties, papers, answers)
            ht_trans, _, _ = model(new_features, new_questions, new_difficulties, new_papers, new_answers)

            # answers [batch_size, seq_len]
            loss_kt, auc, acc = kt_loss(students, pred_res, answers, ht, ht_trans, yt, need_score)
            loss_kt = float(loss_kt.cpu().detach().numpy())
            kt_val.append(loss_kt)
            if auc != -1 and acc != -1:
                auc_val.append(auc)
                acc_val.append(acc)

            loss = loss_kt
            loss_val.append(loss)
            del loss
    
    f.write('Epoch: {:04d} '.format(epoch) +
              'loss_train: {:.10f} '.format(np.mean(loss_train)) +
              'auc_train: {:.10f} '.format(np.mean(auc_train)) +
              'acc_train: {:.10f} '.format(np.mean(acc_train)) +
              'loss_val: {:.10f} '.format(np.mean(loss_val)) +
              'auc_val: {:.10f} '.format(np.mean(auc_val)) +
              'acc_val: {:.10f} '.format(np.mean(acc_val)) +
              'time: {:.4f}s'.format(time.time() - t) + '\n')
    print('Epoch: {:04d}'.format(epoch),
          'loss_train: {:.10f}'.format(np.mean(loss_train)),
          'auc_train: {:.10f}'.format(np.mean(auc_train)),
          'acc_train: {:.10f}'.format(np.mean(acc_train)),
          'loss_val: {:.10f}'.format(np.mean(loss_val)),
          'auc_val: {:.10f}'.format(np.mean(auc_val)),
          'acc_val: {:.10f}'.format(np.mean(acc_val)),
          'time: {:.4f}s'.format(time.time() - t))
    
    if args.save_dir and np.mean(loss_val) < best_val_loss:
        print('Best model so far, saving...')
        torch.save(model.state_dict(), model_file)
        torch.save(optimizer.state_dict(), optimizer_file)
        torch.save(scheduler.state_dict(), scheduler_file)

        print('Epoch: {:04d}'.format(epoch),
              'loss_train: {:.10f}'.format(np.mean(loss_train)),
              'auc_train: {:.10f}'.format(np.mean(auc_train)),
              'acc_train: {:.10f}'.format(np.mean(acc_train)),
              'loss_val: {:.10f}'.format(np.mean(loss_val)),
              'auc_val: {:.10f}'.format(np.mean(auc_val)),
              'acc_val: {:.10f}'.format(np.mean(acc_val)),
              'time: {:.4f}s'.format(time.time() - t), file=log)

        log.flush()
    res = np.mean(loss_val)
    del loss_train
    del auc_train
    del acc_train
    del loss_val
    del auc_val
    del acc_val
    gc.collect()
    if args.cuda:
        torch.cuda.empty_cache()
    return res

In [None]:
def test():
    loss_test = []
    kt_test = []
    auc_test = []
    acc_test = []
    
    model.eval()
    model.load_state_dict(torch.load(model_file))
    
    with torch.no_grad():
        for batch_idx, (students, questions, features, difficulties, papers, answers) in enumerate(test_loader):
            
            if args.cuda:
                students = students.to(device)
                questions = questions.to(device)
                features = features.to(device)
                difficulties = difficulties.to(device)
                papers = papers.to(device)
                answers = answers.to(device)

            # encode the exam information
            papers, need_score = is_new_paper(papers)
            # shuffle the origin sequence
            new_questions, new_features, new_difficulties, new_papers, new_answers = paper_shuffle(questions, features, difficulties, papers, answers)
            ht, yt, pred_res = model(features, questions, difficulties, papers, answers)
            ht_trans, _, _ = model(new_features, new_questions, new_difficulties, new_papers, new_answers)

            # answers [batch_size, seq_len]
            loss_kt, auc, acc = kt_loss(students, pred_res, answers, ht, ht_trans, yt, need_score)
            loss_kt = float(loss_kt.cpu().detach().numpy())
            if auc != -1 and acc != -1:
                auc_test.append(auc)
                acc_test.append(acc)
            kt_test.append(loss_kt)
            loss = loss_kt
            loss_test.append(loss)
            del loss
    print('--------------------------------')
    print('--------Testing-----------------')
    print('--------------------------------')
    print('loss_test: {:.10f}'.format(np.mean(loss_test)),
          'auc_test: {:.10f}'.format(np.mean(auc_test)),
          'acc_test: {:.10f}'.format(np.mean(acc_test)))
    if args.save_dir:
        print('--------------------------------', file=log)
        print('--------Testing-----------------', file=log)
        print('--------------------------------', file=log)
        print('loss_test: {:.10f}'.format(np.mean(loss_test)),
              'auc_test: {:.10f}'.format(np.mean(auc_test)),
              'acc_test: {:.10f}'.format(np.mean(acc_test)), file=log)
        log.flush()
    del loss_test
    del auc_test
    del acc_test
    gc.collect()
    if args.cuda:
        torch.cuda.empty_cache()

In [None]:
if args.test is False:
    # Train model
    print('start training!')
    t_total = time.time()
    best_val_loss = np.inf
    best_epoch = 0
    for epoch in range(args.epochs):
        val_loss = train(epoch, best_val_loss, f)
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            best_epoch = epoch
    print("Optimization Finished!")
    print("Best Epoch: {:04d}".format(best_epoch))
    if args.save_dir:
        print("Best Epoch: {:04d}".format(best_epoch), file=log)
        log.flush()

In [None]:
test()
if log is not None:
    print(save_dir)
    log.close()

f.close()