# Initialization settings

In [3]:
# Because the raw dataset is too large, only the first 50000 pieces 
# of data are taken from each dataset for demonstration.
from torch.utils import data
import torch
import torch.nn as nn
from sklearn import metrics
import tqdm
import torch.optim as optim
import numpy as np
import datetime
import os
import time
import math
from torch import Tensor
from sklearn.model_selection import train_test_split
import pandas as pd
import random
def set_seed(seed=2022):
    random.seed(seed)
    os.environ["PYTHONHASHSEED"] = str(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
set_seed(2022)
RAW_ITEM_COL = ['stu_kp_now_acc', 'stu_kp_now_use_time', 'stu_kp_now_len', 'stu_q_now_acc',
                'stu_q_now_time', 'stu_q_now_len', 'q_now_acc', 'q_now_use_time',
                'q_now_len', 'kp_now_acc', 'kp_now_use_time', 'kp_now_len', 'last_kp_time_passed', 'last_q_time_passed']
ITEM_COL = ['stu_kp_now_acc',
            'stu_kp_now_use_time',
            'stu_kp_now_len',
            'stu_q_now_acc',
            'stu_q_now_time',
            'stu_q_now_len',
            'q_now_acc',
            'q_now_use_time',
            'q_now_len',
            'kp_now_acc',
            'kp_now_use_time',
            'kp_now_len',
            'last_kp_time_passed',
            'last_q_time_passed'
           ]
RAW_ITEM_AN_COL = ['stu_kp_now_acc_an', 'stu_kp_now_use_time_an', 'stu_kp_now_len_an', 'stu_q_now_acc_an',
                   'stu_q_now_time_an', 'stu_q_now_len_an', 'q_now_acc_an', 'q_now_use_time_an',
                   'q_now_len_an', 'kp_now_acc_an', 'kp_now_use_time_an', 'kp_now_len_an',
                   'last_kp_time_passed', 'last_q_time_passed', 'use_time', 'is_right']
ITEM_AN_COL = ['stu_kp_now_acc_an',
               'stu_kp_now_use_time_an',
               'stu_kp_now_len_an',
               'stu_q_now_acc_an',
               'stu_q_now_time_an',
               'stu_q_now_len_an',
               'q_now_acc_an',
               'q_now_use_time_an',
               'q_now_len_an',
               'kp_now_acc_an',
               'kp_now_use_time_an',
               'kp_now_len_an',
               'last_kp_time_passed',
               'last_q_time_passed',
               'use_time',
               'is_right']
ITEM_AN_COL_INDEX = [RAW_ITEM_AN_COL.index(one) for one in ITEM_AN_COL]

# Read Data

In [4]:
class MyDataset(data.Dataset):
    def __init__(self, X,RAW_DATA):
        self.X = X
        self.RAW_DATA = RAW_DATA

    def __getitem__(self, index):
        raw_index = self.X[index]
        one_data = self.RAW_DATA.loc[raw_index]
        item = torch.as_tensor(one_data[ITEM_COL], dtype=torch.float).to(DEVICE)
        label = torch.as_tensor(one_data['is_right'], dtype=torch.float).to(DEVICE)
        stu_kp_seq = torch.as_tensor(one_data['stu_kp_seq'][:,ITEM_AN_COL_INDEX], dtype=torch.float).to(DEVICE)
        stu_kp_seq_padding_mask = one_data['stu_kp_seq_padding_mask']
        if sum(stu_kp_seq_padding_mask)==0:
            stu_kp_seq_padding_mask[0]=1
        stu_kp_seq_padding_mask = torch.as_tensor(stu_kp_seq_padding_mask, dtype=torch.bool).eq(0).to(DEVICE)
        stu_q_seq = torch.as_tensor(one_data['stu_q_seq'][:,ITEM_AN_COL_INDEX], dtype=torch.float).to(DEVICE)
        stu_q_seq_padding_mask = one_data['stu_q_seq_padding_mask']
        if sum(stu_q_seq_padding_mask)==0:
            stu_q_seq_padding_mask[0]=1
        stu_q_seq_padding_mask = torch.as_tensor(one_data['stu_q_seq_padding_mask'], dtype=torch.bool).eq(0).to(DEVICE)
        return item, label, stu_kp_seq, stu_kp_seq_padding_mask, stu_q_seq, stu_q_seq_padding_mask

    def __len__(self):
        return len(self.X)

# SIKT Model

In [5]:
from torch.nn import LayerNorm
from torch.nn import functional as F
from typing import Optional, Any, Union, Callable

class LayerMask(nn.Module):
    def __init__(self,input_size):
        super(LayerMask, self).__init__()
        self.input_size = input_size
        self.embeding = nn.Sequential()
        self.embeding.add_module('c_fc1', nn.Linear(input_size, input_size*2))
        self.embeding.add_module('c_relu1', nn.ReLU(True))
        self.embeding.add_module('c_fc2', nn.Linear(input_size*2, input_size*2))
        self.embeding.add_module('c_relu2', nn.ReLU(True))
        self.embeding.add_module('c_fc3', nn.Linear(input_size*2, input_size))
    def forward(self, src,mask):
        batch_size = src.size()[0]
        mask = mask.eq(0)
        b_len = mask.sum().item()
        batch_len = mask.sum(dim=1)
        mask =mask.unsqueeze(2)
        mask = mask.repeat(1,1,self.input_size)
        out = torch.masked_select(src, mask)
        out = out.view(b_len,self.input_size)
        new_out = torch.tensor([]).to(DEVICE)
        for i in range(len(batch_len)):
            one_out = out[torch.sum(batch_len[0:i]).item():torch.sum(batch_len[0:i+1]).item(),:]
            one_out = self.embeding(one_out)
            one_out = torch.mean(one_out,0)
            one_out.view(-1)
            new_out = torch.cat([new_out,one_out])
        new_out = new_out.view(batch_size,self.input_size)
        return new_out

class TFEncoder(nn.Module):
    def __init__(self, d_model: int = 512, nhead: int = 2, num_encoder_layers: int = 4,
                 dim_feedforward: int = 128, dropout: float = 0.1,
                 activation: Union[str, Callable[[Tensor], Tensor]] = F.relu,
                 layer_norm_eps: float = 1e-5, batch_first: bool = True, norm_first: bool = False,
                 device=None, dtype=None):
        factory_kwargs = {'device': device, 'dtype': dtype}
        super(TFEncoder, self).__init__()
        encoder_layer = nn.TransformerEncoderLayer(
            d_model, nhead, dim_feedforward, dropout, activation, layer_norm_eps, batch_first, norm_first, **factory_kwargs)
        encoder_norm = LayerNorm(d_model, eps=layer_norm_eps, **factory_kwargs)
        self.encoder = nn.TransformerEncoder(
            encoder_layer, num_encoder_layers, encoder_norm)

    def forward(self, src: Tensor, src_mask: Optional[Tensor] = None, src_key_padding_mask: Optional[Tensor] = None):
        memory = self.encoder(src, mask=src_mask,
                              src_key_padding_mask=src_key_padding_mask)
        return memory
    
class SIKT(nn.Module):
    def __init__(self, d_model, nhead,embedding_dim=128):
        super(SIKT, self).__init__()
        self.kp_embd = nn.Linear(d_model, embedding_dim)
        self.q_embd = nn.Linear(d_model, embedding_dim)
        self.pos_embd_kp = nn.Embedding( MEMORY_LEN , embedding_dim = embedding_dim )
        self.pos_embd_q = nn.Embedding( MEMORY_LEN , embedding_dim = embedding_dim )
        self.tf_encoder_kp = TFEncoder(d_model=embedding_dim,
                                             nhead=nhead,
                                             batch_first=True,
                                             dim_feedforward=128)
        self.tf_encoder_q = TFEncoder(d_model=embedding_dim,
                                            nhead=nhead,
                                            batch_first=True,
                                            dim_feedforward=128)
        self.LayerMask_kp = LayerMask(embedding_dim)
        self.LayerMask_q = LayerMask(embedding_dim)
        input_dim = embedding_dim*2+len(ITEM_COL)
        self.class_classifier = nn.Sequential()
        self.class_classifier.add_module('c_fc1', nn.Linear(input_dim, input_dim*2))
        self.class_classifier.add_module('c_relu1', nn.ReLU(True))
        self.class_classifier.add_module('c_fc3', nn.Linear(input_dim*2, input_dim))
        self.class_classifier.add_module('c_relu3', nn.ReLU(True))
        self.class_classifier.add_module('c_fc4', nn.Linear(input_dim, 10))
        self.class_classifier.add_module('c_relu4', nn.ReLU(True))
        self.class_classifier.add_module('c_fc5', nn.Linear(10, 1))
        self.class_classifier.add_module('c_sigmoid', nn.Sigmoid())

    def forward(self, item, stu_kp_seq, stu_kp_seq_padding_mask,stu_q_seq, stu_q_seq_padding_mask):
        pos_stu_kp = self.pos_embd_kp(torch.arange(MEMORY_LEN).unsqueeze(0).to(DEVICE))
        pos_stu_q = self.pos_embd_q(torch.arange(MEMORY_LEN).unsqueeze(0).to(DEVICE) )
        stu_kp_seq = self.kp_embd(stu_kp_seq)
        stu_q_seq = self.q_embd(stu_q_seq)
        stu_kp_seq = stu_kp_seq + pos_stu_kp
        stu_q_seq = stu_q_seq + pos_stu_q
        out_kp = self.tf_encoder_kp(src=stu_kp_seq,
                               src_key_padding_mask=stu_kp_seq_padding_mask)
        out_q = self.tf_encoder_q(src=stu_q_seq,
                               src_key_padding_mask=stu_q_seq_padding_mask)
        
        out_kp = self.LayerMask_kp(out_kp,stu_kp_seq_padding_mask)
        out_q = self.LayerMask_q(out_q,stu_q_seq_padding_mask)
        out = torch.cat([out_kp,out_q],-1)
        out = out.view(out.size()[0],-1)
        out = torch.cat([out,item],-1)
        out = self.class_classifier(out)
        out = out.squeeze()
        return out

# Tools

In [8]:
import logging
import pyecharts.options as opts
from pyecharts.charts import Line
import matplotlib.pyplot as plt
%matplotlib inline
plt.rcParams['font.sans-serif'] = ['Microsoft YaHei']
plt.rcParams['axes.unicode_minus'] = False
def create_logger(fp):
    logging.basicConfig(level=logging.INFO)
    formatter = logging.Formatter('%(asctime)s - %(message)s')
    file_logger = logging.getLogger('FileLogger')
    file_handler = logging.FileHandler(fp, mode='a', encoding='utf-8')
    file_handler.setFormatter(formatter)
    file_logger.addHandler(file_handler)
    return file_logger

def performance(ground_truth, prediction):
    ground_truth = ground_truth.to('cpu')
    prediction = prediction.to('cpu')
    ground_truth = ground_truth.detach().numpy()
    prediction = prediction.detach().numpy()
    auc = metrics.roc_auc_score(ground_truth, prediction) * 100
    # 转成0，1
    prediction = np.round(prediction)
    f1 = metrics.f1_score(ground_truth, prediction) * 100
    recall = metrics.recall_score(ground_truth, prediction) * 100
    precision = metrics.precision_score(ground_truth, prediction) * 100
    acc = metrics.accuracy_score(ground_truth, prediction) * 100
    return auc, f1, recall, precision, acc

def test_epoch(model,vail_datalaoder):
    gold_epoch = torch.Tensor([]).to(DEVICE)
    pred_epoch = torch.Tensor([]).to(DEVICE)
    model.eval()
    for one_batch in tqdm.tqdm(vail_datalaoder,desc='test:',mininterval=1,ncols=60):
        item,label, stu_kp_seq, stu_kp_seq_padding_mask,stu_q_seq, stu_q_seq_padding_mask = one_batch
        with torch.no_grad():
            pred = model(item, stu_kp_seq, stu_kp_seq_padding_mask,stu_q_seq, stu_q_seq_padding_mask)
            loss = loss_func(pred, label)
            pred_epoch = torch.cat([pred_epoch, pred])
            gold_epoch = torch.cat([gold_epoch, label])
    auc, f1, recall, precision, acc = performance(gold_epoch,pred_epoch)
    return auc, f1, recall, precision, acc,loss

def __load_model__(ckpt):
    '''
    ckpt: Path of the checkpoint
    return: Checkpoint dict
    '''
    if os.path.isfile(ckpt):
        checkpoint = torch.load(ckpt)
        print("Successfully loaded checkpoint '%s'" % ckpt)
        return checkpoint
    else:
        raise Exception("No checkpoint found at '%s'" % ckpt)

def svae_com_info(file_logger):
    file_logger.info(f'model_info:{model_info}')
    file_logger.info(f'Model:{model}')
    file_logger.info(f'BATCH_SIZE:{BATCH_SIZE}；LR:{LR}；MEMORY_LEN：{MEMORY_LEN}；EPOCH_NUM：{EPOCH_NUM}；NHEAD：{NHEAD}')
    file_logger.info(f'optimizer:{optimizer}')
    file_logger.info(f'loss_func:{loss_func}')
    file_logger.info(f'ITEM_COL:{ITEM_COL}')
    file_logger.info(f'ITEM_AN_COL:{ITEM_AN_COL}')
    print('\ntrain begin：')

def draw_auc(log_name, datas,datas_test):
    x_data = list(range(len(datas)))
    min_data = min(np.min(datas),np.min(datas_test) )- 1
    one_line = (Line().add_xaxis(x_data)
                .add_yaxis( "Train-auc", np.round(datas[:, 0], 2), markpoint_opts=opts.MarkPointOpts(data=[opts.MarkPointItem(type_="max", name="最大值")]))
                .add_yaxis("Train-f1", np.round(datas[:, 1], 2))
                .add_yaxis("Train-recall", np.round(datas[:, 2], 2))
                .add_yaxis("Train-precision", np.round(datas[:, 3], 2))
                .add_yaxis("Train-acc", np.round(datas[:, 4], 2))
                .add_yaxis( "Test-auc", np.round(datas_test[:, 0], 2), markpoint_opts=opts.MarkPointOpts(data=[opts.MarkPointItem(type_="max", name="最大值")]))
                .add_yaxis("Test-f1", np.round(datas_test[:, 1], 2))
                .add_yaxis("Test-recall", np.round(datas_test[:, 2], 2))
                .add_yaxis("Test-precision", np.round(datas_test[:, 3], 2))
                .add_yaxis("Test-acc", np.round(datas_test[:, 4], 2))
                .set_global_opts(tooltip_opts=opts.TooltipOpts(trigger="axis"),
                                yaxis_opts=opts.AxisOpts(min_=min_data))
                .set_series_opts(label_opts=opts.LabelOpts(is_show=False)))
    one_line.render(log_name + '.html')
    return one_line

def test_other_dataset(best_model,file_logger):
    all_data_list = [ 'ASSIST2012', 'EdNet','ASSIST2009NSB']
    for one_data in all_data_list:
        one_raw_data = pd.read_pickle(f'../DataProcess/test_data/{one_data}_test_{str(MEMORY_LEN)}.pkl')
        all_index = list(one_raw_data.index)
        # here you can change the number of test data records. test_size=300000 in our paper
        _,select_index= train_test_split(all_index, random_state=2022, test_size=0.8, shuffle=True)
        one_data_set = MyDataset(select_index,one_raw_data)
        one_test_datalaoder = data.DataLoader(one_data_set, batch_size=BATCH_SIZE, shuffle=True)
        auc, f1, recall, precision, acc,loss = test_epoch(best_model,one_test_datalaoder)
        file_logger.info(f'{one_data}-Test Best Model: auc:{auc},f1:{f1},recall:{recall},precision:{precision},acc:{acc}')
        del one_raw_data

# Continual learning sequence train

In [10]:
MEMORY_LEN = 5
BATCH_SIZE = 2048
# Change the sequence here such as：
# ['ASSIST2009NSB','EdNet', 'ASSIST2012']
# ['EdNet','ASSIST2009NSB', 'ASSIST2012']
train_seq = ['ASSIST2009NSB','ASSIST2012', 'EdNet']
DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print('use:',DEVICE)
LR = 0.001
NHEAD = 2
model = SIKT(d_model=len(ITEM_AN_COL), nhead=NHEAD,embedding_dim=16)
model.to(DEVICE)
optimizer = optim.Adam(model.parameters(), lr=LR)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min',patience=10)
loss_func = nn.BCEWithLogitsLoss()
EPOCH_NUM = 50
model_info = f'Continual learning sequence: {train_seq}'
logging.shutdown()
time_now = datetime.datetime.now()
now_str = time_now.strftime('%Y-%m-%d-%H-%M-%S')
log_dir = f'log'
log_file_name = f'{log_dir}/{now_str}.log'
os.makedirs(log_dir, exist_ok=True)
file_logger = create_logger(log_file_name)
model_dir = f'Checkpoints'
os.makedirs(model_dir, exist_ok=True)
final_trian_p = []
final_test_p = []
svae_com_info(file_logger)
for one_data in train_seq:
    DATA_NAME = one_data
    file_logger.info(f'************************')
    file_logger.info(f'task name:{DATA_NAME}')
    RAW_DATA = pd.read_pickle(f'../DataProcess/train_data/{DATA_NAME}_final_{str(MEMORY_LEN)}.pkl')
    all_data = list(RAW_DATA.index)
    X_train, X_test = train_test_split(
        all_data, random_state=2022, test_size=0.1, shuffle=True)
    train_data = MyDataset(X_train,RAW_DATA)
    vail_data = MyDataset(X_test,RAW_DATA)
    train_datalaoder = data.DataLoader(train_data, batch_size=BATCH_SIZE, shuffle=True)
    vail_datalaoder = data.DataLoader(vail_data, batch_size=BATCH_SIZE, shuffle=True)
    best_model_path = f'{model_dir}/{now_str}.pth.tar'
    file_logger.info(f'Checkpoint path:{best_model_path}')
    best_auc = 0
    best_model = model
    all_train_performence = []
    all_test_performence = []
    for one_epoch in range(EPOCH_NUM):
        gold_epoch = torch.Tensor([]).to(DEVICE)
        pred_epoch = torch.Tensor([]).to(DEVICE)
        model.train()
        for one_batch in tqdm.tqdm(train_datalaoder, desc='train:', mininterval=1, ncols=60):
            item, label, stu_kp_seq, stu_kp_seq_padding_mask, stu_q_seq, stu_q_seq_padding_mask = one_batch
            optimizer.zero_grad()
            pred = model(item, stu_kp_seq, stu_kp_seq_padding_mask,
                         stu_q_seq, stu_q_seq_padding_mask)
            loss = loss_func(pred, label)
            loss.backward()
            optimizer.step()
            pred_epoch = torch.cat([pred_epoch, pred])
            gold_epoch = torch.cat([gold_epoch, label])
        auc, f1, recall, precision, acc = performance(gold_epoch, pred_epoch)
        file_logger.info(f'Train {one_epoch}: auc:{auc},f1:{f1},recall:{recall},precision:{precision},acc:{acc}')
        all_train_performence.append([auc, f1, recall, precision,acc])
        auc, f1, recall, precision, acc,vail_loss = test_epoch(model, vail_datalaoder)
        file_logger.info(f'Vaild {one_epoch}: auc:{auc},f1:{f1},recall:{recall},precision:{precision},acc:{acc}')
        all_test_performence.append([auc, f1, recall, precision,acc])
        if best_auc < auc:
            best_auc = auc
            best_model = model
            torch.save({'state_dict': model.state_dict()}, best_model_path)
            file_logger.info(f'At 【{one_epoch}】 Epoceh Get Best Model!')
        scheduler.step(vail_loss)
    del RAW_DATA
    test_other_dataset(best_model,file_logger)
    all_train_performence = np.asarray(all_train_performence)
    all_test_performence = np.asarray(all_test_performence)
    final_trian_p.extend(all_train_performence)
    final_test_p.extend(all_test_performence) 
final_trian_p = np.asarray(final_trian_p)
final_test_p = np.asarray(final_test_p)
draw_auc(log_file_name,final_trian_p,final_test_p).render_notebook()

INFO:FileLogger:model_info:Continual learning sequence: ['ASSIST2009NSB', 'ASSIST2012', 'EdNet']
INFO:FileLogger:Model:SIKT(
  (kp_embd): Linear(in_features=16, out_features=16, bias=True)
  (q_embd): Linear(in_features=16, out_features=16, bias=True)
  (pos_embd_kp): Embedding(5, 16)
  (pos_embd_q): Embedding(5, 16)
  (tf_encoder_kp): TFEncoder(
    (encoder): TransformerEncoder(
      (layers): ModuleList(
        (0): TransformerEncoderLayer(
          (self_attn): MultiheadAttention(
            (out_proj): NonDynamicallyQuantizableLinear(in_features=16, out_features=16, bias=True)
          )
          (linear1): Linear(in_features=16, out_features=128, bias=True)
          (dropout): Dropout(p=0.1, inplace=False)
          (linear2): Linear(in_features=128, out_features=16, bias=True)
          (norm1): LayerNorm((16,), eps=1e-05, elementwise_affine=True)
          (norm2): LayerNorm((16,), eps=1e-05, elementwise_affine=True)
          (dropout1): Dropout(p=0.1, inplace=False)
  

use: cpu

训练开始：


INFO:FileLogger:Checkpoint path:Checkpoints/2022-08-06-22-17-08_ASSIST2009NSB.pth.tar
train:: 100%|███████████████| 11/11 [00:37<00:00,  3.42s/it]
INFO:FileLogger:Train 0: auc:66.00568185150716,f1:77.31454296339793,recall:99.06843425295592,precision:63.394167278063094,acc:63.65794660455115
test:: 100%|██████████████████| 2/2 [00:02<00:00,  1.28s/it]
INFO:FileLogger:Vaild 0: auc:70.90491523295933,f1:77.53446877534468,recall:93.17738791423001,precision:66.38888888888889,acc:66.50544135429263
INFO:FileLogger:At 【0】 Epoceh Get Best Model!
train:: 100%|███████████████| 11/11 [00:37<00:00,  3.39s/it]
INFO:FileLogger:Train 1: auc:71.1715661340655,f1:77.5903767820774,recall:87.35936940164815,precision:69.78647890548973,acc:68.45547392940333
test:: 100%|██████████████████| 2/2 [00:02<00:00,  1.30s/it]
INFO:FileLogger:Vaild 1: auc:72.15207161569884,f1:76.23024157470923,recall:83.04093567251462,precision:70.45203969128997,acc:67.87585650947199
INFO:FileLogger:At 【1】 Epoceh Get Best Model!
train::

test:: 100%|██████████████████| 1/1 [00:00<00:00,  8.41it/s]
INFO:FileLogger:Vaild 4: auc:57.19489981785063,f1:69.02654867256638,recall:63.934426229508205,precision:75.0,acc:60.22727272727273
INFO:FileLogger:At 【4】 Epoceh Get Best Model!
test:: 100%|██████████████████| 2/2 [00:03<00:00,  1.87s/it]
INFO:FileLogger:ASSIST2012-Test Best Model: auc:72.75215594836011,f1:63.63863239331171,recall:51.369863013698634,precision:83.60655737704919,acc:60.158599945310364
test:: 100%|██████████████████| 3/3 [00:04<00:00,  1.57s/it]
INFO:FileLogger:EdNet-Test Best Model: auc:71.23819581920084,f1:72.4264705882353,recall:65.52766858179619,precision:80.948823309675,acc:66.41563199674333
test:: 100%|██████████████████| 2/2 [00:02<00:00,  1.30s/it]
INFO:FileLogger:ASSIST2009NSB-Test Best Model: auc:74.4071841790769,f1:66.0597232337946,recall:54.70446320868516,precision:83.36397058823529,acc:63.01587301587301


# Joint Train

In [13]:
MEMORY_LEN = 5
BATCH_SIZE = 2048
train_seq = [ 'ASSIST2012', 'EdNet','ASSIST2009NSB']
DEVICE = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print('use:',DEVICE)
LR = 0.001
NHEAD = 2
model = SIKT(d_model=len(ITEM_AN_COL), nhead=NHEAD,embedding_dim=16)
model.to(DEVICE)
optimizer = optim.Adam(model.parameters(), lr=LR)
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min',patience=10)
loss_func = nn.BCEWithLogitsLoss()
EPOCH_NUM = 50
model_info = f'Joint train{train_seq}'
logging.shutdown()
time_now = datetime.datetime.now()
now_str = time_now.strftime('%Y-%m-%d-%H-%M-%S')
log_dir = f'log'
log_file_name = f'{log_dir}/{now_str}.log'
os.makedirs(log_dir, exist_ok=True)
file_logger = create_logger(log_file_name)
model_dir = f'Checkpoints'
os.makedirs(model_dir, exist_ok=True)
all_train_performence = []
all_test_performence = []
svae_com_info(file_logger)
RAW_DATA = pd.read_pickle(f'../DataProcess/train_data/ASSIST2012_final_{str(MEMORY_LEN)}.pkl')
# All training sets are merged here
for one_data in train_seq[1:]:
    one_raw_data = pd.read_pickle(f'../DataProcess/train_data/{one_data}_final_{str(MEMORY_LEN)}.pkl')
    RAW_DATA = pd.concat([RAW_DATA,one_raw_data])
    del one_raw_data
RAW_DATA = RAW_DATA.reset_index(drop=True)
all_data = list(RAW_DATA.index)
# print(len(all_data))
X_train, X_test = train_test_split(all_data, random_state=2022, test_size=0.1, shuffle=True)
train_data = MyDataset(X_train,RAW_DATA)
vail_data = MyDataset(X_test,RAW_DATA)
train_datalaoder = data.DataLoader(train_data, batch_size=BATCH_SIZE, shuffle=True)
vail_datalaoder = data.DataLoader(vail_data, batch_size=BATCH_SIZE, shuffle=True)
best_model_path = f'{model_dir}/{now_str}_joint_train.pth.tar'
file_logger.info(f'Checkpoint path:{best_model_path}')
best_auc = 0
best_model = model
for one_epoch in range(EPOCH_NUM):
    gold_epoch = torch.Tensor([]).to(DEVICE)
    pred_epoch = torch.Tensor([]).to(DEVICE)
    model.train()
    for one_batch in tqdm.tqdm(train_datalaoder, desc='train:', mininterval=1, ncols=60):
        item, label, stu_kp_seq, stu_kp_seq_padding_mask, stu_q_seq, stu_q_seq_padding_mask = one_batch
        optimizer.zero_grad()
        pred = model(item, stu_kp_seq, stu_kp_seq_padding_mask,
                     stu_q_seq, stu_q_seq_padding_mask)
        loss = loss_func(pred, label)
        loss.backward()
        optimizer.step()
        pred_epoch = torch.cat([pred_epoch, pred])
        gold_epoch = torch.cat([gold_epoch, label])
    auc, f1, recall, precision, acc = performance(gold_epoch, pred_epoch)
    file_logger.info(f'Train {one_epoch}: auc:{auc},f1:{f1},recall:{recall},precision:{precision},acc:{acc}')
    all_train_performence.append([auc, f1, recall, precision,acc])
    auc, f1, recall, precision, acc,vail_loss = test_epoch(model, vail_datalaoder)
    file_logger.info(f'Vaild {one_epoch}: auc:{auc},f1:{f1},recall:{recall},precision:{precision},acc:{acc}')
    all_test_performence.append([auc, f1, recall, precision,acc])
    if best_auc < auc:
        best_auc = auc
        best_model = model
        torch.save({'state_dict': model.state_dict()}, best_model_path)
        file_logger.info(f'At 【{one_epoch}】 Epoceh Get Best Model!')
    scheduler.step(vail_loss)
del RAW_DATA
test_other_dataset(best_model,file_logger)
all_train_performence = np.asarray(all_train_performence)
all_test_performence = np.asarray(all_test_performence)
draw_auc(log_file_name,all_train_performence,all_test_performence).render_notebook()

INFO:FileLogger:model_info:Joint train['ASSIST2012', 'EdNet', 'ASSIST2009NSB']
INFO:FileLogger:Model:SIKT(
  (kp_embd): Linear(in_features=16, out_features=16, bias=True)
  (q_embd): Linear(in_features=16, out_features=16, bias=True)
  (pos_embd_kp): Embedding(5, 16)
  (pos_embd_q): Embedding(5, 16)
  (tf_encoder_kp): TFEncoder(
    (encoder): TransformerEncoder(
      (layers): ModuleList(
        (0): TransformerEncoderLayer(
          (self_attn): MultiheadAttention(
            (out_proj): NonDynamicallyQuantizableLinear(in_features=16, out_features=16, bias=True)
          )
          (linear1): Linear(in_features=16, out_features=128, bias=True)
          (dropout): Dropout(p=0.1, inplace=False)
          (linear2): Linear(in_features=128, out_features=16, bias=True)
          (norm1): LayerNorm((16,), eps=1e-05, elementwise_affine=True)
          (norm2): LayerNorm((16,), eps=1e-05, elementwise_affine=True)
          (dropout1): Dropout(p=0.1, inplace=False)
          (dropout2)

use: cpu

训练开始：


INFO:FileLogger:Checkpoint path:Checkpoints/2022-08-06-22-32-39_joint_train.pth.tar
train:: 100%|███████████████| 18/18 [01:01<00:00,  3.42s/it]
INFO:FileLogger:Train 0: auc:67.2035665964544,f1:78.27190958997193,recall:97.1329664477188,precision:65.54460782845678,acc:65.48011363636364
test:: 100%|██████████████████| 2/2 [00:04<00:00,  2.04s/it]
INFO:FileLogger:Vaild 0: auc:72.4528248647352,f1:77.72958702621159,recall:82.87092882991556,precision:73.18892045454545,acc:69.81083844580776
INFO:FileLogger:At 【0】 Epoceh Get Best Model!
train:: 100%|███████████████| 18/18 [01:01<00:00,  3.41s/it]
INFO:FileLogger:Train 1: auc:70.78313341230695,f1:74.88255070914882,recall:74.63163500798863,precision:75.13515928689513,acc:67.95170454545455
test:: 100%|██████████████████| 2/2 [00:03<00:00,  1.99s/it]
INFO:FileLogger:Vaild 1: auc:73.36662927926974,f1:72.55753365175858,recall:67.18938480096502,precision:78.85795186408683,acc:67.68916155419224
INFO:FileLogger:At 【1】 Epoceh Get Best Model!
train:: 100