In [1]:
import os, random

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import torch
import torch.nn as nn
from MyPyTorch import *

os.makedirs('model_pytorch', exist_ok=True)
os.makedirs('figure', exist_ok=True)

In [2]:
print(torch.__version__)
print(torch.version.cuda)
print(torch.cuda.is_available())
print(torch.cuda.get_device_name())

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device = torch.device("cpu")

1.10.1+cu113
11.3
True
Quadro P2000


In [7]:
def getData_Advice(course_name=None):
    x_data = pd.read_csv('data/train/ScorePredict_x_{}.csv'.format(course_name) if course_name else 'data/train/ScorePredict_x.csv')
    y_data = pd.read_csv('data/train/ScorePredict_y_{}.csv'.format(course_name) if course_name else 'data/train/ScorePredict_y.csv')
    n_data = pd.read_csv('data/train/normalize.csv')
    n_data.set_index('col', inplace=True)
    feature = pd.read_excel('data/other/text_material.xlsx', sheet_name='advice')['feature'].tolist()
    model_sp = torch.load('model_pytorch/ScoreRangePredict_MOE_{}.pt'.format(course_name) if course_name else 'model_pytorch/ScoreRangePredict.pt')
    xn_data = n_data.apply(lambda n: normalize(n, x_data), axis = 1).T
    xn_data = torch.tensor(xn_data.to_numpy(), dtype=torch.float)
    yn_data = y_data.drop(['group'], axis = 1)
    yn_data = torch.tensor(yn_data.to_numpy(), dtype=torch.float)
    select_row = torch.argmax(model_sp(xn_data), dim=1) == torch.argmax(yn_data, dim=1)
    select_row = select_row.detach().numpy().tolist()

    x_data = x_data[select_row]
    y_data = x_data[['score', 'group']]
    x_data, y_data = y_data, x_data
    x_train = x_data[x_data['group'] == 'training']
    x_valid = x_data[x_data['group'] == 'validation']
    x_test = x_data[x_data['group'] == 'testing']
    y_train = y_data[y_data['group'] == 'training']
    y_valid = y_data[y_data['group'] == 'validation']
    y_test = y_data[y_data['group'] == 'testing']
    y_train = n_data.apply(lambda n: normalize(n, y_train), axis = 1).T
    y_valid = n_data.apply(lambda n: normalize(n, y_valid), axis = 1).T
    y_test = n_data.apply(lambda n: normalize(n, y_test), axis = 1).T
    x_train = pd.concat([x_train[['score']], y_train[['year', 'semester', 'week']]], axis=1)
    x_valid = pd.concat([x_valid[['score']], y_valid[['year', 'semester', 'week']]], axis=1)
    x_test = pd.concat([x_test[['score']], y_test[['year', 'semester', 'week']]], axis=1)
    y_train = y_train.loc[:, y_train.columns.isin(feature)]
    y_valid = y_valid.loc[:, y_valid.columns.isin(feature)]
    y_test = y_test.loc[:, y_test.columns.isin(feature)]

    x_train, x_valid, x_test, y_train, y_valid, y_test = map(lambda x: torch.tensor(x.to_numpy(), dtype=torch.float), (x_train, x_valid, x_test, y_train, y_valid, y_test))
    return (x_train, x_valid, x_test, y_train, y_valid, y_test)

In [8]:
x_train, x_valid, x_test, y_train, y_valid, y_test = getData_Advice()
train_dl = torch.utils.data.DataLoader(torch.utils.data.TensorDataset(x_train, y_train), shuffle=True, batch_size=128, pin_memory=True) 
x_train = x_train.to(device)
x_valid = x_valid.to(device)
y_train = y_train.to(device)
y_valid = y_valid.to(device)
x_test = x_test.to(device)
y_test = y_test.to(device)

model = Advice(16)
model = model.to(device)
criterion = torch.nn.MSELoss(reduction='mean')
criterion = criterion.to(device)
optimizer = torch.optim.Adam(model.parameters())

minVL = 0
minVL_ep = 0

for e in range(1000):
    ep = e + 1
    for xb, yb in train_dl:
        pred = model(xb.to(device))
        loss = criterion(pred, yb.to(device))
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
    train_loss = float(criterion(model(x_train), y_train))
    valid_loss = float(criterion(model(x_valid), y_valid))
    test_loss = float(criterion(model(x_test), y_test))
    save = ''
    if (minVL > valid_loss) | (minVL_ep < 1):
        minVL = valid_loss
        minVL_ep = ep
        save = '< save'
        torch.save(model, 'model_pytorch/Advice.pt')
    print(ep, 
        'train_loss', '{:.8f}'.format(train_loss), 
        'valid_loss', '{:.8f}'.format(valid_loss), 
        'test_loss', '{:.8f}'.format(test_loss), 
        save)
    ## early drop
    if ep < 200: ## 至少執行200次
        pass
    elif minVL_ep < ep/2: ## 連續10次小於max_VA的一半
        print('Accuracy of validation is CRASH !!')
        break
## 轉換成CPU版本後儲存
model = torch.load('model_pytorch/Advice.pt')
model = model.to(torch.device('cpu'))
torch.save(model, 'model_pytorch/Advice.pt')
print('Training done, save model at VL: =', minVL)
os.makedirs('record', exist_ok=True)

1 train_loss 1.15430081 valid_loss 1.22706461 test_loss 1.17435372 < save
2 train_loss 1.05799186 valid_loss 1.13713622 test_loss 1.08497012 < save
3 train_loss 1.01961458 valid_loss 1.09613895 test_loss 1.04529059 < save
4 train_loss 1.00611103 valid_loss 1.08283925 test_loss 1.03218138 < save
5 train_loss 0.92934054 valid_loss 0.99266171 test_loss 0.94650364 < save
6 train_loss 0.92325175 valid_loss 0.98545849 test_loss 0.93978435 < save
7 train_loss 0.91953838 valid_loss 0.98135978 test_loss 0.93592691 < save
8 train_loss 0.91644555 valid_loss 0.97860271 test_loss 0.93324000 < save
9 train_loss 0.91334438 valid_loss 0.97503006 test_loss 0.92988390 < save
10 train_loss 0.91003531 valid_loss 0.97138035 test_loss 0.92640698 < save
11 train_loss 0.90152985 valid_loss 0.96462619 test_loss 0.91937792 < save
12 train_loss 0.87349623 valid_loss 0.93940961 test_loss 0.89526057 < save
13 train_loss 0.85176212 valid_loss 0.92257732 test_loss 0.87930918 < save
14 train_loss 0.84373564 valid_los

In [9]:
course = pd.read_excel('data/other/course_list.xlsx', 'course')
course = course['course_name'].drop_duplicates()
model = torch.load('model_pytorch/Advice.pt')
for c in course:
    
    c_model = torch.load('model_pytorch/Advice.pt')

    torch.save(c_model, 'model_pytorch/Advice_{}.pt'.format(c))
    criterion = torch.nn.MSELoss(reduction='mean')
    optimizer = torch.optim.Adam(c_model.parameters())
    
    x_train, x_valid, x_test, y_train, y_valid, y_test = getData_Advice(c)
    x_train = x_train.to(device)
    x_valid = x_valid.to(device)
    y_train = y_train.to(device)
    y_valid = y_valid.to(device)
    x_test = x_test.to(device)
    y_test = y_test.to(device)  
    train_dl = torch.utils.data.DataLoader(torch.utils.data.TensorDataset(x_train, y_train), shuffle=True, batch_size=64) 
    minVL = float(criterion(model(x_valid), y_valid) ** 0.5)
    minVL_0 = minVL
    minVL_ep = 0

    print('Now processing model:', c)
    print('original RMSE:', minVL)
    
    for e in range(1000):
        ep = e + 1
        for xb, yb in train_dl:
            pred = c_model(xb)
            loss = criterion(pred, yb)
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
        train_loss = float(criterion(c_model(x_train), y_train))
        valid_loss = float(criterion(c_model(x_valid), y_valid))
        test_loss = float(criterion(c_model(x_test), y_test))

        save = ''
        if minVL > valid_loss:
            minVL = valid_loss
            minVL_ep = ep
            save = '< save'
            torch.save(c_model, 'model_pytorch/Advice_{}.pt'.format(c))
        print(c, '|', ep, 
            'train_loss', '{:.8f}'.format(train_loss), 
            'valid_loss', '{:.8f}'.format(valid_loss), 
            'test_loss', '{:.8f}'.format(test_loss), 
            save)
        ## early drop
        if ep < 200: ## 至少執行200次
            pass
        elif minVL_ep < ep/2: ## 連續10次小於maxValAcc的一半
            print('Accuracy of validation is CRASH !!')
            break
    print('ReTraining done, RMSE:', minVL_0, '=>', minVL)
    print('decrease:',minVL_0 - minVL)
    print()
    pass

Now processing model: 企業倫理
original RMSE: 0.9484657049179077
企業倫理 | 1 train_loss 0.74814981 valid_loss 0.77662092 test_loss 0.67527670 < save
企業倫理 | 2 train_loss 0.72194314 valid_loss 0.75070220 test_loss 0.65058929 < save
企業倫理 | 3 train_loss 0.71191007 valid_loss 0.74093443 test_loss 0.64196730 < save
企業倫理 | 4 train_loss 0.70377028 valid_loss 0.73318827 test_loss 0.63429654 < save
企業倫理 | 5 train_loss 0.70220888 valid_loss 0.73141581 test_loss 0.63401538 < save
企業倫理 | 6 train_loss 0.69531828 valid_loss 0.72504354 test_loss 0.62649530 < save
企業倫理 | 7 train_loss 0.69236875 valid_loss 0.72196078 test_loss 0.62424946 < save
企業倫理 | 8 train_loss 0.68971425 valid_loss 0.71934116 test_loss 0.62203223 < save
企業倫理 | 9 train_loss 0.68746942 valid_loss 0.71720827 test_loss 0.61981022 < save
企業倫理 | 10 train_loss 0.68627441 valid_loss 0.71611524 test_loss 0.61853027 < save
企業倫理 | 11 train_loss 0.68414634 valid_loss 0.71355134 test_loss 0.61704588 < save
企業倫理 | 12 train_loss 0.69804686 valid_loss 0.7