# model

In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")  # GPUを使うために必要

class LSTMClassifier(nn.Module):
    
    def __init__(self, embedding_dim, hidden_dim, tagset_size):
        
        super(LSTMClassifier, self).__init__()
        self.hidden_dim = hidden_dim
        
        self.hidden_dim = hidden_dim
        self.lstm = nn.LSTM(embedding_dim, hidden_dim, batch_first=True)
        self.hidden2tag = nn.Linear(hidden_dim, tagset_size)
        self.softmax = nn.LogSoftmax()

    def forward(self, sentence):
        
        _, lstm_out = self.lstm(sentence)
        tag_space = self.hidden2tag(lstm_out[0])
        tag_scores = self.softmax(tag_space.squeeze())
#         tag_scores = F.softmax(tag_space.squeeze(), dim = 1)
        
        
        return tag_scores, lstm_out[0][0]

In [3]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")  # GPUを使うために必要

class LSTMClassifier_3NN(nn.Module):
    
    def __init__(self, embedding_dim, hidden_dim, tagset_size):
        
        super(LSTMClassifier_3NN, self).__init__()
        hidden1_dim = hidden_dim
        hidden2_dim = 256
        hidden3_dim = 64

        self.lstm = nn.LSTM(embedding_dim, hidden1_dim, batch_first=True)
        self.fc1 = nn.Linear(hidden1_dim, hidden2_dim)
        self.fc2 = nn.Linear(hidden2_dim, hidden3_dim)
        self.fc3 = nn.Linear(hidden3_dim, tagset_size)
        self.softmax = nn.LogSoftmax()

    def forward(self, sentence):
        
        _, lstm_out = self.lstm(sentence)
        x1 = F.relu(self.fc1(lstm_out[0]))
        x2 = F.relu(self.fc2(x1))
        x3 = self.fc3(x2)
        tag_scores = self.softmax(x3.squeeze())
#         tag_scores = F.softmax(tag_space.squeeze(), dim = 1)
        
        
        return tag_scores, lstm_out[0][0]

In [4]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")  # GPUを使うために必要

class Simple_3NN(nn.Module):
    
    def __init__(self, embedding_dim, hidden_dim, tagset_size):
        
        super(Simple_3NN,self).__init__()
        hidden1_dim = hidden_dim
        hidden2_dim = 256
        hidden3_dim = 64
        
        self.hidden_dim = hidden_dim
        self.fc1 = nn.Linear(hidden1_dim, hidden2_dim)
        self.fc2 = nn.Linear(hidden2_dim, hidden3_dim)
        self.fc3 = nn.Linear(hidden3_dim, tagset_size)
        self.softmax = nn.LogSoftmax()

    def forward(self, sentence):
        x1 = F.relu(self.fc1(sentence))
        x2 = F.relu(self.fc2(x1))
        x3 = self.fc3(x2)
#         return F.softmax(x3, dim = 1)
        return F.log_softmax(x3, dim = 1)

# 事前処理用の関数（形態素解析とか）

In [6]:
import MeCab
import re
import neologdn



# テキスト事前処理用の関数
def clean_sentence(sentence):
    tagger = MeCab.Tagger("-Owakati")
    # MeCabで分かち書き
    sentence = tagger.parse(sentence)
    # 全角半角統一、重ね表現除去
    sentence = neologdn.normalize(sentence)
    # アルファベットの大文字を小文字に変換
    sentence = sentence.lower()
    # 半角全角英数字除去
    #sentence = re.sub(r'[０-９a-zA-Zａ-ｚＡ-Ｚ]+', " ", sentence)
    # 記号もろもろ除去
    sentence = re.sub(r'[″ω\．_－―─！＠＃＄％＾＆\-‐|\\＊\“（）＿■×+α※÷⇒—●★☆〇◎◆▼◇△□(：〜～＋=)／*&^%$#@!~`){}［］…\[\]\"\'\”\’:;<>?＜＞〔〕〈〉？、､。｡・･,\./『』【】「」｢｣→←○《》≪≫\n\u3000]+', "", sentence)
    # スペースで区切って形態素の配列へ
    wakati = sentence.split(" ")
    # 空の要素は削除
    wakati = list(filter(("").__ne__, wakati))
    # sentenceとして返す
    sentence = ''.join(wakati)
    return sentence

# 分かち書き用の関数
def make_wakati(sentence):
    tagger = MeCab.Tagger("-Owakati")
    # MeCabで分かち書き
    sentence = tagger.parse(sentence)
    # 記号もろもろ除去
    sentence = re.sub('\n', "", sentence)
    # スペースで区切って形態素の配列へ
    wakati = sentence.split(" ")
    # 空の要素は削除
    wakati = list(filter(("").__ne__, wakati))
    return wakati

# 文章を単語IDの系列データに変換
# PyTorchのLSTMのインプットになるデータなので、もちろんtensor型で
def sentence2index_fast(sentence):
    wakati = make_wakati(sentence)
    return torch.tensor([model_fast.get_word_vector(w) for w in wakati], dtype=torch.long)

def sentence2index(sentence):
    wakati = make_wakati(sentence)
    return torch.tensor([word2index[w] for w in wakati], dtype=torch.long)

# データをバッチでまとめるための関数
from sklearn.utils import shuffle

def train2batch(title, category, batch_size):
    title_batch = []
    category_batch = []
    
#     title_shuffle, category_shuffle = shuffle(title, category)
    
    for i in range(0, len(title), batch_size):
        title_batch.append(title[i:i+batch_size])
        category_batch.append(category[i:i+batch_size])
        
    return title_batch, category_batch

from bs4 import BeautifulSoup

def clean_tokenizer(text):
    soup = BeautifulSoup(text,"lxml")
    clean_text = soup.get_text()
    return [tok for tok in keitaiso_noun(clean_text)]



# mecab
def keitaiso_noun(text):
    mecab = MeCab.Tagger('-d /usr/lib/x86_64-linux-gnu/mecab/dic/mecab-ipadic-neologd')
    mecab.parse('')
    node = mecab.parseToNode(text)
    word=[]
    hinshi = ["名詞","動詞","形容詞","助動詞","助詞"]
    while node:
        feats = node.feature.split(',')
        if feats[0] in hinshi :
            try:
                word.append(node.surface)  #単語を取得
            except:
                print("err: " + str(node.surface))
        node = node.next  #次の単語に進める
    return word


def blank_delete(student_list, student_answer_list):
    return_student = []
    return_answer = []
    for id, text in enumerate(student_list):
        if text != "b":
            return_student.append(text)
            return_answer.append(student_answer_list[id])
            
    return return_student, return_answer

#datasetファイルから対応する問題の答えを取得
def get_data(question_num):
    import xlrd
    student_list=[]
    student_answer_list=[]
    if(question_num==1):
        book = xlrd.open_workbook('./dataset/result_j1.xlsx')
    elif(question_num==2):
        book = xlrd.open_workbook('./dataset/result_ss1.xlsx')
    elif(question_num==3):
        book = xlrd.open_workbook('./dataset/result_s1.xlsx')
    elif(question_num==4):
        book = xlrd.open_workbook('./dataset/result_s_sakuta.xlsx')
    elif(question_num==5):
        book = xlrd.open_workbook('./dataset/result_new_s.xlsx')            
    else:
        print("無効な問題番号です")
    sheet_1 = book.sheet_by_index(0)
    #シートの2行目以降をリストに追加
    for row in range(sheet_1.nrows):
        if(row!=0):
            student_list.append(sheet_1.cell(row, 1).value)
            student_answer_list.append(int(sheet_1.cell(row,2).value))
    
    #アンサーが0→1 2→1　3,4→0とする(0が正解　1が不正解　△は不正解とする)     
    for i in range(len(student_answer_list)):
        if(student_answer_list[i]==0):
            student_answer_list[i]=1
        elif(student_answer_list[i]==2):
            student_answer_list[i]=1
        elif(student_answer_list[i]==3):
            student_answer_list[i]=0
        else:
            student_answer_list[i]=0

        
        
    return student_list,student_answer_list


def self_cross_val(all_data, val_num):
    import copy
    
    train_data = []
    test_data = []
    data_num = len(all_data)
    split_pos = []
    base_data_num = int(data_num/val_num)
    amari = data_num%val_num
    pos = 0
    for i in range(val_num):
        if i != 0:
            pos = split_pos[i-1]
        if amari > 0:
            split_pos.append(pos+base_data_num+1)
            amari -= 1
        else:
            split_pos.append(pos+base_data_num)
    print(split_pos)

    before_pos = 0
    for id, pos in enumerate(split_pos):
        tmp_list = copy.copy(all_data)      
        test_data.append(all_data[before_pos:pos])

        del tmp_list[before_pos:pos]
        train_data.append(tmp_list)
    
        before_pos = pos
        
    if val_num == 1:
        train_data = copy.copy(test_data)

    return train_data, test_data

        

# init model
def weights_init(m):
    classname = m.__class__.__name__
    if hasattr(m, 'weight') and (classname.find('Embedding') == -1):
        nn.init.xavier_uniform_(m.weight.data, gain=nn.init.calculate_gain('relu'))
        
#文章を単語リストに変換する
def analyzer(text):
    mecab = MeCab.Tagger('-Owakati -d /usr/lib/x86_64-linux-gnu/mecab/dic/mecab-ipadic-neologd')
#     stop_words = ['i', 'a', 'an', 'the', 'to', 'and', 'or', 'if', 'is', 'are', 'am', 'it', 'this', 'that', 'of', 'from', 'in', 'on']
    stop_words = ['']
#     text = text.lower() # 小文字化
    mecab.parse('')
    text = mecab.parse(text)
    text = text.replace('\n', '') # 改行削除
    text = text.replace('\t', '') # タブ削除
    text = re.sub(re.compile(r'[!-\/:-@[-`{-~]'), ' ', text) # 記号をスペースに置き換え
    text = text.split(' ') # スペースで区切る
    
    words = []
    
#条件をつけて単語リストに追加する
    for word in text:
#         if (re.compile(r'^.*[0-9]+.*$').fullmatch(word) is not None): # 数字が含まれるものは除外
#             continue
        if word in stop_words: # ストップワードに含まれるものは除外
            continue
        if len(word) < 1: #0文字（空文字）は除外
            continue
        words.append(word)
            
    return words
            

def change_Tensor(input_data):
    return_data = []
    
    for data in input_data:
        data =  torch.Tensor(data)
        return_data.append(data)
        
    return_data = torch.Tensor(return_data)
    return return_data
        


def make_bert_vec(qnum, val_num):
    
    import pandas as pd
    train_val_vec = []
    test_val_vec = []
    bert_vec = []

    if qnum == 1:
        all_df = pd.read_json('./dataset/output_bert_japanese.jsonl', orient='records', lines=True)
    elif qnum == 2:
        all_df = pd.read_json('./dataset/output_bert_social.jsonl', orient='records', lines=True)
    elif qnum == 3 or qnum == 4:
        all_df = pd.read_json('./dataset/output_bert_science.jsonl', orient='records', lines=True)
    elif qnum == 5:
        all_df = pd.read_json('./dataset/model/output_new_science.jsonl', orient='records', lines=True)
        
#         all_df = pd.read_json('./dataset/output_science.jsonl', orient='records', lines=True)
#         all_df = pd.read_json('./dataset/output_science_1029.jsonl', orient='records', lines=True)
        
    all_features = all_df['features']
    for l in all_features:
        vecs = []
        for m in l:
            vec = ( ( m['layers'] )[0] )['values']
            vecs.append(vec)
        vecs = torch.Tensor(vecs)
        bert_vec.append(vecs)
    
    return bert_vec

def make_bert_vec_laboro(qnum, val_num, flag):
    
    import pandas as pd
    train_val_vec = []
    test_val_vec = []
    bert_vec = []

    
    if flag == 1:
        if qnum == 1:
            all_df = pd.read_json('./dataset/model/output_japanese_base.jsonl', orient='records', lines=True)
        elif qnum == 2:
            all_df = pd.read_json('./dataset/model/output_social_base.jsonl', orient='records', lines=True)
        elif qnum == 3 or qnum == 4:
            all_df = pd.read_json('./dataset/model/output_science_base.jsonl', orient='records', lines=True)
    elif flag == 2:
        if qnum == 1:
            all_df = pd.read_json('./dataset/model/output_japanese_large.jsonl', orient='records', lines=True)
        elif qnum == 2:
            all_df = pd.read_json('./dataset/model/output_social_large.jsonl', orient='records', lines=True)
        elif qnum == 3 or qnum == 4:
            all_df = pd.read_json('./dataset/model/output_science_large.jsonl', orient='records', lines=True)
        elif qnum == 5:
            all_df = pd.read_json('./dataset/model/new_science.jsonl', orient='records', lines=True)
        

        
    all_features = all_df['features']
    for l in all_features:
        vecs = []
        for m in l:
            vec = ( ( m['layers'] )[0] )['values']
            vecs.append(vec)
        vecs = torch.Tensor(vecs)
        bert_vec.append(vecs)
    
    return bert_vec


def print_graph(train_loss, train_accu):
    import matplotlib.pyplot as plt

    %matplotlib inline

    x = range(len(train_loss))
    y = train_loss
    fig = plt.figure(figsize=(8,6))
    ax = fig.add_subplot(111)
    ax.plot(x, y, label='train_loss_random')
    ax.set_ylabel('loss')
    plt.legend()
    plt.show()

    x = range(len(train_accu))
    y = train_accu
    fig = plt.figure(figsize=(8,6))
    ax = fig.add_subplot(111)
    ax.plot(x, y, label='train_acc_random')
    ax.set_ylabel('acc')
    plt.legend()
    plt.show()
    
#ランダムに誤採点を作成
def make_change_answer(answer_list, diff_num):
    re_ans_list = []
    change_pos = []
    change_num = random.sample(range(0, len(answer_list), 1), k=diff_num)
    
    print(change_num)
    
    for id, label in enumerate(answer_list):
        if id in change_num:
            change_pos.append(1)
            if label == 0:
                label = 1
            else :
                label = 0
        else:
            change_pos.append(0)
        re_ans_list.append(label)
        
    return re_ans_list, change_pos

def my_shuffle(seed, box):
    random.seed(seed)
    random.shuffle(box)
    
    return box

In [8]:
#データセット準備
import MeCab

print("何問目について採点するかを決定します(1〜4) 1:国語, 2:社会, 3:理科, 4:理科変更後") 
qnum=int(input('>>> '))
print("使うモデルを選択してください 0:ランダム, 1:fasttext, 2:wiki, 3;BERT, 4:朝日新聞, 5:辞書モデル") 
model_select = int(input('>>> ')) 
print("使うモデルを選択してください 0: BERT京大 1:base, 2:large") 
laboro_model = int(input('>>> ')) 

student_list,student_answer_list =get_data(qnum) #1:おばあちゃん #2:社会 #3:国語 ラベルは0が正解、1が不正解 
print(len(student_list)) 
student_list, student_answer_list = blank_delete(student_list, student_answer_list) 
print(len(student_list)) 
student_count=len(student_list)


from gensim.models import KeyedVectors
if model_select == 2:
    model_wiki = KeyedVectors.load_word2vec_format('./model/fastText/model.vec', binary=False)
elif model_select == 3:
    from bert_juman import BertWithJumanModel
    bert = BertWithJumanModel("./model/Japanese_L-12_H-768_A-12_E-30_BPE")
elif model_select ==4:
    model_wiki = KeyedVectors.load_word2vec_format("./model/embeddings/cbow.txt")

何問目について採点するかを決定します(1〜4) 1:国語, 2:社会, 3:理科, 4:理科変更後
>>> 5
使うモデルを選択してください 0:ランダム, 1:fasttext, 2:wiki, 3;BERT, 4:朝日新聞, 5:辞書モデル
>>> 3
使うモデルを選択してください 0: BERT京大 1:base, 2:large
>>> 0
2017
2017


unable to import 'smart_open.gcs', disabling that module


##### 回すよう

In [9]:
import numpy as np
import torch.nn.utils.rnn as rnn
import copy
import random
from bert_juman import BertWithJumanModel
import tensorflow as tf
import torch
from pyknp import Juman
jumanpp = Juman()

#設定
change_flag = 1#誤採点を作成、０で作らない、１で作る
val_num = 4#クロスバリデーションの分割数
epoch_num = 100
miss = 20#誤採点の数
change_seeds = []
for i in range(10):#シャッフルのseed設定、繰り返す回数作成
    change_seeds.append(random.randint(1,1000))

#初期化
ave_acc = 0

acc_val = np.zeros((len(change_seeds), val_num), dtype = np.float64)
ave_acc = np.zeros(len(change_seeds), dtype = np.float64)
new_student_answer_list = np.zeros((len(change_seeds), len(student_list)), dtype = np.float64)

all_ans_label = []
all_pred_point = []
text_pos_memo = []
change_num = []
new_student_list = []

for i in range(len(change_seeds)):
    all_ans_label.append([])
    all_pred_point.append([])
    new_student_list.append([])
    text_pos_memo.append([])
    change_num.append([])

#モデルの読み込み
if model_select == 3:
    if laboro_model == 0:
        EMBEDDING_DIM = 768
        bert_vec = make_bert_vec(qnum, val_num)
    elif laboro_model == 1:
        EMBEDDING_DIM = 768
        bert_vec = make_bert_vec_laboro(qnum, val_num, laboro_model)
    elif laboro_model == 2:
        EMBEDDING_DIM = 1024
        bert_vec = make_bert_vec_laboro(qnum, val_num, laboro_model)

    
text_pos = []
for i in range(len(student_list)):
    text_pos.append(i)
    
if change_flag == 1:#誤採点作成
    change_student_answer_list, change_pos = make_change_answer(student_answer_list, miss)
elif change_flag == 0:#誤採点を作らない
    change_pos = np.zeros(len(student_list), dtype = np.int64)
    change_student_answer_list = copy.copy(student_answer_list)
    
change_student_answer_list = my_shuffle(change_seeds[0], change_student_answer_list)
    

for change_id, change_seed in enumerate(change_seeds):
    
    train_val_list= []
    test_val_list= []
    train_val_label= []
    test_val_label= []

    lstm_vec = []    

    #データシャッフル
    student_list = my_shuffle(change_seed, student_list)
    change_student_answer_list = my_shuffle(change_seed, change_student_answer_list)
    change_pos = my_shuffle(change_seed, change_pos)
    bert_vec = my_shuffle(change_seed, bert_vec)
    
    if change_id == 0:
        base_change_pos = copy.copy(change_pos)#誤採点の最初の位置を記憶
    else:#初回のデータの並びを記憶
        text_pos = my_shuffle(seed, text_pos)
            
    #シャッフルごとにデータ保存
    new_student_list[change_id] = copy.copy(student_list)
    text_pos_memo[change_id] = copy.copy(text_pos)
    new_student_answer_list[change_id] = copy.copy(change_student_answer_list)
    change_num[change_id] = copy.copy(change_pos)
    
    if model_select == 3:
        train_val_vec, test_val_vec = self_cross_val(bert_vec, val_num)

    #データをクロスバリデーション用に分割
    train_val_list, test_val_list = self_cross_val(student_list, val_num)
    train_val_label, test_val_label = self_cross_val(change_student_answer_list, val_num)

    #cross_valdation
    for val_epo in range(val_num):
        train_list = copy.copy(train_val_list[val_epo])
        train_label = copy.copy(train_val_label[val_epo])
        test_list = copy.copy(test_val_list[val_epo])
        test_label = copy.copy(test_val_label[val_epo])
       


        all_list = train_list+test_list
        all_label = train_label+test_label

        # データセットの準備
        index_datasets_title = []



      
        elif model_select == 3:
            index_datasets_title = train_val_vec[val_epo] + test_val_vec[val_epo]

        elif model_select == 5:
            index_datasets_title = train_val_vec[val_epo] + test_val_vec[val_epo]


        all_vec_data = []
        for vec in index_datasets_title:
            vec = torch.tensor(vec)
            all_vec_data.append(vec)

        #分割
        train_data = all_vec_data[:len(train_list)]
        test_data = all_vec_data[len(train_list):]

        print('train_data:',len(train_data),', train_label:',len(train_label))
        print('test_data:',len(test_data),', test_label:',len(test_label))

    #---------------------------------------------------------------------------------------------------------------
        #学習モデル
        HIDDEN_DIM = 128
        TAG_SIZE = 2

        model = LSTMClassifier_3NN(EMBEDDING_DIM, HIDDEN_DIM, TAG_SIZE).to(device)  # to(device)でモデルがGPU対応する

        for m in model.modules():
            print(m.__class__.__name__)
            weights_init(m)

        loss_function = nn.NLLLoss()
        optimizer = optim.Adam(model.parameters(), lr=0.001)

        losses_epoch = []
        losses_batch = []
        accs = []
        weights_init(model)

        model.train() #学習モード
        import time
        now_time = time.time()
        for epoch in range(epoch_num):

            title_batch, category_batch = train2batch(train_data, train_label, 30)

            batch_count = 0
            all_loss = 0
            all_acc = 0

            for i in range(len(title_batch)):

                title_batch_pad = rnn.pad_sequence(title_batch[i], batch_first=True)  # padding
    #             title_batch_pad = title_batch[i]

                batch_loss = 0

                optimizer.zero_grad()

                # 順伝搬させるtensorはGPUで処理させるためdevice=にGPUをセット
                title_tensor = torch.tensor(title_batch_pad, device=device)

                # category_tensor.size() = (batch_size × 1)なので、squeeze()
                category_tensor = torch.tensor(category_batch[i], device=device).squeeze()

                out, out_vec = model(title_tensor)

                batch_loss = loss_function(out, category_tensor)
                batch_loss.backward()

                optimizer.step()

                batch_count += len(title_batch_pad)
                all_loss += batch_loss.item()
                losses_batch.append(batch_loss.item())

                _, predicts = torch.max(out, 1)

                for j, ans in enumerate(category_tensor):
                    if predicts[j].item() == ans.item():
                        all_acc += 1

            losses_epoch.append(all_loss)

            acc = all_acc/len(train_data)
            accs.append(acc)

            before_time  = now_time
            now_time = time.time()
            loss_time = now_time - before_time
            print('epoch: %d  time: %.2f  loss: %.6f  acc: %.5f' %(epoch+1, loss_time, all_loss, acc))

        print("done.")


#         print_graph(losses_epoch, accs)


        # test
        test_num = len(test_data)
        a = 0

        model.eval()
        with torch.no_grad():

            title_batch, category_batch = train2batch(test_data, test_label, 30)

            for i in range(len(title_batch)):
                title_batch_pad = rnn.pad_sequence(title_batch[i], batch_first=True)  # padding
                title_tensor = torch.tensor(title_batch_pad, device=device)

                category_tensor = torch.tensor(category_batch[i], device=device)

                out, vec_out = model(title_tensor)
                for per, vec in zip(out, vec_out):
                    lstm_vec.append(vec)
                    all_pred_point[change_id].append(per)

                _, predicts = torch.max(out, 1)

                for j, ans in enumerate(category_tensor):
                    all_ans_label[change_id].append(predicts[j].item())
                    if predicts[j].item() == ans.item():
                        a += 1

            epo_acc = a/test_num

        print("predict_acc : ", epo_acc)
        acc_val[change_id][val_epo] = epo_acc

        ave_acc[change_id] += epo_acc

    print("\n\n\n")
    print("epoch : score")
    for i, score in enumerate(acc_val[change_id]):
        print(i, ":", score)
        
    print(all_ans_label)

    ave_acc[change_id] /= val_num
    print("predict_ave : ", ave_acc[change_id])

average_acc = 0
for i in ave_acc:
    average_acc += i
    
print(average_acc/len(change_seeds))

ok1
[505, 1009, 1513, 2017]
[505, 1009, 1513, 2017]
[505, 1009, 1513, 2017]
train_data: 1512 , train_label: 1512
test_data: 505 , test_label: 505




LSTMClassifier_3NN
LSTM
Linear
Linear
Linear
LogSoftmax




epoch: 1  time: 5.11  loss: 36.143646  acc: 0.52249
epoch: 2  time: 5.08  loss: 34.742968  acc: 0.56878
epoch: 3  time: 4.66  loss: 24.601427  acc: 0.78638
epoch: 4  time: 4.73  loss: 20.739066  acc: 0.81878
epoch: 5  time: 4.42  loss: 17.924646  acc: 0.86243
epoch: 6  time: 4.94  loss: 17.683006  acc: 0.86772
epoch: 7  time: 4.56  loss: 15.441466  acc: 0.87963
epoch: 8  time: 4.44  loss: 14.466099  acc: 0.89815
epoch: 9  time: 4.78  loss: 13.370382  acc: 0.90476
epoch: 10  time: 4.78  loss: 11.148798  acc: 0.92460
epoch: 11  time: 4.58  loss: 10.797220  acc: 0.92328
epoch: 12  time: 4.89  loss: 9.938559  acc: 0.92923
epoch: 13  time: 5.20  loss: 8.443794  acc: 0.94180
epoch: 14  time: 4.85  loss: 8.880643  acc: 0.93386
epoch: 15  time: 4.96  loss: 6.895373  acc: 0.95437
epoch: 16  time: 4.65  loss: 6.271459  acc: 0.95833
epoch: 17  time: 4.47  loss: 5.413398  acc: 0.96098
epoch: 18  time: 5.19  loss: 6.429125  acc: 0.95635
epoch: 19  time: 4.70  loss: 5.774955  acc: 0.95833
epoch: 20 



predict_acc :  0.8910891089108911
train_data: 1513 , train_label: 1513
test_data: 504 , test_label: 504
LSTMClassifier_3NN
LSTM
Linear
Linear
Linear
LogSoftmax
epoch: 1  time: 4.45  loss: 35.640593  acc: 0.53007
epoch: 2  time: 4.26  loss: 29.074548  acc: 0.70588
epoch: 3  time: 4.46  loss: 22.027417  acc: 0.82683
epoch: 4  time: 4.45  loss: 19.783756  acc: 0.85129
epoch: 5  time: 4.50  loss: 21.694061  acc: 0.83543
epoch: 6  time: 4.38  loss: 19.949368  acc: 0.85988
epoch: 7  time: 4.36  loss: 16.445970  acc: 0.88500
epoch: 8  time: 4.33  loss: 14.582365  acc: 0.90218
epoch: 9  time: 4.60  loss: 12.613007  acc: 0.92003
epoch: 10  time: 4.52  loss: 11.734562  acc: 0.92664
epoch: 11  time: 4.46  loss: 11.651910  acc: 0.92333
epoch: 12  time: 4.35  loss: 10.635979  acc: 0.93655
epoch: 13  time: 4.34  loss: 10.393773  acc: 0.93457
epoch: 14  time: 4.46  loss: 10.142192  acc: 0.93258
epoch: 15  time: 4.47  loss: 9.038360  acc: 0.93919
epoch: 16  time: 4.34  loss: 7.742456  acc: 0.95572
epo

epoch: 73  time: 5.00  loss: 1.374908  acc: 0.98678
epoch: 74  time: 4.79  loss: 4.599587  acc: 0.97753
epoch: 75  time: 4.49  loss: 4.550635  acc: 0.97158
epoch: 76  time: 5.04  loss: 2.881647  acc: 0.98612
epoch: 77  time: 5.31  loss: 1.935341  acc: 0.98942
epoch: 78  time: 4.81  loss: 1.526372  acc: 0.99273
epoch: 79  time: 4.90  loss: 1.376803  acc: 0.99339
epoch: 80  time: 5.25  loss: 1.312332  acc: 0.99339
done.
predict_acc :  0.8412698412698413
train_data: 1513 , train_label: 1513
test_data: 504 , test_label: 504
LSTMClassifier_3NN
LSTM
Linear
Linear
Linear
LogSoftmax
epoch: 1  time: 4.68  loss: 35.298535  acc: 0.55254
epoch: 2  time: 4.54  loss: 28.412291  acc: 0.70456
epoch: 3  time: 4.57  loss: 19.130601  acc: 0.84534
epoch: 4  time: 4.37  loss: 17.526681  acc: 0.86451
epoch: 5  time: 4.35  loss: 14.350665  acc: 0.89954
epoch: 6  time: 4.27  loss: 13.540524  acc: 0.90549
epoch: 7  time: 4.71  loss: 13.438755  acc: 0.91143
epoch: 8  time: 4.52  loss: 11.630311  acc: 0.92135
ep

epoch: 1  time: 4.39  loss: 35.404145  acc: 0.50794
epoch: 2  time: 5.12  loss: 32.385041  acc: 0.64616
epoch: 3  time: 5.32  loss: 23.005494  acc: 0.81481
epoch: 4  time: 5.03  loss: 19.325228  acc: 0.85979
epoch: 5  time: 4.85  loss: 16.937448  acc: 0.88228
epoch: 6  time: 4.97  loss: 16.332077  acc: 0.88426
epoch: 7  time: 4.76  loss: 14.931267  acc: 0.89550
epoch: 8  time: 6.02  loss: 14.166223  acc: 0.89749
epoch: 9  time: 4.56  loss: 13.885885  acc: 0.90013
epoch: 10  time: 4.99  loss: 12.540214  acc: 0.91534
epoch: 11  time: 4.92  loss: 12.053243  acc: 0.91601
epoch: 12  time: 5.08  loss: 11.584434  acc: 0.91997
epoch: 13  time: 5.85  loss: 9.252439  acc: 0.93386
epoch: 14  time: 5.28  loss: 8.361138  acc: 0.94378
epoch: 15  time: 5.25  loss: 8.758567  acc: 0.94378
epoch: 16  time: 5.29  loss: 7.131292  acc: 0.95701
epoch: 17  time: 5.03  loss: 6.198308  acc: 0.96230
epoch: 18  time: 5.44  loss: 6.024703  acc: 0.96164
epoch: 19  time: 5.80  loss: 5.640687  acc: 0.96296
epoch: 20

epoch: 76  time: 3.35  loss: 0.766761  acc: 0.99405
epoch: 77  time: 3.08  loss: 0.760848  acc: 0.99273
epoch: 78  time: 3.10  loss: 0.685849  acc: 0.99339
epoch: 79  time: 3.43  loss: 0.665472  acc: 0.99405
epoch: 80  time: 3.16  loss: 0.845900  acc: 0.99273
done.
predict_acc :  0.8412698412698413
train_data: 1513 , train_label: 1513
test_data: 504 , test_label: 504
LSTMClassifier_3NN
LSTM
Linear
Linear
Linear
LogSoftmax
epoch: 1  time: 3.48  loss: 35.734781  acc: 0.51950
epoch: 2  time: 3.12  loss: 30.329005  acc: 0.66424
epoch: 3  time: 3.15  loss: 21.227303  acc: 0.83741
epoch: 4  time: 3.21  loss: 16.912661  acc: 0.87046
epoch: 5  time: 3.13  loss: 14.845523  acc: 0.89028
epoch: 6  time: 3.07  loss: 14.615031  acc: 0.90086
epoch: 7  time: 3.11  loss: 13.339651  acc: 0.91011
epoch: 8  time: 3.12  loss: 13.401929  acc: 0.91276
epoch: 9  time: 3.96  loss: 10.303759  acc: 0.92531
epoch: 10  time: 4.45  loss: 9.399614  acc: 0.93325
epoch: 11  time: 3.96  loss: 8.883366  acc: 0.94316
ep

epoch: 68  time: 2.97  loss: 2.543274  acc: 0.98612
epoch: 69  time: 3.21  loss: 2.307797  acc: 0.98810
epoch: 70  time: 3.37  loss: 2.355095  acc: 0.98612
epoch: 71  time: 2.97  loss: 2.284992  acc: 0.98876
epoch: 72  time: 3.16  loss: 1.981097  acc: 0.98876
epoch: 73  time: 3.55  loss: 3.270252  acc: 0.98282
epoch: 74  time: 3.29  loss: 2.718897  acc: 0.98546
epoch: 75  time: 3.10  loss: 1.960264  acc: 0.98810
epoch: 76  time: 2.97  loss: 1.752205  acc: 0.99009
epoch: 77  time: 3.04  loss: 1.415171  acc: 0.99141
epoch: 78  time: 2.93  loss: 1.589212  acc: 0.99075
epoch: 79  time: 2.93  loss: 1.797113  acc: 0.98744
epoch: 80  time: 2.91  loss: 1.831667  acc: 0.98876
done.
predict_acc :  0.8611111111111112




epoch : score
0 : 0.8594059405940594
1 : 0.8412698412698413
2 : 0.8571428571428571
3 : 0.8611111111111112
[[0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0

In [37]:
miss_count = np.zeros(len(student_list), dtype=np.int64)
for change_id in range(len(change_seeds)):
    count = 0
    for id, label in enumerate(all_ans_label[change_id]):
         if label != new_student_answer_list[change_id][id]: #間違っていたら
                miss_count[text_pos_memo[change_id][id]] += 1
    
siteki_num = []
gosaiten_num = []

out_list = []
out_index = []

for i in range(len(chaneg_seeds)):  
    count = 0
    all_count = 0
    i = len(chaneg_seeds)-i
    
    print("検出回数：", i)
    print("==================================================")
    for id, num in enumerate(miss_count):
        if num > i and num <= i + 1:
            all_count += 1
            if int(new_student_answer_list[0][id]) == 0:
                label = "◯"
            else:
                label = "×"
           
            if change_flag ==1:
                if student_answer_list[id] != check_student_answer_list[id]:
                    
                    text = "誤採点"
                    count += 1
                else:
                    text = "　　　"
                
                print(id, text, label, new_student_list[0][id])
            else:
                print(id, label, new_student_list[0][id])
                
            out = text + ","+label+","+new_student_list[0][id]+"\n"
            out_list.append(out)
            out_index.append(str(index[id])+"\n")
            
    print("誤採点検出回数：", all_count, count)
    print("\n\n")
    
    siteki_num.append(all_count)
    gosaiten_num.append(count)
    
pos = 1
print()
for siteki, wrong in zip(siteki_num, gosaiten_num):
    print(pos, ":", sikteki, wrong)
    pos+=1
    

検出回数： 14
12 　　　 × 抵抗が電熱線が1つのときより大きくなり、電流がながれにくくなったから
68 　　　 × 電熱線が2つになり抵抗が2倍になり電流が流れにくくなったから。
77 　　　 ◯ 直列回路につないだときの電流の大きさは小さく、抵抗は大きいから。
82 　　　 × 直列回路で2つの電熱線てつないだことで抵抗の大きさが2倍になり、電流がさらに流れにくくなったから。
94 誤採点 × 抵抗が大きく、電流が流れにくくなったから。
105 　　　 ◯ 抵抗が大きく、電流が電熱線が2つのときより、小さいから
164 誤採点 × 抵抗が大きくなり、電流が流れにくくなったから。
231 　　　 ◯ ていこうが強く電流が流れにくいため。
270 　　　 × 抵抗が大きいと電流がより流れにくくなるから。
287 　　　 ◯ 2つにすることで1つにおくられる電流が小さくなり抵抗が大きくなるため
338 　　　 × 電源電圧を8.0Vにしたことで抵抗が大きくなり電流が弱くなったから
381 　　　 × 抵抗が大きくなり、電流が流れにくくなったため。
400 　　　 ◯ 電流を抵抗する力が増えたから。
405 　　　 ◯ 電流が流れても抵抗があるので伝わる電流が小さくなってしまうから。
447 　　　 ◯ 電流の大きさは小さくなり、抵抗の大きさは大きくなるから。
465 　　　 ◯ 抵抗が増えて、電流が少しさえぎられて小さくなったから。
702 　　　 × 電熱線を直列につないだことで、全体の抵抗が大きくなったので電流が流れにくくなったから。
750 　　　 × 抵抗が大きく、電流が流れにくかったから
759 　　　 × 抵抗が大きくなって電流が流れにくくなったから
761 　　　 × 抵抗が大きいと電流が流れる力は小さくなるため。
841 　　　 ◯ 抵抗が2つになり、電流が流れにくくなったから、
922 　　　 × 抵抗が増え電流の流れが遅くなるから
983 　　　 × 抵抗の大きさが2倍になり、実験のときよりも電流が流れにくくなったため。
1012 　　　 ◯ 電熱線の抵抗によって流れる電流が減るから。
1114 　　　 ◯ 電熱線1つのときよりも、抵抗と電流の割合が抵抗の方が大きくなったため。
1125 　　　 × 抵抗が大きくなり電流が流れにくくなったから

1479 　　　 ◯ 2つの電熱線を直列につないだことにより、抵抗が大きくなり電流が流れにくくなったから。
1504 　　　 × 直例回路はどこも電流が同じで、抵抗はそれぞれの電熱線の抵抗の和になるため、電圧が小さくなるから。
1514 　　　 × 電熱線の抵抗が上がり、電流は直列つなぎの方が並列つなぎよりよわいから。
1530 　　　 ◯ 2つの電熱線を直列につなぐと、抵抗が大きくなり電流があまり通らないから。
1542 　　　 × 抵抗が低ければ、電流も低くなるから。
1571 　　　 ◯ 2つの電熱線を直列につないだため、抵抗が大きくなり、電流が通りにくくなったから。
1616 　　　 × 低抗が大きくなり電流が小さくなったから。
1630 　　　 ◯ 抵抗を増やした事によって、電流の流れる量が減ったから。
1656 　　　 ◯ 電熱線が増えたので、抵抗が大きくなり、電流が伝わる大きさが小さくなった。
1673 　　　 ◯ 2つの電熱線を直列につないでいるため、抵抗が大きく、電流が小さくなったから。
1706 　　　 × 直列回路の抵抗はそれぞれが全体と等しくなり電流は2つの電熱線の和が全体と等しくなるため
1749 　　　 ◯ 直列につなげた回路なので抵抗が大きくなり、電流の大きさが小さくなったため。
1789 　　　 × 電熱線をふやすと抵抗がおきて電流がながれないから
1801 　　　 ◯ 抵抗が大きいのに対して電流が小さかったから
1825 　　　 ◯ 抵抗が大きくなり、電流が流れにくくなったため。
1855 　　　 ◯ 抵抗が大きくなると、電流の大きさも減り、上昇温度が低くなるから。
1868 　　　 × 低抗の大きさが強く、電流の大きさが弱いため。
1878 　　　 ◯ 抵抗の値が大きくなり、流れる電流の値が少さくなったから。
1903 　　　 ◯ 抵抗が大きくなると電流は小さくなるので、水に伝わる温度が低くなるから。
1992 　　　 ◯ 発熱線が増えたことにより、抵抗が大きくなって電流が流れにくくなったから。
2002 　　　 × 抵抗して電流が流れにくくなったから。
誤採点検出回数： 99 0
検出回数： 0
0 　　　 × 抵抗が大きくなり、電圧が下がったため
9 　　　 ◯ 抵抗が大きくなったため、電流が流れにくくなったから。
37 　　