In [3]:
import torch
from torch import nn
from GetLoader import GetLoader
from ModelEmbedding import ModelEmbedding
from MyDataset import MyDataset
from GetInit import GetInit
from TrainFunc import TrainFunc
from TextCNN import TextCNN

In [2]:
data_root = {
    "train_path": '../../data/train_torch.csv',
    "test_path": "../../data/test_a.csv",
    "sub_path": "../../data/test_a_sample_submit.csv",
    "w2v_path": "../../data/word2vec.bin"
}
config = GetInit(data_root)
model_embedding = ModelEmbedding(data_root["w2v_path"])

train_dataset = MyDataset(model_embedding,
                          corpus=config.x_train,
                          corpus_label=config.y_train,
                          with_label=True)
test_dataset = MyDataset(model_embedding,
                         corpus=config.x_test,
                         with_label=False)
loader = GetLoader(train_dataset, test_dataset)

GetInit Start!
GetInit End!
ModelEmbedding End!


In [7]:
# 建立model
model = TextCNN(vocab_size=model_embedding.dict_length, embedding_dim=300, output_size=14)
model.init_weights(model_embedding.embedding, is_static=False)
model = model.cuda()
criterion = nn.NLLLoss()
opt = torch.optim.Adam(model.parameters(), lr=1e-4)
# 开始训练
mytrain = TrainFunc(model, criterion, opt, loader.train_loader, loader.valid_loader, loader.test_loader)

GetLoader End


In [8]:
best_model = mytrain.train(25)

	Loss: 0.6457(train)	|	Acc: 81.9%(train)
	Loss: 0.3172(valid)	|	Acc: 90.2%(valid)
	Micro: 0.9022(valid)	|	Macro: 0.8612(valid)
Now_best:0.8612
	Loss: 0.2746(train)	|	Acc: 91.7%(train)
	Loss: 0.2530(valid)	|	Acc: 92.1%(valid)
	Micro: 0.9208(valid)	|	Macro: 0.8995(valid)
Now_best:0.8995
	Loss: 0.2241(train)	|	Acc: 93.1%(train)
	Loss: 0.2261(valid)	|	Acc: 92.9%(valid)
	Micro: 0.9293(valid)	|	Macro: 0.9117(valid)
Now_best:0.9117
	Loss: 0.1947(train)	|	Acc: 94.0%(train)
	Loss: 0.2102(valid)	|	Acc: 93.3%(valid)
	Micro: 0.9335(valid)	|	Macro: 0.9180(valid)
Now_best:0.9180
	Loss: 0.1735(train)	|	Acc: 94.7%(train)
	Loss: 0.1994(valid)	|	Acc: 93.6%(valid)
	Micro: 0.9364(valid)	|	Macro: 0.9232(valid)
Now_best:0.9232
	Loss: 0.1564(train)	|	Acc: 95.2%(train)
	Loss: 0.1917(valid)	|	Acc: 93.8%(valid)
	Micro: 0.9380(valid)	|	Macro: 0.9266(valid)
Now_best:0.9266
	Loss: 0.1418(train)	|	Acc: 95.6%(train)
	Loss: 0.1860(valid)	|	Acc: 94.0%(valid)
	Micro: 0.9396(valid)	|	Macro: 0.9295(valid)
Now_best:0.9295

KeyboardInterrupt: 

In [9]:
criterion = nn.NLLLoss()
opt = torch.optim.Adam(model.parameters(), lr=1e-4)
# 开始训练
mytrain = TrainFunc(model, criterion, opt, loader.train_loader, loader.valid_loader, loader.test_loader)
best_model = mytrain.train(10)

	Loss: 0.0342(train)	|	Acc: 99.4%(train)
	Loss: 0.1855(valid)	|	Acc: 94.4%(valid)
	Micro: 0.9438(valid)	|	Macro: 0.9352(valid)
Now_best:0.9352
	Loss: 0.0286(train)	|	Acc: 99.6%(train)
	Loss: 0.1889(valid)	|	Acc: 94.4%(valid)
	Micro: 0.9437(valid)	|	Macro: 0.9349(valid)
	Loss: 0.0238(train)	|	Acc: 99.7%(train)
	Loss: 0.1927(valid)	|	Acc: 94.4%(valid)
	Micro: 0.9437(valid)	|	Macro: 0.9352(valid)
	Loss: 0.0196(train)	|	Acc: 99.8%(train)
	Loss: 0.1971(valid)	|	Acc: 94.4%(valid)
	Micro: 0.9439(valid)	|	Macro: 0.9356(valid)
Now_best:0.9356
	Loss: 0.0160(train)	|	Acc: 99.8%(train)
	Loss: 0.2023(valid)	|	Acc: 94.4%(valid)
	Micro: 0.9437(valid)	|	Macro: 0.9354(valid)


KeyboardInterrupt: 

In [10]:
ans=mytrain.predict()

In [15]:
import pandas as pd
import numpy as np
df_sub=pd.read_csv(data_root["sub_path"])
df_sub.label=np.array(ans)
df_sub.label=df_sub.label.astype("int64")
save_name="./textcnn_{:.4f}.csv".format(mytrain.best_score)
df_sub.to_csv(save_name, index=False)

In [20]:
mytrain.best_score

0.9355598130314265