In [1]:
import torch
from tqdm import tqdm
from transformers import (
    AutoConfig,
    AutoModelForMultipleChoice,
    AutoTokenizer,
    HfArgumentParser,
    Trainer,
    TrainingArguments,
    default_data_collator,
    set_seed,
)
import numpy as np
# np.argmax([1,2,3,4])
def softmax(x, axis=None):
    x = np.array(x)
    x = x - x.max(axis=axis, keepdims=True)
    y = np.exp(x)
    rst = y / y.sum(axis=axis, keepdims=True)
    return rst.tolist()
softmax([1,2])
tokenizer = AutoTokenizer.from_pretrained('ethanyt/guwenbert-base')

In [2]:
import json
with open('data/test_placeholder.jsonl','r') as f:
    valid_json = f.readlines()
    valid_json = [json.loads(e) for e in valid_json]

In [18]:
def save_jsonl(predict_valid_json,out_path):
    with open(out_path,'w') as f:
        for line in predict_valid_json:
            this_line = {'translation':line['translation'],'choices':line['choices'],'answer':int(line['predict'])}
            f.write(json.dumps(this_line,ensure_ascii=False)+'\n')

### ner


In [7]:
import pickle
with open('predict/final/test/ner.pkl','rb') as f:
    rst = pickle.load(f)
all_scores = {}
for batch_idx in tqdm(range(len(rst['test_dataset']))):
    origin_id = rst['test_dataset'][batch_idx]['origin_idx']
    if origin_id not in all_scores:
        all_scores[origin_id] = []
    predicts = softmax(rst['test_result'].predictions[batch_idx],axis=-1)
    labels = rst['test_result'].label_ids[batch_idx]
    all_scores[origin_id].append(np.array(predicts)[np.where(labels!=-100)[0].tolist()][:,1].tolist())
def multiple_list(l):
    rst = 1
    for e in l:
        rst *= e
    return rst

predict_valid_json = []
for i in range(len(valid_json)):
    choices = valid_json[i]['choices']
    this_scores = []
    for choice_id in range(len(choices)):
        choice_score = all_scores[i][choice_id]
        choice_score = multiple_list(choice_score)
#         choice_score = sum(choice_score)
        this_scores.append(choice_score)
    this_dict = {k:valid_json[i][k] for k in valid_json[i]}
    this_dict['scores'] = softmax(this_scores)
    this_dict['predict'] = np.argmax(this_scores)
    this_dict['true'] = (this_dict['predict'] == this_dict['answer'])
    predict_valid_json.append(this_dict)

100%|████████████████████████████████████████████████████████████████████████| 10880/10880 [00:07<00:00, 1463.55it/s]


In [8]:
np.sum([int(e['true']) for e in predict_valid_json]) / len(predict_valid_json)

0.25882352941176473

In [9]:
ner_predict = predict_valid_json

### binary

In [10]:
import pickle
with open('predict/final/test/binary.pkl','rb') as f:
    rst = pickle.load(f)
all_scores = {}
for i in tqdm(range(len(rst['test_dataset']))):
    idx = rst['test_dataset'][i]['origin_idx']
    input_str = rst['test_dataset'][i]['input_ids']
    input_str = tokenizer.decode(input_str)
    input_str = input_str.split('[SEP]')
    if idx not in all_scores:
        all_scores[idx] = {}
    all_scores[idx][input_str[1].replace(' ','')] = softmax(rst['test_result'].predictions[i].tolist())

predict_valid_json = []
for i in range(len(valid_json)):
    choices = valid_json[i]['choices']
    this_scores = []
    for choice in choices:
        choice_score = 0
        for k in all_scores[i]:
            if k in choice:
                choice_score += all_scores[i][k][1]
        this_scores.append(choice_score)
    this_dict = {k:valid_json[i][k] for k in valid_json[i]}
    this_dict['scores'] = softmax(this_scores)
    this_dict['predict'] = np.argmax(this_scores)
    this_dict['true'] = (this_dict['predict'] == this_dict['answer'])
    predict_valid_json.append(this_dict)

np.sum([int(e['true']) for e in predict_valid_json]) / len(predict_valid_json)

100%|████████████████████████████████████████████████████████████████████████| 24749/24749 [00:10<00:00, 2461.83it/s]


0.2584558823529412

In [11]:
binary_predict = predict_valid_json

### seq

In [12]:
import pickle
with open('predict/final/test/seq.pkl','rb') as f:
    rst = pickle.load(f)
all_scores = {}
for i in tqdm(range(len(rst['test_dataset']))):
    idx = i
    all_scores[idx] = softmax(rst['test_result'].predictions[i].tolist())

predict_valid_json = []
for i in range(len(valid_json)):
    choices = valid_json[i]['choices']
    this_scores = all_scores[i]
    this_dict = {k:valid_json[i][k] for k in valid_json[i]}
    this_dict['scores'] = softmax(this_scores)
    this_dict['predict'] = np.argmax(this_scores)
    this_dict['true'] = (this_dict['predict'] == this_dict['answer'])
    predict_valid_json.append(this_dict)
np.sum([int(e['true']) for e in predict_valid_json]) / len(predict_valid_json)

100%|█████████████████████████████████████████████████████████████████████████| 2720/2720 [00:00<00:00, 52184.19it/s]


0.2610294117647059

In [13]:
seq_predict = predict_valid_json

### baseline

In [26]:
import pickle
with open('predict/final/test/baseline.pkl','rb') as f:
    rst = pickle.load(f)
all_scores = {}
for i in tqdm(range(len(rst['test_dataset']))):
    idx = i
    all_scores[idx] = softmax(rst['test_result'].predictions[i].tolist())

predict_valid_json = []
for i in range(len(valid_json)):
    choices = valid_json[i]['choices']
    this_scores = all_scores[i]
    this_dict = {k:valid_json[i][k] for k in valid_json[i]}
    this_dict['scores'] = softmax(this_scores)
    this_dict['predict'] = np.argmax(this_scores)
    this_dict['true'] = (this_dict['predict'] == this_dict['answer'])
    predict_valid_json.append(this_dict)
np.sum([int(e['true']) for e in predict_valid_json]) / len(predict_valid_json)

100%|█████████████████████████████████████████████████████████████████████████| 2720/2720 [00:00<00:00, 32554.25it/s]


0.2613970588235294

In [27]:
baseline_predict = predict_valid_json

### save_each

In [19]:
save_jsonl(ner_predict,'predict/final/test/ner.jsonl')
save_jsonl(binary_predict,'predict/final/test/binary.jsonl')
save_jsonl(seq_predict,'predict/final/test/seq.jsonl')
save_jsonl(baseline_predict,'predict/final/test/baseline.jsonl')

### ensemble

In [20]:
oracle_true = [binary_predict[i]['true'] or ner_predict[i]['true'] or seq_predict[i]['true'] for i in range(len(ner_predict))]
oracle_true = sum(oracle_true) / len(oracle_true)
print(oracle_true)

0.3290441176470588


In [23]:
w = [1,0.1,1.6]
predict_list = [ner_predict,binary_predict,seq_predict]
ensemble_predict = []
def get_score(w,predict_list):
    ensemble_predict = []
    for i in range(len(predict_list[0])):
        ensemble_predict.append(predict_list[0][i].copy())
        ensemble_predict[-1]['scores'] = [np.array(predict_list[each_ensemble][i]['scores']) * w[each_ensemble] 
                                          for each_ensemble in range(len(predict_list))]
        ensemble_predict[-1]['scores'] = sum(ensemble_predict[-1]['scores']).tolist()
        ensemble_predict[-1]['predict'] = np.argmax(ensemble_predict[-1]['scores'])
        ensemble_predict[-1]['true'] = (ensemble_predict[-1]['predict'] == ensemble_predict[-1]['answer'])
    return np.sum([int(e['true']) for e in ensemble_predict]) / len(ensemble_predict),ensemble_predict
rst,ensemble_predict = get_score(w,predict_list)
print(rst)

0.26176470588235295


In [25]:
save_jsonl(ensemble_predict,'predict/final/test/ensemble.jsonl')

In [44]:
for i in range(100):
    w1 = i * 0.1
    print(w1,' ',get_score([w[0],w[1],w1],predict_list))

0.0   0.8580882352941176
0.1   0.8801470588235294
0.2   0.88125
0.30000000000000004   0.8849264705882353
0.4   0.8852941176470588
0.5   0.8849264705882353
0.6000000000000001   0.8871323529411764
0.7000000000000001   0.8856617647058823
0.8   0.8863970588235294
0.9   0.8871323529411764
1.0   0.8856617647058823
1.1   0.8856617647058823
1.2000000000000002   0.8863970588235294
1.3   0.8863970588235294
1.4000000000000001   0.8871323529411764
1.5   0.8867647058823529
1.6   0.8871323529411764
1.7000000000000002   0.8871323529411764
1.8   0.8867647058823529
1.9000000000000001   0.8867647058823529
2.0   0.8867647058823529
2.1   0.8863970588235294
2.2   0.8863970588235294
2.3000000000000003   0.8863970588235294
2.4000000000000004   0.8863970588235294
2.5   0.8856617647058823
2.6   0.8856617647058823
2.7   0.8856617647058823
2.8000000000000003   0.8856617647058823
2.9000000000000004   0.8856617647058823
3.0   0.8856617647058823
3.1   0.8856617647058823
3.2   0.8856617647058823
3.3000000000000003  