In [1]:
import pickle
import numpy as np
import pandas as pd
from sklearn.svm import LinearSVC, NuSVC
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import GradientBoostingClassifier, BaggingClassifier

In [2]:
models = ['resnext', 'wideresnet', 'densenet']
train_file = "prediction_{}_train.pkl"
val_file = "prediction_{}_val.pkl"
test_file = "prediction_{}.pkl"

In [3]:
def depackage(result):
    result_tp = []
    for i in range(len(result)):
        labels = result[i][0]
        outputs = result[i][1]
        for j in range(len(labels)):
            result_tp.append((labels[j], outputs[j]))
    return result_tp

def depackage_test(result):
    result_dp = {}
    for key, val in result.items():
        for i in range(len(key)):
            result_dp[key[i]] = val[i]
    return result_dp

def read_and_combine_data(models, mode):
    data = {}
    if mode in ['train', 'val']:
        if mode == 'train':
            file_template = train_file
        else:
            file_template = val_file
        X = []
        y = []
        for model in models:
            data[model] = depackage(pickle.load(open("./" + model + '/' + file_template.format(model), "rb")))
        for i in range(len(data[models[0]])):
            assert(data[models[0]][i][0] == data[models[1]][i][0])
            y.append(data[models[0]][i][0])
            outputs = []
            for model in models:
                outputs.append(data[model][i][1])
            X.append(np.concatenate(outputs, axis=0))
        X = np.array(X)
        return X, np.array(y)
    else:
        file_template = test_file
        for model in models:
            data[model] = depackage_test(pickle.load(open("./" + model + '/' + file_template.format(model), "rb")))
        res_dict = {}
        for key in data[models[0]]:
            outputs = []
            for model in models:
                outputs.append(data[model][key])
            res_dict[key] = np.concatenate(outputs, axis=0)
        return res_dict

def softmax(x):
    x = x - np.max(x, axis=1, keepdims=True)
    xexp = np.exp(x)
    row_sum = xexp.sum(axis=1, keepdims=True)
    x = xexp / row_sum
    return x

In [4]:
X_train, y_train = read_and_combine_data(models, 'train')

In [5]:
X_val, y_val = read_and_combine_data(models, 'val')

In [6]:
data = read_and_combine_data(models, 'test')

In [7]:
import tensorflow as tf
from tensorflow import keras

In [8]:
for i in range(len(models)):
    X_train[:, i*100:(i+1)*100] = softmax(X_train[:, i*100:(i+1)*100])

In [9]:
model = keras.Sequential()
model.add(keras.Input(shape=(300,)))
model.add(keras.layers.Dense(600, activation='relu'))
model.add(keras.layers.Dense(100, activation='softmax'))

In [10]:
y_train = keras.utils.to_categorical(y_train)

In [11]:
model.compile(loss='categorical_crossentropy',
              optimizer=keras.optimizers.Adam(),
              metrics=['accuracy'])

In [60]:
weight = [0.475, 0.425, 0.10]

In [61]:
def fusion(result, weights):
    X = []
    label = []
    for key, value in data.items():
        X.append(value)
        label.append(key)
    # X=softmax(np.array(X))
    X = np.array(X)
    sumr = X[:, 0:100] * weights[0]
    for i in range(1, len(weights)):
        sumr += X[:, 100*i:100*(i+1)] * weights[i]
    sumr = np.argmax(sumr, axis=1)
    return dict(zip(label, sumr))

In [62]:
result_fusion = fusion(data, weight)

In [63]:
# X = []
# label = []
# for key, value in data.items():
#     X.append(value)
#     label.append(key)
# X=softmax(np.array(X))
# preds = np.argmax(model.predict(X),axis=1)
# result_fusion = dict(zip(label, preds))

In [64]:
df = pd.DataFrame([result_fusion]).transpose().reset_index(drop=False)
df.columns = ['Id', 'Category']
df.loc[:, 'Id']=df['Id'].apply(lambda x: x.split('/')[-1])

In [65]:
df.to_csv("submission_fusion.csv", index=False)