# Model Ensemble
> Ensemble of baseline work

In [1]:
import os
import pandas as pd
import numpy as np
import gc
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras import layers
from tensorflow import keras
import mlcrate as mlc
import pickle as pkl
from tensorflow.keras.layers import BatchNormalization
from keras.models import Sequential, Model
from keras.layers import Input, Embedding, Dense, Flatten, Concatenate, Dot, Reshape, Add, Subtract
from keras import backend as K
from keras import regularizers 
from tensorflow.keras.optimizers import Adam
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.regularizers import l2
from sklearn.base import clone
from typing import Dict
import matplotlib.pyplot as plt
from scipy import stats
from tensorflow.keras.losses import Loss
from tensorflow.keras import backend as K
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import TimeSeriesSplit, StratifiedKFold, KFold, GroupKFold
from tqdm import tqdm
from tensorflow.python.ops import math_ops

### 1. Data Exploration & Features layers

In [2]:
%%time
n_features = 300
features = [f'f_{i}' for i in range(n_features)]
feature_columns = ['investment_id', 'time_id'] + features
train = pd.read_pickle('../input/ubiquant-market-prediction-half-precision-pickle/train.pkl')
train.head()

CPU times: user 409 ms, sys: 1.49 s, total: 1.9 s
Wall time: 15.6 s


Unnamed: 0,investment_id,time_id,f_0,f_1,f_2,f_3,f_4,f_5,f_6,f_7,...,f_291,f_292,f_293,f_294,f_295,f_296,f_297,f_298,f_299,target
0,1,0,0.932617,0.113708,-0.4021,0.378418,-0.203979,-0.413574,0.96582,1.230469,...,-1.095703,0.200073,0.819336,0.941406,-0.086792,-1.086914,-1.044922,-0.287598,0.321533,-0.300781
1,2,0,0.811035,-0.51416,0.742188,-0.616699,-0.194214,1.771484,1.427734,1.133789,...,0.912598,-0.734375,0.819336,0.941406,-0.387695,-1.086914,-0.929688,-0.974121,-0.343506,-0.231079
2,6,0,0.394043,0.615723,0.567871,-0.60791,0.068909,-1.083008,0.979492,-1.125977,...,0.912598,-0.551758,-1.220703,-1.060547,-0.219116,-1.086914,-0.612305,-0.113953,0.243652,0.568848
3,7,0,-2.34375,-0.011871,1.875,-0.606445,-0.586914,-0.815918,0.77832,0.299072,...,0.912598,-0.266357,-1.220703,0.941406,-0.608887,0.104919,-0.783203,1.151367,-0.773438,-1.064453
4,8,0,0.842285,-0.262939,2.330078,-0.583496,-0.618164,-0.742676,-0.946777,1.230469,...,0.912598,-0.741211,-1.220703,0.941406,-0.588379,0.104919,0.753418,1.345703,-0.737793,-0.531738


In [3]:
investment_id = train.pop("investment_id")
investment_id.head()

0    1
1    2
2    6
3    7
4    8
Name: investment_id, dtype: uint16

In [4]:
_ = train.pop("time_id")
y = train.pop("target")
y.head()

0   -0.300781
1   -0.231079
2    0.568848
3   -1.064453
4   -0.531738
Name: target, dtype: float16

In [5]:
%%time
investment_ids = list(investment_id.unique())
investment_id_size = len(investment_ids) + 1
investment_id_lookup_layer = layers.IntegerLookup(max_tokens=investment_id_size)
with tf.device("cpu"):
    investment_id_lookup_layer.adapt(investment_id)

2022-04-19 22:22:52.713596: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-04-19 22:22:52.801234: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-04-19 22:22:52.801961: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:937] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero
2022-04-19 22:22:52.803120: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 AVX512F FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compil

CPU times: user 1min 11s, sys: 8.87 s, total: 1min 20s
Wall time: 57.7 s


In [6]:
investment_id2 = investment_id[~investment_id.isin([85, 905, 2558, 3662, 2800, 1415])]

investment_ids2 = list(investment_id2.unique())
investment_id_size2 = len(investment_ids2) + 1
investment_id_lookup_layer2 = layers.IntegerLookup(max_tokens=investment_id_size2)
investment_id_lookup_layer2.adapt(pd.DataFrame({"investment_ids":investment_ids}))

In [7]:
def preprocess(X, y):
    print(X)
    print(y)
    return X, y
def make_dataset(feature, investment_id, y, batch_size=1024, mode="train"):
    ds = tf.data.Dataset.from_tensor_slices(((investment_id, feature), y))
    ds = ds.map(preprocess)
    if mode == "train":
        ds = ds.shuffle(256)
    ds = ds.batch(batch_size).cache().prefetch(tf.data.experimental.AUTOTUNE)
    return ds

In [8]:
def gen_ids_and_skf_idxs():
    train = pd.read_pickle('../input/ubiquant-market-prediction-half-precision-pickle/train.pkl')
    investment_id = train[["investment_id"]].astype('int64')
    train.pop("investment_id")
    train.pop("time_id")
    train.pop("target")
    skf = StratifiedKFold(5, shuffle=True, random_state=42)
    idxs = list(enumerate(skf.split(train, investment_id)))
    del train
    gc.collect()
    return investment_id, idxs

### feature_time_ds

In [9]:
def make_ft_dataset(investment_id, feature, time_id, y=None, batch_size=1024):
    if y is not None:
        slices = ((investment_id, feature, time_id), y)
    else:
        slices = ((investment_id, feature, time_id))
        
    ds = tf.data.Dataset.from_tensor_slices(slices)
    ds = ds.batch(batch_size).cache().prefetch(tf.data.experimental.AUTOTUNE)
    return ds

### 2. DNN Architecture

In [10]:
class MyModel(keras.Model):
    
    def __init__(self, investment_id, device='gpu'):
        super().__init__()
        
        with tf.device(device):
            self.inv_embedding = layers.Embedding(investment_id_size, 32)
            self.inv_fc = keras.Sequential([
                layers.Dense(64, activation='swish', kernel_initializer='he_normal', bias_initializer='zeros'),
                layers.Dropout(0.5),
                layers.Dense(32, activation='swish', kernel_initializer='he_normal', bias_initializer='zeros'),
                layers.Dropout(0.5),
            ])

            self.fea_fc = keras.Sequential([
                layers.Dense(256, activation='swish', kernel_initializer='he_normal', bias_initializer='zeros'),
                keras.layers.BatchNormalization(axis=1),
                layers.Dropout(0.5),
                layers.Dense(128, activation='swish', kernel_initializer='he_normal', bias_initializer='zeros'),
                keras.layers.BatchNormalization(axis=1),
                layers.Dropout(0.5),
                layers.Dense(64, activation='swish', kernel_initializer='he_normal', bias_initializer='zeros')
            ])
            
            self.fc = keras.Sequential([
                layers.Dropout(0.5),
                layers.Dense(128, activation='swish', kernel_initializer='he_normal', bias_initializer='zeros', kernel_regularizer="l2"),
                layers.Dropout(0.5),
                layers.Dense(32, activation='swish', kernel_initializer='he_normal', bias_initializer='zeros',  kernel_regularizer="l2"),
                layers.Dropout(0.5),
                layers.Dense(16, activation='swish', kernel_initializer='he_normal', bias_initializer='zeros', kernel_regularizer="l2"),
                layers.Dense(1)
            ])
    
    def call(self, inputs):
        inv_id, fea = inputs
        
        inv = investment_id_lookup_layer(inv_id)
        inv = self.inv_embedding(inv)
        inv = self.inv_fc(inv)
        inv = tf.squeeze(inv, axis=1)
        
        fea = self.fea_fc(fea)
        
        concat = tf.concat([inv, fea], axis=1)
        output = self.fc(concat)
        
        return output

In [11]:
def get_model():
    investment_id_inputs = tf.keras.Input((1, ), dtype=tf.uint16)
    features_inputs = tf.keras.Input((300, ), dtype=tf.float16)
    
    investment_id_x = investment_id_lookup_layer(investment_id_inputs)
    investment_id_x = layers.Embedding(investment_id_size, 32, input_length=1)(investment_id_x)
    investment_id_x = layers.Reshape((-1, ))(investment_id_x)
    investment_id_x = layers.Dense(64, activation='swish')(investment_id_x)
    investment_id_x = layers.Dense(64, activation='swish')(investment_id_x)
    investment_id_x = layers.Dense(64, activation='swish')(investment_id_x)
    
    feature_x = layers.Dense(256, activation='swish')(features_inputs)
    feature_x = layers.Dense(256, activation='swish')(feature_x)
    feature_x = layers.Dense(256, activation='swish')(feature_x)
    
    x = layers.Concatenate(axis=1)([investment_id_x, feature_x])
    x = layers.Dense(512, activation='swish', kernel_regularizer="l2")(x)
    x = layers.Dense(128, activation='swish', kernel_regularizer="l2")(x)
    x = layers.Dense(32, activation='swish', kernel_regularizer="l2")(x)
    output = layers.Dense(1)(x)
    rmse = keras.metrics.RootMeanSquaredError(name="rmse")
    model = tf.keras.Model(inputs=[investment_id_inputs, features_inputs], outputs=[output])
    model.compile(optimizer=tf.optimizers.Adam(0.001), loss='mse', metrics=['mse', "mae", "mape", rmse])
    return model


def get_model2():
    investment_id_inputs = tf.keras.Input((1, ), dtype=tf.uint16)
    features_inputs = tf.keras.Input((300, ), dtype=tf.float16)
    
    investment_id_x = investment_id_lookup_layer(investment_id_inputs)
    investment_id_x = layers.Embedding(investment_id_size, 32, input_length=1)(investment_id_x)
    investment_id_x = layers.Reshape((-1, ))(investment_id_x)
    investment_id_x = layers.Dense(64, activation='swish')(investment_id_x)    
    investment_id_x = layers.Dense(64, activation='swish')(investment_id_x)
    investment_id_x = layers.Dense(64, activation='swish')(investment_id_x)
    investment_id_x = layers.Dense(64, activation='swish')(investment_id_x)
   # investment_id_x = layers.Dropout(0.65)(investment_id_x)
   
    
    feature_x = layers.Dense(256, activation='swish')(features_inputs)
    feature_x = layers.Dense(256, activation='swish')(feature_x)
    feature_x = layers.Dense(256, activation='swish')(feature_x)
    feature_x = layers.Dense(256, activation='swish')(feature_x)
    feature_x = layers.Dropout(0.65)(feature_x)
    
    x = layers.Concatenate(axis=1)([investment_id_x, feature_x])
    x = layers.Dense(512, activation='swish', kernel_regularizer="l2")(x)
   # x = layers.Dropout(0.2)(x)
    x = layers.Dense(128, activation='swish', kernel_regularizer="l2")(x)
  #  x = layers.Dropout(0.4)(x)
    x = layers.Dense(32, activation='swish', kernel_regularizer="l2")(x)
    x = layers.Dense(32, activation='swish', kernel_regularizer="l2")(x)
    x = layers.Dropout(0.75)(x)
    output = layers.Dense(1)(x)
    rmse = keras.metrics.RootMeanSquaredError(name="rmse")
    model = tf.keras.Model(inputs=[investment_id_inputs, features_inputs], outputs=[output])
    model.compile(optimizer=tf.optimizers.Adam(0.001), loss='mse', metrics=['mse', "mae", "mape", rmse])
    return model


def get_model5():
    features_inputs = tf.keras.Input((300, ), dtype=tf.float16)
    
    ## feature ##
    feature_x = layers.Dense(256, activation='swish')(features_inputs)
    feature_x = layers.Dropout(0.1)(feature_x)
    ## convolution 1 ##
    feature_x = layers.Reshape((-1,1))(feature_x)
    feature_x = layers.Conv1D(filters=16, kernel_size=4, strides=1, padding='same')(feature_x)
    feature_x = layers.BatchNormalization()(feature_x)
    feature_x = layers.LeakyReLU()(feature_x)
    ## convolution 2 ##
    feature_x = layers.Conv1D(filters=16, kernel_size=4, strides=4, padding='same')(feature_x)
    feature_x = layers.BatchNormalization()(feature_x)
    feature_x = layers.LeakyReLU()(feature_x)
    ## convolution 3 ##
    feature_x = layers.Conv1D(filters=64, kernel_size=4, strides=1, padding='same')(feature_x)
    feature_x = layers.BatchNormalization()(feature_x)
    feature_x = layers.LeakyReLU()(feature_x)
    ## convolution 4 ##
    feature_x = layers.Conv1D(filters=64, kernel_size=4, strides=4, padding='same')(feature_x)
    feature_x = layers.BatchNormalization()(feature_x)
    feature_x = layers.LeakyReLU()(feature_x)
    ## convolution 5 ##
    feature_x = layers.Conv1D(filters=64, kernel_size=4, strides=2, padding='same')(feature_x)
    feature_x = layers.BatchNormalization()(feature_x)
    feature_x = layers.LeakyReLU()(feature_x)
    ## flatten ##
    feature_x = layers.Flatten()(feature_x)
    
    x = layers.Dense(512, activation='swish', kernel_regularizer="l2")(feature_x)
    
    x = layers.Dropout(0.1)(x)
    x = layers.Dense(128, activation='swish', kernel_regularizer="l2")(x)
    x = layers.Dropout(0.1)(x)
    x = layers.Dense(32, activation='swish', kernel_regularizer="l2")(x)
    x = layers.Dropout(0.1)(x)
    output = layers.Dense(1)(x)
    rmse = keras.metrics.RootMeanSquaredError(name="rmse")
    model = tf.keras.Model(inputs=[features_inputs], outputs=[output])
    model.compile(optimizer=tf.optimizers.Adam(0.001), loss='mse', metrics=['mse', "mae", "mape", rmse])
    return model
del train
# del investment_id
del y
gc.collect()

754

### 2.0 Model_Dropout_10_RMSE

In [12]:
def get_model_dr04():
    features_inputs = tf.keras.Input((300, ), dtype=tf.float32)
    
    feature_x = layers.Dense(256, activation='swish')(features_inputs)
    feature_x = layers.Dropout(0.4)(feature_x)
    feature_x = layers.Dense(128, activation='swish')(feature_x)
    feature_x = layers.Dropout(0.4)(feature_x)
    feature_x = layers.Dense(64, activation='swish')(feature_x)
    
    x = layers.Concatenate(axis=1)([feature_x])
    x = layers.Dropout(0.4)(x)
    x = layers.Dense(64, activation='swish', kernel_regularizer="l2")(x)
    x = layers.Dropout(0.4)(x)
    x = layers.Dense(32, activation='swish', kernel_regularizer="l2")(x)
    x = layers.Dropout(0.4)(x)
    x = layers.Dense(16, activation='swish', kernel_regularizer="l2")(x)
    x = layers.Dropout(0.4)(x)
    output = layers.Dense(1)(x)
    output = tf.keras.layers.BatchNormalization(axis=1)(output)
    rmse = keras.metrics.RootMeanSquaredError(name="rmse")
    model = tf.keras.Model(inputs=[features_inputs], outputs=[output])
    model.compile(optimizer=tf.optimizers.Adam(0.001),  loss = correlationLoss, metrics=[correlationMetric])
    return model

dr=0.3

gpus = tf.config.experimental.list_physical_devices('GPU')
for gpu in gpus:
    print("Name:", gpu.name, "  Type:", gpu.device_type)

n_features = 300
features = [f'f_{i}' for i in range(n_features)]

# def preprocess(X, y):
#     return X, y
# def make_dataset(feature, y, batch_size=1024, mode="train"):
#     ds = tf.data.Dataset.from_tensor_slices((feature, y))
#     ds = ds.map(preprocess)
#     if mode == "train":
#         ds = ds.shuffle(512)
# #     ds = ds.batch(batch_size).cache().prefetch(tf.data.experimental.AUTOTUNE)
#     ds = ds.batch(batch_size).prefetch(tf.data.experimental.AUTOTUNE)
#     return ds

def correlationMetric(x, y, axis=-2):
  """Metric returning the Pearson correlation coefficient of two tensors over some axis, default -2."""
  x = tf.convert_to_tensor(x)
  y = math_ops.cast(y, x.dtype)
  n = tf.cast(tf.shape(x)[axis], x.dtype)
  xsum = tf.reduce_sum(x, axis=axis)
  ysum = tf.reduce_sum(y, axis=axis)
  xmean = xsum / n
  ymean = ysum / n
  xvar = tf.reduce_sum( tf.math.squared_difference(x, xmean), axis=axis)
  yvar = tf.reduce_sum( tf.math.squared_difference(y, ymean), axis=axis)
  cov = tf.reduce_sum( (x - xmean) * (y - ymean), axis=axis)
  corr = cov / tf.sqrt(xvar * yvar)
  return tf.constant(1.0, dtype=x.dtype) - corr


def correlationLoss(x,y, axis=-2):
  """Loss function that maximizes the pearson correlation coefficient between the predicted values and the labels,
  while trying to have the same mean and variance"""
  x = tf.convert_to_tensor(x)
  y = math_ops.cast(y, x.dtype)
  n = tf.cast(tf.shape(x)[axis], x.dtype)
  xsum = tf.reduce_sum(x, axis=axis)
  ysum = tf.reduce_sum(y, axis=axis)
  xmean = xsum / n
  ymean = ysum / n
  xsqsum = tf.reduce_sum( tf.math.squared_difference(x, xmean), axis=axis)
  ysqsum = tf.reduce_sum( tf.math.squared_difference(y, ymean), axis=axis)
  cov = tf.reduce_sum( (x - xmean) * (y - ymean), axis=axis)
  corr = cov / tf.sqrt(xsqsum * ysqsum)
  return tf.convert_to_tensor( K.mean(tf.constant(1.0, dtype=x.dtype) - corr ) , dtype=tf.float32 )
def correlationMetric_01mse(x, y, axis=-2):
  """Metric returning the Pearson correlation coefficient of two tensors over some axis, default -2."""
  x = tf.convert_to_tensor(x)
  y = math_ops.cast(y, x.dtype)
  n = tf.cast(tf.shape(x)[axis], x.dtype)
  xsum = tf.reduce_sum(x, axis=axis)
  ysum = tf.reduce_sum(y, axis=axis)
  xmean = xsum / n
  ymean = ysum / n
  xvar = tf.reduce_sum( tf.math.squared_difference(x, xmean), axis=axis)
  yvar = tf.reduce_sum( tf.math.squared_difference(y, ymean), axis=axis)
  cov = tf.reduce_sum( (x - xmean) * (y - ymean), axis=axis)
  corr = cov / tf.sqrt(xvar * yvar)
  return tf.constant(1.0, dtype=x.dtype) - corr

gc.collect()

# list(GroupKFold(5).split(train , groups = train.index))[0]
def pearson_coef(data):
    return data.corr()['target']['preds']

def evaluate_metric(valid_df):
    return np.mean(valid_df[['time_id_', 'target', 'preds']].groupby('time_id').apply(pearson_coef))

Name: /physical_device:GPU:0   Type: GPU


In [13]:
def get_model_best(ft_units, x_units, x_dropout):
    investment_id_inputs = tf.keras.Input((1, ), dtype=tf.uint16)
    features_inputs = tf.keras.Input((300, ), dtype=tf.float16)
    
    investment_id_x = investment_id_lookup_layer2(investment_id_inputs)
    investment_id_x = layers.Embedding(investment_id_size2, 32, input_length=1)(investment_id_x)
    investment_id_x = layers.Reshape((-1, ))(investment_id_x)
    investment_id_x = layers.Dense(128, activation='swish')(investment_id_x)
    investment_id_x = layers.Dense(128, activation='swish')(investment_id_x)
    investment_id_x = layers.Dense(128, activation='swish')(investment_id_x)
    
    feature_x = layers.Dense(256, activation='swish')(features_inputs)
    for hu in ft_units:
        feature_x = layers.Dense(hu, activation='swish')(feature_x)
    
    x = layers.Concatenate(axis=1)([investment_id_x, feature_x])
    
    for i in range(len(x_units)):
        x = tf.keras.layers.Dense(x_units[i], kernel_regularizer="l2")(x) #v8
        x = tf.keras.layers.BatchNormalization()(x) #v7
        x = tf.keras.layers.Activation('swish')(x) #v7
        x = tf.keras.layers.Dropout(x_dropout[i])(x) #v8
        
    output = layers.Dense(1)(x)
    rmse = keras.metrics.RootMeanSquaredError(name="rmse")
    model = tf.keras.Model(inputs=[investment_id_inputs, features_inputs], outputs=[output])
    model.compile(optimizer=tf.optimizers.Adam(0.0001), loss='mse', metrics=['mse', "mae", "mape", rmse])
    return model

params = {
    'ft_units': [256,256],
    'x_units': [512, 256, 128, 32],
    'x_dropout': [0.4, 0.3, 0.2, 0.1]
#           'lr':1e-3, 
         }

models_best = []
scores = []
for i in range(7):
    model = get_model_best(**params)
    model.load_weights(f"../input/wmodels/best/model_{i}.tf")
    models_best.append(model)

### 2.1 Augment Model: Gaussian_Conv1 + Conv2d Model: Account for Spatial/Area Relationship

In [14]:
def get_model6():
    features_inputs = tf.keras.Input((300, ), dtype=tf.float16)
    
    features_x = layers.GaussianNoise(0.1)(features_inputs)
    ## feature ##
    feature_x = layers.Dense(256, activation='swish')(features_x)
    feature_x = layers.Dropout(0.1)(feature_x)
    ## convolution 1 ##
    feature_x = layers.Reshape((-1,1))(feature_x)
    feature_x = layers.Conv1D(filters=16, kernel_size=4, strides=1, padding='same')(feature_x)
    feature_x = layers.BatchNormalization()(feature_x)
    feature_x = layers.LeakyReLU()(feature_x)
    ## convolution 2 ##
    feature_x = layers.Conv1D(filters=16, kernel_size=4, strides=4, padding='same')(feature_x)
    feature_x = layers.BatchNormalization()(feature_x)
    feature_x = layers.LeakyReLU()(feature_x)
    ## convolution 3 ##
    feature_x = layers.Conv1D(filters=64, kernel_size=4, strides=1, padding='same')(feature_x)
    feature_x = layers.BatchNormalization()(feature_x)
    feature_x = layers.LeakyReLU()(feature_x)
    ## convolution 4 ##
    feature_x = layers.Conv1D(filters=64, kernel_size=4, strides=4, padding='same')(feature_x)
    feature_x = layers.BatchNormalization()(feature_x)
    feature_x = layers.LeakyReLU()(feature_x)
    ## convolution 5 ##
    feature_x = layers.Conv1D(filters=64, kernel_size=4, strides=2, padding='same')(feature_x)
    feature_x = layers.BatchNormalization()(feature_x)
    feature_x = layers.LeakyReLU()(feature_x)
    ## flatten ##
    feature_x = layers.Flatten()(feature_x)
 
    x = layers.Dense(512, activation='swish', kernel_regularizer="l2")(feature_x)
    
    x = layers.Dropout(0.1)(x)
    x = layers.Dense(128, activation='swish', kernel_regularizer="l2")(x)
    x = layers.Dropout(0.1)(x)
    x = layers.Dense(32, activation='swish', kernel_regularizer="l2")(x)
    x = layers.Dropout(0.1)(x)
    output = layers.Dense(1)(x)
    rmse = keras.metrics.RootMeanSquaredError(name="rmse")
    model = tf.keras.Model(inputs=[features_inputs], outputs=[output])
    model.compile(optimizer=tf.optimizers.Adam(0.001), loss='mse', metrics=['mse', "mae", "mape", rmse])
    return model

def get_model7():
    features_inputs = tf.keras.Input((300, ), dtype=tf.float16)
    
    ## Dense 1 ##
    feature_x = layers.Dense(256, activation='swish')(features_inputs)
    feature_x = layers.Dropout(0.1)(feature_x)
    ## convolution 1 ##
    feature_x = layers.Reshape((-1,1))(feature_x)
    feature_x = layers.Conv1D(filters=16, kernel_size=4, strides=1, padding='same')(feature_x)
    feature_x = layers.BatchNormalization()(feature_x)
    feature_x = layers.LeakyReLU()(feature_x)
    ## convolution 2 ##
    feature_x = layers.Conv1D(filters=16, kernel_size=4, strides=4, padding='same')(feature_x)
    feature_x = layers.BatchNormalization()(feature_x)
    feature_x = layers.LeakyReLU()(feature_x)
    ## convolution 3 ##
    feature_x = layers.Conv1D(filters=64, kernel_size=4, strides=1, padding='same')(feature_x)
    feature_x = layers.BatchNormalization()(feature_x)
    feature_x = layers.LeakyReLU()(feature_x)

    ## convolution2D 1 ##
    feature_x = layers.Reshape((64,64,1))(feature_x)
    feature_x = layers.Conv2D(filters=32, kernel_size=4, strides=1, padding='same')(feature_x)
    feature_x = layers.BatchNormalization()(feature_x)
    feature_x = layers.LeakyReLU()(feature_x)
    ## convolution2D 2 ##
    feature_x = layers.Conv2D(filters=32, kernel_size=4, strides=4, padding='same')(feature_x)
    feature_x = layers.BatchNormalization()(feature_x)
    feature_x = layers.LeakyReLU()(feature_x)
    ## convolution2D 3 ##
    feature_x = layers.Conv2D(filters=32, kernel_size=4, strides=4, padding='same')(feature_x)
    feature_x = layers.BatchNormalization()(feature_x)
    feature_x = layers.LeakyReLU()(feature_x)

    ## flatten ##
    feature_x = layers.Flatten()(feature_x)
    ## Dense 3 ##
    x = layers.Dense(512, activation='swish', kernel_regularizer="l2")(feature_x)
    ## Dense 4 ##
    x = layers.Dropout(0.1)(x)
    ## Dense 5 ##    
    x = layers.Dense(128, activation='swish', kernel_regularizer="l2")(x)
    x = layers.Dropout(0.1)(x)
    ## Dense 6 ##
    x = layers.Dense(32, activation='swish', kernel_regularizer="l2")(x)
    x = layers.Dropout(0.1)(x)
    ## Dense 7 ##
    output = layers.Dense(1)(x)
    rmse = keras.metrics.RootMeanSquaredError(name="rmse")
    model = tf.keras.Model(inputs=[features_inputs], outputs=[output])
    model.compile(optimizer=tf.optimizers.Adam(0.001), loss='mse', metrics=['mse', "mae", "mape", rmse])
    return model

### 2.2 Augment2: Feature_Time Model

In [15]:
def get_model_ft():
    investment_id_input = tf.keras.Input(shape=(1,), dtype=tf.uint16, name='investment_id')
    inv_x = layers.Dense(64, activation='relu')(investment_id_input)
    inv_x = layers.Dropout(0.2)(inv_x)

    features_input = tf.keras.Input(shape=(300,), dtype=tf.float16, name='features')
    f_x = layers.Dense(512, activation='relu')(features_input)
    f_x = layers.Dropout(0.25)(f_x)
    f_x = layers.Dense(256, activation='relu')(f_x)
    f_x = layers.Dropout(0.2)(f_x)

    time_id_input = tf.keras.Input(shape=(1,), dtype=tf.uint16, name='time_id')
    time_x = layers.Dense(64, activation='relu')(time_id_input)
    time_x = layers.Dropout(0.2)(time_x)

    concatenated = layers.concatenate([inv_x, f_x, time_x], axis=-1)
    output = layers.Dense(1)(concatenated)

    model = tf.keras.models.Model([investment_id_input, features_input, time_id_input], output, name='model_with_time_id')
    
    model.compile(optimizer='rmsprop', loss='mse', metrics=['mse', 'mae', 'mape'])
    return model

In [16]:
gc.collect()
model_ft = get_model_ft()
model_ft.summary()

Model: "model_with_time_id"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
features (InputLayer)           [(None, 300)]        0                                            
__________________________________________________________________________________________________
dense_78 (Dense)                (None, 512)          154112      features[0][0]                   
__________________________________________________________________________________________________
investment_id (InputLayer)      [(None, 1)]          0                                            
__________________________________________________________________________________________________
dropout_29 (Dropout)            (None, 512)          0           dense_78[0][0]                   
_________________________________________________________________________________

In [17]:
models = []

for i in range(5):
    model = get_model()
    model.load_weights(f'../input/dnn-base/model_{i}')
    models.append(model)

for i in range(10):
    model = get_model2()
    model.load_weights(f'../input/train-dnn-v2-10fold/model_{i}')
    models.append(model)

for i in range(5):
    model = MyModel(investment_id=investment_id, device='cpu')
    model.load_weights(f'../input/masked-model-weights/random_mask_DNN_ensemble_weights/model{i}/model_{i}.tf')
    models.append(model)
    
    
models2 = []
    
for i in range(5):
    model = get_model5()
    model.load_weights(f'../input/prediction-including-spatial-info-with-conv1d/model_{i}.tf')
    models2.append(model)
    
for i in range(5):
    model = get_model6()
    model.load_weights(f'../input/gaussian-noise-model-weights/model_{i}.tf')
    models2.append(model)

# for i in range(5):
#     model = get_model7()
#     model.load_weights(f'../input/ump-conv2d-fold5-outputs/model_{i}.tf')
#     models2.append(model)
    
models3 = []
    
for i in range(10):
    model = get_model_dr04()
    model.load_weights(f'../input/mse10-model-weights/model_{i}')
    models3.append(model)

model_ft = get_model_ft()
model_ft.load_weights(f'../input/feature-time-model/ns_model_with_time_id.tf')

2022-04-19 22:23:55.568640: W tensorflow/core/util/tensor_slice_reader.cc:95] Could not open ../input/dnn-base/model_0: Data loss: file is too short to be an sstable: perhaps your file is in a different file format and you need to use a different restore operator?
2022-04-19 22:23:55.909520: W tensorflow/core/util/tensor_slice_reader.cc:95] Could not open ../input/dnn-base/model_1: Data loss: file is too short to be an sstable: perhaps your file is in a different file format and you need to use a different restore operator?
2022-04-19 22:23:56.204290: W tensorflow/core/util/tensor_slice_reader.cc:95] Could not open ../input/dnn-base/model_2: Data loss: file is too short to be an sstable: perhaps your file is in a different file format and you need to use a different restore operator?
2022-04-19 22:23:56.494071: W tensorflow/core/util/tensor_slice_reader.cc:95] Could not open ../input/dnn-base/model_3: Data loss: file is too short to be an sstable: perhaps your file is in a different fi

<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x7f6f706a7850>

In [18]:
def get_model_corr(ft_units, x_units, x_dropout):
    
    # investment_id
    investment_id_inputs = tf.keras.Input((1, ), dtype=tf.uint16)
    investment_id_x = investment_id_lookup_layer(investment_id_inputs)
    investment_id_x = layers.Embedding(investment_id_size, 32, input_length=1)(investment_id_x)
    investment_id_x = layers.Reshape((-1, ))(investment_id_x)
    investment_id_x = layers.Dense(128, activation='swish')(investment_id_x)
    investment_id_x = layers.Dense(128, activation='swish')(investment_id_x) 
    investment_id_x = layers.Dense(128, activation='swish')(investment_id_x)
    
    # features_inputs
    features_inputs = tf.keras.Input((300, ), dtype=tf.float16)
    bn = tf.keras.layers.BatchNormalization()(features_inputs)
    gn = tf.keras.layers.GaussianNoise(0.035)(bn)
    feature_x = layers.Dense(300, activation='swish')(gn)
    feature_x = tf.keras.layers.Dropout(0.5)(feature_x)
    
    for hu in ft_units:
        feature_x = layers.Dense(hu, activation='swish')(feature_x)
#         feature_x = tf.keras.layers.Activation('swish')(feature_x)
        feature_x = tf.keras.layers.Dropout(0.35)(feature_x)
    
    x = layers.Concatenate(axis=1)([investment_id_x, feature_x])
    
    for i in range(len(x_units)):
        x = tf.keras.layers.Dense(x_units[i], kernel_regularizer="l2")(x) 
        x = tf.keras.layers.Activation('swish')(x)
        x = tf.keras.layers.Dropout(x_dropout[i])(x)
        
    output = layers.Dense(1)(x)
    model = tf.keras.Model(inputs=[investment_id_inputs, features_inputs], outputs=[output])
    model.compile(optimizer=tf.optimizers.Adam(0.0001), loss=correlationLoss, 
                  metrics=['mse', "mae", correlation])
    return model


params = {
#     'num_columns': len(features), 
    'ft_units': [150, 75, 150 ,200],
    'x_units': [512, 256, 128, 32],
    'x_dropout': [0.44, 0.4, 0.33, 0.2] #4, 3, 2, 1
#           'lr':1e-3, 
         }

### 3. Validation

In [19]:
def preprocess_test(investment_id, feature):
    return (investment_id, feature), 0

def preprocess_test_s(feature):
    return (feature), 0

def make_test_dataset(feature, investment_id, batch_size=1024):
    ds = tf.data.Dataset.from_tensor_slices(((investment_id, feature)))
    ds = ds.map(preprocess_test)
    ds = ds.batch(batch_size).cache().prefetch(tf.data.experimental.AUTOTUNE)
    return ds

def make_test_dataset2(feature, batch_size=1024):
    ds = tf.data.Dataset.from_tensor_slices(((feature)))
    ds = ds.batch(batch_size).cache().prefetch(tf.data.AUTOTUNE)
    return ds

def inference(models, ds):
    y_preds = []
    for model in models:
        y_pred = model.predict(ds)
        y_preds.append(y_pred)
    return np.mean(y_preds, axis=0)

from sklearn.decomposition import PCA
pca = PCA(n_components=1)


def pca_inference(models, ds):
    y_preds = []
    for model in models:
        y_pred = model.predict(ds)
        y_preds.append(y_pred)
    res = np.hstack(y_preds)
    print(len(res))
    if len(res)>1:
        res = pca.fit_transform(res)
    else:
        res = np.mean(res, axis=1)
    return res

def make_test_dataset3(feature, batch_size=1024):
    ds = tf.data.Dataset.from_tensor_slices((feature))
    ds = ds.map(preprocess_test_s)
    ds = ds.batch(batch_size).cache().prefetch(tf.data.experimental.AUTOTUNE)
    return ds

def infer(models, ds):
    y_preds = []
    for model in models:
        y_pred = model.predict(ds)
        y_preds.append((y_pred-y_pred.mean())/y_pred.std())
    
    return np.mean(y_preds, axis=0)


In [20]:
import ubiquant
env = ubiquant.make_env()
iter_test = env.iter_test() 
for (test_df, sample_prediction_df) in iter_test:
    ds = make_test_dataset(test_df[features], test_df["investment_id"])
    p1 = inference(models, ds)
    ds2 = make_test_dataset2(test_df[features])
    p2 = inference(models2, ds2)
    ds3 = make_test_dataset3(test_df[features])
    p3 = infer(models3, ds3)
    p4 = inference(models_best, ds)
    
    # feature_time_augment
    test_time_id = test_df['row_id'].str.split('_', expand=True).get(key=0).astype(int)
    ds5 = make_ft_dataset(investment_id=test_df['investment_id'], feature=test_df[features], time_id=test_time_id)
    p5 = model_ft.predict([test_df['investment_id'], test_df[features], test_time_id])[:, 0]
    
    sample_prediction_df['target'] = p1 * 0.18 + p2 * 0.57 + p3 * 0.1 + p5 * 0.15
    env.predict(sample_prediction_df) 
    display(sample_prediction_df)

This version of the API is not optimized and should not be used to estimate the runtime of your code on the hidden test set.


2022-04-19 22:24:13.748647: I tensorflow/stream_executor/cuda/cuda_dnn.cc:369] Loaded cuDNN version 8005


Unnamed: 0,row_id,target
0,1220_1,-0.117122
1,1220_2,0.075274


Unnamed: 0,row_id,target
0,1221_0,-0.042585
1,1221_1,-0.035119
2,1221_2,-0.11572


Unnamed: 0,row_id,target
0,1222_0,-0.089163
1,1222_1,-0.007983
2,1222_2,-0.028085




Unnamed: 0,row_id,target
0,1223_0,
