In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
# for dirname, _, filenames in os.walk('/kaggle/input'):
#     for filename in filenames:
#         print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session
import time
from tqdm import tqdm
import pickle
from pickle import dump, load
import gc
import matplotlib.pyplot as plt
import random
from trueskill import Rating, quality_1vs1, rate_1vs1
import math
import trueskill

In [None]:
import tensorflow as tf
import tensorflow_addons as tfa
from tensorflow.keras import layers
from tensorflow.keras.callbacks import *
from tensorflow.keras.models import Model
from sklearn.model_selection import train_test_split
print(tf.__version__)

In [None]:
import lightgbm as lgb

In [None]:
WIN_SIZE = 100

In [None]:
def read_feather(file_name = "train.feather"):
    data = pd.read_feather(file_name)
    return data

def read_csv(file_name = "train.csv", dtype = None, skiprows = None, nrows = None, usecols = None):
    data = pd.read_csv(file_name, dtype=dtype, skiprows = skiprows, nrows = nrows, low_memory = True, header = 0, usecols = usecols)
    return data

In [None]:
tdf = read_feather("../input/processing-riiid-train-data/all_train_dat_plus.feather")
tdf = tdf[['user_id', 'content_id']]
gc.collect()

In [None]:
user_pickle = open('../input/trueskill-mean-hung/global_mean_trueskill.pkl', 'rb')
user_dict = pickle.load(user_pickle)

In [None]:
# user_dict[1688853638]

In [None]:
# user_dict[1893691064]

In [None]:
#935388: trueskill.Rating(mu=0.249, sigma=0.041
#142896: trueskill.Rating(mu=0.238, sigma=0.065),
#107002: trueskill.Rating(mu=0.618, sigma=0.022),
#163243: trueskill.Rating(mu=0.197, sigma=0.066),
#4872589: trueskill.Rating(mu=0.248, sigma=0.063),

In [None]:
def win_probability(team1, team2):
    delta_mu = team1.mu - team2.mu
    sum_sigma = sum([team1.sigma ** 2, team2.sigma ** 2])
    size = 2
    denom = math.sqrt(size * (0.05 * 0.05) + sum_sigma)
    ts = trueskill.global_env()
    return ts.cdf(delta_mu / denom)

In [None]:
tdf["attempt_no"] = 1
tdf.attempt_no = tdf.attempt_no.astype('int8')
tdf["attempt_no"] = tdf[["user_id","content_id",'attempt_no']].groupby(["user_id","content_id"])["attempt_no"].cumsum()

In [None]:
attempt_no_agg=tdf.groupby(["user_id","content_id"])["attempt_no"].agg(['max'])
del tdf
gc.collect()
attempt_no_agg=attempt_no_agg.astype('int8')

In [None]:
from collections import defaultdict
attempt_no_agg=attempt_no_agg[attempt_no_agg['max'] > 1]
u_attempt_c_dict = attempt_no_agg['max'].to_dict(defaultdict(int))
del attempt_no_agg
gc.collect()

In [None]:
class MultiHeadAttention(layers.Layer):
    def __init__(self, embed_dim, num_heads=8, **kwargs):
        super(MultiHeadAttention, self).__init__()
        self.embed_dim = embed_dim
        self.num_heads = num_heads
        if self.embed_dim % self.num_heads != 0:
            raise ValueError(
                f"embedding dimension = {self.embed_dim} should be divisible by number of heads = {self.num_heads}"
            )
        self.projection_dim = self.embed_dim // self.num_heads
        self.query_dense = layers.Dense(embed_dim)
        self.key_dense = layers.Dense(embed_dim)
        self.value_dense = layers.Dense(embed_dim)
        self.combine_heads = layers.Dense(embed_dim)
    
    def get_config(self):
        cfg = super().get_config()
        cfg.update({
            'embed_dim': self.embed_dim,
            'num_heads': self.num_heads,
        })
        return cfg

    def attention(self, query, key, value, mask):
        score = tf.matmul(query, key, transpose_b=True)
        dim_key = tf.cast(tf.shape(key)[-1], tf.float32)
        scaled_score = score / tf.math.sqrt(dim_key)
        if mask is not None:
            scaled_score += (mask * -1e9)
        weights = tf.nn.softmax(scaled_score, axis=-1)
        output = tf.matmul(weights, value)
        return output, weights

    def separate_heads(self, x, batch_size):
        x = tf.reshape(x, (batch_size, -1, self.num_heads, self.projection_dim))
        return tf.transpose(x, perm=[0, 2, 1, 3])

    def call(self, q , k ,v, mask):
        batch_size = tf.shape(q)[0]
        query = self.query_dense(q)  # (batch_size, seq_len, embed_dim)
        key = self.key_dense(k)  # (batch_size, seq_len, embed_dim)
        value = self.value_dense(v)  # (batch_size, seq_len, embed_dim)
        query = self.separate_heads(query, batch_size)  # (batch_size, num_heads, seq_len, projection_dim)
        key = self.separate_heads(key, batch_size)  # (batch_size, num_heads, seq_len, projection_dim)
        value = self.separate_heads(value, batch_size)  # (batch_size, num_heads, seq_len, projection_dim)
        attention, weights = self.attention(query, key, value, mask)
        attention = tf.transpose(attention, perm=[0, 2, 1, 3])  # (batch_size, seq_len, num_heads, projection_dim)
        concat_attention = tf.reshape(attention, (batch_size, -1, self.embed_dim))  # (batch_size, seq_len, embed_dim)
        output = self.combine_heads(concat_attention)  # (batch_size, seq_len, embed_dim)
        return output # can return weights

"""
Encoder block as a layer
"""

class EncoderBlock(layers.Layer):
    def __init__(self, embed_dim, num_heads, ff_dim = None, rate=0.1, **kwargs):
        super(EncoderBlock, self).__init__()
        self.embed_dim = embed_dim
        self.num_heads = num_heads
        self.ff_dim = ff_dim
        self.rate = rate
        self.att = MultiHeadAttention(self.embed_dim, self.num_heads)
        if self.ff_dim is None: self.ff_dim = 2*self.embed_dim
        self.ffn = tf.keras.Sequential(
            [layers.Dense(self.ff_dim, activation="relu"), layers.Dense(self.embed_dim),]
        )
        self.layernorm1 = layers.LayerNormalization(epsilon=1e-6)
        self.layernorm2 = layers.LayerNormalization(epsilon=1e-6)
        self.dropout1 = layers.Dropout(self.rate)
        self.dropout2 = layers.Dropout(self.rate)
        
    def get_config(self):
        cfg = super().get_config()
        cfg.update({
            'embed_dim': self.embed_dim,
            'num_heads': self.num_heads,
            'ff_dim': self.ff_dim,
            'rate': self.rate
        })
        return cfg

    def call(self, x, y, padding_mask, training):
        attn_output = self.att(x, y, y, padding_mask)
        attn_output = self.dropout1(attn_output, training=training)
        out1 = self.layernorm1(x + attn_output)
        ffn_output = self.ffn(out1)
        ffn_output = self.dropout2(ffn_output, training=training)
        return self.layernorm2(out1 + ffn_output)
    
"""
Decoder block as a layer
"""

class DecoderBlock(layers.Layer):
    def __init__(self, embed_dim, num_heads, ff_dim = None, rate = 0.1, **kwargs):
        super(DecoderBlock, self).__init__()
        self.embed_dim = embed_dim
        self.num_heads = num_heads
        self.ff_dim = ff_dim
        self.rate = rate
        self.att1 = MultiHeadAttention(self.embed_dim, self.num_heads)
        self.att2 = MultiHeadAttention(self.embed_dim, self.num_heads)
        self.ffn = tf.keras.Sequential(
            [layers.Dense(self.ff_dim, activation="relu"), layers.Dense(self.embed_dim),]
        )
        self.layernorm1 = layers.LayerNormalization(epsilon=1e-6)
        self.layernorm2 = layers.LayerNormalization(epsilon=1e-6)
        self.layernorm3 = layers.LayerNormalization(epsilon=1e-6)
        self.dropout1 = layers.Dropout(self.rate)
        self.dropout2 = layers.Dropout(self.rate)
        self.dropout3 = layers.Dropout(self.rate)
        
    def get_config(self):
        cfg = super().get_config()
        cfg.update({
            'embed_dim': self.embed_dim,
            'num_heads': self.num_heads,
            'ff_dim': self.ff_dim,
            'rate': self.rate
        })
        return cfg
    
    def call(self, x, enc_output, look_ahead_mask, padding_mask, training):
        attn1 = self.att1(x, x, x, look_ahead_mask)
        attn1 = self.dropout1(attn1, training = training)
        out1 = self.layernorm1(attn1 + x)
        
        attn2 = self.att2(out1, enc_output, enc_output, padding_mask)
        attn2 = self.dropout2(attn2, training = training)
        out2 = self.layernorm2(attn2 + out1)
        
        ffn_output = self.ffn(out2)
        ffn_output = self.dropout3(ffn_output, training = training)
        return self.layernorm3(ffn_output + out2)

In [None]:
def create_padding_mask(seqs):
    mask = tf.cast(tf.reduce_all(tf.math.equal(seqs, 0), axis=-1), tf.float32)
    return mask[:, tf.newaxis, tf.newaxis, :]

def create_look_ahead_mask(size):
    mask = 1 - tf.linalg.band_part(tf.ones((size, size)), -1, 0)
    return mask  # (seq_len, seq_len)

def get_angles(pos, i, embed_dim):
    angle_rates = 1 / np.power(10000, (2 * (i//2)) / np.float32(embed_dim))
    return pos * angle_rates

def positional_encoding(position, embed_dim):
    angle_rads = get_angles(np.arange(position)[:, np.newaxis],
                            np.arange(embed_dim)[np.newaxis, :],
                            embed_dim)
    angle_rads[:, 0::2] = np.sin(angle_rads[:, 0::2])
    angle_rads[:, 1::2] = np.cos(angle_rads[:, 1::2])
    pos_encoding = angle_rads[np.newaxis, ...]
    return tf.cast(pos_encoding, dtype=tf.float32)

In [None]:
def custom_transformer_model(feature_dim, window_size, q_size=13524, embed_dim = 256, num_heads = 16, dense_dim = 1024):
    inputs = layers.Input(shape=(window_size, feature_dim), name = "enc_input")
    min_delta = inputs[...,0]
    day_delta = inputs[...,1]
    month_delta = inputs[...,2]
    cid = inputs[...,3]
    tid = inputs[...,4]
    prior_elapsed = inputs[...,5]
    prior_explained = inputs[...,6]
    is_with = inputs[...,7]
    num_lect = inputs[...,-1,8]
    lec_type = inputs[...,-1,9:13]
    lec_h_past = inputs[...,-1,13]
    c_part = inputs[...,14:23]
    tag1 = inputs[...,23]
    tag2 = inputs[...,24]
    tag3 = inputs[...,25]
    tag4 = inputs[...,26]
    tag5 = inputs[...,27]
    tag6 = inputs[...,28]
    prev_answered_correct = inputs[...,29]
    
    #====Excercise====
    min_delta = layers.Embedding(input_dim=1443, output_dim=embed_dim//8, input_length=window_size,
                                 embeddings_initializer = 'glorot_uniform')(min_delta)
    day_delta = layers.Embedding(input_dim=33, output_dim=embed_dim//16, input_length=window_size,
                                 embeddings_initializer = 'glorot_uniform')(day_delta)
    month_delta = layers.Embedding(input_dim=9, output_dim=embed_dim//16, input_length=window_size,
                                   embeddings_initializer = 'glorot_uniform')(month_delta)
    cid = layers.Embedding(input_dim=q_size, output_dim=embed_dim, input_length=window_size,
                           embeddings_initializer = 'glorot_uniform')(cid)
    tid = layers.Embedding(input_dim=2001, output_dim=embed_dim//16, input_length=window_size,
                           embeddings_initializer = 'glorot_uniform')(tid)
    is_with = layers.Embedding(input_dim=3, output_dim=2, input_length=window_size,
                               embeddings_initializer = 'glorot_uniform')(is_with)
    c_part = layers.Dense(embed_dim//4, activation = 'relu', use_bias=False)(c_part)
#     tag_emb = layers.Embedding(input_dim=189, output_dim=embed_dim//4)
    tag1 = layers.Embedding(input_dim=189, output_dim=embed_dim//8, input_length=window_size,
                            embeddings_initializer = 'glorot_uniform')(tag1)
    tag2 = layers.Embedding(input_dim=179, output_dim=embed_dim//8, input_length=window_size,
                            embeddings_initializer = 'glorot_uniform')(tag2)
    tag3 = layers.Embedding(input_dim=162, output_dim=embed_dim//8, input_length=window_size,
                            embeddings_initializer = 'glorot_uniform')(tag3)
#     tag4 = tag_emb(tag4)
#     tag5 = tag_emb(tag5)
#     tag6 = tag_emb(tag6)
    enc_ex = layers.Concatenate()([min_delta, day_delta, month_delta, tid, c_part,
                                 tag1, tag2, tag3, is_with]) #tag4, tag5, tag6
    enc_ex = layers.Dense(embed_dim, activation = 'relu')(enc_ex)
    
    #====Lecture====
    num_lect = layers.Embedding(input_dim=160, output_dim=embed_dim//16,
                                embeddings_initializer = 'glorot_uniform')(num_lect)
    lec_type = layers.Dense(embed_dim//8, activation = 'relu', use_bias=False)(lec_type)
    lec_h_past = layers.Embedding(input_dim=724, output_dim=embed_dim//8,
                                  embeddings_initializer = 'glorot_uniform')(lec_h_past)
    enc_lec = layers.Concatenate()([num_lect, lec_type, lec_h_past])
    enc_lec = layers.Dense(embed_dim//2, activation = 'relu')(enc_lec)
    enc_lec = layers.Dropout(0.1)(enc_lec)

    #====Response====
    prev_answered_correct = layers.Embedding(input_dim=4, output_dim=embed_dim, input_length=window_size,
                                             embeddings_initializer = 'glorot_uniform')(prev_answered_correct)
    prior_elapsed = layers.Embedding(input_dim=302, output_dim=embed_dim//4, input_length=window_size,
                                     embeddings_initializer = 'glorot_uniform')(prior_elapsed)
    prior_explained = layers.Embedding(input_dim=3, output_dim=embed_dim//4, input_length=window_size,
                                       embeddings_initializer = 'glorot_uniform')(prior_explained)
    prior_inter = layers.Concatenate()([prior_elapsed, prior_explained])
    prior_inter = layers.Dense(embed_dim, activation = 'relu')(prior_inter)
    
    #====Mask====
    padding_mask = create_padding_mask(inputs)
    look_ahead_mask = create_look_ahead_mask(window_size)
    dec_combined_mask = tf.maximum(padding_mask, look_ahead_mask)
    pos_enc = positional_encoding(window_size, embed_dim)
    
    #++++Model++++
    e_enc_input = layers.Add()([cid, pos_enc, enc_ex])
    dec_input = layers.Add()([prev_answered_correct, pos_enc, prior_inter])
    
    x1 = EncoderBlock(embed_dim, num_heads, ff_dim = dense_dim)(e_enc_input, e_enc_input, padding_mask)
    x1 = EncoderBlock(embed_dim, num_heads, ff_dim = dense_dim)(x1, x1, padding_mask)
    x1 = layers.Add()([e_enc_input, x1])
    x3 = DecoderBlock(embed_dim, num_heads, ff_dim = dense_dim)(dec_input, x1,
                                                                dec_combined_mask, padding_mask)
    x3 = DecoderBlock(embed_dim, num_heads, ff_dim = dense_dim)(x3, x1,
                                                                dec_combined_mask, padding_mask)
    x = x3[:, -1, :]
    x = layers.Concatenate()([x, enc_lec])
    x = layers.Dense(embed_dim, activation="relu")(x)
#     x = layers.BatchNormalization()(x)
    x = layers.Dropout(0.2)(x)
    outputs = layers.Dense(1, activation="sigmoid",
                           kernel_initializer=tf.keras.initializers.TruncatedNormal(stddev=0.02),
                           name = "output")(x)
    model = tf.keras.Model(inputs=inputs, outputs=outputs)
    return model

In [None]:
model = custom_transformer_model(30, WIN_SIZE, embed_dim = 128, dense_dim = 512, num_heads=8)
model.load_weights("../input/model-sak/transformer_2seq_8head.03.h5")
print('Load neural network done.')
# model.summary()

In [None]:
tdf = read_feather("../input/processing-riiid-train-data/all_train_dat_plus.feather")

In [None]:
tdf.numlect = tdf.numlect.astype(np.uint8)
tdf.task_container_id[tdf.task_container_id > 2000] = 2000

In [None]:
tdf = tdf.groupby('user_id').tail(WIN_SIZE)
gc.collect()
tdf.reset_index(drop = True, inplace = True)
print(tdf.shape)

In [None]:
adf = tdf.groupby('user_id')
users_dict = adf.groups
del adf
gc.collect()

In [None]:
tdf.drop(columns = ['user_id'], inplace = True)

In [None]:
user_df = {uid: tdf[users_dict[uid][0]:users_dict[uid][-1]+1].copy().to_numpy(copy = True) for uid in tqdm(user_dict)}
del tdf
del users_dict
gc.collect()

In [None]:
##TODO 수정: lgbm_model_new_mu_sigma_hung_trueskill_mean.txt
lgbm_model = lgb.Booster(model_file="../input/new-mu-3rd-train/lgbm_model_3rd_train_new_mu_sigma_hung_trueskill_mean.txt")
print('Load lgbm done.')

In [None]:
def make_time_series(x, windows_size):
    pad_size = x.shape[0]
    x = np.pad(x, [[ windows_size-pad_size, 0], [0, 0]], constant_values=0)
    return x

In [None]:
QUESTION_DTYPES = {
    'question_id': np.uint16,
    'bundle_id': np.uint16,
    'correct_answer': np.int8,
    'part': np.int8,
    'tags': str
}

In [None]:
trainlec_dict = load(open('../input/feather-data/trainlec_dict.pkl', 'rb'))
df_q = read_feather("../input/feather-data/questions_processed.feather")
df_lec = read_feather("../input/feather-data/lectures_processed.feather")
lec_pos = {}
part_df = read_csv(file_name = '../input/riiid-test-answer-prediction/questions.csv', dtype = QUESTION_DTYPES)
part_df.drop(columns = ['question_id', 'bundle_id', 'correct_answer', 'tags'], inplace=True)
bundle_df = read_feather('../input/riiid-data-processing4/questions_processed.feather')
bundle_df = bundle_df[['bundle_id']]
gc.collect()
for i in range(df_lec.shape[0]):
    lec_pos[df_lec['lecture_id'][i]] = i

In [None]:
c_percent_dict = load(open('../input/riiid-data-processing-lgbm2/c_percent_dict.pkl', 'rb'))
u_percent_dict = load(open('../input/riiid-data-processing-lgbm2/u_percent_dict.pkl', 'rb'))
t_percent_dict = load(open('../input/riiid-data-processing-lgbm2/t_percent_dict.pkl', 'rb'))
b_means_dict = load(open('../input/feather-data/b_mean_elapsed_dict.pkl', 'rb'))

In [None]:
# content_dict = {}
# for key, value in c_percent_dict.items():
#     #content_dict[key] = value[0]/value[1]
#     content_dict[key][0] = value[0]
#     content_dict[key][1] = value[1]

In [None]:
# content_dict

In [None]:
num_tag_df = pd.DataFrame(df_q[df_q[['t1','t2','t3','t4','t5','t6']] > 1].T.count(), columns = ['num_tag'])

In [None]:
import riiideducation
from bisect import bisect
import json
import time

In [None]:
env = riiideducation.make_env()
iter_test = env.iter_test()

In [None]:
for (tdf, sample_prediction_df) in iter_test:
#     start = time.time()
    prev_correct = json.loads(tdf["prior_group_answers_correct"].iloc[0])
    if len(prev_correct) != 0:
        i = -1
        for cor in prev_correct:
            if cor == -1: continue
            i += 1
            prv_udat = np.array([prev_dat[i].copy()+[cor]], dtype = np.int64)
            prv_uid = prev_uid[i]
            prv_cid = prev_cid[i][0]
            prv_part = prev_cid[i][1]
            prv_t1 = prev_cid[i][2]
            prv_skill = 'lis'
            if prv_part > 4: prv_skill = 'read'
            if not prv_uid in u_percent_dict:
                u_percent_dict[prv_uid] = {'cor':0, 'tot':0, 1:[0,0], 2:[0,0], 3:[0,0], 4:[0,0], 5:[0,0], 6:[0,0], 7:[0,0],
                                           'lis':[0,0], 'read':[0,0], 'mean_gap':0, 'total_explained':0}
            c_percent_dict[prv_cid][1] += 1
            t_percent_dict[prv_t1][1] += 1
            u_percent_dict[prv_uid]['tot'] += 1
            u_percent_dict[prv_uid][prv_part][1] += 1
            u_percent_dict[prv_uid][prv_skill][1] += 1
            if cor == 1:
                c_percent_dict[prv_cid][0] += 1
                new_user_rating, new_question_rating = rate_1vs1(user_dict[prv_uid], trueskill.setup(mu=1 - (c_percent_dict[prv_cid][0] / c_percent_dict[prv_cid][1]),
                                                                      sigma=0.164486,
                                                                      beta=0.05, tau=0.00164,
                                                                      draw_probability=0).Rating())
                u_percent_dict[prv_uid]['cor'] += 1
                t_percent_dict[prv_t1][0] += 1
                u_percent_dict[prv_uid][prv_part][0] += 1
                u_percent_dict[prv_uid][prv_skill][0] += 1
            if cor == 0:
                new_question_rating, new_user_rating = rate_1vs1(trueskill.setup(mu=1 - (c_percent_dict[prv_cid][0] / c_percent_dict[prv_cid][1]),
                                                                      sigma=0.164486,
                                                                      beta=0.05, tau=0.00164,
                                                                      draw_probability=0).Rating(), user_dict[prv_uid])
            if prv_uid in user_df:
                prv_udat = np.concatenate([user_df[prv_uid], prv_udat], axis = 0)[-100:]
            user_df[prv_uid] = prv_udat
            user_dict[prv_uid] = new_user_rating
    tdf['prior_question_had_explanation'].fillna(0, inplace = True)
    tdf['prior_question_had_explanation'] = tdf['prior_question_had_explanation'].astype(np.int8)
    tdf['prior_question_had_explanation'] = tdf['prior_question_had_explanation'] + 1
    tdf['prior_question_elapsed_time'].fillna(23000, inplace = True) #mean: 25300 med: 21000
    tdf['prior_question_elapsed_time'] = (tdf['prior_question_elapsed_time'] // 1000 + 1)
    tdf['content_id'] = tdf['content_id'] + 1
    tdf['task_container_id'] = tdf['task_container_id'] + 1
#     print(time.time() - start)
    
    prev_dat = []
    prev_uid = []
    prev_cid = []
    total_dat = []
    temp_udat = []
    row_id = []
    total_lgbm_dat = []
    for i in range(tdf.shape[0]):
        ##For questions
        can_if = False
        if tdf['content_type_id'].iloc[i] == 0:
            user_id = tdf['user_id'].iloc[i]
            question_id = tdf['content_id'].iloc[i] - 1
            if(user_id not in user_dict.keys()):
                user_dict[user_id] = trueskill.setup(mu=0.3,
                                                                      sigma=0.164486,
                                                                      beta=0.05, tau=0.00164,
                                                                      draw_probability=0).Rating()
            prob = win_probability(user_dict[user_id], trueskill.setup(mu=1 - (c_percent_dict[question_id][0] / c_percent_dict[question_id][1]),
                                                                      sigma=0.164486,
                                                                      beta=0.05, tau=0.00164,
                                                                      draw_probability=0).Rating())
            mu = user_dict[user_id].mu
            sigma = user_dict[user_id].sigma
            temp_udat = tdf.iloc[i].to_list()[:-2]
            row_id.append(temp_udat.pop(0))
            ts_cur = temp_udat[0]
            uid = temp_udat.pop(1)
            cid = temp_udat[1]
            temp_udat.pop(2)
            lgbm_udat = temp_udat[2:].copy()
            lgbm_udat[0] -= 1
            lgbm_udat[1] -= 1
            lgbm_udat[2] -= 1
            temp_udat[2] = min(2000, temp_udat[2])
            assert df_q['question_id'][cid-1] == cid, 'hmm somethings wrong ' + str(df_q['question_id'][cid]) + " " + str(cid)
            if (i-1 >= 0 and tdf['task_container_id'].iloc[i] == tdf['task_container_id'].iloc[i-1] 
                and tdf['user_id'].iloc[i] == tdf['user_id'].iloc[i-1]):
                temp_udat.append(2)
            elif (i+1 < tdf.shape[0] and tdf['task_container_id'].iloc[i] == tdf['task_container_id'].iloc[i+1] 
                  and tdf['user_id'].iloc[i] == tdf['user_id'].iloc[i+1]):
                temp_udat.append(2)
            else: temp_udat.append(1)
            part = part_df['part'][cid-1]
            bundle = bundle_df['bundle_id'][cid-1]
            t1 = df_q['t1'][cid-1]
            num_tag = num_tag_df['num_tag'][cid - 1]
            lgbm_udat.append(part)
            lgbm_udat.append(t1)
            lgbm_udat.append(num_tag)
            skill = 'lis'
            if part > 4: skill = 'read'
            if not uid in trainlec_dict: 
                temp_udat += [1,0,0,0,0,1]
            else:
                if not part in trainlec_dict[uid]:
                    temp_udat += [1,0,0,0,0,1]
                else:
                    idx_cur = bisect(trainlec_dict[uid][part]['ts'], ts_cur) - 1
                    if idx_cur == -1:
                        temp_udat += [1,0,0,0,0,1]
                    else:
                        num_lect = min(159, len(trainlec_dict[uid][part]['lt']) + 1)
                        tlast = [0]*4
                        tlast[trainlec_dict[uid][part]['lt'][idx_cur]] = 1
                        tsago = min((ts_cur - trainlec_dict[uid][part]['ts'][idx_cur])//3600000 + 2, 723)
                        temp_udat.append(num_lect)
                        temp_udat += tlast
                        temp_udat.append(tsago)
            q_fet = df_q.iloc[cid-1].to_list()
            q_fet.pop(0)
            temp_udat += q_fet
            if not uid in user_df:
                ts_fet = [1, 1, 1]
                #===========u_attemp_c===============
                lgbm_udat.append(0)
                u_attempt_c_dict[(uid, cid)] = 1
                #====================================
                temp_udat = ts_fet + temp_udat
                infer_dat = np.array([temp_udat+ [3]])
                infer_dat = make_time_series(infer_dat, WIN_SIZE)
                lgbm_udat[1] = -1
                lgbm_udat[2] = -1
                lgbm_udat = [-1] + lgbm_udat
                lgbm_udat = lgbm_udat + [0, 0, 0, 0, 0, b_means_dict[bundle][0], -1, 0]
                lgbm_udat = lgbm_udat + [c_percent_dict[cid-1][0] / c_percent_dict[cid-1][1], c_percent_dict[cid-1][1]]
                lgbm_udat = lgbm_udat + [-1, 0, -1, 0]
                lgbm_udat = lgbm_udat + [t_percent_dict[t1][0] / t_percent_dict[t1][1], c_percent_dict[t1][1]]
                lgbm_udat = lgbm_udat + [-1, 0]
            else:
                #===========u_attemp_c===============
                if (uid, cid) in u_attempt_c_dict:
                    lgbm_udat.append(u_attempt_c_dict[(uid, cid)])
                    u_attempt_c_dict[(uid, cid)] += 1
                else:
                    attempt_recent = user_df[uid][user_df[uid][:,4] == cid].shape[0]
                    lgbm_udat.append(attempt_recent)
                    u_attempt_c_dict[(uid, cid)] = attempt_recent + 1
                #====================================
                ts_prev = user_df[uid][-1,3]
                ts_recent_prev10 = user_df[uid][0,3]
                if user_df[uid].shape[0] > 9:
                    ts_recent_prev10 = user_df[uid][-9,3]
                ts_diff = ts_cur - ts_prev
                min_del = min(1440, ts_diff//60000) + 2
                day_del = min(30, ts_diff//86400000) + 2
                mon_del = min(6, ts_diff//2592000000) + 2
                ts_fet = [min_del, day_del, mon_del]
                temp_udat = ts_fet + temp_udat
                infer_dat = np.concatenate([user_df[uid], np.array([temp_udat + [2]])])
                infer_dat[:,-1] = infer_dat[:,-1] + 1
                infer_dat[:,-1] = np.roll(infer_dat[:, -1], 1)
                if infer_dat.shape[0] > 100: infer_dat = infer_dat[-100:]
                if infer_dat.shape[0] < 100: infer_dat = make_time_series(infer_dat, WIN_SIZE)
                if lgbm_udat[2] == 1: u_percent_dict[uid]['total_explained'] += 1
                if lgbm_udat[0] != 0: u_percent_dict[uid]['mean_gap'] = ts_cur / lgbm_udat[0]
                lgbm_udat = [ts_diff//1000] + lgbm_udat
                recent_10_correctness = user_df[uid][-10:, -1].sum()/(min(10, user_df[uid].shape[0]))
                recent_10_mean_gap = (ts_cur - ts_recent_prev10)/((min(9, user_df[uid].shape[0])+1)*1000)
                mean_elapsed = (lgbm_udat[3] + user_df[uid][:, 6].sum() - 23) / (user_df[uid].shape[0])
                lgbm_udat = lgbm_udat + [u_percent_dict[uid]['total_explained'], recent_10_correctness, recent_10_mean_gap,
                                         mean_elapsed, u_percent_dict[uid]['mean_gap']//1000, b_means_dict[bundle][0]]
                lgbm_udat = lgbm_udat + [u_percent_dict[uid]['cor'] / u_percent_dict[uid]['tot'], u_percent_dict[uid]['tot']]
                lgbm_udat = lgbm_udat + [c_percent_dict[cid-1][0] / c_percent_dict[cid-1][1], c_percent_dict[cid-1][1]]
                try:
                    u_part_chance = u_percent_dict[uid][part][0] / u_percent_dict[uid][part][1]
                    if u_percent_dict[uid][part][1] > 15: can_if = True
                except:
                    u_part_chance = -1
                try:
                    u_skill_chance = u_percent_dict[uid][skill][0] / u_percent_dict[uid][skill][1]
                except:
                    u_skill_chance = -1
                lgbm_udat = lgbm_udat + [u_part_chance, u_percent_dict[uid][part][1], u_skill_chance, u_percent_dict[uid][skill][1]]
                lgbm_udat = lgbm_udat + [t_percent_dict[t1][0] / t_percent_dict[t1][1], c_percent_dict[t1][1]]
                lgbm_udat = lgbm_udat + [user_df[uid][-1,-1], user_df[uid][-1,24]]
            total_dat.append(infer_dat)
            lgbm_udat.append(prob)
            lgbm_udat.append(mu)
            lgbm_udat.append(sigma)
            total_lgbm_dat.append(lgbm_udat)
            prev_dat.append(temp_udat)
            prev_uid.append(uid)
            prev_cid.append([cid-1,part,t1])
        ##For lectures
        else:
            uid = tdf['user_id'].iloc[i]
            if not uid in trainlec_dict: trainlec_dict[uid] = {}
            lid = tdf['content_id'].iloc[i] - 1
            pos = lec_pos[lid]
            part = df_lec['part'][pos]
            tag = df_lec['tag'][pos]
            ltype = np.argmax(df_lec[['ty1', 'ty2', 'ty3', 'ty4']][pos : pos+1].to_numpy())
            if not part in trainlec_dict[uid]: trainlec_dict[uid][part] = {'tt':[], 'lt':[], 'ts':[]}
            trainlec_dict[uid][part]['tt'].append(tag)
            trainlec_dict[uid][part]['lt'].append(ltype)
            trainlec_dict[uid][part]['ts'].append(tdf['timestamp'].iloc[i])
    total_dat = np.delete(np.array(total_dat), 3, axis = 2)
    pred0 = model.predict(total_dat)
    pred1 = lgbm_model.predict(total_lgbm_dat)
    pred0 = np.squeeze(pred0, axis = -1)
    pred0 = list(pred0*0.45 + pred1*0.55)
    #pred0 = list(pred1)
    pred_df = pd.DataFrame({"row_id": row_id, "answered_correctly": pred0}, columns = ["row_id", "answered_correctly"])
    env.predict(pred_df)
#     print(time.time() - start)
#     print("======="