# *DATA PREPARATION*

In [None]:
import os
import sys
import time
import pandas as pd
import numpy as np
from tqdm import tqdm
import matplotlib.pyplot as plt
import gc
import random
from pickle import dump, load

In [None]:
TRAIN_DTYPES = {
    'row_id': np.uint64,
    'timestamp': np.int64,
    'user_id': np.uint64,
    'content_id': np.uint16,
    'content_type_id': np.int8,
    'task_container_id': np.uint16,
    'user_answer': np.int8,
    'answered_correctly': np.int8,
    'prior_question_elapsed_time': np.float32,
    'prior_question_had_explanation': 'boolean'
}

QUESTION_DTYPES = {
    'question_id': np.uint16,
    'bundle_id': np.uint16,
    'correct_answer': np.int8,
    'part': np.int8,
    'tags': str
}

LECTURE_DTYPES = {
    'lecture_id': np.uint16,
    'tag': np.uint16,
    'part': np.int8,
    'type_of':str
}

In [None]:
def read_csv(file_name = "train.csv", dtype = None, skiprows = None, nrows = None, usecols = None):
    data = pd.read_csv(file_name, dtype=dtype, skiprows = skiprows, nrows = nrows, low_memory = True, header = 0, usecols = usecols)
    return data

def read_feather(file_name = "../input/feather-data/train.feather"):
    data = pd.read_feather(file_name)
    return data

In [None]:
def concat_feat_en(i):
    tdf = read_feather(f'../input/riiid-trainlgb-row/trainlgb_row_{i+1}.feather')
    ctdf = read_feather(f'../input/riiid-data-processing-lgbm2/trainlgb_feat-en_{i}.feather')
    ctdf = ctdf[['u_chance', 'u_attempts', 'c_chance', 'c_attempts', 'u_part_chance', 'u_part_attempts', 
                 'u_skill_chance', 'u_skill_attempts', 't_chance', 't_attempts']]
    for x in ctdf:
        if ctdf[x].dtype == 'float64':
            ctdf[x] = ctdf[x].astype(np.float32)
    tdf['mean_elapsed'] = tdf['mean_elapsed'].replace([np.inf, -np.inf], np.nan)
    tdf['mean_elapsed'].fillna(0, inplace = True)
    tdf['mean_elapsed'] = tdf['mean_elapsed'].astype(np.int16)
    gc.collect()
    tdf = pd.concat([tdf, ctdf], axis = 1)
    del ctdf
    gc.collect()
    tdf.to_feather(f'trainlgb_feat-plus-row_{i}.feather')

In [None]:
concat_feat_en(0)
gc.collect()
concat_feat_en(1)
gc.collect()
concat_feat_en(2)
gc.collect()
concat_feat_en(3)