In [None]:
import numpy as np
import pandas as pd
import re

import seaborn as sns
import matplotlib.pyplot as plt

import os,sys,inspect
import gc
from tqdm import tqdm
import random

import warnings
warnings.filterwarnings('ignore')

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, log_loss

In [None]:
from tensorflow import keras
import tensorflow as tf
from tensorflow.keras import optimizers, callbacks, layers, losses
from tensorflow.keras.layers import Dense, Concatenate, Activation, Add, BatchNormalization, Dropout, Input, Embedding, Flatten, Multiply
from tensorflow.keras.models import Model, Sequential, load_model
from tensorflow.keras.regularizers import l2
from tensorflow.keras.utils import to_categorical

SEED = 42
np.random.seed(SEED)
tf.random.set_seed(SEED)
os.environ['PYTHONHASHSEED']=str(SEED)
random.seed(SEED)
gpus = tf.config.experimental.list_physical_devices('GPU')

if gpus:
    try:
        tf.config.experimental.set_memory_growth(gpus[0], True)
    except RuntimeError as e:
        # 프로그램 시작시에 메모리 증가가 설정되어야만 합니다
        print(e)
        
def mish(x):
    return x*tf.math.tanh(tf.math.softplus(x))

def leakyrelu(x, factor=0.2):
    return tf.maximum(x, factor*x)

In [None]:
train = pd.read_pickle('./data/train.pkl')
test = pd.read_pickle('./data/test.pkl')

train.shape, test.shape

In [None]:
# expressions = {
#     r'\d+:\d+:\d+\S': '<TIME>',
#     r'\d+:\d+': '<RANGE>',
#     r'\d+-\d+-\d+\S': '<DAY>',
#     r'\S\S+[a-z] [\s\d]\d': '<MON> <DATE>',
#     r'\d+': '<NUM>',
#     'js:': 'js',
#     r'\\n': ' ',
#     '\t': ' ',
#     '"': '',
#     r':+\s': ' ',
#     ':': '=',
#     '{': '',
#     '}': '',
#     '(': '',
#     ')': '',
#     ',': ' ',
#     r'\s+': ' '
# }

expressions = {
    r'\d+:\d+:\d+\S': '<TIME>',
    r'\d+:\d+': '<RANGE>',
    r'\d+-\d+-\d+\S': '<DAY>',
    r'\S\S+[a-z] [\s\d]\d': '<MON> <DATE>',
    r'\d+': '<NUM>',
#     'js:': 'js',
#     r'\\n|\s+|,|:+\s': ' ',
    r'\\n|,|[[]|[]]|[=]|[:]': ' ',
    r'[{]|[}]|[(]|[)]|["]|[\\]+': '',
#     ':': '=',
}

def strip_strs(x):
    phrases = re.findall(r'"+[\S\s]+?"', x)
    for ph in phrases:
        x = x.replace(ph, ph.replace(' ', ''))
    return x

def convert(x):
    for f, t in expressions.items():
        x = re.sub(f, t, x)
    return x

def convert_df(df_, col='full_log'):
    df = df_.copy()
    df[col] = df[col].map(strip_strs)
    for f, t in expressions.items():
        df[col] = df[col].str.replace(f, t)
        
    return df

In [None]:
%%time
df = convert_df(train, 'full_log')
test_X = convert_df(test)
df.head()

In [None]:
tr_sent = list(map(list, map(lambda x: filter(lambda y: len(y)>0, x.split(' ')), df['full_log'].values)))
test_sent = list(map(list, map(lambda x: filter(lambda y: len(y)>0, x.split(' ')), test_X['full_log'].values)))


In [None]:
import gensim

emb_dim = 128
w2v = gensim.models.Word2Vec(tr_sent, vector_size =emb_dim, sg=1, workers=4)

In [None]:
w2v.build_vocab(np.array(tr_sent))
w2v.train(np.array(tr_sent),
         total_examples = w2v.corpus_count,
         epochs=100,
         compute_loss=True)

In [None]:
def featureVecMethod(words, model, num_features):
    # Pre-initialising empty numpy array for speed
    featureVec = np.zeros(num_features,dtype="float32")
    nwords = 0
    
    #Converting Index2Word which is a list to a set for better speed in the execution.
    index2word_set = set(model.wv.index_to_key)
    
    for word in  words:
        if word in index2word_set:
            nwords = nwords + 1
            featureVec = np.add(featureVec,model.wv[word])
    
    # Dividing the result by number of words to get average
    featureVec = np.divide(featureVec, nwords)
    return featureVec

def getAvgFeatureVecs(sents, model, num_features):
    counter = 0
    reviewFeatureVecs = np.zeros((len(sents),num_features),dtype="float32")
    for sent in tqdm(sents):
        reviewFeatureVecs[counter] = featureVecMethod(sent, model, num_features)
        counter += 1

    return reviewFeatureVecs

In [None]:
emb_X = getAvgFeatureVecs(tr_sent, w2v, emb_dim)
emb_test_X = getAvgFeatureVecs(test_sent, w2v, emb_dim)


In [None]:
TEST_SIZE=0.2

tr_X, val_X, tr_y, val_y=train_test_split(emb_X, df['level'], test_size=TEST_SIZE, random_state=SEED)


In [None]:
nn = Sequential([
    Dense(128, activation='relu'),
    Dense(64, activation='relu'),
    Dense(7, activation='softmax')
])

In [None]:
nn.compile(loss='sparse_categorical_crossentropy', optimizer=optimizers.Adam(2e-4))
es = callbacks.EarlyStopping(patience=2, restore_best_weights=True)

nn.fit(emb_X, tr_y,
      epochs=10,
      validation_data=(val_X, val_y),
      callbacks=[es])

In [None]:
preds=nn.predict(val_X)
# probas=forest.predict_proba(val_X)

f1_score(val_y, np.argmax(preds, 1), average='macro')

In [None]:
class ValCallback(tf.keras.callbacks.Callback):
    def __init__(self):
        self.bw = None
        self.best_score = 0
        
    def on_epoch_end(self, epoch, logs=None):
        pred = self.model.generator.predict(val_X)
        pred = np.argmax(pred, 1)
        score = f1_score(val_y, pred, average='macro')
        if score > self.best_score:
            self.bw = self.model.generator.get_weights()
            
    def on_train_end(self, epoch, logs=None):
        self.model.generator.set_weights(self.bw)

In [None]:
class GAN(tf.keras.models.Model):
    def __init__(self, x_dim, y_dim, batch_size):
        super().__init__()
        
        self.x_dim = x_dim
        self.y_dim = y_dim
        self.batch_size = batch_size
        
        self.generator = self.build_generator()
        self.discriminator = self.build_discriminator()
        
    def compile(self, adv_loss_fn, pred_loss_fn, g_optim, d_optim):
        super().compile()
        self.adv_loss_fn = adv_loss_fn
        self.pred_loss_fn = pred_loss_fn
        self.g_optim = g_optim
        self.d_optim = d_optim
        
        
    def build_generator(self):
        activation = 'relu'
        inputs = Input(shape=(self.x_dim, ))
        
        h = Dense(128)(inputs)
        h = Activation(activation)(h)
        h = Dense(64)(h)
        h = Activation(activation)(h)
        
#         h = Add()([inputs, h])
        
        outputs = Dense(self.y_dim)(h)
        outputs = Activation('softmax')(outputs)
        
        return Model(inputs, outputs, name='generator')
        
    
    def build_discriminator(self):
        activation = 'relu'
        inputs_x = Input(shape=(self.x_dim, ))
        inputs_y = Input(shape=(self.y_dim, ))
        
        inputs = Concatenate()([inputs_x, inputs_y])
        
        h = Dense(64)(inputs)
        h = Activation(activation)(h)
        h = Dense(32)(h)
        h = Activation(activation)(h)
    
        outputs = Dense(1)(h)
        
        return Model([inputs_x, inputs_y], outputs, name='discriminator')
    
    
    def train_step(self, data):
        global batch_size
        x = data['X']
        y = data['y']
        
        fake_labels = tf.ones((self.batch_size, 1))
        real_labels = tf.ones((self.batch_size, 1))*0
        labels = tf.concat([real_labels, fake_labels], 0)

        # discriminator
        with tf.GradientTape() as tape:
            fake = self.generator(x)
            all_y = tf.concat([y, fake], 0)
            all_x = tf.concat([x, x], 0)
            preds = self.discriminator([all_x, all_y])

            d_loss = self.adv_loss_fn(labels, preds)

        grads = tape.gradient(d_loss, self.discriminator.trainable_weights)
        self.d_optim.apply_gradients(zip(grads, self.discriminator.trainable_weights))

        # generator
        with tf.GradientTape() as tape:
            fake = self.generator(x)
            preds = self.discriminator([x, fake])
            
            adv_loss = self.adv_loss_fn(real_labels, preds)
            pred_loss = self.pred_loss_fn(y, fake)
            
            g_loss = adv_loss + 100*pred_loss
            
        grads = tape.gradient(g_loss, self.generator.trainable_weights)
        self.g_optim.apply_gradients(zip(grads, self.generator.trainable_weights))

        return {'d_loss': d_loss, 'g_loss': g_loss, 'adv_loss': adv_loss, 'pred_loss': pred_loss}

In [None]:
batch_size = 128
tr_ds = tf.data.Dataset.from_tensor_slices({'X': emb_X, 'y': to_categorical(df['level'].values).astype(np.float32)})
tr_loader = tr_ds.batch(batch_size, drop_remainder=True).shuffle(buffer_size=5000, reshuffle_each_iteration=True).prefetch(1)


In [None]:
gan = GAN(64, 7, batch_size)
gan.compile(
    adv_loss_fn = losses.BinaryCrossentropy(from_logits=True),
    pred_loss_fn = losses.CategoricalCrossentropy(),
    g_optim = optimizers.Adam(2e-4),
    d_optim = optimizers.Adam(2e-4)
           )

In [None]:
es = callbacks.EarlyStopping(patience=5, restore_best_weights=True, monitor='pred_loss')

gan.fit(tr_loader,
      epochs=20,
      callbacks=[ValCallback()])

In [None]:
pred = gan.generator.predict(val_X)
f1_score(val_y, np.argmax(pred, 1), average='macro')

In [None]:
np.unique(np.argmax(pred, 1))

In [None]:
from collections import Counter

Counter(df['level'])

In [None]:
gan = GAN(64, 7, batch_size)
gan.compile(
#     adv_loss_fn = losses.BinaryCrossentropy(from_logits=True),
    adv_loss_fn = losses.MeanSquaredError(),
    pred_loss_fn = losses.CategoricalCrossentropy(),
    g_optim = optimizers.Adam(2e-4),
    d_optim = optimizers.Adam(2e-4)
           )

In [None]:
gan.fit(tr_loader,
      epochs=20,
      callbacks=[ValCallback()])

In [None]:
pred = gan.generator.predict(val_X)
f1_score(val_y, np.argmax(pred, 1), average='macro')

In [None]:
class D2GAN(tf.keras.models.Model):
    def __init__(self, x_dim, y_dim, batch_size):
        super().__init__()
        
        self.x_dim = x_dim
        self.y_dim = y_dim
        self.batch_size = batch_size
        
        self.alpha = 1
        self.beta = 1
        
        self.generator = self.build_generator()
        self.discriminator_1 = self.build_discriminator()
        self.discriminator_2 = self.build_discriminator()
        
    def compile(self, adv_loss_fn, pred_loss_fn, g_optim, d_optim_1, d_optim_2):
        super().compile()
        self.adv_loss_fn = adv_loss_fn
        self.pred_loss_fn = pred_loss_fn
        self.g_optim = g_optim
        self.d_optim_1 = d_optim_1
        self.d_optim_2 = d_optim_2
        
        
    def build_generator(self):
        activation = 'relu'
        inputs = Input(shape=(self.x_dim, ))
        
        h = Dense(128)(inputs)
        h = Activation(activation)(h)
        h = Dense(64)(h)
        h = Activation(activation)(h)
        
        outputs = Dense(self.y_dim)(h)
        outputs = Activation('softmax')(outputs)
        
        return Model(inputs, outputs, name='generator')
        
    
    def build_discriminator(self):
        activation = 'relu'
        inputs_x = Input(shape=(self.x_dim, ))
        inputs_y = Input(shape=(self.y_dim, ))
        
        inputs = Concatenate()([inputs_x, inputs_y])
        
        h = Dense(64)(inputs)
        h = Activation(activation)(h)
        h = Dense(32)(h)
        h = Activation(activation)(h)
    
        outputs = Dense(1, activation='softplus')(h)
        
        return Model([inputs_x, inputs_y], outputs, name='discriminator')
    
    
    def train_step(self, data):
        x = data['X']
        y = data['y']
        
        fake_labels = tf.ones((self.batch_size, 1))
        real_labels = tf.ones((self.batch_size, 1))*0
        labels = tf.concat([real_labels, fake_labels], 0)

        # discriminator
        with tf.GradientTape() as tape1, tf.GradientTape() as tape2:
            fake = self.generator(x)

            d1_pred_x = self.discriminator_1([x, y])
            d1_pred_fake = self.discriminator_1([x, fake])
            
            d1_loss = tf.reduce_mean(-self.alpha*tf.math.log(d1_pred_x) + d1_pred_fake)
            
            d2_pred_x = self.discriminator_2([x, y])
            d2_pred_fake = self.discriminator_2([x, fake])
            
            d2_loss = tf.reduce_mean(d2_pred_x - self.beta*tf.math.log(d2_pred_fake))

        grads1 = tape1.gradient(d1_loss, self.discriminator_1.trainable_weights)
        self.d_optim_1.apply_gradients(zip(grads1, self.discriminator_1.trainable_weights))
        
        grads2 = tape2.gradient(d2_loss, self.discriminator_2.trainable_weights)
        self.d_optim_2.apply_gradients(zip(grads2, self.discriminator_2.trainable_weights))
        
        
        # generator
        with tf.GradientTape() as tape:
            fake = self.generator(x)
            pred_1 = self.discriminator_1([x, fake])
            pred_2 = self.discriminator_2([x, fake])
            
            adv_loss = tf.reduce_mean(-pred_1 + self.beta*tf.math.log(pred_2))
            pred_loss = self.pred_loss_fn(y, fake)
            
            g_loss = adv_loss + 100*pred_loss
            
        grads = tape.gradient(g_loss, self.generator.trainable_weights)
        self.g_optim.apply_gradients(zip(grads, self.generator.trainable_weights))

        return {'d1_loss': d1_loss, 'd2_loss':d2_loss, 'g_loss': g_loss, 'adv_loss': adv_loss, 'pred_loss': pred_loss}

In [None]:
d2gan = D2GAN(64, 7, batch_size)
d2gan.compile(
    adv_loss_fn = losses.BinaryCrossentropy(from_logits=True),
    pred_loss_fn = losses.CategoricalCrossentropy(),
    g_optim = optimizers.Adam(2e-4),
    d_optim_1 = optimizers.Adam(2e-4),
    d_optim_2 = optimizers.Adam(2e-4)
           )

In [None]:
es = callbacks.EarlyStopping(patience=5, restore_best_weights=True, monitor='pred_loss')

d2gan.fit(tr_loader,
      epochs=20,
      callbacks=[ValCallback()])

In [None]:
pred = d2gan.generator.predict(emb_X)
f1_score(df['level'].values, np.argmax(pred, 1), average='macro')

In [None]:
np.unique(np.argmax(pred, 1))

In [None]:
pred = d2gan.generator.predict(emb_test_X)

In [None]:
results = np.argmax(pred, 1)


In [None]:
results[np.where(np.max(pred, axis=1) < 0.7)] = 7

In [None]:
plt.hist(results)

In [None]:
sub = pd.read_csv('./data/sample_submission.csv')
sub['level']=results
sub.to_csv('./sample_d2gan_no7.csv', index=False)