In [2]:
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.utils import to_categorical, plot_model
from tensorflow.keras.layers import *
from tensorflow.keras.models import Sequential, load_model, Model
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint,  ReduceLROnPlateau
from tensorflow.keras.regularizers import *
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.optimizers import *
import tensorflow.keras.backend as K
from datetime import datetime
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')
from google.colab import drive
drive.mount('/content/gdrive/')
import pickle
import gzip

Mounted at /content/gdrive/


In [3]:
class MultiHeadAttention(Layer):
    def __init__(self, embedding_dim, num_heads=8):
        super(MultiHeadAttention, self).__init__()
        self.embedding_dim = embedding_dim # d_model
        self.num_heads = num_heads

        assert embedding_dim % self.num_heads == 0

        self.projection_dim = embedding_dim // num_heads
        self.query_dense = Dense(embedding_dim)
        self.key_dense = Dense(embedding_dim)
        self.value_dense = Dense(embedding_dim)
        self.dense = Dense(embedding_dim)
    
    def get_config(self):
        config = super().get_config().copy()
        config.update({
            'embedding_dim' : self.embedding_dim,
            'num_heads' : self.num_heads,
            
            'projection_dim' : self.projection_dim,
            'query_dense' : self.query_dense,
            'key_dense' : self.key_dense,
            'value_dense' : self.value_dense,
            'dense' : self.dense
        })

    def scaled_dot_product_attention(self, query, key, value):
        matmul_qk = tf.matmul(query, key, transpose_b=True)
        depth = tf.cast(tf.shape(key)[-1], tf.float32)
        logits = matmul_qk / tf.math.sqrt(depth)
        attention_weights = tf.nn.softmax(logits, axis=-1)
        output = tf.matmul(attention_weights, value)
        return output, attention_weights

    def split_heads(self, x, batch_size):
        x = tf.reshape(x, (batch_size, -1, self.num_heads, self.projection_dim))
        return tf.transpose(x, perm=[0, 2, 1, 3])

    def call(self, inputs):
        # x.shape = [batch_size, seq_len, embedding_dim]
        batch_size = tf.shape(inputs)[0]

        # (batch_size, seq_len, embedding_dim)
        query = self.query_dense(inputs)
        key = self.key_dense(inputs)
        value = self.value_dense(inputs)

        # (batch_size, num_heads, seq_len, projection_dim)
        query = self.split_heads(query, batch_size)  
        key = self.split_heads(key, batch_size)
        value = self.split_heads(value, batch_size)

        scaled_attention, _ = self.scaled_dot_product_attention(query, key, value)
        # (batch_size, seq_len, num_heads, projection_dim)
        scaled_attention = tf.transpose(scaled_attention, perm=[0, 2, 1, 3])  

        # (batch_size, seq_len, embedding_dim)
        concat_attention = tf.reshape(scaled_attention, (batch_size, -1, self.embedding_dim))
        outputs = self.dense(concat_attention)
        return outputs

In [4]:
class TransformerBlock(Layer):
    def __init__(self, embedding_dim, num_heads, dff, rate=0.1):
        super(TransformerBlock, self).__init__()
        self.att = MultiHeadAttention(embedding_dim, num_heads)
        self.ffn = Sequential(
            [Dense(dff, activation="relu"),
             Dense(embedding_dim),]
        )
        self.layernorm1 = LayerNormalization(epsilon=1e-6)
        self.layernorm2 = LayerNormalization(epsilon=1e-6)
        self.dropout1 = Dropout(rate)
        self.dropout2 = Dropout(rate)
        
    def get_config(self):
        config = super().get_config().copy()
        config.update({
            'att' : self.att,
            'ffn' : self.ffn,
            'layernorm1' : self.layernorm1,
            'layernorm2' : self.layernorm2,
            'dropout1' : self.dropout1,
            'dropout2' : self.dropout2
        })

    def call(self, inputs, training):
        attn_output = self.att(inputs)
        attn_output = self.dropout1(attn_output, training=training)
        out1 = self.layernorm1(inputs + attn_output)
        ffn_output = self.ffn(out1)
        ffn_output = self.dropout2(ffn_output, training=training)
        return self.layernorm2(out1 + ffn_output)

In [5]:
class TokenAndPositionEmbedding(Layer):
    def __init__(self, max_len, vocab_size, embedding_dim):
        super(TokenAndPositionEmbedding, self).__init__()
        self.token_emb = Embedding(vocab_size, embedding_dim)
        self.pos_emb = Embedding(max_len, embedding_dim)
        
    def get_config(self):
        config = super().get_config().copy()
        config.update({
            'token_emb' : self.token_emb,
            'pos_emb' : self.pos_emb,
        })
        return config

    def call(self, x):
        max_len = tf.shape(x)[-1]
        positions = tf.range(start=0, limit=max_len, delta=1)
        positions = self.pos_emb(positions)
        x = self.token_emb(x)
        return x + positions

In [9]:
with gzip.open('/content/gdrive/MyDrive/애쓰는 감자/data/small_data/pad_X_train.pickle','rb') as f:
  pad_X_train = pickle.load(f)
# with gzip.open('/content/gdrive/MyDrive/애쓰는 감자/data/small_data/small_y_train.pickle','rb') as f:
#   small_y_train = pickle.load(f)
# with gzip.open('/content/gdrive/MyDrive/애쓰는 감자/data/small_data/small_y_val.pickle','rb') as f:
#   small_y_val = pickle.load(f)
# with gzip.open('/content/gdrive/MyDrive/애쓰는 감자/data/small_data/pad_X_val.pickle','rb') as f:
#   pad_X_val = pickle.load(f)

In [7]:
len(small_y_train[0])

231

In [6]:
max_len = 300
embedding_dim = 32  # Embedding size for each token
num_heads = 4  # Number of attention heads
dff = 32  # Hidden layer size in feed forward network inside transformer
vocab_size = 18658 # Transformer1에 있음
inputs = Input(shape=(max_len,))
embedding_layer = TokenAndPositionEmbedding(max_len, vocab_size, embedding_dim)
x = embedding_layer(inputs)
transformer_block = TransformerBlock(embedding_dim, num_heads, dff)
x = transformer_block(x)
x = GlobalAveragePooling1D()(x)
x = Dropout(0.5)(x)
x = Dense(100, activation="relu")(x)
x = Dropout(0.5)(x)
outputs = Dense(231, activation="softmax")(x)
model = Model(inputs=inputs, outputs=outputs)

In [None]:
import os
path = '/content/gdrive/MyDrive/애쓰는 감자/코드/kym/small_weights/'
ckpt_1 = 'Transformer.ckpt'
model.compile(optimizer="adam", loss="categorical_crossentropy", metrics=["accuracy"])
mc = ModelCheckpoint(filepath =  os.path.join(path, ckpt_1), monitor = 'val_accuracy', save_best_only = True, mode = 'max',verbose = 1, save_weights_only=True)
es = EarlyStopping(monitor='loss', mode='min', verbose=1, patience = 5)

In [None]:
history = model.fit(pad_X_train, small_y_train, batch_size=128, epochs=100, validation_split=0.2, callbacks = [mc,es])

Epoch 1/100

Epoch 00001: val_accuracy improved from -inf to 0.51310, saving model to /content/gdrive/MyDrive/애쓰는 감자/코드/kym/small_weights/Transformer.ckpt
Epoch 2/100

Epoch 00002: val_accuracy improved from 0.51310 to 0.54699, saving model to /content/gdrive/MyDrive/애쓰는 감자/코드/kym/small_weights/Transformer.ckpt
Epoch 3/100

Epoch 00003: val_accuracy improved from 0.54699 to 0.56716, saving model to /content/gdrive/MyDrive/애쓰는 감자/코드/kym/small_weights/Transformer.ckpt
Epoch 4/100

Epoch 00004: val_accuracy improved from 0.56716 to 0.57293, saving model to /content/gdrive/MyDrive/애쓰는 감자/코드/kym/small_weights/Transformer.ckpt
Epoch 5/100

Epoch 00005: val_accuracy improved from 0.57293 to 0.58301, saving model to /content/gdrive/MyDrive/애쓰는 감자/코드/kym/small_weights/Transformer.ckpt
Epoch 6/100

Epoch 00006: val_accuracy improved from 0.58301 to 0.58325, saving model to /content/gdrive/MyDrive/애쓰는 감자/코드/kym/small_weights/Transformer.ckpt
E

In [9]:
path = '/content/gdrive/MyDrive/애쓰는 감자/코드/kym/small_weights/'
ckpt_1 = 'Transformer.ckpt'
model.load_weights(path+ckpt_1)

<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x7f4e102c6350>

In [None]:
len(mid_y_val)

488386

In [None]:
score_board = []
pred_val = model.predict(pad_X_val)
for true,pred in zip(small_y_val,pred_val):
  if np.argmax(true) == np.argmax(pred):
    score_board.append(1)
  else:
    score_board.append(0)
print('Accuracy',np.array(score_board).sum()/len(score_board)*100)

Accuracy 60.55576242591671


In [7]:
data = pd.read_csv('/content/gdrive/MyDrive/애쓰는 감자/data/Pre3.csv')

In [13]:
data['small'] = data['KSIC10_BZC_CD'].apply(lambda x: x[1:4])

In [14]:
ls = data['small'].unique().tolist()

In [15]:
final_ls = []
for i in ls:
  if len(i)==3:
    final_ls.append(i)
final_ls = sorted(final_ls)

In [None]:
predict_val_name = []
for i in pred_val:
  idx = np.argmax(i)
  predict_val_name.append(final_ls[idx])


In [17]:
with gzip.open('/content/gdrive/MyDrive/애쓰는 감자/data/mid_data/small_train_nan.pickle','rb') as f:
  small_train_nan = pickle.load(f)
with gzip.open('/content/gdrive/MyDrive/애쓰는 감자/data/mid_data/small_val_nan.pickle','rb') as f:
  small_val_nan = pickle.load(f)

FileNotFoundError: ignored

In [None]:
for i in small_val_nan:
  predict_val_name.insert(i,np.nan)

In [None]:
Transformer_val = pd.read_csv('/content/gdrive/MyDrive/애쓰는 감자/코드/kym/output/Transformer_predict_val.csv')
Transformer_val['small'] = predict_val_name

In [None]:
# Transformer_val.to_csv('/content/gdrive/MyDrive/애쓰는 감자/코드/kym/output/Transformer_predict_val.csv',index=False)

In [10]:
with gzip.open('/content/gdrive/MyDrive/애쓰는 감자/data/preprocessing_data/pad_X_test.pickle','rb') as f:
  pad_X_test = pickle.load(f)
pred_test = model.predict(pad_X_test)

In [None]:
# 다시...!!!

In [18]:
predict_test_name = []
for i in pred_test:
  idx = np.argmax(i)
  predict_test_name.append(final_ls[idx])
# for j in small_train_nan:
#   predict_test_name.insert(j,np.nan)
Transformer_test = pd.read_csv('/content/gdrive/MyDrive/애쓰는 감자/코드/kym/output/Transformer_predict_test.csv')
Transformer_test['small'] = predict_test_name

In [19]:
# Transformer_test.to_csv('/content/gdrive/MyDrive/애쓰는 감자/코드/kym/output/Transformer_predict_test.csv',index=False)

In [10]:
predict_train = model.predict(pad_X_train)
with gzip.open('/content/gdrive/MyDrive/애쓰는 감자/코드/kym/output/Transformer_predict_train_small.pickle','wb') as f:
  pickle.dump(predict_train,f)