In [20]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.preprocessing import MinMaxScaler
from keras import backend as K

In [5]:
from keras.utils import to_categorical
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import Adam

In [63]:
def single_class_precision(y_true, y_pred):
    class_id_true = K.argmax(y_true, axis=-1)
    class_id_pred = K.argmax(y_pred, axis=-1)
    precision_mask = K.cast(K.equal(class_id_pred), 'int32')
    class_prec_tensor = K.cast(K.equal(class_id_true, class_id_pred), 'int32') * precision_mask
    class_prec = K.cast(K.sum(class_prec_tensor), 'float32') / K.cast(K.maximum(K.sum(precision_mask), 1), 'float32')
    return class_prec

def single_class_recall(y_true, y_pred):
    class_id_true = K.argmax(y_true, axis=-1)
    class_id_pred = K.argmax(y_pred, axis=-1)
    recall_mask = K.cast(K.equal(class_id_true), 'int32')
    class_recall_tensor = K.cast(K.equal(class_id_true, class_id_pred), 'int32') * recall_mask
    class_recall = K.cast(K.sum(class_recall_tensor), 'float32') / K.cast(K.maximum(K.sum(recall_mask), 1), 'float32')
    return class_recall


In [60]:
def f1_score(y_true, y_pred):   
    precision = single_class_precision(0)
    recall = single_class_recall(0)
    return 2*((precision*recall)/(precision+recall))

In [6]:
# def f1(y_true, y_pred):
#     def recall(y_true, y_pred):
#         true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
#         possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
#         recall = true_positives / (possible_positives + K.epsilon())
#         return recall

#     def precision(y_true, y_pred):
#         true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
#         predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
#         precision = true_positives / (predicted_positives + K.epsilon())
#         return precision
    
#     precision = precision(y_true, y_pred)
#     recall = recall(y_true, y_pred)
#     return 2*((precision*recall)/(precision+recall))

In [7]:
def model_performance(y_test, y_pred):    
    print('confusion matrix')
    print(metrics.confusion_matrix(y_test, y_pred))
    print('accuracy : {}'.format(metrics.accuracy_score(y_test, y_pred).round(3)))
    print('precision : {}'.format(metrics.precision_score(y_test, y_pred, pos_label=1).round(3)))
    print('recall : {}'.format(metrics.recall_score(y_test, y_pred, pos_label=1).round(3)))
    print('F1 : {}'.format(metrics.f1_score(y_test, y_pred, pos_label=1).round(3)))

## import data

In [64]:
cust_data = pd.read_csv('data_transform.csv', encoding='euckr')
cust_data.shape

(100233, 126)

In [65]:
# 전체 연체자 수
cust_overdue = cust_data[cust_data['TARGET'] == 1]     # 연체자
print(cust_overdue.shape[0])

4287


In [66]:
# train / test set 분리
x = cust_data.drop('TARGET', axis=1).as_matrix()
y = cust_data['TARGET']

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=0)

print('연체자 수 train : {} / {}'.format(sum(y_train == 1), y_train.shape[0]))
print('연체자 수 test  : {} / {}'.format(sum(y_test  == 1), y_test.shape[0]))

연체자 수 train : 3439 / 80186
연체자 수 test  : 848 / 20047


In [67]:
scaler = MinMaxScaler(feature_range=(0, 1))
x = scaler.fit_transform(x)
x[0]

array([ 0.42857143,  0.6       ,  0.06000188,  0.16393443,  0.25947368,
        0.045     ,  0.        ,  0.42857201,  0.3       ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.01953418,  0.18181818,
        0.71428571,  0.        ,  0.00826446,  0.        ,  0.        ,
        0.        ,  0.10743802,  0.        ,  0.54      ,  0.00402495,
        0.0036483 ,  0.        ,  0.0475    ,  0.        ,  0.        ,
        0.02      ,  0.        ,  0.0475    ,  0.00905632,  0.385     ,
        0.4       ,  0.05263158,  0.        ,  0.        ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.        ,  0.        ,
        0.        ,  0.019     ,  0.        ,  0.66666667,  0.52727273,
        0.08421053,  0.        ,  0.        ,  0.12      ,  0.        ,
        0.        ,  0.        ,  0.        ,  0.77631579,  0.        ,
        0.        ,  0.05309735,  0.        ,  0.00905433,  0.34139403,
        0.02      ,  0.        ,  0.        ,  1.        ,  0.  

In [68]:
# train set 중에서 연체자만 추출
x_overdue = x_train[y_train == 1]
y_overdue = y_train[y_train == 1]
print(x_overdue.shape)
print(y_overdue.shape)

(3439, 125)
(3439,)


In [69]:
y_train = to_categorical(y_train, 2)
y_test = to_categorical(y_test, 2)

print(y_train.shape)

n_cols = x_train.shape[1]

(80186, 2)


In [70]:
print(sum(y_train[:,1]))
print(sum(y_test[:,1]))   # 연체자

3439.0
848.0


In [71]:
model = Sequential()
model.add(Dense(125, activation='relu', input_shape=(n_cols,)))
model.add(Dense(100, activation='relu'))
model.add(Dense(100, activation='relu'))
model.add(Dense(100, activation='relu'))
model.add(Dense(100, activation='relu'))
model.add(Dense(100, activation='relu'))
model.add(Dense(2, activation='softmax'))

In [62]:
model.compile(loss='binary_crossentropy',
              optimizer='sgd',
              metrics=['accuracy', f1_score])

model.fit(x_train, y_train,
            batch_size = 1000,
            epochs = 10,
            verbose = 1,
            validation_split=0.1)

TypeError: unsupported operand type(s) for *: 'function' and 'function'

In [56]:
y_pred = model.predict(x_test)
print(sum(y_pred[:,0]))
print(sum(y_pred[:,1]))   # 연체자

19824.0
223.0
