## Linear regression with tensorflow

In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
import random
from sklearn.preprocessing import LabelEncoder, normalize

In [2]:
### 데이터 로딩 ###
credit_card = pd.read_csv('data/CreditCard.csv')
credit_card.head()

# share : 소득 대비 신용카드 지출 비율
# reports : 경멸적 보고서 수
# income : 수입. USD 10,000
# expenditure : 월 평균 카드 지출액
# dependents : 부양가족 수
# majorcards : 사용 카드 수
# active : 신용계정 수

Unnamed: 0,card,reports,age,income,share,expenditure,owner,selfemp,dependents,months,majorcards,active
0,yes,0,37.66667,4.52,0.03327,124.9833,yes,no,3,54,1,12
1,yes,0,33.25,2.42,0.005217,9.854167,no,no,3,34,1,13
2,yes,0,33.66667,4.5,0.004156,15.0,yes,no,4,58,1,5
3,yes,0,30.5,2.54,0.065214,137.8692,no,no,0,25,1,7
4,yes,0,32.16667,9.7867,0.067051,546.5033,yes,no,2,64,1,5


In [3]:
### 범주형 변수 더미화 ###

credit_cat = credit_card[["card", "owner", "selfemp"]]   # yes or no --> 1 or 0

label_enc = LabelEncoder()

card = label_enc.fit_transform(credit_cat["card"])
card.shape = (len(card), 1)

owner = label_enc.fit_transform(credit_cat["owner"])
owner.shape = (len(owner), 1)

selfemp = label_enc.fit_transform(credit_cat["selfemp"])
selfemp.shape = (len(selfemp), 1)

In [4]:
### 수치형 변수 정규화 ###
credit_num  = credit_card.drop(["card", "owner", "selfemp", "share"], axis = 1)
credit_num_norm = normalize(credit_num)

In [5]:
### X, y 데이터 설정 ###

credit_X = np.concatenate([card, owner, selfemp, credit_num_norm], axis = 1)

credit_y = np.array(credit_card['share'])
credit_y.shape = (len(credit_y), 1)

In [6]:
### train, test 데이터로 분할 ###
train_idx = random.sample(list(range(len(credit_card))), int(len(credit_card) * 0.7))

train_X = credit_X[train_idx, :]
train_y = credit_y[train_idx]

test_X = np.delete(credit_X, train_idx, axis = 0)
test_y = np.delete(credit_y, train_idx)
test_y.shape = (len(test_y), 1)

print(train_X.shape, test_X.shape)

(923, 11) (396, 11)


### Tensorflow Low API

In [7]:
X = tf.placeholder(dtype = tf.float32, shape = (None, 11))
y = tf.placeholder(dtype = tf.float32, shape = None)

W1 = tf.Variable(initial_value = tf.random_normal([11,4]), dtype = tf.float32)
b1 = tf.Variable(initial_value = tf.random_normal([4]), dtype = tf.float32)
L1 = tf.add(tf.matmul(X, W1), b1)

W2 = tf.Variable(initial_value = tf.random_normal([4, 1]), dtype = tf.float32)
b2 = tf.Variable(initial_value = tf.random_normal([1]), dtype = tf.float32)

hypo = tf.add(tf.matmul(L1, W2), b2)

In [8]:
cost = tf.reduce_mean(tf.square(hypo - y))

optimizer = tf.train.GradientDescentOptimizer(learning_rate = 0.01)
train = optimizer.minimize(cost)

sess = tf.Session()
sess.run(tf.global_variables_initializer())

In [9]:
for i in range(1000):
    _, cost_val = sess.run([train, cost], feed_dict = {X: train_X, y : train_y})
    if i % 100 == 0:
        print("cost: ", cost_val)
        
print("train_finished!")

cost:  4.4259224
cost:  0.36476627
cost:  0.15249616
cost:  0.08692374
cost:  0.05757766
cost:  0.0417399
cost:  0.032163683
cost:  0.025910072
cost:  0.02159193
cost:  0.018481601
train_finished!


In [10]:
pred_val, pred_cost = sess.run([hypo, cost], feed_dict = ({X: test_X, y : test_y}))
print("predict cost: ", pred_cost)

predict cost:  0.019291189


### Keras API

In [11]:
from tensorflow import keras
from tensorflow.keras import layers

In [12]:
### 모델 생성 ###
def build_model():
    model = keras.Sequential([
        layers.Dense(4, activation=tf.nn.relu, input_shape=[train_X.shape[1]]),
        layers.Dense(1),
    ])

    optimizer = tf.keras.optimizers.RMSprop(0.001)
    
    model.compile(loss='mse', 
                  optimizer=optimizer,
                  metrics=['mae', 'mse'])
    
    return model


modelK = build_model()
modelK.summary()

Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense (Dense)                (None, 4)                 48        
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 5         
Total params: 53
Trainable params: 53
Non-trainable params: 0
_________________________________________________________________


In [13]:
### 모델 훈련 ###

EPOCHS = 1000

history = modelK.fit(train_X, train_y, 
                     epochs=EPOCHS, 
                     validation_split = 0.3, 
                     verbose=0)

In [14]:
## 모델 예측 ###

test_predictions = modelK.predict(test_X)

loss, mae, mse = modelK.evaluate(test_X, test_y, verbose=0)

print("Testing set Mean Abs Error: {:10.7f}".format(mae))
print("Testing set Mean Sqr Error: {:10.7f}".format(mse))

Testing set Mean Abs Error:  0.0429409
Testing set Mean Sqr Error:  0.0040779
