#### iris 데이터셋 활용 꽃잎 너비 예측 모델
- 데이터셋 :  iris.csv에서 2개  Feature 사용
- 구현프레임워크 : Pytorch

- [1] 모듈 로딩 및 데이터 준비

In [2]:
# 모듈 로딩 
import torch                        # 텐서 및 수치 계산 함수 관련 모듈
import torch.nn as nn               # 인공신경망 관련 모듈
import torch.nn.functional as F     # 손실, 거리 등 함수 관련 모듈
import torch.optim as optimizer     # 최적화 기법 관련 모듈

import pandas as pd                 # 데이터 파일 분석 관련 모듈 
from sklearn.model_selection import train_test_split 
from torchmetrics.regression import R2Score

In [3]:
torch.manual_seed(1)
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'

print(f'DEVICE:{DEVICE}')


DEVICE:cpu


In [4]:
# 데이터 로딩 : CSV => DataFrame
DATA_FILE='../data/iris.csv'

irisDF=pd.read_csv(DATA_FILE, usecols=[0, 1, 2,3])
irisDF.head(2)

Unnamed: 0,sepal.length,sepal.width,petal.length,petal.width
0,5.1,3.5,1.4,0.2
1,4.9,3.0,1.4,0.2


[2] 모델 준비
- 학습방법 : 지도학습 > 회귀
- 알고리즘 : 선형관계 >> 선형모델 ==> nn.Linear

In [5]:
# in_features : sepal.length, sepal.width,	petal.length 3개
# out_features : petal.width 1개
model = nn.Linear(3, 1)
 
nn_model = nn.Sequential(   nn.Linear(3, 10), 
                            nn.ReLU(),
                            nn.Linear(10, 50), 
                            nn.ReLU(),
                            nn.Linear(50, 1))

In [6]:
# 가중치와 절편 확인
for name, param in nn_model.named_parameters():
    print(f'[{name}] {param}\n')

[0.weight] Parameter containing:
tensor([[-0.5435,  0.3462, -0.1188],
        [ 0.2937,  0.0803, -0.0707],
        [ 0.1601,  0.0285,  0.2109],
        [-0.2250, -0.0421, -0.0520],
        [ 0.0837, -0.0023,  0.5047],
        [ 0.1797, -0.2150, -0.3487],
        [-0.0968, -0.2490, -0.1850],
        [ 0.0276,  0.3442,  0.3138],
        [-0.5644,  0.3579,  0.1613],
        [ 0.5476,  0.3811, -0.5260]], requires_grad=True)

[0.bias] Parameter containing:
tensor([-0.5489, -0.2785,  0.5070, -0.0962,  0.2471, -0.2683,  0.5665, -0.2443,
         0.4330,  0.0068], requires_grad=True)

[2.weight] Parameter containing:
tensor([[-0.1666,  0.1625, -0.1679,  0.0930, -0.0913, -0.0347, -0.3040, -0.1508,
          0.1716, -0.0769],
        [ 0.3150,  0.2535, -0.0148, -0.2111,  0.1926,  0.0981, -0.2044,  0.2054,
          0.1920,  0.2805],
        [-0.1773, -0.0521, -0.0061,  0.0462, -0.2400, -0.2244,  0.1720, -0.0742,
          0.1545,  0.0180],
        [ 0.1038,  0.0695,  0.1150,  0.1568, -0.2929,  0

[3]최적화 인스턴스 준비 

In [7]:
### 모델의 가중치와 절편을 최적화 ==> 인스턴스에 전달
adam_optim=optimizer.Adam(nn_model.parameters(), lr=0.1)

[4]학습 ==> 개발자가 구현

-[4-1] 데이터셋 Tensor화 진행 : 데이터준비 시 진행 하거나 또는 학습 전 진행

In [8]:
# featureTS=torch.from_numpy(irisDF[irisDF.columns[:-1]].values).float().to(DEVICE)                
featureTS=irisDF[irisDF.columns[:-1]]

featureTS.shape

(150, 3)

In [9]:
# targetTS=torch.from_numpy( irisDF[['petal.width']].values).float().float().to(DEVICE)
targetTS=irisDF[['petal.width']]
targetTS.shape

(150, 1)

In [10]:
# Train & Test
X_train, X_test, y_train, y_test = train_test_split(featureTS, 
                                                    targetTS, 
                                                    test_size=0.2,
                                                    random_state=5)

# Train & Valid
X_train, X_val, y_train, y_val = train_test_split(X_train, 
                                                    y_train, 
                                                    test_size=0.2,
                                                    random_state=5)

print(f'[FEATURE] TRAIN {X_train.shape}, TEST {X_test.shape}, VAL {X_val.shape}')
print(f'[TARGET]  TRAIN {y_train.shape}, TEST {y_test.shape}, VAL {y_val.shape}')


[FEATURE] TRAIN (96, 3), TEST (30, 3), VAL (24, 3)
[TARGET]  TRAIN (96, 1), TEST (30, 1), VAL (24, 1)


- [4-2] 학습진행 
    * 학습횟수 결정 ==> 에포크 설정
    * 학습결과 저장 ==> 손실저장 변수

In [11]:
EPOCH=1000
BATCH_SIZE= 10
BATCH_CNT= X_train.shape[0]//BATCH_SIZE
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'

print(f'DEVICE:{DEVICE}, EPOCH:{EPOCH}, BATCH_SIZE:{BATCH_SIZE}, BATCH_CNT:{BATCH_CNT}')


DEVICE:cpu, EPOCH:1000, BATCH_SIZE:10, BATCH_CNT:9


In [12]:
## 테스트/검증 함수 
#  ==> 가중치, 절편 업데이트 X, 최적화 미진행
#  ==> 현재 가중치와 절편값으로 테스트 진행 
def testing( testTS, targetTS, kind='Val'):
    
    with torch.no_grad():    # 가중치 및 절편 업데이트 진행 X
        testTS=torch.FloatTensor(testTS.values).to(DEVICE)
        targetTS=torch.FloatTensor(targetTS.values).to(DEVICE)
        
        #-(1)학습진행 - forward
        pre_y=nn_model(testTS)
        print(f'{pre_y.shape}')
        
        #-(2)오차계산 - 손실함수
        loss=F.mse_loss(pre_y, targetTS)
        
        #-(4)학습결과 출력 및 저장
        print(f'[{kind}] LOSS : {loss}')
        
    return loss , R2Score()(pre_y, targetTS)


In [13]:
X_train.shape[0], X_train.shape[0]//BATCH_SIZE

(96, 9)

In [14]:
# 모델 학습 함수 
def training(featureTS, targetTS, valTS, valTagetTS):    
    loss_history=[[],[]]
    r2_history=[[],[]]

    for epoch in range(EPOCH):
        loss_total, r2_total = 0, 0
        for i in range(BATCH_CNT):
            start = i*BATCH_SIZE 
            end = start + BATCH_SIZE 
            
            BSX_train = torch.FloatTensor(X_train[start:end].values).to(DEVICE)
            BSy_train = torch.FloatTensor(y_train[start:end].values).to(DEVICE)

            #-(1)학습진행 - forward
            pre_y=nn_model(BSX_train)
            print(f'pre_y.shape : {pre_y.shape}')
            
            #-(2)오차계산 - 손실함수
            loss=F.mse_loss(pre_y, BSy_train)
            loss_total += loss.item()
            r2_total += R2Score()(pre_y,BSy_train).item()
            
            #-(3)최적화 - 가중치,절편 업데이트 backward
            adam_optim.zero_grad()
            loss.backward()
            adam_optim.step()
            
        #- 1 에포크 단위 오차,성능지표값 저장
        loss_history[0].append( loss_total/BATCH_CNT)
        r2_history[0].append(r2_total/BATCH_CNT)  
          
        #-(4)검증 - 모델이 제대로 만들어지는 검사용
        val_loss, val_r2=testing(valTS, valTagetTS)
        loss_history[1].append(val_loss.item())
        r2_history[1].append(val_r2.item())
        
        #-(4)학습결과 출력 및 저장
        print(f'[{epoch}/{EPOCH}]\n*TAIN_LOSS : {loss_history[0][-1]} , TAIN_R2 : {r2_history[0][-1]}')
        print(f'*VAL_LOSS : {loss_history[1][-1]} , VAL_R2 : {r2_history[1][-1]}')
        
    return loss_history, r2_history

In [15]:
# 모델 학습 진행
loss, r2 =training(X_train, y_train , X_val, y_val)

pre_y.shape : torch.Size([10, 1])
pre_y.shape : torch.Size([10, 1])
pre_y.shape : torch.Size([10, 1])
pre_y.shape : torch.Size([10, 1])
pre_y.shape : torch.Size([10, 1])
pre_y.shape : torch.Size([10, 1])
pre_y.shape : torch.Size([10, 1])
pre_y.shape : torch.Size([10, 1])
pre_y.shape : torch.Size([10, 1])
torch.Size([24, 1])
[Val] LOSS : 0.20035439729690552
[0/1000]
*TAIN_LOSS : 2.6561427083280353 , TAIN_R2 : -3.157400071620941
*VAL_LOSS : 0.20035439729690552 , VAL_R2 : 0.5199296474456787
pre_y.shape : torch.Size([10, 1])
pre_y.shape : torch.Size([10, 1])
pre_y.shape : torch.Size([10, 1])
pre_y.shape : torch.Size([10, 1])
pre_y.shape : torch.Size([10, 1])
pre_y.shape : torch.Size([10, 1])
pre_y.shape : torch.Size([10, 1])
pre_y.shape : torch.Size([10, 1])
pre_y.shape : torch.Size([10, 1])
torch.Size([24, 1])
[Val] LOSS : 0.10831045359373093
[1/1000]
*TAIN_LOSS : 0.4747772051228417 , TAIN_R2 : 0.16401026646296182
*VAL_LOSS : 0.10831045359373093 , VAL_R2 : 0.7404767274856567
pre_y.shape :

In [None]:
# 학습 후 loss 시각화
import matplotlib.pyplot as plt
THRESHOLD=60
fg, axes=plt.subplots(1,2, figsize=(10,5), sharex=True, sharey=True)
axes[0].plot(range(1, THRESHOLD+1), loss[0][:THRESHOLD], label='Train')
axes[0].plot(range(1, THRESHOLD+1), loss[1][:THRESHOLD], label='Val')
axes[0].grid()
axes[0].legend()
axes[0].set_xlabel("Epoch")
axes[0].set_ylabel("Loss")
axes[0].set_title("LOSS")

axes[1].plot(range(1, THRESHOLD+1), r2[0][:THRESHOLD], label='Train')
axes[1].plot(range(1, THRESHOLD+1), r2[1][:THRESHOLD], label='Val')
axes[1].grid()
axes[1].legend()
axes[1].set_xlabel("Epoch")
axes[1].set_ylabel("R2")
axes[1].set_title("R2")
plt.tight_layout()
plt.show()