### Pytorch로 iris 분석

In [1]:
# PyTorch 라이브러리 임포트
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, precision_score, recall_score

import torch
from torch.autograd import Variable
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
data = pd.read_csv('irisdata.txt', sep = ',', engine='python', encoding = 'cp949',header = None)

In [2]:
data.columns = ['A1','A2','A3','A4','target']

In [3]:
data.head()

Unnamed: 0,A1,A2,A3,A4,target
0,5.1,3.5,1.4,0.2,Iris-setosa
1,4.9,3.0,1.4,0.2,Iris-setosa
2,4.7,3.2,1.3,0.2,Iris-setosa
3,4.6,3.1,1.5,0.2,Iris-setosa
4,5.0,3.6,1.4,0.2,Iris-setosa


In [4]:
data.target.unique()

array(['Iris-setosa', 'Iris-versicolor', 'Iris-virginica'], dtype=object)

In [5]:
data['target']=data['target'].replace('Iris-setosa',0)
data['target']=data['target'].replace('Iris-versicolor',1)
data['target']=data['target'].replace('Iris-virginica',2)

In [6]:
data.head()

Unnamed: 0,A1,A2,A3,A4,target
0,5.1,3.5,1.4,0.2,0
1,4.9,3.0,1.4,0.2,0
2,4.7,3.2,1.3,0.2,0
3,4.6,3.1,1.5,0.2,0
4,5.0,3.6,1.4,0.2,0


In [7]:
#train test 분리
# 7대3로 분리
from sklearn.model_selection import train_test_split  
train_X, test_X, train_y, test_y = train_test_split(data[data.columns[0:4]].values,data.target.values, test_size=0.3)

In [8]:
print(len(train_X))
print(len(test_X))

105
45


In [9]:
train_X = Variable(torch.Tensor(train_X).float())
test_X = Variable(torch.Tensor(test_X).float())
train_y = Variable(torch.Tensor(train_y).long())
test_y = Variable(torch.Tensor(test_y).long())

# 텐서로 변환한 데이터 건수 확인
print(train_X.shape)
print(train_y.shape)

torch.Size([105, 4])
torch.Size([105])


In [10]:
# 설명변수와 목적변수의 텐서를 합침
train = TensorDataset(train_X, train_y)

# 텐서의 첫 번째 데이터 내용 확인
print(train[0])

# 미니배치로 분할
train_loader = DataLoader(train, batch_size=12, shuffle=True)

(tensor([5.1000, 3.4000, 1.5000, 0.2000]), tensor(0))


In [11]:
# 신경망 구성 텐서 배치에 맞게 변환
class Net(nn.Module):
    # define nn
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(4, 100)
        self.fc2 = nn.Linear(100, 100)
        self.fc3 = nn.Linear(100, 3)
        self.softmax = nn.Softmax(dim=1) # 1차원으로

    def forward(self, X):
        X = F.relu(self.fc1(X))
        X = self.fc2(X)
        X = self.fc3(X)
        X = self.softmax(X)

        return X

    # 인스턴스 생성
model = Net()

### 모형 학습

In [12]:
# 오차함수 객체
criterion = nn.CrossEntropyLoss()

# 최적화를 담당할 객체
optimizer = optim.SGD(model.parameters(), lr=0.01)

# 학습 시작
for epoch in range(150):
    total_loss = 0
    # 분할해 둔 데이터를 꺼내옴
    for X_train, y_train in train_loader:
        # 계산 그래프 구성
        X_train, y_train = Variable(X_train), Variable(y_train)
        # 경사 초기화
        optimizer.zero_grad()
        # 순전파 계산
        output = model(X_train)
        # 오차계산
        loss = criterion(output, y_train)
        # 역전파 계산
        loss.backward()
        # 가중치 업데이트
        optimizer.step()
        # 누적 오차 계산
        total_loss += loss.data
    # 50회 반복마다 누적오차 출력
    if (epoch+1) % 50 == 0:
        print(epoch+1, total_loss)

50 tensor(6.6370)
100 tensor(5.9095)
150 tensor(5.6481)


In [13]:
# 계산 그래프 구성
test_X, test_y = Variable(test_X), Variable(test_y)
# 출력이 0 혹은 1이 되게 함
result = torch.max(model(test_X).data, 1)[1]
# 모형의 정확도 측정
accuracy = sum(test_y.data.numpy() == result.numpy()) / len(test_y.data.numpy())

# 모형의 정확도 출력
accuracy

0.9777777777777777