In [2]:
import torch
import keras
from sklearn.model_selection import train_test_split

# IMDB 데이터셋 
(train_input, train_target), (test_input, test_target) =\
     keras.datasets.imdb.load_data(num_words=500)

# 훈련세트, 검증세트
train_input, val_input, train_target, val_target = train_test_split(
    train_input, train_target, test_size=0.2, random_state=42
)

In [3]:
# 패딩 처리
from keras.preprocessing.sequence import pad_sequences

train_seq = pad_sequences(train_input, maxlen=100)
val_seq = pad_sequences(val_input, maxlen=100)

In [4]:
print(train_seq.shape, train_target.shape)

(20000, 100) (20000,)


In [5]:
# 파이토치 torch.tensor 자료형으로 변환 
train_seq = torch.tensor(train_seq)
val_seq = torch.tensor(val_seq)

In [6]:
train_target.dtype

dtype('int64')

In [7]:
train_target = torch.tensor(train_target, dtype=torch.float32)
val_target = torch.tensor(val_target, dtype=torch.float32)

In [8]:
train_target.dtype

torch.float32

In [9]:
#  TensorDataset, DataLoader 
from torch.utils.data import TensorDataset, DataLoader

train_dataset = TensorDataset(train_seq, train_target)
val_dataset = TensorDataset(val_seq, val_target)

# 32개의 배치로 섞어서 나눠준다.
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

In [10]:
# 층 구성 - nn.Module 클래스 상속을 통한 층 구성
import torch.nn as nn

class IMDBRnn(nn.Module):
    def __init__(self):
        super().__init__() 
        self.embedding = nn.Embedding(500, 16)
        self.rnn = nn.RNN(16, 8, batch_first=True) # (시퀀스 길이, 배치크기, 임베딩크기) 
                                # (배치크기, 시퀀스 길이, 임베딩크기)
        self.dense = nn.Linear(8, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.embedding(x)
        _, hidden = self.rnn(x)
        outputs = self.dense(x)
        return self.sigmoid(outputs)


In [11]:
from torchinfo import summary
model = IMDBRnn()

summary(model)


Layer (type:depth-idx)                   Param #
IMDBRnn                                  --
├─Embedding: 1-1                         8,000
├─RNN: 1-2                               208
├─Linear: 1-3                            9
├─Sigmoid: 1-4                           --
Total params: 8,217
Trainable params: 8,217
Non-trainable params: 0