# 미니 배치(Mini Batch)와 데이터 로더(DataLoader)

In [380]:

import random
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torch.utils.data import TensorDataset, Dataset

seed = 42
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.use_deterministic_algorithms(True)

# CUDA 사용 시(여기서는 CPU 고정 권장이므로 선택)
if torch.cuda.is_available():
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    # 아래 두 줄은 환경과 버전에 따라 선택
    # torch.backends.cuda.matmul.allow_tf32 = False
    # torch.backends.cudnn.allow_tf32 = False


In [381]:
# 데이터
x_train = torch.FloatTensor([[73, 80, 75], [93, 88, 93], [89, 91, 90], [96, 98, 100], [73, 66, 70]])
y_train = torch.FloatTensor([[152], [185], [180], [196], [142]])


In [382]:
# 데이터를 TensorDataset의 입력으로 사용하고 dataset으로 저장
dataset = TensorDataset(x_train, y_train)

# g = torch.Generator().manual_seed(42)
# dataLoader = DataLoader(dataset, batch_size=2, shuffle=True, generator=g)
dataLoader = DataLoader(dataset, batch_size=2, shuffle=True)

In [383]:
model = nn.Linear(3, 1)
optimizer = torch.optim.SGD(model.parameters(), lr=1e-5)

nb_epochs = 20
for epoch in range(nb_epochs + 1):
    for batch_idx, samples in enumerate(dataLoader):
        # print(batch_idx, samples)
        x_train, y_train = samples
        # print(x_train.shape, y_train.shape)
        
        # H(x) 계산
        prediction = model(x_train)
        
        cost = F.mse_loss(prediction, y_train)
        
        optimizer.zero_grad()
        cost.backward()
        optimizer.step()
        
        print('Epoch: {:4d}/{}, Cost: {:.6f}'.format(epoch, nb_epochs, cost.item()))

Epoch:    0/20, Cost: 7937.602539
Epoch:    0/20, Cost: 5775.528320
Epoch:    0/20, Cost: 1678.240967
Epoch:    1/20, Cost: 273.867584
Epoch:    1/20, Cost: 73.110306
Epoch:    1/20, Cost: 3.771547
Epoch:    2/20, Cost: 28.770439
Epoch:    2/20, Cost: 1.102020
Epoch:    2/20, Cost: 0.018248
Epoch:    3/20, Cost: 4.035028
Epoch:    3/20, Cost: 4.250602
Epoch:    3/20, Cost: 3.958240
Epoch:    4/20, Cost: 3.035414
Epoch:    4/20, Cost: 4.048670
Epoch:    4/20, Cost: 1.960239
Epoch:    5/20, Cost: 2.819942
Epoch:    5/20, Cost: 4.009200
Epoch:    5/20, Cost: 2.040997
Epoch:    6/20, Cost: 3.645667
Epoch:    6/20, Cost: 0.428303
Epoch:    6/20, Cost: 7.242025
Epoch:    7/20, Cost: 3.668132
Epoch:    7/20, Cost: 3.111837
Epoch:    7/20, Cost: 2.968807
Epoch:    8/20, Cost: 1.356676
Epoch:    8/20, Cost: 4.052970
Epoch:    8/20, Cost: 3.584163
Epoch:    9/20, Cost: 3.512377
Epoch:    9/20, Cost: 3.366941
Epoch:    9/20, Cost: 0.073125
Epoch:   10/20, Cost: 0.770133
Epoch:   10/20, Cost: 5.54

In [384]:
# import os
import random
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset

# ---- 재현성 설정 ----
seed = 42

# 가능하면 CUDA 비결정성 방지용 환경변수 (CUDA 사용 시 유효)
# os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":16:8"  # 또는 ":4096:8"

random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.use_deterministic_algorithms(True)

# CUDA 사용 시(여기서는 CPU 고정 권장이므로 선택)
if torch.cuda.is_available():
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    # 아래 두 줄은 환경과 버전에 따라 선택
    # torch.backends.cuda.matmul.allow_tf32 = False
    # torch.backends.cudnn.allow_tf32 = False

# (가장 확실하게) CPU로 고정
# device = torch.device("cpu")

# ---- 데이터 ----
x_train = torch.tensor([[73, 80, 75],
                        [93, 88, 93],
                        [89, 91, 90],
                        [96, 98, 100],
                        [73, 66, 70]], dtype=torch.float32)
y_train = torch.tensor([[152],
                        [185],
                        [180],
                        [196],
                        [142]], dtype=torch.float32)

dataset = TensorDataset(x_train, y_train)

# DataLoader 셔플 재현성 보장용 Generator
g = torch.Generator().manual_seed(seed)

dataLoader = DataLoader(
    dataset,
    batch_size=2,
    shuffle=True,
    generator=g,
    num_workers=0,     # 멀티워커 비결정성 방지 (필요시 worker_init_fn으로 시드 고정)
    pin_memory=False
)

# ---- 모델/옵티마이저 ----
# 모델 초기화도 전역 시드에 의해 결정적
model = nn.Linear(3, 1)
optimizer = torch.optim.SGD(model.parameters(), lr=1e-5)

# ---- 학습 ----
nb_epochs = 20
for epoch in range(nb_epochs + 1):
    for batch_idx, samples in enumerate(dataLoader):
        xb, yb = samples 

        pred = model(xb)
        cost = F.mse_loss(pred, yb)

        optimizer.zero_grad(set_to_none=True)
        cost.backward()
        optimizer.step()

        print(f'Epoch: {epoch:4d}/{nb_epochs}, Cost: {cost.item():.6f}')


Epoch:    0/20, Cost: 13637.417969
Epoch:    0/20, Cost: 2035.193115
Epoch:    0/20, Cost: 580.398499
Epoch:    1/20, Cost: 376.903015
Epoch:    1/20, Cost: 151.049225
Epoch:    1/20, Cost: 39.716576
Epoch:    2/20, Cost: 15.853495
Epoch:    2/20, Cost: 6.992236
Epoch:    2/20, Cost: 1.867033
Epoch:    3/20, Cost: 0.409047
Epoch:    3/20, Cost: 5.195969
Epoch:    3/20, Cost: 6.783936
Epoch:    4/20, Cost: 3.021986
Epoch:    4/20, Cost: 3.929235
Epoch:    4/20, Cost: 0.461374
Epoch:    5/20, Cost: 3.522270
Epoch:    5/20, Cost: 2.206404
Epoch:    5/20, Cost: 3.202274
Epoch:    6/20, Cost: 4.330459
Epoch:    6/20, Cost: 2.630559
Epoch:    6/20, Cost: 0.031720
Epoch:    7/20, Cost: 2.637102
Epoch:    7/20, Cost: 1.699211
Epoch:    7/20, Cost: 6.615451
Epoch:    8/20, Cost: 1.593786
Epoch:    8/20, Cost: 4.989443
Epoch:    8/20, Cost: 4.372359
Epoch:    9/20, Cost: 1.190410
Epoch:    9/20, Cost: 4.023237
Epoch:    9/20, Cost: 5.780277
Epoch:   10/20, Cost: 3.113744
Epoch:   10/20, Cost: 1.

In [385]:
# 임의의 입력 [73, 80, 75]를 선언
new_var =  torch.FloatTensor([[73, 80, 75]]) 
# 입력한 값 [73, 80, 75]에 대해서 예측값 y를 리턴받아서 pred_y에 저장
pred_y = model(new_var) 
print("훈련 후 입력이 73, 80, 75일 때의 예측값 :", pred_y) 

훈련 후 입력이 73, 80, 75일 때의 예측값 : tensor([[153.1775]], grad_fn=<AddmmBackward0>)


# 커스텀 데이터셋 만드는 방법

In [None]:
class CustomDataset(torch.utils.data.Dataset):
    def __init__(self):
        # 생성자부분 (데이터 셋 전처리)
        pass
    
    def __len__(self):
        # 데이터 셋의 길이 (총 셈플의 수)
        pass
    
    def __getitem__(self, idx):
        # 데이터 셋에서 특정 1개의 샘플을 가져오는 함수
        pass
    
# torch.utils.data.Dataset은 추상 클래스여서 __len__이랑 __getitem__을 무조건 구현해주어야함.
# 아니면 그냥 TensorDataset 사용하삼.     

In [388]:
class CustomDataset(Dataset):
    def __init__(self):
        self.x_data = [[73, 80, 75],
                   [93, 88, 93],
                   [89, 91, 90],
                   [96, 98, 100],
                   [73, 66, 70]]
        self.y_data = [[152], [185], [180], [196], [142]]
        
    def __len__(self):
        return len(self.x_data)
    
    def __getitem__(self, idx):
        x = torch.FloatTensor(self.x_data[idx])
        y = torch.FloatTensor(self.y_data[idx])
        return x, y

In [None]:
dataset = CustomDataset()
dataLoader = DataLoader(dataset, batch_size=2, shuffle=True)
# print(dataset.__len__())

model = torch.nn.Linear(3, 1)
optimizer = torch.optim.SGD(model.parameters(), lr=1e-5)

5


In [390]:
nb_epochs = 20
for epoch in range(nb_epochs + 1):
    for batch_idx, samples in enumerate(dataLoader):
        x_t, y_t = samples
        
        prediction = model(x_t)
        
        cost = F.mse_loss(prediction, y_t)
        
        optimizer.zero_grad()
        cost.backward()
        optimizer.step()
        
        print('Epoch {:4d}/{}, Cost: {:.6f}'.format(epoch, nb_epochs, cost.item()))

Epoch    0/20, Cost: 35803.921875
Epoch    0/20, Cost: 16515.566406
Epoch    0/20, Cost: 2256.199951
Epoch    1/20, Cost: 1425.250488
Epoch    1/20, Cost: 427.543610
Epoch    1/20, Cost: 132.186142
Epoch    2/20, Cost: 51.370708
Epoch    2/20, Cost: 28.593304
Epoch    2/20, Cost: 0.445428
Epoch    3/20, Cost: 9.191752
Epoch    3/20, Cost: 5.633094
Epoch    3/20, Cost: 0.976640
Epoch    4/20, Cost: 5.322802
Epoch    4/20, Cost: 4.743402
Epoch    4/20, Cost: 1.807387
Epoch    5/20, Cost: 6.002551
Epoch    5/20, Cost: 0.579418
Epoch    5/20, Cost: 8.797900
Epoch    6/20, Cost: 4.970479
Epoch    6/20, Cost: 3.671367
Epoch    6/20, Cost: 6.464517
Epoch    7/20, Cost: 3.655384
Epoch    7/20, Cost: 4.759625
Epoch    7/20, Cost: 3.824538
Epoch    8/20, Cost: 5.676812
Epoch    8/20, Cost: 3.522624
Epoch    8/20, Cost: 1.063193
Epoch    9/20, Cost: 0.531859
Epoch    9/20, Cost: 9.495720
Epoch    9/20, Cost: 6.467854
Epoch   10/20, Cost: 7.966233
Epoch   10/20, Cost: 3.648458
Epoch   10/20, Cost:

In [391]:
test = torch.FloatTensor([73, 80, 75])
test_result = model(test)

print(test_result)

tensor([153.6559], grad_fn=<ViewBackward0>)
