In [4]:
! pip3 install torch torchvision torchaudio



In [5]:
import torch
torch.__version__

'2.7.1'

In [6]:
class Neural_Network(torch.nn.Module):
    def __init__(self, num_inputs, num_outputs):
        super().__init__()
        self.layers = torch.nn.Sequential(
            # 1st hidden layer
            torch.nn.Linear(num_inputs, 30),
            torch.nn.ReLU(),

            # 2nd hidden layer
            torch.nn.Linear(30, 20),
            torch.nn.ReLU(),

            # output layer
            torch.nn.Linear(20, num_outputs)
        )

    def forward(self, x):
        logits = self.layers(x)
        return logits

In [7]:
model = Neural_Network(num_inputs=50, num_outputs=3)
model

Neural_Network(
  (layers): Sequential(
    (0): Linear(in_features=50, out_features=30, bias=True)
    (1): ReLU()
    (2): Linear(in_features=30, out_features=20, bias=True)
    (3): ReLU()
    (4): Linear(in_features=20, out_features=3, bias=True)
  )
)

In [8]:
model.layers[0].weight
# 設定requires_grad為True，這意味著它是可訓練的——這是中權重和偏差的預設torch.nn.Linear。

Parameter containing:
tensor([[-0.1196,  0.1129, -0.1038,  ..., -0.0010, -0.0404, -0.0215],
        [-0.1145, -0.0262, -0.1290,  ..., -0.0754,  0.1193, -0.0222],
        [-0.0237,  0.0068,  0.1191,  ..., -0.1407, -0.1231,  0.0694],
        ...,
        [-0.0994, -0.0765,  0.0389,  ..., -0.0599, -0.0290, -0.1104],
        [ 0.0112,  0.0724,  0.1370,  ...,  0.1413,  0.1176, -0.0690],
        [-0.0118, -0.1131, -0.0481,  ...,  0.1038,  0.0209,  0.1037]],
       requires_grad=True)

In [9]:
model.layers[0].weight.shape

torch.Size([30, 50])

In [10]:
torch.manual_seed(123)

X = torch.rand((1, 50))
out = model(X)
print(out)

tensor([[ 0.0885, -0.1673,  0.1940]], grad_fn=<AddmmBackward0>)


In [11]:
with torch.no_grad():
    out = model(X)
print(out)

tensor([[ 0.0885, -0.1673,  0.1940]])


In [12]:
with torch.no_grad(): #告訴 PyTorch 它不需要追蹤梯度，從而可以顯著節省記憶體和計算資源。
    out = torch.softmax(model(X), dim=1)
print(out)

tensor([[0.3465, 0.2683, 0.3851]])


In [13]:
num_params = sum(
    p.numel() for p in model.parameters() if p.requires_grad
)
print("Total number of trainable model parameters:", num_params)

Total number of trainable model parameters: 2213


## Dataloader

In [14]:
X_train = torch.tensor([
    [-1.2, 3.1],
    [-0.9, 2.9],
    [-0.5, 2.6],
    [2.3, -1.1],
    [2.7, -1.5]
]) # features

y_train = torch.tensor([0, 0, 0, 1, 1]) # labels

In [15]:
X_test = torch.tensor([
    [-0.8, 2.8],
    [2.6, -1.6],
])

y_test = torch.tensor([0, 1])

In [16]:
from torch.utils.data import Dataset

class ToyDataset(Dataset):
    def __init__(self, X, y):
        self.features = X
        self.labels = y
    
    def __getitem__(self, index): #定義透過索引從資料集中傳回一個項目的指令。這意味著對應於單一訓練範例或測試實例的特徵和類別標籤。 
        one_x = self.features[index]
        one_y = self.labels[index]
        return one_x, one_y
    
    def __len__(self): #含用於檢索資料集長度的指
        return self.labels.shape[0]

In [17]:
train_ds = ToyDataset(X_train, y_train)
test_ds = ToyDataset(X_test, y_test)

In [19]:
len(test_ds)

2

## 高效率的資料載入器

In [20]:
from torch.utils.data import DataLoader

torch.manual_seed(123)

train_loader = DataLoader(
    dataset=train_ds,
    batch_size=2,
    shuffle=True,  # 隨機打亂資料集
    num_workers=0  # 在單一執行緒中載入資料
)

In [21]:
test_loader = DataLoader(
    dataset=test_ds,
    batch_size=2,
    shuffle=False,
    num_workers=0
)

In [22]:
for idx, (X,y) in enumerate(train_loader):
    print(f"Batch {idx+1} - X: {X}, y: {y}") # 上面指定了batch size為 2，但第三個批次僅包含一個範例。這是因為我們有五個訓練範例，它們不能被 2 整除。

Batch 1 - X: tensor([[ 2.3000, -1.1000],
        [-0.9000,  2.9000]]), y: tensor([1, 0])
Batch 2 - X: tensor([[-1.2000,  3.1000],
        [-0.5000,  2.6000]]), y: tensor([0, 0])
Batch 3 - X: tensor([[ 2.7000, -1.5000]]), y: tensor([1])


In [23]:
train_loader = DataLoader(
    dataset=train_ds,
    batch_size=2,
    shuffle=True,
    num_workers=0,
    drop_last=True
)
# 在一個訓練週期中，最後一個批次的批次明顯較小，可能會幹擾訓練過程中的收斂。為了避免這種情況，建議設置drop_last=True，這將丟棄每個週期的最後一個批次
# 設定num_workers=4通常會在許多真實世界的資料集上帶來最佳效能，但最佳設定取決於您的硬體和用於載入Dataset類別中定義的訓練範例的程式碼

In [24]:
for idx, (X,y) in enumerate(train_loader):
    print(f"Batch {idx+1} - X: {X}, y: {y}") 

Batch 1 - X: tensor([[-1.2000,  3.1000],
        [-0.5000,  2.6000]]), y: tensor([0, 0])
Batch 2 - X: tensor([[ 2.3000, -1.1000],
        [-0.9000,  2.9000]]), y: tensor([1, 0])


## 典型的訓練循環 

In [27]:
import torch.nn.functional as F

torch.manual_seed(123)
model = Neural_Network(num_inputs=2, num_outputs=2)
optimizer = torch.optim.SGD(model.parameters(), lr=0.5)
# 學習率是一個超參數，這意味著它是一個可調的設置，我們必須根據對損失的觀察進行實驗。理想情況下，我們希望選擇一個學習率，使得損失在一定數量的迭代次數後收斂——迭代次數是另一個需要選擇的超參數。

num_epochs = 3

for epoch in range(num_epochs):
    model.train()  # 設定模型為訓練模式
    for batch_idx, (feature, labels) in enumerate(train_loader):
        logits = model(feature)
        loss = F.cross_entropy(logits, labels)# Loss function

        optimizer.zero_grad()  # 清除梯度
        loss.backward()  # 計算 PyTorch 在背景建構的計算圖中的梯度
        optimizer.step()  # 將使用梯度更新模型參數，以最小化損失 ,eq 將梯度乘以學習率，並將縮放後的負梯度加到參數中。
        ### LOGGING
        print(f"Epoch: {epoch+1:03d}/{num_epochs:03d}"
              f" | Batch {batch_idx:03d}/{len(train_loader):03d}"
              f" | Train/Val Loss: {loss:.2f}")
    
    model.eval()  # 設定模型為評估模式

Epoch: 001/003 | Batch 000/002 | Train/Val Loss: 0.75
Epoch: 001/003 | Batch 001/002 | Train/Val Loss: 0.65
Epoch: 002/003 | Batch 000/002 | Train/Val Loss: 0.44
Epoch: 002/003 | Batch 001/002 | Train/Val Loss: 0.13
Epoch: 003/003 | Batch 000/002 | Train/Val Loss: 0.03
Epoch: 003/003 | Batch 001/002 | Train/Val Loss: 0.00


In [28]:
model.eval()

with torch.no_grad():
    output = model(X_train)
print(output)

tensor([[ 2.8569, -4.1618],
        [ 2.5382, -3.7548],
        [ 2.0944, -3.1820],
        [-1.4814,  1.4816],
        [-1.7176,  1.7342]])


In [29]:
torch.set_printoptions(sci_mode=False)

# 得到類別成員機率
probs = torch.softmax(output, dim=1) #第一個值（列）表示訓練範例有 99.91% 的機率屬於 0 類，有 0.09% 的機率屬於 1 類。
print(probs)

tensor([[    0.9991,     0.0009],
        [    0.9982,     0.0018],
        [    0.9949,     0.0051],
        [    0.0491,     0.9509],
        [    0.0307,     0.9693]])


In [30]:
predictions = torch.argmax(probs, dim=1) #如果我們設置，該函數將返回每行中最高值的索引位置dim=1（設定dim=0將返回每列中的最高值）：
print(predictions)

tensor([0, 0, 0, 1, 1])


In [31]:
predictions == y_train #這將返回一個布林張量，表示預測是否正確。

tensor([True, True, True, True, True])

In [32]:
torch.sum(predictions == y_train) #計算預測正確的訓練範例數量。

tensor(5)

In [33]:
def compute_accuracy(model, dataloader):
    model = model.eval()
    correct = 0.0
    total_examples = 0.0

    for batc_idx, (feature, labels) in enumerate(dataloader):

        with torch.no_grad():
            logits = model(feature)

        predictions= torch.argmax(logits, dim=1) 
        compare = labels== predictions
        correct += sum(compare)
        total_examples += len(labels)
    
    return correct / total_examples

In [34]:
compute_accuracy(model, train_loader)

tensor(1.)

In [35]:
compute_accuracy(model, test_loader)

tensor(1.)

## 儲存和載入模型 

In [36]:
torch.save(model.state_dict(), "model.pth") #將模型的狀態字典（state_dict）儲存到檔案中。這是 PyTorch 儲存模型的標準方式。

In [37]:
model = Neural_Network(2, 2) # needs to match the original model exactly
model.load_state_dict(torch.load("model.pth", weights_only=True))

<All keys matched successfully>