In [1]:
import os
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

# Get Device for Training

In [2]:
device = (
    "cuda"
    if torch.cuda.is_available()
    else "mps"
    if torch.backends.mps.is_available()
    else "cpu"
)
print(f"Using {device} device")

Using cuda device


# Define the Class

In [18]:
class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.flatten = nn.Flatten()   # (512,28,28) -> # (512,28*28)
        self.linear_relu_stack = nn.Sequential(# X*W+B
            nn.Linear(28*28,512),# W=(28*28,512) ,B=(0.01,0.2,,,.......512个) ->
            nn.ReLU(),# 激活函数, activation，Relu(X)
            nn.Linear(512,512),
            nn.ReLU(),
            nn.Linear(512,10),
        )
        self.softmax = nn.Softmax(dim=1)
        
    def forward(self,x): # (512,28,28)
        x = self.flatten(x)
        logits = self.linear_relu_stack(x) # (512,10)
        prob = self.softmax(logits)
        return prob

In [19]:
model = NeuralNetwork()
model = model.to(device)
print(model)

NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
  (softmax): Softmax(dim=1)
)


In [24]:
X = torch.rand(20,28,28,device=device)  # 随机产生一个tensor20个样本用模型进行预测
print(X.shape,X.dtype)
pred_probab = model(X)
# print(pred_probab)
print(pred_probab[0].sum())  # 经过Softmax层的Tensor各个值和为1
y_pred = pred_probab.argmax(1) # axis = 1
print(f"Predicted class: {y_pred}")

torch.Size([20, 28, 28]) torch.float32
tensor(1., device='cuda:0', grad_fn=<SumBackward0>)
Predicted class: tensor([5, 3, 3, 3, 3, 3, 3, 5, 3, 3, 3, 3, 3, 5, 3, 5, 3, 3, 3, 3],
       device='cuda:0')


In [26]:
for name, param in model.named_parameters():
#     print(name,param.requires_grad) # y=Tensor(3,requires_grad=False)*x+torch.rand().requires_grad(True)
    print(f"Layer: {name} | Size: {param.size()} | Values : {param[:2]} \n")

Layer: linear_relu_stack.0.weight | Size: torch.Size([512, 784]) | Values : tensor([[-0.0305,  0.0323, -0.0034,  ...,  0.0255, -0.0308, -0.0112],
        [ 0.0043,  0.0057, -0.0199,  ...,  0.0161,  0.0242,  0.0244]],
       device='cuda:0', grad_fn=<SliceBackward0>) 

Layer: linear_relu_stack.0.bias | Size: torch.Size([512]) | Values : tensor([-0.0098,  0.0048], device='cuda:0', grad_fn=<SliceBackward0>) 

Layer: linear_relu_stack.2.weight | Size: torch.Size([512, 512]) | Values : tensor([[ 0.0318, -0.0358, -0.0040,  ...,  0.0280, -0.0020,  0.0067],
        [ 0.0084, -0.0317, -0.0043,  ..., -0.0216,  0.0082, -0.0203]],
       device='cuda:0', grad_fn=<SliceBackward0>) 

Layer: linear_relu_stack.2.bias | Size: torch.Size([512]) | Values : tensor([-0.0005, -0.0004], device='cuda:0', grad_fn=<SliceBackward0>) 

Layer: linear_relu_stack.4.weight | Size: torch.Size([10, 512]) | Values : tensor([[ 0.0081, -0.0301,  0.0276,  ...,  0.0141, -0.0021,  0.0332],
        [-0.0039, -0.0371,  0.0266,