In [2]:
import torch

In [3]:


if torch.backends.mps.is_available():
    device = torch.device("mps")
    print("Using Apple GPU (MPS)")
elif torch.cuda.is_available():
    device = torch.device("cuda")
    print("Using NVIDIA GPU (CUDA)")
else:
    device = torch.device("cpu")
    print("Using CPU")

# Test tensor
x = torch.randn(3, 3).to(device)
print(x)
print("Tensor device:", x.device)


Using Apple GPU (MPS)
tensor([[ 1.7813,  0.1132,  0.2903],
        [-0.9251,  0.0205, -0.5629],
        [ 1.1565, -0.0996, -0.3871]], device='mps:0')
Tensor device: mps:0


In [4]:
torch.__version__

'2.9.1'

In [5]:
import torch

# ---------- 2D Tensor (Matrix) ----------
# Shape: (rows, columns)
tensor2d = torch.tensor([
    [1, 2, 3],
    [4, 5, 6]
])

print("2D Tensor:")
print(tensor2d)
print("Shape:", tensor2d.shape)
print()


# ---------- 3D Tensor ----------
# Example: (batch_size, sequence_length, features)
tensor3d = torch.randn(2, 4, 3)

print("3D Tensor:")
print(tensor3d)
print("Shape:", tensor3d.shape)
print()


# ---------- Using the 3D Tensor ----------
# Compute mean across the sequence dimension
# This reduces (2, 4, 3) -> (2, 3)

reduced_tensor = tensor3d.mean(dim=1)

print("Reduced 3D Tensor (mean over dim=1):")
print(reduced_tensor)
print("Shape:", reduced_tensor.shape)


2D Tensor:
tensor([[1, 2, 3],
        [4, 5, 6]])
Shape: torch.Size([2, 3])

3D Tensor:
tensor([[[-1.1460, -0.8202, -0.9884],
         [-0.0728,  1.7661,  0.8772],
         [ 1.4683, -0.9376,  0.8915],
         [ 0.0877, -0.0432,  0.3757]],

        [[-1.6999, -1.0216, -0.8518],
         [-0.2541,  0.3451, -1.2604],
         [-1.1813,  0.2588, -0.5538],
         [ 0.2125,  2.0788,  0.6944]]])
Shape: torch.Size([2, 4, 3])

Reduced 3D Tensor (mean over dim=1):
tensor([[ 0.0843, -0.0087,  0.2890],
        [-0.7307,  0.4153, -0.4929]])
Shape: torch.Size([2, 3])


In [6]:
# One data point with 10 dimensions
tensor10d = torch.randn(10)

print("10D tensor:", tensor10d)
print("Shape:", tensor10d.shape)
print(tensor10d.dtype)

10D tensor: tensor([ 0.6720, -0.6947, -0.2773,  0.2136, -0.7465, -0.6325,  0.1366, -0.7042,
         0.1264,  0.1450])
Shape: torch.Size([10])
torch.float32


In [7]:
W = torch.randn(4, 10)


In [8]:
tensor4d = W @ tensor10d

print("Projected 4D tensor:", tensor4d)
print("Shape:", tensor4d.shape)
print(tensor4d.dtype)

Projected 4D tensor: tensor([ 0.5454, -0.7322,  0.7743,  0.3716])
Shape: torch.Size([4])
torch.float32


In [9]:
tensor1d = torch.tensor([1.3,3,6])
print(tensor1d.dtype)

torch.float32


In [10]:
floatvec = tensor1d.to(torch.float32)
print(floatvec.dtype)

torch.float32


## PyTorch automatic differentiation engine

In [12]:
import torch.nn.functional as F

y = torch.tensor([1.0])
x1 = torch.tensor([1.1])
w1 = torch.tensor([2.2])
b = torch.tensor([0.0])

z = x1 * w1 + b
a = torch.sigmoid(z)

loss = F.binary_cross_entropy(a,y)

In [13]:
import torch
import torch.nn as nn

class NeuralNetwork(nn.Module):
    def __init__(self, num_inputs, num_outputs):
        super().__init__()

        self.layers = nn.Sequential(
            # 1st hidden layer
            nn.Linear(num_inputs, 30),
            nn.ReLU(),

            # 2nd hidden layer
            nn.Linear(30, 20),
            nn.ReLU(),

            # output layer
            nn.Linear(20, num_outputs),
        )

    def forward(self, x):
        logits = self.layers(x)
        return logits


# example usage
model = NeuralNetwork(50, 3)
print(model)


NeuralNetwork(
  (layers): Sequential(
    (0): Linear(in_features=50, out_features=30, bias=True)
    (1): ReLU()
    (2): Linear(in_features=30, out_features=20, bias=True)
    (3): ReLU()
    (4): Linear(in_features=20, out_features=3, bias=True)
  )
)


In [15]:
num_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print("Total number of trainable model parameters:", num_params)

Total number of trainable model parameters: 2213


In [None]:
print(model.layers[0].weight)