In [8]:
import numpy as np
import torch

nv = np.array([
    [1, 2, 3], 
    [4, 5, 6],
])

print(nv, "\n")
print(nv.T)

[[1 2 3]
 [4 5 6]] 

[[1 4]
 [2 5]
 [3 6]]


In [9]:
tv_a = torch.tensor(nv)
print(tv_a.T, "\n")

tv_b = torch.tensor([[-3], [2], [7]])
print(tv_b.T, "\n")

tensor([[1, 4],
        [2, 5],
        [3, 6]]) 

tensor([[-3,  2,  7]]) 



In [10]:
square_matrix_np = np.array([
    [ 3,  2, -1],
    [ 7, -3,  6],
    [-5,  4,  9],
])

print(np.linalg.inv(square_matrix_np), "\n")

torch.set_default_dtype(torch.float32)

square_matrix_t = torch.tensor([
    [ 3.0,  2.0, -1.0],
    [ 7.0, -3.0,  6.0],
    [-5.0,  4.0,  9.0],
]).to('cuda')

# Inverse of a matrix
print(square_matrix_t.inverse(), "\n")

unit_matrix_t = torch.eye(3).to('cuda')
print(unit_matrix_t, "\n")

test_matrix_t = torch.mm(square_matrix_t, square_matrix_t.inverse())
print(torch.round(test_matrix_t), "\n")


[[ 0.14488636  0.0625     -0.02556818]
 [ 0.26420455 -0.0625      0.07102273]
 [-0.03693182  0.0625      0.06534091]] 

tensor([[ 0.1449,  0.0625, -0.0256],
        [ 0.2642, -0.0625,  0.0710],
        [-0.0369,  0.0625,  0.0653]], device='cuda:0') 

tensor([[1., 0., 0.],
        [0., 1., 0.],
        [0., 0., 1.]], device='cuda:0') 

tensor([[1., 0., 0.],
        [0., 1., 0.],
        [0., 0., 1.]], device='cuda:0') 



In [11]:
nv_a = np.array([
    [1, 2, 3], 
    [4, 5, 6],
])

nv_b = np.array([
    [7, 8, 9], 
    [10, 11, 12],
])

tv_a = torch.tensor(nv_a)
tv_b = torch.tensor(nv_b)

# Element-wise multiplication
print(nv_a * nv_b, "\n")
print(tv_a * tv_b, "\n")

# Matrix multiplication numpy
print(np.dot(nv_a, nv_b.T), "\n")
print(np.matmul(nv_a, nv_b.T), "\n")
print(nv_a @ nv_b.T, "\n")

# Matrix multiplication torch
print(torch.matmul(tv_a, tv_b.T), "\n")
print(tv_a @ tv_b.T, "\n")

[[ 7 16 27]
 [40 55 72]] 

tensor([[ 7, 16, 27],
        [40, 55, 72]]) 

[[ 50  68]
 [122 167]] 

[[ 50  68]
 [122 167]] 

[[ 50  68]
 [122 167]] 

tensor([[ 50,  68],
        [122, 167]]) 

tensor([[ 50,  68],
        [122, 167]]) 



In [12]:
device = 'cuda'
inner_length = 4000
outer_length = 6000

An = np.round(np.random.randn(outer_length, inner_length), 2)
At = torch.tensor(An).to(device=device)

Bn = np.round(np.random.randn(inner_length, outer_length), 2)
Bt = torch.tensor(Bn).to(device=device)

In [13]:
Ct = At @ Bt

In [15]:
import torch
import time

# Create large matrices
matrix_a = torch.randn(6000, 4000)
matrix_b = torch.randn(4000, 6000)

# Measure CPU time
start_time_cpu = time.time()
result_cpu = matrix_a @ matrix_b
end_time_cpu = time.time()
execution_time_cpu = end_time_cpu - start_time_cpu
print(f"CPU execution time: {execution_time_cpu:.8f} seconds")

# Move matrices to GPU
matrix_a_gpu = matrix_a.to('cuda')
matrix_b_gpu = matrix_b.to('cuda')

# Warm up GPU (optional, to avoid cold start overhead)
_ = matrix_a_gpu @ matrix_b_gpu

# Measure GPU time
torch.cuda.synchronize()  # Ensure all operations are complete
start_time_gpu = time.time()
result_gpu = matrix_a_gpu @ matrix_b_gpu
torch.cuda.synchronize()  # Ensure all operations are complete
end_time_gpu = time.time()
execution_time_gpu = end_time_gpu - start_time_gpu
print(f"GPU execution time: {execution_time_gpu:.8f} seconds")
print(f"Speedup: {execution_time_cpu / execution_time_gpu:.2f}x")

CPU execution time: 2.30125189 seconds
GPU execution time: 0.09723687 seconds
Speedup: 23.67x
