In [1]:
import numpy as np
from typing import Tuple
import torch

In [2]:
import pandas   as   pd
df  =  pd.read_csv ('http://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data', header=None)
data = torch.tensor(df.iloc[:,[0,1,2,3]].values)
data = data - data.mean(dim=0)

In [3]:
seed = 1
torch.manual_seed(seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

# Matrix factorisation using gradient descent

In [6]:
from typing import Tuple
def sgd_factorise(A:torch.tensor, rank:int, numepochs = 1000, lr = 0.01)->(torch.tensor, torch.tensor):
    [m,n] = A.shape
    torch.manual_seed(seed)
    U = torch.rand(m,rank)
    V = torch.rand(n,rank)
    for epoch in range(numepochs):
        for r in range(m):
            for c in range(n):
                e = A[r,c] - U[r,:] @ V[c,:].t()
                U[r,:] = U[r,:] + lr*e*V[c,:] #U的近似梯度是e*V
                V[c,:] = V[c,:] + lr*e*U[r,:] #同上
        if epoch%300 == 0:
            print(epoch)
    return (U, V)


A = torch.tensor([[0.3374, 0.6005, 0.1735],
                  [3.3359, 0.0492, 1.8374],
                  [2.9407, 0.5301, 2.2620]])

(U,V) = sgd_factorise(A,2)
print(U,V)
torch.nn.functional.mse_loss(A, torch.mm(U,V.t()))

0
300
600
900
tensor([[ 0.6741, -0.1215],
        [ 0.2116,  1.6963],
        [ 0.9354,  1.3231]]) tensor([[ 0.6635,  1.8306],
        [ 0.7727, -0.0993],
        [ 0.7184,  1.0786]])


tensor(0.0136)

# Truncated SVD

In [5]:
U,Σ,V= torch.svd(A)
t = 2
Σ = torch.diag(Σ[0:t])
U=U[:,0:t]
V=V[:,0:t]

A_tilde = U @ Σ @ V.t()
torch.nn.functional.mse_loss(A, A_tilde)

tensor(0.0135)

# Matrix completion

In [131]:
def sgd_factorise_masked(A:torch.tensor, M:torch.tensor, rank:int, numepochs = 1000, lr = 0.01)->(torch.tensor, torch.tensor):
    [m,n] = A.shape
    torch.manual_seed(seed)
    U = torch.randn(m,rank)
    V = torch.randn(n,rank)
    for epoch in range(numepochs):
        for r in range(m):
            for c in range(n):
                if M[r,c] != 0:
                    e = A[r,c] - U[r,:] @ V[c,:].t()
                    U[r,:] = U[r,:] + lr*e*V[c,:]
                    V[c,:] = V[c,:] + lr*e*U[r,:]
        if epoch%300 == 0:
            print(epoch)
    return (U, V)

A_ic = torch.tensor([[0.3374, 0.6005, 0.1735],
                  [0.    , 0.0492, 1.8374],
                  [2.9407, 0.    , 2.2620]],dtype = float)
M = torch.tensor([[1, 1, 1],
                  [0, 1, 1],
                  [1, 0, 1]],dtype = int)

U,V = sgd_factorise_masked(A,M,2)
A_c= U @ V.t()
print(A_c, U, V)
torch.nn.functional.mse_loss(A, A_c)

0
300
600
900
tensor([[0.3425, 0.5994, 0.1677],
        [2.1154, 0.0494, 1.8366],
        [2.9396, 1.3863, 2.2634]]) tensor([[-0.2800, -0.4419],
        [-0.8746,  0.9651],
        [-1.5238,  0.0085]]) tensor([[-1.9266,  0.4459],
        [-0.9141, -0.7772],
        [-1.4823,  0.5598]])


tensor(0.2470)