In [1]:
%load_ext cython

In [2]:
%load_ext line_profiler

In [3]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import scale

In [8]:
df = pd.read_feather('train.feather')

  labels, = index.labels


In [9]:
X = df.iloc[:, 2:].values

In [10]:
%%cython -a --compile-args=-Xpreprocessor --compile-args=-fopenmp --link-args=-lomp
import numpy as np
cimport numpy as np
cimport cython
from libc.math cimport sqrt, acos, pi
from cython.parallel import prange

@cython.cdivision(True)
@cython.boundscheck(False)
@cython.wraparound(False)
def polar(double[:, ::1] A):
    """Convert cartesian to polar coordinates."""
    cdef int m = A.shape[0]
    cdef int n = A.shape[1]
    
    cdef double r_tmp, denom, tmp, x_tmp1, x_tmp2
    cdef double[:, ::1] view = A
    cdef np.ndarray[np.float64_t, ndim=2] ret = np.empty((m, n), dtype=np.float64)
    cdef double[:, ::1] ret_view = ret
    
    with nogil:
        for i in range(m):
            x_tmp1 = view[i, n - 1]
            x_tmp2 = view[i, n - 2]
            r_tmp = x_tmp1 * x_tmp1 + x_tmp2 * x_tmp2
            tmp = view[i, 0]
            if tmp >= 0:
                ret_view[i, n - 1] = acos(tmp / sqrt(r_tmp))
            else:
                ret_view[i, n - 1] = (2 * pi) - acos(tmp / sqrt(r_tmp))

            for j in range(n - 2, 0, -1):
                x_tmp1 = view[i, j]
                r_tmp = r_tmp + x_tmp1 * view[i, j]
                ret_view[i, j] = acos(view[i, j] / sqrt(r_tmp))

            ret_view[i, 0] = sqrt(r_tmp)  
    np.nan_to_num(ret, copy=False)
    return ret

@cython.cdivision(True)
@cython.boundscheck(False)
@cython.wraparound(False)
def cy_distance_subset(double[:, ::1] A, int start, int end):
    cdef int m = A.shape[0]
    cdef int n = A.shape[1]
    cols = end - start
    
    assert start >= 0
    assert end <= n
    
    cdef np.ndarray[np.float64_t, ndim=2] ret = np.empty((m, cols), dtype=np.float64)
    cdef double[:, ::1] data = ret
    cdef int i, j, k
    cdef double val, denom1, denom2, numer, a, b
    with nogil:
        for i in range(m):
            for j in range(cols):
                if i == j:
                    val = 1
                else:
                    '''
                    # Euclidean distance
                    a = 0
                    for k in range(n):
                        b = A[i, k] - A[j, k]
                        a = a + b * b
                    a = sqrt(a)
                    val = a
                    '''
                    
                    
                    # Cosine similarity
                    numer = 0
                    denom1 = 0
                    denom2 = 0
                    for k in range(n):
                        a = A[i, k]
                        b = A[start + j, k]
                        numer = numer + a * b
                        denom1 = denom1 + a * a
                        denom2 = denom2 + b * b
                    val = numer / (sqrt(denom1) * sqrt(denom2))
                    
                data[i, j] = val
    return ret

In [11]:
x = np.array([[0, 0], [4, 5]], dtype=np.float64)
polar(x)

array([[0.        , 0.        ],
       [6.40312424, 0.89605538]])

In [12]:
import cmath
t1 = complex(0, 0)
t2 = complex(4, 5)
cmath.polar(t1), cmath.polar(t2)

((0.0, 0.0), (6.4031242374328485, 0.8960553845713439))

In [21]:
import numpy as np

from sklearn.utils.extmath import randomized_svd
from scipy.linalg import qr


def fullsvd(A, k):
    u, s, v = np.linalg.svd(A, full_matrices=False)
    return u[:, :k], s[:k], v[:, :k]

def merge(us1, us2, k):
    u1, s1 = us1
    u2, s2 = us2
    
    k1 = s1.shape[0]
    k2 = s2.shape[0]
    
    Z = u1.T @ u2
    u_, r = qr(u2 - u1 @ Z, mode='economic')
    X = np.zeros((k1 + k2, k1 + k2))
    X[:k1, :k1] = np.diag(s1)
    X[:k1, k1:] = Z @ np.diag(s2)
    X[k1:, k1:] = r @ np.diag(s2)
    
    ur, sr, _ = fullsvd(X, k)
    r1 = ur[:k1, :]
    r2 = ur[k2:, :]
    urr = (u1 @ r1 + u_ @ r2)[:, :k]
    return urr, sr



def tsvd_dm(A, k=20, splits=2, randomized=False, split_k=None):
    if split_k is None:
        split_k = k
        
    m, n = A.shape
    split_size = n // splits
    results = []
    
    for i in range(splits):
        A_sub = cy_distance_subset(A, i * split_size, (i + 1) * split_size)
        print(f'Subset {i} generated')
        if randomized:
            *us, _ = randomized_svd(A_sub, split_k)
        else:
            *us, _ = fullsvd(A_sub, split_k)
        print(f'TSVD completed')
        results.append(us)
    
    if len(results) % 2 == 1:
        results.extend([None])
    
    while len(results) > 1:
        work_stack = []
        pairs = zip(results[::2], results[1::2])
        for us1, us2 in pairs:
            if us2 is None:
                work_stack.append(us1)
            else:
                merged = merge(us1, us2, k=k)
                work_stack.append(merged)
                
        if len(results) > 1 and len(results) % 2 == 1:
            work_stack.extend([None])
        results = work_stack
        
    return work_stack[0]

In [10]:
scaledX = polar(np.ascontiguousarray(scale(X)))

In [None]:
%%time
u_, s_ = tsvd_dm(scaledX, k=256, splits=8, randomized=True)

In [14]:
test = np.random.random((1000, 200))

In [24]:
np.set_printoptions(suppress=True, precision=5)
tsvd_dm(test, k=20, splits=4, randomized=True)

Subset 0 generated
TSVD completed
Subset 1 generated
TSVD completed
Subset 2 generated
TSVD completed
Subset 3 generated
TSVD completed


(array([[ 0.03187,  0.00122,  0.04346, ..., -0.05995,  0.01676,  0.03028],
        [ 0.03237, -0.03606, -0.04498, ..., -0.04321, -0.00821,  0.05056],
        [ 0.0315 , -0.00149,  0.14454, ...,  0.05177, -0.11561, -0.02837],
        ...,
        [ 0.03162,  0.02683, -0.02933, ..., -0.01655, -0.02929,  0.05297],
        [ 0.03203, -0.00249,  0.01048, ..., -0.03893, -0.01245,  0.03888],
        [ 0.03172,  0.0131 ,  0.03497, ...,  0.02223, -0.01246,  0.02212]]),
 array([335.85392,   1.36889,   1.3431 ,   1.2951 ,   1.25297,   1.25079,
          1.22802,   1.19999,   1.16627,   1.15878,   1.15539,   1.11674,
          1.09231,   1.07209,   1.0487 ,   1.03523,   1.01382,   0.9758 ,
          0.94295,   0.93395]))

In [25]:
u, s, v = fullsvd(cy_distance_subset(test, 0, 200), 20)
u, s

(array([[-0.03172,  0.02712, -0.05381, ..., -0.06214, -0.0046 ,  0.08177],
        [-0.03223, -0.0286 ,  0.01323, ..., -0.01723, -0.06676,  0.00703],
        [-0.03134,  0.02881, -0.13086, ...,  0.01253, -0.03477,  0.02645],
        ...,
        [-0.03162,  0.01598,  0.03424, ..., -0.05284, -0.01194,  0.01656],
        [-0.03204, -0.00204,  0.00379, ..., -0.04535, -0.00167, -0.03209],
        [-0.03173,  0.02332, -0.01377, ..., -0.01137,  0.03194, -0.01583]]),
 array([335.77079,   1.36862,   1.36364,   1.32485,   1.2908 ,   1.28403,
          1.27382,   1.2651 ,   1.23707,   1.22519,   1.2101 ,   1.17978,
          1.17334,   1.14777,   1.13669,   1.12599,   1.10976,   1.10754,
          1.0787 ,   1.07217]))

In [26]:
cy_distance_subset(test[:7], 0, 7)

array([[1.     , 0.7897 , 0.76901, 0.78531, 0.75776, 0.71993, 0.77314],
       [0.7897 , 1.     , 0.75263, 0.7709 , 0.74805, 0.77288, 0.78293],
       [0.76901, 0.75263, 1.     , 0.77442, 0.75344, 0.71313, 0.7044 ],
       [0.78531, 0.7709 , 0.77442, 1.     , 0.76158, 0.78642, 0.759  ],
       [0.75776, 0.74805, 0.75344, 0.76158, 1.     , 0.77427, 0.75089],
       [0.71993, 0.77288, 0.71313, 0.78642, 0.77427, 1.     , 0.77519],
       [0.77314, 0.78293, 0.7044 , 0.759  , 0.75089, 0.77519, 1.     ]])

In [27]:
cy_distance_subset(test, 0, 3)[:7]

array([[1.     , 0.7897 , 0.76901],
       [0.7897 , 1.     , 0.75263],
       [0.76901, 0.75263, 1.     ],
       [0.78531, 0.7709 , 0.77442],
       [0.75776, 0.74805, 0.75344],
       [0.71993, 0.77288, 0.71313],
       [0.77314, 0.78293, 0.7044 ]])

In [28]:
*us1, _ = fullsvd(test[:, :25], 20)
*us2, _ = fullsvd(test[:, 25:50], 20)
*us3, _ = fullsvd(test[:, 50:75], 20)
*us4, _ = fullsvd(test[:, 75:], 20)

In [29]:
us5 = merge(us1, us2, k=20)
us6 = merge(us3, us4, k=20)

In [30]:
us7 = merge(us5, us6, k=20)
us7[0]

array([[-0.0314 ,  0.04932, -0.01079, ..., -0.02715,  0.03102,  0.01194],
       [-0.03195,  0.04146, -0.01979, ..., -0.01383, -0.01572, -0.00596],
       [-0.03086, -0.04154,  0.04614, ...,  0.03255, -0.00723, -0.0064 ],
       ...,
       [-0.03073,  0.00877, -0.02565, ...,  0.01271, -0.02556, -0.01221],
       [-0.03259, -0.01682,  0.01603, ..., -0.00241,  0.03002,  0.01704],
       [-0.03164, -0.01301,  0.04376, ...,  0.04054,  0.02857, -0.01356]])

In [31]:
fullsvd(test, 20)

(array([[-0.0314 ,  0.03203,  0.02086, ...,  0.05467, -0.04875, -0.00325],
        [-0.03195,  0.04355,  0.00286, ..., -0.02643, -0.03788,  0.01743],
        [-0.03086, -0.05963,  0.03191, ...,  0.03458, -0.04299, -0.0337 ],
        ...,
        [-0.03073,  0.018  ,  0.00408, ...,  0.03988, -0.00951,  0.01383],
        [-0.0326 , -0.00773, -0.00162, ..., -0.01608,  0.01643, -0.01747],
        [-0.03164, -0.0104 ,  0.04036, ..., -0.02084, -0.05577, -0.00343]]),
 array([224.07765,  12.99373,  12.83067,  12.74665,  12.67735,  12.62498,
         12.59178,  12.54279,  12.48603,  12.39723,  12.36373,  12.31041,
         12.23135,  12.18466,  12.16967,  12.15108,  12.09141,  12.03054,
         11.96349,  11.89652]),
 array([[-0.06936, -0.07128, -0.07075, ..., -0.07113, -0.07012, -0.06919],
        [ 0.02189,  0.02451, -0.02658, ..., -0.00455, -0.04295, -0.0882 ],
        [-0.06321, -0.00491,  0.01219, ..., -0.04854, -0.11777, -0.02081],
        ...,
        [-0.00429,  0.04914, -0.06091, ...,