In [1]:
import cupy as cp 
import numpy as np
from scipy.sparse import coo_matrix
import scipy
import time
import pandas as pd
import scipy.io as sio


import os 

  (fname, cnt))
  (fname, cnt))


In [2]:
# Construct the table of reference to map N/density to the file names.
table = pd.DataFrame(index=['0.0001', '0.001', '0.01', '0.1'], columns=['4000', '8000', '40000', '80000', '160000'])
results = pd.DataFrame(index=['0.0001', '0.001', '0.01', '0.1'], columns=['4000', '8000', '40000', '80000', '160000'])
table['4000']['0.0001'] = 'bcsstm24.mat'
table['4000']['0.001'] = 'c-24.mat'
table['4000']['0.01'] = 'crystk01.mat'
table['4000']['0.1'] = 'heart1.mat'
table['8000']['0.0001'] = 'bcsstm38.mat'
table['8000']['0.001'] = 'c-39.mat'
table['8000']['0.01'] = 'msc10848.mat'
table['8000']['0.1'] = 'human_gene2.mat' # previously 'TSC_OPF_1047.mat' 
table['40000']['0.0001'] = 'cond-mat-2005.mat'
table['40000']['0.001'] = 'bbmat.mat'
table['40000']['0.01'] = 'TSOPF_RS_b2383_c1.mat'
table['40000']['0.1'] = '' # Does not exist.
table['80000']['0.0001'] = 'net4-1.mat'
table['80000']['0.001'] = 'consph.mat'
table['80000']['0.01'] = '' # Does not exist
table['80000']['0.1'] = '' # Does not exist
table['160000']['0.0001'] = 'para-4.mat'
table['160000']['0.001'] = 'pkustk14.mat'
table['160000']['0.01'] = '' # Does not exist
table['160000']['0.1'] = '' # Does not exist
table

Unnamed: 0,4000,8000,40000,80000,160000
0.0001,bcsstm24.mat,bcsstm38.mat,cond-mat-2005.mat,net4-1.mat,para-4.mat
0.001,c-24.mat,c-39.mat,bbmat.mat,consph.mat,pkustk14.mat
0.01,crystk01.mat,msc10848.mat,TSOPF_RS_b2383_c1.mat,,
0.1,heart1.mat,human_gene2.mat,,,


In [3]:
# Handle all the edge cases for all files
def load_matrix(matrix_name):
    data = sio.loadmat(matrix_name)
    P = data['Problem']
    zeros = {'net4-1.mat'}
    twos = {'consph.mat','human_gene2.mat','TSOPF_RS_b2383_c1.mat','cond-mat-2005.mat', 'para-4.mat'}
    fours = {'c-24.mat','c-39.mat'}
    if matrix_name in zeros:
        x = P[0][0][0]
    elif matrix_name in twos:
        x = P[0][0][2]
    elif matrix_name in fours:
        x = P[0][0][4]
    else:
        x = P[0][0][1]
    return x

def multiply_and_time(matrix_name, N, p, results):
    if not matrix_name:
        return
    x = load_matrix(matrix_name)
    x_gpu = cp.sparse.csr_matrix(x) # Convert to Cupy GPU CSR matrix.
    nonzeros = x_gpu.count_nonzero()
    density = nonzeros / (x_gpu.shape[0] * x_gpu.shape[1])
    print('\tShape of {0} is {1} with density={2}'.format(matrix_name, x_gpu.shape, density))
    start = time.time()
    x_gpu.dot(x_gpu.T)
    end = time.time()
    print("\tTime for {0} = {1}".format(matrix_name, end-start))
    results[N][p] = end-start
    return


def multiply_all(table, results, skip_computed=False):
    for N in ['4000', '8000', '40000', '80000', '160000']:
        for p in ['0.0001', '0.001', '0.01', '0.1']:
            print("For N={0}, p={1}".format(N,p))
            if not results.isna()[N][p] and skip_computed:
                print("\t Skipping, already computed.")
            else:
                multiply_and_time(table[N][p], N, p, results)

In [4]:
multiply_all(table, results, skip_computed=True)
print("Done!")

For N=4000, p=0.0001
	Shape of bcsstm24.mat is (3562, 3562) with density=0.00028074115665356543
	Time for bcsstm24.mat = 0.24916672706604004
For N=4000, p=0.001
	Shape of c-24.mat is (4119, 4119) with density=0.002104129508142708
	Time for c-24.mat = 0.047541141510009766
For N=4000, p=0.01
	Shape of crystk01.mat is (4875, 4875) with density=0.01329192899408284
	Time for crystk01.mat = 0.09464073181152344
For N=4000, p=0.1
	Shape of heart1.mat is (3557, 3557) with density=0.10949175913309958
	Time for heart1.mat = 1.1984214782714844
For N=8000, p=0.0001
	Shape of bcsstm38.mat is (8032, 8032) with density=0.00016252532201869812
	Time for bcsstm38.mat = 0.010604619979858398
For N=8000, p=0.001
	Shape of c-39.mat is (9271, 9271) with density=0.0013564286839588878
	Time for c-39.mat = 0.11538815498352051
For N=8000, p=0.01
	Shape of msc10848.mat is (10848, 10848) with density=0.010450249519234952
	Time for msc10848.mat = 0.5023391246795654
For N=8000, p=0.1
	Shape of human_gene2.mat is (143

In [5]:
results

Unnamed: 0,4000,8000,40000,80000,160000
0.0001,0.249167,0.0106046,0.335074,8.5704,2.7645
0.001,0.0475411,0.115388,0.351276,1.31511,4.69894
0.01,0.0946407,0.502339,141.653,,
0.1,1.19842,70.9023,,,


### CSR x Dense Matrix

In [6]:
results_2 = pd.DataFrame(index=['0.0001', '0.001', '0.01', '0.1'], columns=['4000', '8000', '40000', '80000', '160000'])

In [7]:
def multiply_all(table, results, skip_computed=False):
    for N in ['4000', '8000']:
        for p in ['0.0001', '0.001', '0.01', '0.1']:
            print("For N={0}, p={1}".format(N,p))
            if not results.isna()[N][p] and skip_computed:
                print("\t Skipping, already computed.")
            else:
                multiply_and_time(table[N][p], N, p, results)

def multiply_and_time(matrix_name, N, p, results):
    if not matrix_name:
        return
    try:
        x = load_matrix(matrix_name)
        x_gpu = cp.sparse.csr_matrix(x) # Convert to Cupy GPU CSR matrix.
        nonzeros = x_gpu.count_nonzero()
        density = nonzeros / (x_gpu.shape[0] * x_gpu.shape[1])
        print('\tShape of {0} is {1} with density={2}'.format(matrix_name, x_gpu.shape, density))
        N_x = x_gpu.shape[0]
        y = scipy.sparse.random(N_x,N_x, 0.1, "csr")
        y = y.todense()
        y = cp.array(y)

        start = time.time()
        x_gpu.dot(y)
        end = time.time()
        print("\tTime for {0} = {1}".format(matrix_name, end-start))
        results[N][p] = end-start
    except:
        print("\tCaught Cuda memory exception")
    return

multiply_all(table, results_2, skip_computed=True)
print("Done!")

For N=4000, p=0.0001
	Shape of bcsstm24.mat is (3562, 3562) with density=0.00028074115665356543
	Time for bcsstm24.mat = 0.13143420219421387
For N=4000, p=0.001
	Shape of c-24.mat is (4119, 4119) with density=0.002104129508142708
	Time for c-24.mat = 0.0003185272216796875
For N=4000, p=0.01
	Shape of crystk01.mat is (4875, 4875) with density=0.01329192899408284
	Time for crystk01.mat = 0.00033283233642578125
For N=4000, p=0.1
	Shape of heart1.mat is (3557, 3557) with density=0.10949175913309958
	Time for heart1.mat = 0.0003223419189453125
For N=8000, p=0.0001
	Shape of bcsstm38.mat is (8032, 8032) with density=0.00016252532201869812
	Time for bcsstm38.mat = 0.00032138824462890625
For N=8000, p=0.001
	Shape of c-39.mat is (9271, 9271) with density=0.0013564286839588878
	Time for c-39.mat = 0.0003254413604736328
For N=8000, p=0.01
	Shape of msc10848.mat is (10848, 10848) with density=0.010450249519234952
	Time for msc10848.mat = 0.0003235340118408203
For N=8000, p=0.1
	Shape of human_gen

In [8]:
results_2

Unnamed: 0,4000,8000,40000,80000,160000
0.0001,0.131434,0.000321388,,,
0.001,0.000318527,0.000325441,,,
0.01,0.000332832,0.000323534,,,
0.1,0.000322342,0.199033,,,


## CSR x Dense Vector

In [9]:
results_3 = pd.DataFrame(index=['0.0001', '0.001', '0.01', '0.1'], columns=['4000', '8000', '40000', '80000', '160000'])

In [10]:
def multiply_all(table, results, skip_computed=False):
    for N in ['4000', '8000', '40000', '80000', '160000']:
        for p in ['0.0001', '0.001', '0.01', '0.1']:
            print("For N={0}, p={1}".format(N,p))
            if not results.isna()[N][p] and skip_computed:
                print("\t Skipping, already computed.")
            else:
                multiply_and_time(table[N][p], N, p, results)

def multiply_and_time(matrix_name, N, p, results):
    if not matrix_name:
        return
    try:
        x = load_matrix(matrix_name)
        x_gpu = cp.sparse.csr_matrix(x) # Convert to Cupy GPU CSR matrix.
        nonzeros = x_gpu.count_nonzero()
        density = nonzeros / (x_gpu.shape[0] * x_gpu.shape[1])
        print('\tShape of {0} is {1} with density={2}'.format(matrix_name, x_gpu.shape, density))
        N_x = x_gpu.shape[0]
        y = scipy.sparse.random(N_x,1, 0.1, "csr")
        y = y.todense()
        y = cp.array(y)

        start = time.time()
        x_gpu.dot(y)
        end = time.time()
        print("\tTime for {0} = {1}".format(matrix_name, end-start))
        results[N][p] = end-start
    except:
        print("\tCaught Cuda memory exception")
    return

multiply_all(table, results_3, skip_computed=True)
print("Done!")

For N=4000, p=0.0001
	Shape of bcsstm24.mat is (3562, 3562) with density=0.00028074115665356543
	Time for bcsstm24.mat = 0.00017547607421875
For N=4000, p=0.001
	Shape of c-24.mat is (4119, 4119) with density=0.002104129508142708
	Time for c-24.mat = 0.000141143798828125
For N=4000, p=0.01
	Shape of crystk01.mat is (4875, 4875) with density=0.01329192899408284
	Time for crystk01.mat = 0.00013256072998046875
For N=4000, p=0.1
	Shape of heart1.mat is (3557, 3557) with density=0.10949175913309958
	Time for heart1.mat = 0.00018358230590820312
For N=8000, p=0.0001
	Shape of bcsstm38.mat is (8032, 8032) with density=0.00016252532201869812
	Time for bcsstm38.mat = 0.00013303756713867188
For N=8000, p=0.001
	Shape of c-39.mat is (9271, 9271) with density=0.0013564286839588878
	Time for c-39.mat = 0.0001380443572998047
For N=8000, p=0.01
	Shape of msc10848.mat is (10848, 10848) with density=0.010450249519234952
	Time for msc10848.mat = 0.0001895427703857422
For N=8000, p=0.1
	Shape of human_gen

In [11]:
results_3

Unnamed: 0,4000,8000,40000,80000,160000
0.0001,0.000175476,0.000133038,0.000148535,0.000187635,0.000196695
0.001,0.000141144,0.000138044,0.000194073,0.000189543,0.000190735
0.01,0.000132561,0.000189543,0.000188351,,
0.1,0.000183582,0.000186443,,,


## Ignore Below: Testing & Playgrounding

In [126]:
def multiply_and_time(matrix_name):
    data = sio.loadmat(matrix_name)
    P = data['Problem']
    if matrix_name == 'SiO2.mat':
        x = P[0][0][2] # For some reason, this file is stored differently.
    elif matrix_name == 'net4-1.mat':
        x = P[0][0][0]
    else:
        x = P[0][0][1]
    x_gpu = cp.sparse.csr_matrix(x) # Convert to Cupy GPU CSR matrix.
    nonzeros = x_gpu.count_nonzero()
    density = nonzeros / (x_gpu.shape[0] * x_gpu.shape[1])
    print('\tShape of {0} is {1} with density={2}'.format(matrix_name, x_gpu.shape, density))
    start = time.time()
    x_gpu.dot(x_gpu.T)
    end = time.time()
    print("Time for {0} = {1}".format(matrix_name, end-start))
    return

In [127]:
# Get all files:
matrices = [file for file in os.listdir('./') if '.mat' in file]
print("Running multiply on ", matrices)
for matrix_name in matrices:
    multiply_and_time(matrix_name)

Running multiply on  ['net4-1.mat', 'msc10848.mat']
	Shape of net4-1.mat is (88343, 88343) with density=0.00031286200139439834
Time for net4-1.mat = 8.725044250488281
	Shape of msc10848.mat is (10848, 10848) with density=0.010450249519234952
Time for msc10848.mat = 0.5050654411315918


In [220]:
matrix1 = 'pkustk14.mat'
matrix1_data = sio.loadmat(matrix1)
P = matrix1_data['Problem']
x = P[0][0][1]

x_gpu = cp.sparse.csr_matrix(x)
start = time.time()
x_gpu.dot(x_gpu.T)
end = time.time()
print("Time for {0}".format(matrix1))
print(end-start)
x_gpu

Time for pkustk14.mat
4.782956838607788


<cupy.sparse.csr.csr_matrix at 0x7f1cddae3d68>

In [218]:
matrix1_data['Problem'][0][0][2]

<153226x153226 sparse matrix of type '<class 'numpy.float64'>'
	with 2930882 stored elements in Compressed Sparse Column format>

In [163]:
x

<8184x8184 sparse matrix of type '<class 'numpy.complex128'>'
	with 127762 stored elements in Compressed Sparse Column format>

In [83]:
x.count_nonzero()

array(11284032)

In [77]:
matrix1_data

{'__header__': b'MATLAB 5.0 MAT-file, Platform: GLNXA64, Created on: Sat Sep  6 10:02:30 2008',
 '__version__': '1.0',
 '__globals__': [],
 'Problem': array([[ (array(['Mittelmann/rail4284'],
       dtype='<U19'), array(['Italian railways (H. Mittelmann test set)'],
       dtype='<U41'), array([[1658]], dtype=uint16), array(['linear programming problem'],
       dtype='<U26'), array(['2005'],
       dtype='<U4'), array(['P. Nobili'],
       dtype='<U9'), array(['J. Beasley'],
       dtype='<U10'), <4284x1096894 sparse matrix of type '<class 'numpy.float64'>'
 	with 11284032 stored elements in Compressed Sparse Column format>, array([[1],
        [1],
        [1],
        ..., 
        [1],
        [1],
        [1]], dtype=uint8), array([[ (array([[0],
        [0],
        [0],
        ..., 
        [2],
        [2],
        [2]], dtype=uint8), array([[0],
        [0],
        [0],
        ..., 
        [0],
        [0],
        [0]], dtype=uint8), array([[ inf],
        [ inf],
       

Unnamed: 0,4000,8000,20000,80000,160000
0.0001,,,,,
0.001,,,,,
0.01,,msc10848.mat,,,
0.1,,,,,
