In [1]:
import numpy as np
from logistic_regression.GrdDscntQuant import grdescentquant
from logistic_regression.normal_logistic import normallogistic
from logistic_regression.GrdDscnt import grdescentnormal
from logistic_regression.quant_logistic import quant_logistic
from logistic_regression.GrdDscntUncoded import grdescentuncoded
from logistic_regression.uncoded_logistic import  uncoded_logistic
from quantization.quantize import quantize
from coded_computation.master import master
import matplotlib.pyplot as plt
import pandas as pd
from scipy.io import arff
from pad_and_clean import pad
from pad_and_clean import clean_and_scale
import time
from gen_data import gen_data, gen_nonlinear_data
from sklearn.model_selection import train_test_split

def get_loss(w,X,y):
    #calculates 1-0 prediction error
    log_odds = X@w
    probs = 1 / (1 + np.exp(-log_odds))
    preds = np.where(probs > 0.5, 1,-1)
    test_loss = np.mean(preds != y)

    return test_loss
def plot_3d_bar(data, z):
    fig = plt.figure()
    ax = fig.add_subplot(111, projection='3d')

    # Define the x, y coordinates and the z heights
    _x = np.arange(data.shape[1])
    _y = np.arange(data.shape[0])
    _x, _y = np.meshgrid(_x, _y)
    x, y = _x.ravel(), _y.ravel()

    # The z values represent the bar heights
    z = np.zeros_like(x)
    dz = data.ravel()

    # Plot 3D bars
    ax.bar3d(x, y, z, 1, 1, dz, shade=True)

    # Labels
    ax.set_xlabel('w_lvl')
    ax.set_ylabel('grd_lvl')
    ax.set_zlabel(z)

    plt.show()

def run(X,y, filename):
    func = quant_logistic
    G = np.array([
        [1, 1, 1, 1, 1, 1, 1],
        [-1, -1, -1, 1, 1, 1, 1],
        [-1, 1, 1, -1, -1, 1, 1],
        [1, -1, -1, -1, -1, 1, 1],
        [1, -1, 1, -1, 1, -1, 1],
        [-1, 1, -1, -1, 1, -1, 1],
        [-1, -1, 1, 1, -1, -1, 1],
        [1, 1, -1, 1, -1, -1, 1]
    ]).T

    stepsize = 0.1
    maxiter = 10000

    #X,y = gen_nonlinear_data(500, 40, 1)

    X, Xt, y, yt = train_test_split(X, y, test_size=0.2)
    X, y = pad(X, y, 7)
    Xt, yt = pad(Xt, yt, 7)

    Master_uncoded = master(X, None, 21)
    Master = master(X, G, 3)
    times_grid = np.zeros((5, 4))
    loss_grid = np.zeros((5, 4))  # Initialize for each dataset
    iters_grid = np.zeros((5, 4))
    test_loss = np.zeros((5, 4))
    test_loss_uncoded = np.zeros((5,4))
    uncoded_times = np.zeros((5, 4))
    # loss from normal logistic regression
    w0 = np.random.uniform(-1, 1, (X.shape[1], 1))
    start_time = time.time()
    w, num_iters = grdescentnormal(normallogistic, w0, stepsize, maxiter, Master_uncoded, y, X, tolerance=1e-02)
    normal_loss = get_loss(w, Xt, yt)
    end_time = time.time()
    print(f"loss from unquantized logistic regresison: {normal_loss} on {num_iters} iterations in {end_time - start_time} seconds")
    repetitions = 100
    for i in range(repetitions):
        for w_lvl in range(4, 9):
            for grd_lvl in range(2, 6):
                # logic for calculations
                w0 = np.random.uniform(-1, 1, (X.shape[1], 1))

                start_time = time.time()
                #w, num_iters = grdescentquant(func, w0, stepsize, maxiter, Master, w_lvl, grd_lvl, X, y, filename, tolerance=1e-02)
                w, num_iters = grdescentquant(func, w0, stepsize, maxiter, Master, w_lvl, grd_lvl, X, y, filename, 1e-02)
                # grdescentquant(func, w, stepsize, maxiter, Master, w_lvl, grd_lvl, X, y, filename, tolerance)

                end_time = time.time()

                times_grid[w_lvl - 4, grd_lvl - 2] += end_time - start_time
                loss_grid[w_lvl - 4, grd_lvl - 2] += get_loss(w, X, y)
                iters_grid[w_lvl - 4, grd_lvl - 2] += num_iters
                test_loss[w_lvl - 4, grd_lvl - 2] += get_loss(w, Xt, yt)

                start_time = time.time()
                w, num_iters = grdescentuncoded(uncoded_logistic, w0, stepsize, maxiter, Master_uncoded, w_lvl, grd_lvl, X, y, tolerance=1e-02)
                end_time = time.time()
                uncoded_times[w_lvl - 4, grd_lvl - 2] += end_time - start_time
                test_loss_uncoded[w_lvl - 4, grd_lvl - 2] += get_loss(w,Xt,yt)

    test_loss = test_loss/repetitions
    loss_grid = loss_grid/repetitions
    iters_grid = iters_grid/repetitions
    times_grid = times_grid/repetitions
    test_loss_uncoded = test_loss_uncoded/repetitions
    uncoded_times = uncoded_times/repetitions



    print(f" training loss: \n{loss_grid}")
    print(f" avg iterations: \n{iters_grid}")
    print(f" test loss: \n{test_loss}")
    print(f"run time: \n{times_grid}")
    print(f" test loss uncoded: \n{test_loss_uncoded}")
    print(f" times uncoded: \n{uncoded_times}")
    return test_loss, loss_grid, iters_grid, times_grid, test_loss_uncoded, uncoded_times

(500, 45) (500, 1)
(400, 45) (400, 1)
(100, 45) (100, 1)


In [2]:
X, y = gen_nonlinear_data(500, 40, 1)

test_loss, loss_grid, iters_grid, times_grid, test_loss_uncoded, uncoded_times = run(X,y, "nonlinear_data.csv")

loss from unquantized logistic regresison: 0.37142857142857144 on 394 iterations in 0.08082318305969238 seconds
response, actual 
 [[27.71681189  1.46413096]
 [19.80796433  2.46779736]
 [-0.0863594  -1.70987823]
 [ 5.82709958 -1.47425213]
 [47.02534799  3.24531103]] 

index passed: [[-0.37851602]
 [-0.37542618]
 [-0.37233634]
 [-0.3692465 ]
 [-0.36615666]
 [-0.36306682]
 [-0.35997698]
 [-0.35688714]
 [-0.3537973 ]
 [-0.35070746]
 [-0.34761762]
 [-0.34452778]
 [-0.34143794]
 [-0.3383481 ]
 [-0.33525826]
 [-0.33216843]
 [-0.32907859]
 [-0.32598875]
 [-0.32289891]
 [-0.31980907]
 [-0.31671923]
 [-0.31362939]
 [-0.31053955]
 [-0.30744971]
 [-0.30435987]
 [-0.30127003]
 [-0.29818019]
 [-0.29509035]
 [-0.29200051]
 [-0.28891067]
 [-0.28582084]
 [-0.282731  ]
 [-0.27964116]
 [-0.27655132]
 [-0.27346148]
 [-0.27037164]
 [-0.2672818 ]
 [-0.26419196]
 [-0.26110212]
 [-0.25801228]
 [-0.25492244]
 [-0.2518326 ]
 [-0.24874276]
 [-0.24565292]
 [-0.24256308]
 [-0.23947325]
 [-0.23638341]
 [-0.2332935

ValueError:  did not return arithmetic sequence: [-0.3785160151819251, -0.3723363365142123, -0.3661566578464994, -0.35997697917878657, -0.3568871398449301, -0.34761762184336087, -0.3414379431756479, -0.33525826450793517, -0.3290785858402222, -0.32289890717250946, -0.3167192285047966, -0.31053954983708376, -0.3043598711693709, -0.29818019250165806, -0.2920005138339452, -0.28582083516623213, -0.2796411564985195, -0.27346147783080665, -0.2672817991630938, -0.26110212049538095, -0.2549224418276681, -0.24874276315995525, -0.2425630844922424, -0.2363834058245292, -0.2302037271568167, -0.22711388782295994, -0.217844369821391, -0.21166469115367814, -0.2054850124859653, -0.19930533381825244, -0.1931256551505396, -0.18694597648282674, -0.18076629781511389, -0.17458661914740103, -0.1684069404796877, -0.16222726181197533, -0.15604758314426248, -0.14986790447654963, -0.14368822580883625, -0.13750854714112393, -0.13132886847341108, -0.1282390291395541, -0.11896951113798479, -0.11278983247027252, -0.10661015380255967, -0.10043047513484682, -0.09425079646713397, -0.08807111779942045, -0.0818914391317076, -0.07571176046399541, -0.06953208179628256, -0.06335240312856899, -0.05717272446085686, -0.05099304579314401, -0.04481336712543038, -0.03863368845771831, -0.032454009790005456, -0.026274331122292605, -0.023184491788435402, -0.017004813120722495, -0.007735295119153218, -0.0015556164514411996, 0.004624062216271652, 0.010803740883984503, 0.013893580217841817, 0.020073258885554668, 0.029342776887123057, 0.03552245555483591, 0.04170213422254876, 0.04788181289026161, 0.05406149155797446, 0.06024117022568831, 0.06642084889340116, 0.07260052756111302, 0.07878020622882587, 0.08495988489653872, 0.0880497242303962, 0.09731924223196547, 0.10349892089967727, 0.10967859956739123, 0.11585827823510297, 0.11894811756896051, 0.1282176355705298, 0.13439731423824264, 0.14057699290595554, 0.14675667157366723, 0.14984651090752488, 0.15911602890909415, 0.16529570757680578, 0.17147538624451863, 0.17456522557837628, 0.18383474357994434, 0.1900144222476572, 0.19310426158151495, 0.2023737795830829, 0.20855345825079574, 0.21473313691850993, 0.22091281558622278, 0.2270924942539343, 0.23327217292164715, 0.23945185158936, 0.24254169092321787, 0.2518112089247857, 0.25799088759249855, 0.2641705662602114, 0.27035024492792425, 0.27652992359563855, 0.28270960226334996, 0.2888892809310628, 0.29506895959877566, 0.3012486382664885, 0.30742831693420136, 0.31360799560191577, 0.31978767426962706, 0.3259673529373399, 0.33214703160505277, 0.3383267102727656, 0.34450638894047847, 0.3506860676081913, 0.35686574627590417, 0.35995558560976226, 0.3692251036113299, 0.3754047822790427, 0.3815844609467556, 0.3877641396144702, 0.39394381828218306, 0.40012349694989413, 0.40321333628375233, 0.41248285428531983, 0.4186625329530327, 0.42484221162074554, 0.4310218902884584, 0.43720156895617124, 0.4433812476238841, 0.44956092629159694, 0.4557406049593098, 0.46192028362702264, 0.4680999622947355, 0.47427964096244835, 0.4804593196301612, 0.48663899829787405, 0.4928186769655869, 0.49899835563329975, 0.5051780343010126, 0.5113577129687255, 0.5175373916364383, 0.5237170703041512, 0.529896748971864, 0.5360764276395769, 0.5422561063072897, 0.5484357849750026, 0.5546154636427154, 0.5607951423104283, 0.5669748209781411, 0.573154499645854, 0.5793341783135668, 0.5855138569812797, 0.5916935356489925, 0.5978732143167054, 0.6040528929844182, 0.6102325716521311, 0.6164122503198439, 0.6225919289875568, 0.6287716076552696, 0.6349512863229825, 0.6411309649906953, 0.6473106436584082, 0.653490322326121, 0.6596700009938339, 0.6658496796615467, 0.6720293583292596, 0.6782090369969724, 0.6843887156646853, 0.6905683943323981, 0.696748073000111, 0.7029277516678238, 0.7091074303355367, 0.7152871090032495, 0.7214667876709624, 0.7276464663386752, 0.7338261450063881, 0.740005823674101, 0.7461855023418138, 0.7523651810095267, 0.7585448596772395, 0.7647245383449524, 0.7709042170126652, 0.7770838956803781, 0.7832635743480909, 0.7894432530158038, 0.7956229316835166, 0.8018026103512295, 0.8079822890189423, 0.8141619676866552, 0.820341646354368, 0.8265213250220809, 0.8327010036897937, 0.8388806823575066, 0.8450603610252194, 0.8512400396929323, 0.8574197183606451, 0.863599397028358, 0.8697790756960708, 0.8759587543637837, 0.8821384330314965, 0.8883181116992094, 0.8944977903669222, 0.9006774690346351, 0.9068571477023479, 0.9130368263700608, 0.9192165050377736, 0.9253961837054865, 0.9315758623731993, 0.9377555410409122, 0.943935219708625, 0.9501148983763379, 0.9562945770440507, 0.9624742557117636, 0.9686539343794764, 0.9748336130471893, 0.9810132917149021, 0.987192970382615, 0.9933726490503278, 0.9995523277180407, 1.0057320063857536, 1.0119116850534664, 1.0180913637211793, 1.024271042388892, 1.030450721056605, 1.0366303997243178, 1.0428100783920307, 1.0489897570597435, 1.0551694357274564, 1.0613491143951692, 1.067528793062882, 1.073708471730595, 1.0798881503983078, 1.0860678290660206, 1.0922475077337335, 1.0984271864014463, 1.1046068650691592, 1.110786543736872, 1.1169662224045849, 1.1231459010722977, 1.1293255797400106, 1.1355052584077234, 1.1416849370754363, 1.1478646157431491, 1.154044294410862, 1.1602239730785748, 1.1664036517462877, 1.1725833304140005, 1.1787630090817134, 1.1849426877494262, 1.191122366417139, 1.197302045084852], from vlaues [-0.37851602 -0.35688714 -0.34143794 -0.32907859 -0.28582084 -0.23638341
 -0.22711389 -0.16840694 -0.14368823 -0.12823903 -0.11896951 -0.08807112
 -0.08189144 -0.0633524  -0.04481337 -0.02318449 -0.01700481 -0.0077353
  0.01389358  0.02007326  0.06024117  0.06642085  0.08804972  0.09731924
  0.1096786   0.11894812  0.12821764  0.13439731  0.14057699  0.14984651
  0.15911603  0.17456523  0.19310426  0.21473314  0.22091282  0.24254169
  0.27652992  0.313608    0.35995559  0.38776414  0.39394382  0.40321334]

In [None]:
X, y = gen_data(500, 40, 1)
test_loss1, loss_grid1, iters_grid1, times_grid1, test_loss_uncoded1, uncoded_times1 = run(X,y, "linear_data.csv")

In [None]:

data = arff.loadarff('/Users/willem/Downloads/speeddating.arff')
df = pd.DataFrame(data[0])

hill_train_x, hill_test_x, hill_train_y, hill_test_y = clean_and_scale(df, "match")
hill_train_x, hill_train_y = pad(hill_train_x, hill_train_y, 7)
hill_test_x, hill_test_y = pad(hill_test_x, hill_test_y, 7)

print(hill_train_x.shape, hill_train_y.shape, hill_test_x.shape, hill_test_y.shape)

X = np.vstack((hill_test_x,hill_train_x))
y = np.vstack((hill_test_y, hill_train_y))
test_loss_real, loss_grid_real, iters_grid_real, times_grid_real, test_loss_uncoded_real, uncoded_times_real  = run(X,y, "real_data.csv")


In [None]:
from gen_data import gen_seperable_data
X, y, w = gen_seperable_data(500, 40)

test_loss, loss_grid, iters_grid, times_grid, test_loss_uncoded, uncoded_times = run(X,y, "seperable_data.csv")

In [None]:
X, y = gen_nonlinear_data(2000, 200, 1)

test_loss, loss_grid, iters_grid, times_grid, test_loss_uncoded, uncoded_times = run(X,y, "hdnonlinear_data.csv")

In [None]:
def plot_3d_bar(data):

    fig = plt.figure()
    ax = fig.add_subplot(111, projection='3d')
    rows, cols = data.shape
    x, y = np.meshgrid(np.arange(cols), np.arange(rows))

    x = x.flatten()
    y = y.flatten()
    z = np.zeros_like(x)

    dz = data.flatten()

    ax.bar3d(x, y, z, 1, 1, dz, shade=True)

    ax.set_xlabel('w_lvl')
    ax.set_ylabel('grd_lvl')
    ax.set_zlabel('z')

    plt.show()

In [None]:
def plot_time_per_query(df):
    import numpy as np

    df_numeric = df.select_dtypes(include=[np.number])

    grouped = df_numeric.groupby(['w-quantization', 'grd-quantization']).mean()

    # Reset the index to make plotting easier
    grouped = grouped.reset_index()

    import matplotlib.pyplot as plt
    from mpl_toolkits.mplot3d import Axes3D

    # Setting up the figure and 3D axis
    fig = plt.figure()
    ax = fig.add_subplot(111, projection='3d')

    # Data for the bars
    x = grouped['w-quantization']
    y = grouped['grd-quantization']
    z = grouped['time']  # or any other column you wish to visualize

    # The bar positions and width
    dx = dy = 0.5
    dz = z

    ax.bar3d(x, y, [0]*len(z), dx, dy, dz, color='b')

    # Labeling the axes
    ax.set_xlabel('w-quantization')
    ax.set_ylabel('grd-quantization')
    ax.set_zlabel('Average Time Per Query')

    # Show the plot
    plt.show()

In [None]:
file_path = 'access_measurements.csv'

column_names = [
    'w-quantization', 'grd-quantization', 'imputation', 'access',
    'query type', 'time', 'stop cond', 'iters'
]

df_nonlinear = pd.read_csv(file_path, header=None, names=column_names)

print(df_nonlinear.head())

In [7]:
bad_w = [-0.08605444406989449, -0.06381547465695116, -0.04157650524400783, -0.019337535831064498, 0.0029014335818787276, 0.02514040299482216, 0.05849885711423698, 0.08073782652718031
         , 0.09185731123365215, 0.1140962806465955, 0.1363352500595388, 0.15857421947248213, 0.18081318888542547, 0.2030521582983688, 0.2252911277113121, 0.24753009712425544]

for i in range(1,len(bad_w)):
    print(f"\n {bad_w[i] - bad_w[i-1]}")






 0.02223896941294333

 0.02223896941294333

 0.02223896941294333

 0.022238969412943226

 0.022238969412943434

 0.03335845411941482

 0.02223896941294333

 0.011119484706471838

 0.022238969412943344

 0.022238969412943316

 0.022238969412943316

 0.022238969412943344

 0.022238969412943344

 0.022238969412943288

 0.022238969412943344
