In [83]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.model_selection import GridSearchCV, train_test_split

import matplotlib.pyplot as plt
from sklearn.decomposition import PCA

from sklearn.svm import SVC

import xgboost as xgb

from sklearn.model_selection import GridSearchCV
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, BatchNormalization, Activation, Dropout
from tensorflow.keras.optimizers.legacy import Adam, SGD
from tensorflow.keras.initializers import HeNormal
from tensorflow.keras.callbacks import EarlyStopping

In [84]:
# Load the data
data = np.load('train.npz')

In [85]:
uid = data['uid']
emb1 = data['emb1']
emb2 = data['emb2']
preference = data['preference']

In [86]:
X = np.concatenate((emb1, emb2), axis=1)
print(X.shape)

(18750, 768)


In [87]:
y = preference
print(y.shape)

(18750,)


In [88]:
# train, val split
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.01, random_state=42)

In [89]:
print(X_train.shape)
print(X_val.shape)
print(y_train.shape)
print(y_val.shape)

(18562, 768)
(188, 768)
(18562,)
(188,)


In [90]:
# Data Augmentation (from 15000 -> 45000 through noise injection)
feature_std1 = np.std(X_train)  
print("Standard deviation of features:", feature_std1)
noise_level1 = 0.1 * feature_std1  
noise1 = np.random.normal(0, noise_level1, X_train.shape)
X_train_noisy_pos = X_train + noise1
X_train_noisy_neg = X_train - noise1

X_train= np.concatenate((X_train, X_train_noisy_pos, X_train_noisy_neg), axis=0)

uid = np.concatenate((uid, uid, uid), axis=0)
y_train = np.concatenate((y_train, y_train, y_train), axis=0)

Standard deviation of features: 0.05102888


In [91]:
print(X_train.shape)
print(y_train.shape)

(55686, 768)
(55686,)


In [92]:
def feature_engineering(X_train):
    # Split X_train into emb1 and emb2
    emb1 = X_train[:, :384]  
    emb2 = X_train[:, 384:]  

    # Calculate the difference between emb1 and emb2
    diff = emb1 - emb2

    # Calculate cosine similarities
    cosine_similarities = np.zeros(emb1.shape[0])
    for i in range(emb1.shape[0]):
        norm_emb1 = emb1[i] / np.linalg.norm(emb1[i])
        norm_emb2 = emb2[i] / np.linalg.norm(emb2[i])
        cosine_similarities[i] = np.dot(norm_emb1, norm_emb2)

    cos_sim_reshaped = cosine_similarities.reshape(-1, 1)

    # Calculate mean and variance
    mean_emb1 = np.mean(emb1, axis=1).reshape(-1, 1)
    var_emb1 = np.var(emb1, axis=1).reshape(-1, 1)
    mean_emb2 = np.mean(emb2, axis=1).reshape(-1, 1)
    var_emb2 = np.var(emb2, axis=1).reshape(-1, 1)

    # Combine all features
    combined_features = np.concatenate([diff, mean_emb1, var_emb1, mean_emb2, var_emb2, cos_sim_reshaped], axis=1)
    X_final = np.concatenate([X_train, combined_features], axis=1)

    return X_final


In [93]:
X_train = feature_engineering(X_train)
print(X_train.shape)

(55686, 1157)


In [94]:
X_val = feature_engineering(X_val)

In [95]:
print(X_train.shape)
print(X_val.shape)
print(y_train.shape)
print(y_val.shape)

(55686, 1157)
(188, 1157)
(55686,)
(188,)


In [98]:
#1. Logistic Regression
# Initialize the model
LR_model = LogisticRegression(C=10, max_iter = 1000)

# Train the model
LR_model.fit(X_train, y_train)

# Predict on validation set
y_pred = LR_model.predict(X_val)

# Evaluate the model
accuracy = accuracy_score(y_val, y_pred)
print(f'Validation Accuracy: {accuracy}')

Validation Accuracy: 0.9414893617021277


In [82]:
# 2. SVM
# Initialize the SVM model
# You can adjust the kernel and other hyperparameters as needed
SVM_model = SVC(C=2)

# Train the model
SVM_model.fit(X_train, y_train)

# Predict on the validation set
y_pred = SVM_model.predict(X_val)

# Evaluate the model
accuracy = accuracy_score(y_val, y_pred)
print(f'Validation Accuracy: {accuracy}')

Validation Accuracy: 0.9308510638297872


Validation Accuracy: 0.8490666666666666


In [53]:
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

def ANN(dropout_rate = 0.7, neuron_size = 128):
    NN_model = Sequential([
        Dense(16, kernel_initializer=HeNormal(), input_shape=(X_train.shape[1],)),
        BatchNormalization(),
        Activation('relu'),
        Dropout(0.7),

        Dense(1, activation='sigmoid')
    ])
    NN_model.compile(optimizer=Adam(learning_rate = 0.001), loss='binary_crossentropy', metrics=['accuracy'])
    NN_model.fit(X_train, y_train, epochs=50, batch_size=300, validation_data=(X_val, y_val), callbacks=[early_stopping])
    return NN_model
NN_model = ANN()
#NN_model.compile(optimizer=SGD(lr=0.001, momentum=0.8), loss='binary_crossentropy', metrics=['accuracy'])

# Evaluate the model on validation set
_, accuracy = NN_model.evaluate(X_val, y_val)
print(f'Validation Accuracy: {accuracy}')

2023-12-04 16:50:39.386565: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M2
2023-12-04 16:50:39.386807: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 8.00 GB
2023-12-04 16:50:39.387415: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 2.67 GB
2023-12-04 16:50:39.387658: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:306] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2023-12-04 16:50:39.388010: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:272] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


Epoch 1/50


2023-12-04 16:50:40.538088: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:117] Plugin optimizer for device_type GPU is enabled.


Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Validation Accuracy: 0.8962666392326355


In [92]:
from itertools import product
from tensorflow.keras.callbacks import EarlyStopping

# Fine tuning neural nets
def ANN(dropout_rate1, neuron_size1, batch_size, X_train, y_train):
    NN_model = Sequential([
        Dense(neuron_size1, kernel_initializer=HeNormal(), input_shape=(X_train.shape[1],)),
        BatchNormalization(),
        Activation('relu'),
        Dropout(dropout_rate1),

        Dense(1, activation='sigmoid')
    ])
    NN_model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])
    NN_model.fit(X_train, y_train, epochs=30, batch_size=batch_size, validation_data=(X_val, y_val), callbacks=[early_stopping])
    return NN_model

# Parameter grid
param_dist = {
    'dropout_rate1': [0.4, 0.5, 0.6, 0.7],
    'neuron_size1': [16, 32, 64, 128, 256],
    'batch_size': [50, 75, 100, 200, 300]
}


best_accuracy = 0 
best_params = {}

# Iterate over all combinations of parameters
for dropout_rate1, neuron_size1, batch_size in product(param_dist['dropout_rate1'], param_dist['neuron_size1'], param_dist['batch_size']):
    
    model = ANN(dropout_rate1, neuron_size1, batch_size, X_train, y_train)
    
    _, accuracy = model.evaluate(X_val, y_val)

    if accuracy > best_accuracy:
        best_accuracy = accuracy
        best_params = {'dropout_rate1': dropout_rate1, 'neuron_size1': neuron_size1, 'batch_size': batch_size}

    print(f"Dropout1: {dropout_rate1}, Neuron Size1: {neuron_size1}, batch size: {batch_size}, Validation Accuracy: {accuracy}")

print(f'Best Validation Accuracy: {best_accuracy} with parameters {best_params}')


Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Dropout1: 0.4, Neuron Size1: 16, batch size: 50, Validation Accuracy: 0.8984000086784363
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Dropout1: 0.4, Neuron Size1: 16, batch size: 75, Validation Accuracy: 0.8989333510398865
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Dropout1: 0.4, Neuron Size1: 16, batch size: 100, Validation Accuracy: 0.8941333293914795
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Dropout1: 0.4, Neuron Size1: 16, batch size: 200, Validation Accuracy: 0.8967999815940857
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Dropout1: 0.4, Neuron Size1: 16, batch size: 300, Validation Accuracy: 0.892799973487854
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Dropout1: 0.4,

Dropout1: 0.4, Neuron Size1: 64, batch size: 100, Validation Accuracy: 0.8989333510398865
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Dropout1: 0.4, Neuron Size1: 64, batch size: 200, Validation Accuracy: 0.8967999815940857
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Dropout1: 0.4, Neuron Size1: 64, batch size: 300, Validation Accuracy: 0.8935999870300293
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Dropout1: 0.4, Neuron Size1: 128, batch size: 50, Validation Accuracy: 0.8925333619117737
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Dropout1: 0.4, Neuron Size1: 128, batch size: 75, Validation Accuracy: 0.8952000141143799
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Dropout1: 0.4, Neuron Size1: 128, batch size: 100, Validation Accuracy: 0.8930666446685791
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Dropout1

Epoch 8/30
Dropout1: 0.4, Neuron Size1: 128, batch size: 300, Validation Accuracy: 0.8986666798591614
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Dropout1: 0.4, Neuron Size1: 256, batch size: 50, Validation Accuracy: 0.8970666527748108
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Dropout1: 0.4, Neuron Size1: 256, batch size: 75, Validation Accuracy: 0.8938666582107544
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Dropout1: 0.4, Neuron Size1: 256, batch size: 100, Validation Accuracy: 0.8957333564758301
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Dropout1: 0.4, Neuron Size1: 256, batch size: 200, Validation Accuracy: 0.8890666961669922
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Dropout1: 0.4, Neuron Size1: 256, batch size: 300, Validation Accuracy: 0.8965333104133606
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch

Dropout1: 0.5, Neuron Size1: 16, batch size: 75, Validation Accuracy: 0.8970666527748108
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Dropout1: 0.5, Neuron Size1: 16, batch size: 100, Validation Accuracy: 0.9007999897003174
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Dropout1: 0.5, Neuron Size1: 16, batch size: 200, Validation Accuracy: 0.8984000086784363
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Dropout1: 0.5, Neuron Size1: 16, batch size: 300, Validation Accuracy: 0.892799973487854
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Dropout1: 0.5, Neuron Size1: 32, batch size: 50, Validation Accuracy: 0.8986666798591614
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Dropout1: 0.5, Neuron Size1: 32, batch size: 75, Validation Accuracy: 0.897066

Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Dropout1: 0.5, Neuron Size1: 64, batch size: 300, Validation Accuracy: 0.8981333374977112
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Dropout1: 0.5, Neuron Size1: 128, batch size: 50, Validation Accuracy: 0.8989333510398865
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Dropout1: 0.5, Neuron Size1: 128, batch size: 75, Validation Accuracy: 0.9007999897003174
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Dropout1: 0.5, Neuron Size1: 128, batch size: 100, Validation Accuracy: 0.8973333239555359
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Dropout1: 0.5, Neuron Size1: 128, batch size: 200, Validation Accuracy: 0.8952000141143799
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Dropout1: 0.5, Neuron Size1: 128, batch size: 300, Validation Accuracy: 0.8967999815940857
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 

Epoch 5/30
Epoch 6/30
Epoch 7/30
Dropout1: 0.5, Neuron Size1: 256, batch size: 75, Validation Accuracy: 0.8973333239555359
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Dropout1: 0.5, Neuron Size1: 256, batch size: 100, Validation Accuracy: 0.895466685295105
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Dropout1: 0.5, Neuron Size1: 256, batch size: 200, Validation Accuracy: 0.8992000222206116
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Dropout1: 0.5, Neuron Size1: 256, batch size: 300, Validation Accuracy: 0.8962666392326355
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Dropout1: 0.6, Neuron Size1: 16, batch size: 50, Validation Accuracy: 0.8933333158493042
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Dropout1: 0.6, Neuron Size1: 16, batch size: 75, Validation Accuracy: 0.896000027656555

Epoch 7/30
Epoch 8/30
Dropout1: 0.6, Neuron Size1: 32, batch size: 200, Validation Accuracy: 0.8999999761581421
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Dropout1: 0.6, Neuron Size1: 32, batch size: 300, Validation Accuracy: 0.8973333239555359
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Dropout1: 0.6, Neuron Size1: 64, batch size: 50, Validation Accuracy: 0.8944000005722046
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Dropout1: 0.6, Neuron Size1: 64, batch size: 75, Validation Accuracy: 0.8941333293914795
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Dropout1: 0.6, Neuron Size1: 64, batch size: 100, Validation Accuracy: 0.8981333374977112
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Dropout1: 0.6, Neuron Size1: 64, batch size: 200, Validation Accuracy: 0.896533

Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Dropout1: 0.6, Neuron Size1: 256, batch size: 75, Validation Accuracy: 0.8957333564758301
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Dropout1: 0.6, Neuron Size1: 256, batch size: 100, Validation Accuracy: 0.8930666446685791
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Dropout1: 0.6, Neuron Size1: 256, batch size: 200, Validation Accuracy: 0.8984000086784363
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Dropout1: 0.6, Neuron Size1: 256, batch size: 300, Validation Accuracy: 0.8967999815940857
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Dropout1: 0.7, Neuron Size1: 16, batch size: 50, Validation Accuracy: 0.8970666527748108
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Dropout1: 0.7, Neuron Size1: 16, batch size: 75, Validation Acc

Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Dropout1: 0.7, Neuron Size1: 32, batch size: 100, Validation Accuracy: 0.8986666798591614
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Dropout1: 0.7, Neuron Size1: 32, batch size: 200, Validation Accuracy: 0.8952000141143799
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Dropout1: 0.7, Neuron Size1: 32, batch size: 300, Validation Accuracy: 0.8967999815940857
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Dropout1: 0.7, Neuron Size1: 64, batch size: 50, Validation Accuracy: 0.8981333374977112
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Dropout1: 0.7, Neuron Size1: 64, batch size: 75, Validation Accuracy: 0.8989333510398865
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/

Epoch 6/30
Epoch 7/30
Epoch 8/30
Dropout1: 0.7, Neuron Size1: 128, batch size: 200, Validation Accuracy: 0.8967999815940857
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Dropout1: 0.7, Neuron Size1: 128, batch size: 300, Validation Accuracy: 0.8965333104133606
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Dropout1: 0.7, Neuron Size1: 256, batch size: 50, Validation Accuracy: 0.8965333104133606
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Dropout1: 0.7, Neuron Size1: 256, batch size: 75, Validation Accuracy: 0.8965333104133606
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Dropout1: 0.7, Neuron Size1: 256, batch size: 100, Validation Accuracy: 0.8962666392326355
Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Dropout1: 0.7, Neuron Size1: 256, batch size: 200, Validation Accuracy: 0.89626663923

In [106]:
# Load the test data
test_data = np.load('test.npz')
test_uid = test_data['uid']
test_emb1 = test_data['emb1']
test_emb2 = test_data['emb2']

print(test_emb1.shape)

(6250, 384)


In [107]:
# apply feature engineering on the test data
X_test= np.concatenate((test_emb1, test_emb2), axis=1)
print(X_test.shape)
X_test = feature_engineering(X_test)
print(X_test.shape)

(6250, 768)
(6250, 1157)


In [108]:
predictions = LR_model.predict(X_test)
predicted_classes = (predictions > 0.5).astype(int).flatten()

In [109]:
import pandas as pd
results_df = pd.DataFrame({'uid': test_uid, 'preference': predicted_classes})

# Export the DataFrame to a CSV file
results_df.to_csv('LR_predictions4.csv', index=False)