In [10]:
# Import
import numpy as np
import time
from submit import my_map
from sklearn.svm import LinearSVC
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
import pandas as pd

In [11]:
# Load the challenge-response pairs
Z_trn = np.loadtxt("secret_trn.txt")
Z_tst = np.loadtxt("secret_tst.txt")

X_train = Z_trn[:, :-1]
y_train = Z_trn[:, -1]
X_test = Z_tst[:, :-1]
y_test = Z_tst[:, -1]

# Map the challenges to high-dimensional space
X_train_mapped = my_map(X_train)
X_test_mapped = my_map(X_test)

print("Feature space shape:", X_train_mapped.shape)

Feature space shape: (6400, 255)


In [12]:
svc_grid = {
    'C': [0.01, 1, 10, 100],
    'loss': ['hinge', 'squared_hinge'],
    'penalty': ['l2', 'l1'],
    'tol': [1e-1, 1e-3, 1e-5]
}

log_grid = {
    'C': [0.01, 1, 10, 100],
    'penalty': ['l2', 'l1'],
    'tol': [1e-1, 1e-3, 1e-5],
    'solver': ['liblinear']  # liblinear supports both l1 and l2
}

In [14]:
svc_results = []

print("Evaluating LinearSVC hyperparameters...\n")
counter = 1
total = len(svc_grid['C']) * len(svc_grid['loss']) * len(svc_grid['penalty']) * len(svc_grid['tol'])

for C in svc_grid['C']:
    for loss in svc_grid['loss']:
        for penalty in svc_grid['penalty']:
            for tol in svc_grid['tol']:
                print(f"[{counter}/{total}] Trying: C={C}, loss={loss}, penalty={penalty}, tol={tol}")
                counter += 1

                # Skip invalid combination
                if penalty == 'l1' and loss == 'hinge':
                    print("    Skipped (invalid config for SVC)")
                    continue

                try:
                    dual = False if penalty == 'l1' else True
                    model = LinearSVC(C=C, loss=loss, penalty=penalty, tol=tol,
                                      dual=dual, max_iter=3000, random_state=42)

                    start = time.time()
                    model.fit(X_train_mapped, y_train)
                    elapsed = time.time() - start

                    y_pred = model.predict(X_test_mapped)
                    acc = accuracy_score(y_test, y_pred)

                    print(f"    ✅ Accuracy: {acc:.4f}, Train Time: {elapsed:.2f}s\n")

                    svc_results.append({
                        'model': 'LinearSVC', 'C': C, 'loss': loss, 'penalty': penalty,
                        'tol': tol, 'acc': acc, 'time': elapsed
                    })
                except Exception as e:
                    print(f"    ❌ Skipped due to error: {e}\n")

Evaluating LinearSVC hyperparameters...

[1/48] Trying: C=0.01, loss=hinge, penalty=l2, tol=0.1
    ✅ Accuracy: 1.0000, Train Time: 0.03s

[2/48] Trying: C=0.01, loss=hinge, penalty=l2, tol=0.001
    ✅ Accuracy: 1.0000, Train Time: 0.03s

[3/48] Trying: C=0.01, loss=hinge, penalty=l2, tol=1e-05
    ✅ Accuracy: 1.0000, Train Time: 0.04s

[4/48] Trying: C=0.01, loss=hinge, penalty=l1, tol=0.1
    Skipped (invalid config for SVC)
[5/48] Trying: C=0.01, loss=hinge, penalty=l1, tol=0.001
    Skipped (invalid config for SVC)
[6/48] Trying: C=0.01, loss=hinge, penalty=l1, tol=1e-05
    Skipped (invalid config for SVC)
[7/48] Trying: C=0.01, loss=squared_hinge, penalty=l2, tol=0.1
    ✅ Accuracy: 1.0000, Train Time: 0.18s

[8/48] Trying: C=0.01, loss=squared_hinge, penalty=l2, tol=0.001
    ✅ Accuracy: 1.0000, Train Time: 0.65s

[9/48] Trying: C=0.01, loss=squared_hinge, penalty=l2, tol=1e-05
    ✅ Accuracy: 1.0000, Train Time: 0.62s

[10/48] Trying: C=0.01, loss=squared_hinge, penalty=l1, tol



    ✅ Accuracy: 1.0000, Train Time: 10.14s

[25/48] Trying: C=10, loss=hinge, penalty=l2, tol=0.1
    ✅ Accuracy: 1.0000, Train Time: 0.03s

[26/48] Trying: C=10, loss=hinge, penalty=l2, tol=0.001
    ✅ Accuracy: 1.0000, Train Time: 0.03s

[27/48] Trying: C=10, loss=hinge, penalty=l2, tol=1e-05
    ✅ Accuracy: 1.0000, Train Time: 0.03s

[28/48] Trying: C=10, loss=hinge, penalty=l1, tol=0.1
    Skipped (invalid config for SVC)
[29/48] Trying: C=10, loss=hinge, penalty=l1, tol=0.001
    Skipped (invalid config for SVC)
[30/48] Trying: C=10, loss=hinge, penalty=l1, tol=1e-05
    Skipped (invalid config for SVC)
[31/48] Trying: C=10, loss=squared_hinge, penalty=l2, tol=0.1
    ✅ Accuracy: 1.0000, Train Time: 0.03s

[32/48] Trying: C=10, loss=squared_hinge, penalty=l2, tol=0.001
    ✅ Accuracy: 1.0000, Train Time: 0.03s

[33/48] Trying: C=10, loss=squared_hinge, penalty=l2, tol=1e-05




    ✅ Accuracy: 1.0000, Train Time: 23.90s

[34/48] Trying: C=10, loss=squared_hinge, penalty=l1, tol=0.1
    ✅ Accuracy: 1.0000, Train Time: 0.08s

[35/48] Trying: C=10, loss=squared_hinge, penalty=l1, tol=0.001
    ✅ Accuracy: 1.0000, Train Time: 0.09s

[36/48] Trying: C=10, loss=squared_hinge, penalty=l1, tol=1e-05




    ✅ Accuracy: 1.0000, Train Time: 14.96s

[37/48] Trying: C=100, loss=hinge, penalty=l2, tol=0.1
    ✅ Accuracy: 1.0000, Train Time: 0.03s

[38/48] Trying: C=100, loss=hinge, penalty=l2, tol=0.001
    ✅ Accuracy: 1.0000, Train Time: 0.03s

[39/48] Trying: C=100, loss=hinge, penalty=l2, tol=1e-05
    ✅ Accuracy: 1.0000, Train Time: 0.03s

[40/48] Trying: C=100, loss=hinge, penalty=l1, tol=0.1
    Skipped (invalid config for SVC)
[41/48] Trying: C=100, loss=hinge, penalty=l1, tol=0.001
    Skipped (invalid config for SVC)
[42/48] Trying: C=100, loss=hinge, penalty=l1, tol=1e-05
    Skipped (invalid config for SVC)
[43/48] Trying: C=100, loss=squared_hinge, penalty=l2, tol=0.1
    ✅ Accuracy: 1.0000, Train Time: 0.03s

[44/48] Trying: C=100, loss=squared_hinge, penalty=l2, tol=0.001
    ✅ Accuracy: 1.0000, Train Time: 0.03s

[45/48] Trying: C=100, loss=squared_hinge, penalty=l2, tol=1e-05




    ✅ Accuracy: 1.0000, Train Time: 23.69s

[46/48] Trying: C=100, loss=squared_hinge, penalty=l1, tol=0.1
    ✅ Accuracy: 1.0000, Train Time: 0.07s

[47/48] Trying: C=100, loss=squared_hinge, penalty=l1, tol=0.001
    ✅ Accuracy: 1.0000, Train Time: 0.08s

[48/48] Trying: C=100, loss=squared_hinge, penalty=l1, tol=1e-05
    ✅ Accuracy: 1.0000, Train Time: 26.32s





In [15]:
log_results = []

print("Evaluating LogisticRegression hyperparameters...\n")
counter = 1
total = len(log_grid['C']) * len(log_grid['penalty']) * len(log_grid['tol']) * len(log_grid['solver'])

for C in log_grid['C']:
    for penalty in log_grid['penalty']:
        for tol in log_grid['tol']:
            for solver in log_grid['solver']:
                print(f"[{counter}/{total}] Trying: C={C}, penalty={penalty}, tol={tol}, solver={solver}")
                counter += 1

                try:
                    model = LogisticRegression(C=C, penalty=penalty, tol=tol, solver=solver,
                                               max_iter=5000, fit_intercept=False, random_state=42)
                    start = time.time()
                    model.fit(X_train_mapped, y_train)
                    elapsed = time.time() - start

                    y_pred = model.predict(X_test_mapped)
                    acc = accuracy_score(y_test, y_pred)

                    print(f"    ✅ Accuracy: {acc:.4f}, Train Time: {elapsed:.2f}s\n")

                    log_results.append({
                        'model': 'LogisticRegression', 'C': C, 'loss': '-', 'penalty': penalty,
                        'tol': tol, 'acc': acc, 'time': elapsed
                    })
                except Exception as e:
                    print(f"    ❌ Skipped due to error: {e}\n")

Evaluating LogisticRegression hyperparameters...

[1/24] Trying: C=0.01, penalty=l2, tol=0.1, solver=liblinear
    ✅ Accuracy: 1.0000, Train Time: 0.09s

[2/24] Trying: C=0.01, penalty=l2, tol=0.001, solver=liblinear
    ✅ Accuracy: 1.0000, Train Time: 0.15s

[3/24] Trying: C=0.01, penalty=l2, tol=1e-05, solver=liblinear
    ✅ Accuracy: 1.0000, Train Time: 0.17s

[4/24] Trying: C=0.01, penalty=l1, tol=0.1, solver=liblinear
    ✅ Accuracy: 0.9738, Train Time: 0.06s

[5/24] Trying: C=0.01, penalty=l1, tol=0.001, solver=liblinear
    ✅ Accuracy: 0.9738, Train Time: 0.07s

[6/24] Trying: C=0.01, penalty=l1, tol=1e-05, solver=liblinear
    ✅ Accuracy: 0.9738, Train Time: 0.10s

[7/24] Trying: C=1, penalty=l2, tol=0.1, solver=liblinear
    ✅ Accuracy: 1.0000, Train Time: 0.16s

[8/24] Trying: C=1, penalty=l2, tol=0.001, solver=liblinear
    ✅ Accuracy: 1.0000, Train Time: 0.36s

[9/24] Trying: C=1, penalty=l2, tol=1e-05, solver=liblinear
    ✅ Accuracy: 1.0000, Train Time: 0.47s

[10/24] Try

In [20]:
# Combine results
all_results = pd.DataFrame(svc_results + log_results)

# Sort by: (1) highest accuracy, (2) lowest training time, (3) lowest mapping time
all_results = all_results.sort_values(
    by=['acc', 'time'],
    ascending=[False, True]
)

# Display top 10
print("Top 10 configurations by accuracy (with tie-breaking):")
print(all_results.head(10))

Top 10 configurations by accuracy (with tie-breaking):
        model      C           loss penalty      tol  acc      time
30  LinearSVC  100.0  squared_hinge      l2  0.10000  1.0  0.025379
31  LinearSVC  100.0  squared_hinge      l2  0.00100  1.0  0.025506
29  LinearSVC  100.0          hinge      l2  0.00001  1.0  0.025774
28  LinearSVC  100.0          hinge      l2  0.00100  1.0  0.026091
11  LinearSVC    1.0          hinge      l2  0.00001  1.0  0.026429
20  LinearSVC   10.0          hinge      l2  0.00001  1.0  0.026662
22  LinearSVC   10.0  squared_hinge      l2  0.00100  1.0  0.027081
19  LinearSVC   10.0          hinge      l2  0.00100  1.0  0.027442
21  LinearSVC   10.0  squared_hinge      l2  0.10000  1.0  0.027641
10  LinearSVC    1.0          hinge      l2  0.00100  1.0  0.028407


In [21]:
# Save the results to CSV for later analysis
all_results.to_csv("hyperparameter_results.csv", index=False)

In [22]:
print( f"{d_size},{t_train},{t_map},{1 - acc},{t_decode},{m_dist}" )

255.0,0.09034949939999706,0.005223729800013644,0.0,0.0003960710599994854,4.3497558129820845e-16


In [24]:
# Create DataFrame for LinearSVC results
svc_df = pd.DataFrame(svc_results)

# Sort by accuracy, train time, and map time
svc_df = svc_df.sort_values(by=['acc', 'time'], ascending=[False, True])

# Display top 10
print("Top 10 LinearSVC configurations by accuracy (with tie-breaking):")
print(svc_df.head(10))

Top 10 LinearSVC configurations by accuracy (with tie-breaking):
        model      C           loss penalty      tol  acc      time
30  LinearSVC  100.0  squared_hinge      l2  0.10000  1.0  0.025379
31  LinearSVC  100.0  squared_hinge      l2  0.00100  1.0  0.025506
29  LinearSVC  100.0          hinge      l2  0.00001  1.0  0.025774
28  LinearSVC  100.0          hinge      l2  0.00100  1.0  0.026091
11  LinearSVC    1.0          hinge      l2  0.00001  1.0  0.026429
20  LinearSVC   10.0          hinge      l2  0.00001  1.0  0.026662
22  LinearSVC   10.0  squared_hinge      l2  0.00100  1.0  0.027081
19  LinearSVC   10.0          hinge      l2  0.00100  1.0  0.027442
21  LinearSVC   10.0  squared_hinge      l2  0.10000  1.0  0.027641
10  LinearSVC    1.0          hinge      l2  0.00100  1.0  0.028407


In [25]:
# Create DataFrame for Logistic Regression results
log_df = pd.DataFrame(log_results)

# Sort by accuracy and training time
log_df = log_df.sort_values(by=['acc', 'time'], ascending=[False, True])

# Display top 10
print("Top 10 LogisticRegression configurations by accuracy (with tie-breaking):")
print(log_df.head(10))

Top 10 LogisticRegression configurations by accuracy (with tie-breaking):
                 model       C loss penalty      tol  acc      time
21  LogisticRegression  100.00    -      l1  0.10000  1.0  0.070486
0   LogisticRegression    0.01    -      l2  0.10000  1.0  0.091666
15  LogisticRegression   10.00    -      l1  0.10000  1.0  0.091724
9   LogisticRegression    1.00    -      l1  0.10000  1.0  0.099455
18  LogisticRegression  100.00    -      l2  0.10000  1.0  0.128133
22  LogisticRegression  100.00    -      l1  0.00100  1.0  0.146090
1   LogisticRegression    0.01    -      l2  0.00100  1.0  0.153881
6   LogisticRegression    1.00    -      l2  0.10000  1.0  0.161742
2   LogisticRegression    0.01    -      l2  0.00001  1.0  0.174414
16  LogisticRegression   10.00    -      l1  0.00100  1.0  0.229278
