In [1]:
import time
from datasets import *
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import LinearSVC
from SOCT.LinearClassifierHeuristic import LinearClassifierHeuristic
from SOCT.SOCTStumpHeuristic import SOCTStumpHeuristic
from SOCT.SOCTFull import SOCTFull
from SOCT.SOCTBenders import SOCTBenders
from SOCT.utils import *

In [2]:
X, y = load_spambase()
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
X_train, X_test = preprocess_dataset(X_train, X_test, numerical_features=X_train.columns)
print("X_train dimensions:", X_train.shape)

max_depth = 2
heuristic_time_limit = 60
time_limit = 600

X_train dimensions: (3450, 57)


In [3]:
# CART
cart = DecisionTreeClassifier(max_depth=max_depth, ccp_alpha=0.00001, random_state=0)
cart.fit(X_train, y_train)
train_acc = cart.score(X_train, y_train)
test_acc = cart.score(X_test, y_test)
print("Train/test accuracy:", train_acc, test_acc)

Train/test accuracy: 0.8539130434782609 0.840139009556907


In [4]:
# S-OCT Full without warm start
start_time = time.time()
clf = SOCTFull(max_depth=max_depth, ccp_alpha=0.00001, time_limit=time_limit)
clf.fit(X_train, y_train)
end_time = time.time()
train_acc = clf.score(X_train, y_train)
test_acc = clf.score(X_test, y_test)
print("Train/test accuracy, running time:", train_acc, test_acc, end_time-start_time)

Set parameter Username
Academic license - for non-commercial use only - expires 2022-11-10
Set parameter TimeLimit to value 600
Gurobi Optimizer version 9.5.0 build v9.5.0rc5 (win64)
Thread count: 6 physical cores, 12 logical processors, using up to 12 threads
Optimize a model with 72799 rows, 38306 columns and 416591 nonzeros
Model fingerprint: 0xc9220af7
Variable types: 24506 continuous, 13800 integer (13800 binary)
Coefficient statistics:
  Matrix range     [2e-05, 2e+00]
  Objective range  [1e-05, 3e-04]
  Bounds range     [1e+00, 1e+00]
  RHS range        [1e+00, 2e+00]
Presolve removed 13983 rows and 13889 columns
Presolve time: 1.11s
Presolved: 58816 rows, 24417 columns, 395375 nonzeros
Variable types: 14149 continuous, 10268 integer (10268 binary)
Found heuristic solution: objective -0.6078261

Deterministic concurrent LP optimizer: primal and dual simplex (primal and dual model)
Showing first log only...

Root relaxation presolved: 58816 rows, 24417 columns, 395375 nonzeros

C

In [5]:
# S-OCT Benders without warm start
start_time = time.time()
clf = SOCTBenders(max_depth=max_depth, ccp_alpha=0.00001, time_limit=time_limit)
clf.fit(X_train, y_train)
end_time = time.time()
train_acc = clf.score(X_train, y_train)
test_acc = clf.score(X_test, y_test)
print("Train/test accuracy, running time:", train_acc, test_acc, end_time-start_time)

Set parameter LazyConstraints to value 1
Set parameter TimeLimit to value 600
Gurobi Optimizer version 9.5.0 build v9.5.0rc5 (win64)
Thread count: 6 physical cores, 12 logical processors, using up to 12 threads
Optimize a model with 51754 rows, 37961 columns and 110408 nonzeros
Model fingerprint: 0x9f116f26
Variable types: 24161 continuous, 13800 integer (13800 binary)
Coefficient statistics:
  Matrix range     [1e+00, 1e+00]
  Objective range  [1e-05, 3e-04]
  Bounds range     [1e+00, 1e+00]
  RHS range        [1e+00, 1e+00]
Presolve removed 13804 rows and 13804 columns
Presolve time: 0.09s
Presolved: 37950 rows, 24157 columns, 82800 nonzeros
Variable types: 13807 continuous, 10350 integer (10350 binary)
Found heuristic solution: objective -0.6078261

Root relaxation: objective -9.999950e-01, 24836 iterations, 4.05 seconds (12.39 work units)
Total elapsed time = 5.32s

    Nodes    |    Current Node    |     Objective Bounds      |     Work
 Expl Unexpl |  Obj  Depth IntInf | Incumben

 34833 24246   -0.99968 1815  643   -0.60869   -0.99999  64.3%  46.3  604s

Cutting planes:
  Gomory: 84
  MIR: 1
  Flow cover: 1
  Lazy constraints: 832

Explored 35553 nodes (1728317 simplex iterations) in 606.66 seconds (635.06 work units)
Thread count was 12 (of 12 available processors)

Solution count 2: -0.608686 -0.607826 

Time limit reached
Best objective -6.086856521739e-01, best bound -9.999883333334e-01, gap 64.2865%

User-callback calls 91613, time in user-callback 297.72 sec
Train/test accuracy, running time: 0.6086956521739131 0.5977410947002606 608.4039809703827


In [6]:
# SVM heuristic
start_time = time.time()
clf = LinearClassifierHeuristic(max_depth=max_depth, linear_classifier=LinearSVC(random_state=0))
clf.fit(X_train, y_train)
end_time = time.time()
train_acc = clf.score(X_train, y_train)
test_acc = clf.score(X_test, y_test)
running_time = end_time - start_time
print("Train/test accuracy, running time:", train_acc, test_acc, running_time)
warm_start = clf.branch_rules_, clf.classification_rules_

Train/test accuracy, running time: 0.9165217391304348 0.89748045178106 2.1284866333007812


In [7]:
# S-OCT Full with SVM warm start
start_time = time.time()
clf = SOCTFull(max_depth=max_depth, ccp_alpha=0.00001, warm_start_tree=warm_start, time_limit=time_limit-running_time)
clf.fit(X_train, y_train)
end_time = time.time()
train_acc = clf.score(X_train, y_train)
test_acc = clf.score(X_test, y_test)
print("Train/test accuracy, running time:", train_acc, test_acc, end_time-start_time)

Set parameter TimeLimit to value 5.9787151336669922e+02
Gurobi Optimizer version 9.5.0 build v9.5.0rc5 (win64)
Thread count: 6 physical cores, 12 logical processors, using up to 12 threads
Optimize a model with 72799 rows, 38306 columns and 416591 nonzeros
Model fingerprint: 0xfe231bb5
Variable types: 24506 continuous, 13800 integer (13800 binary)
Coefficient statistics:
  Matrix range     [2e-05, 2e+00]
  Objective range  [1e-05, 3e-04]
  Bounds range     [1e+00, 1e+00]
  RHS range        [1e+00, 2e+00]

User MIP start did not produce a new incumbent solution

Presolve removed 13983 rows and 13889 columns
Presolve time: 0.98s
Presolved: 58816 rows, 24417 columns, 395375 nonzeros
Variable types: 14149 continuous, 10268 integer (10268 binary)
Found heuristic solution: objective -0.6078261

Deterministic concurrent LP optimizer: primal and dual simplex (primal and dual model)
Showing first log only...

Root relaxation presolved: 58816 rows, 24417 columns, 395375 nonzeros

Concurrent spin

In [8]:
# S-OCT Benders with SVM warm start
start_time = time.time()
clf = SOCTBenders(max_depth=max_depth, ccp_alpha=0.00001, warm_start_tree=warm_start, time_limit=time_limit-running_time)
clf.fit(X_train, y_train)
end_time = time.time()
train_acc = clf.score(X_train, y_train)
test_acc = clf.score(X_test, y_test)
print("Train/test accuracy, running time:", train_acc, test_acc, end_time-start_time)

Set parameter LazyConstraints to value 1
Set parameter TimeLimit to value 5.9787151336669922e+02
Gurobi Optimizer version 9.5.0 build v9.5.0rc5 (win64)
Thread count: 6 physical cores, 12 logical processors, using up to 12 threads
Optimize a model with 51754 rows, 37961 columns and 110408 nonzeros
Model fingerprint: 0x8e34c2c7
Variable types: 24161 continuous, 13800 integer (13800 binary)
Coefficient statistics:
  Matrix range     [1e+00, 1e+00]
  Objective range  [1e-05, 3e-04]
  Bounds range     [1e+00, 1e+00]
  RHS range        [1e+00, 1e+00]

Loaded user MIP start with objective -0.916492

Presolve removed 13804 rows and 13804 columns
Presolve time: 0.10s
Presolved: 37950 rows, 24157 columns, 82800 nonzeros
Variable types: 13807 continuous, 10350 integer (10350 binary)

Root relaxation: objective -9.999950e-01, 24836 iterations, 4.03 seconds (12.39 work units)
Total elapsed time = 5.26s

    Nodes    |    Current Node    |     Objective Bounds      |     Work
 Expl Unexpl |  Obj  De


Cutting planes:
  Gomory: 197
  MIR: 1
  Flow cover: 7
  Lazy constraints: 1008

Explored 33117 nodes (3594441 simplex iterations) in 606.98 seconds (536.37 work units)
Thread count was 12 (of 12 available processors)

Solution count 1: -0.916492 

Time limit reached
Best objective -9.164917391304e-01, best bound -9.999850000000e-01, gap 9.1101%

User-callback calls 81129, time in user-callback 338.02 sec
Train/test accuracy, running time: 0.9165217391304348 0.89748045178106 609.3542494773865


In [9]:
# S-OCT stump heuristic
start_time = time.time()
clf = SOCTStumpHeuristic(max_depth=max_depth, time_limit=heuristic_time_limit)
clf.fit(X_train, y_train)
end_time = time.time()
train_acc = clf.score(X_train, y_train)
test_acc = clf.score(X_test, y_test)
running_time = end_time - start_time
print("Train/test accuracy, running time:", train_acc, test_acc, running_time)
warm_start = clf.branch_rules_, clf.classification_rules_

Train/test accuracy, running time: 0.6078260869565217 0.6003475238922676 32.29209232330322
