In [1]:
import SVM.SVC as svc
import global_resources as gr
import numpy as np
import torch
import os
import kmc_torch.kmc as kmc

Current training device: Cuda.


In [2]:
# Example Data process
# Read data as pandas dataframe
data_path = os.path.join(gr.default_dir, r'Data\breast-cancer-wisconsin.data')
df = gr.read_and_return_pd_df(data_path)

# Process & drop Nan(not a number) values
df.replace('?', np.nan, inplace = True)
df.dropna(inplace = True)

# Drop useless data column
df.drop(['id'], axis = 1, inplace = True)
df["bare_nuclei"] = df["bare_nuclei"].astype(np.int64)

# Set device
device = gr.set_device()
print(f"Current device: {device.capitalize()}.")

# Set X as datatype: np.array()
X = np.array(df.drop(['class'], axis = 1)).astype('float64')
# Set X_gpu as datatype: torch.tensor()
X_gpu = torch.tensor(X, device = device, dtype = torch.float64)

Reading files from: D:\ImportanFiles\Coding Related\Repositories\Quantitative-Investment-Algorithms\Data\breast-cancer-wisconsin.data
Current device: Cuda.


In [3]:
X, y, _, _ = kmc.WCSS_for_single_k(X_gpu, k = 4)
print(X.shape)
print(y.shape)
print(y.unique().tolist())

Clustering with: k = 4.
Initiating centroids with k being 4...
Initiating centroids with k being 4...
Initiating centroids with k being 4...
Initiating centroids with k being 4...
Initiating centroids with k being 4...
Initiating centroids with k being 4...
Initiating centroids with k being 4...
Initiating centroids with k being 4...
Initiating centroids with k being 4...
Initiating centroids with k being 4...
torch.Size([683, 9])
torch.Size([683])
[0, 1, 2, 3]


In [4]:
dic = svc.ovo_train(
    X,
    y,
    num_epochs = int(1e4),
    l2_penalty = True,
    print_every = int(500),
    delta_loss_breaker = float(1e-5),
    patience = int(10),
    relative = True,
    relative_breaker = 1e-5
)

-------------------------------------------------------------------------------------------------------
Training on label a: 0 and label b: 1
Creating random weights and bias with dtype: torch.float64
Training with loss function: hinge loss with l2 penalty on weights.
Training with relative breaker.
Epoch 0 | Loss: 7.500845804491054 | Relative Ratio: None
Epoch 1 | Loss: 7.291611979943963 | Relative Ratio: 1.0286951397198043
Epoch 500 | Loss: 0.09190152655525698 | Relative Ratio: 1.0014680284675754
Epoch 1000 | Loss: 0.061491573838861634 | Relative Ratio: 1.0017476876735025
Epoch 1500 | Loss: 0.052967911013032526 | Relative Ratio: 1.000056407289852
Epoch 2000 | Loss: 0.051456075117755 | Relative Ratio: 1.0000209586680062
Epoch 2500 | Loss: 0.05123668418542489 | Relative Ratio: 1.0000226746686482
Epoch 3000 | Loss: 0.05098101894136879 | Relative Ratio: 1.000006143568722
Epoch 3500 | Loss: 0.05071839194399095 | Relative Ratio: 1.000006102654233
Epoch 4000 | Loss: 0.050487302992112945 | R

In [5]:
votes = svc.ovo_predict(dic, X, y.dtype)

In [6]:
df_1 = gr.detach_to_pd(votes)
display(df_1)

Unnamed: 0,0
0,1
1,3
2,1
3,2
4,1
...,...
678,1
679,1
680,2
681,2


In [7]:
acc = svc.ovo_score(X, y, dic)
print(acc)

Accuracy: 650/683 = 95.17%
0.9516837481698389
