In [12]:
#
# sci kit classifiers notebook
#

# for Colab paths
# import sys
# sys.path.append('/content/')
#
#!nvidia-smi
#!nvidia-smi -q
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score

import torch
import pandas as pd
from torch.utils.data import Dataset, DataLoader
from torchvision.transforms import Lambda, Compose
from LandmarkDataset import LandmarkDataset
from torch.utils.data.sampler import SubsetRandomSampler

torch.set_default_dtype(torch.float64)


device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
transformations = Compose([
    Lambda(lambda x: torch.tensor(x.values).to(device))
])
target_transformations = Compose([
    Lambda(lambda x: torch.tensor(x).to(device))
])

dataset = LandmarkDataset("/home/jovyan/train/data",
                          "/home/jovyan/model",
                          transform=transformations)

num_classes = dataset.num_class
input_size = dataset.input_size() #2 * (21 * 3) + 12 + 1 + 10 #149

training_indices, validation_indices = dataset.train_validation_indices(split_p = .2)
 
data = np.empty( (len(dataset), input_size))
labels = np.empty( len(dataset), dtype=np.int8)
                  
for idx in training_indices:
    data[idx] = np.array(dataset[idx][1])
    labels[idx] = dataset[idx][0] 

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size=0.2, random_state=42)

print(len(X_train))
print(len(y_test))



11620
2906


In [14]:
# Initialize the models
models = [
    LogisticRegression(max_iter=10000),
    DecisionTreeClassifier(),
    RandomForestClassifier(),
    SVC(),
    GaussianNB()
]

# Train and test each model, and compute the accuracy score
for model in models:
    # Train the model
    model.fit(X_train, y_train)

    # Test the model and compute the accuracy score
    y_pred = model.predict(X_test)
    #print("TEST", X_test, "PRED", y_pred)
    #print(y_test, y_pred)
    accuracy = accuracy_score(y_test, y_pred)

    # Print the model's name and accuracy score
    print(model.__class__.__name__, "accuracy: {:.3f}".format(accuracy) )

[17 10 49 ... 59 23  8] [17 10 49 ... 59 23  8]
LogisticRegression accuracy: 0.949
[17 10 49 ... 59 23  8] [17 10 49 ... 59 23  8]
DecisionTreeClassifier accuracy: 0.940
[17 10 49 ... 59 23  8] [17 10 49 ... 59 23  8]
RandomForestClassifier accuracy: 0.987
[17 10 49 ... 59 23  8] [17 10 49 ... 59 23  8]
SVC accuracy: 0.886
[17 10 49 ... 59 23  8] [39 37 37 ... 37 39 16]
GaussianNB accuracy: 0.227
