In [1]:
#
# sci kit classifiers notebook
#

# for Colab paths
# import sys
# sys.path.append('/content/')
#
#!nvidia-smi
#!nvidia-smi -q
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score

from skl2onnx import convert_sklearn
from skl2onnx.common.data_types import FloatTensorType
from skl2onnx import to_onnx

import torch
import pandas as pd
from torch.utils.data import Dataset, DataLoader
from torchvision.transforms import Lambda, Compose
from LandmarkDataset import LandmarkDataset
from torch.utils.data.sampler import SubsetRandomSampler
torch.set_default_dtype(torch.float64)

import onnx



device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
transformations = Compose([
    Lambda(lambda x: torch.tensor(x.values).to(device))
])
target_transformations = Compose([
    Lambda(lambda x: torch.tensor(x).to(device))
])

dataset = LandmarkDataset("/home/jovyan/train/data",
                          "/home/jovyan/model",
                          transform=transformations)

num_classes = dataset.num_class
input_size = dataset.input_size() #2 * (21 * 3) + 12 + 1 + 10 #149
 
data = np.empty( (len(dataset), input_size), dtype=np.float64)
labels = np.empty( len(dataset), dtype = int)
                  
for i, d in enumerate(dataset):
    data[i] = np.array(d[1], dtype=np.float64)
    labels[i] = np.round(d[0]).astype(int) 

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(data, labels, test_size=0.2, random_state=42)

print("Train len: ", len(X_train))
print("Validation len: ", len(X_test))

  from .autonotebook import tqdm as notebook_tqdm


Train len:  11620
Validation len:  2906


In [2]:
# Initialize the models
print("model init")

#
# Both Decision + RandomForest classifiers have export issues with running on onnx runtime web js
#
#   * DecisionTreeClassifer: no raw probabilities; difficult to apply an in-game threshold / cutoff value
#   * RandomForestClassifier: typically most accurate but not by that much to overcome the gargantuan model file size 
#   * LogisticRegression: just as accurate as any of the others, smallest file size
#   * SVC: decent, but large file size (11mb) vs LogisticRegression
#   * GaussianNB: extremely inaccurate, ignored for now.
#
# "Simple" LogisticRegession seems best for this class of probem.
#
# Most models (except GaussianNB) had 90%+ accuracy, so evaluating much better than two layer NN.
# But these also lack dropout, so not completely comparable.
#

models = [
    LogisticRegression(max_iter=10000),
    #DecisionTreeClassifier(),
    #RandomForestClassifier(),
    SVC(probability=True),
    #GaussianNB()
]

from skl2onnx import __max_supported_opset__
print("Last supported opset:", __max_supported_opset__)


# Train and test each model, and compute the accuracy score
for model in models:
    
    print("--------------------")
    
    # Train the model
    model.fit(X_train, y_train)

    # Test the model and compute the accuracy score
    y_pred = model.predict(X_test)
    #print("TEST", X_test, "PRED", y_pred)
    #print(y_test, y_pred)
    accuracy = accuracy_score(y_test, y_pred)

    # Print the model's name and accuracy score
    print(model.__class__.__name__, "accuracy: {:.3f}".format(accuracy) )
    
    
    # Sanity check
    X=[
    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
    -1, -1, -1, -1, -1, -1, 0.08243870735168457, 0.3272324204444885,
    3.296062232038821e-7, 0.11587709188461304, 0.2649609446525574,
    -0.010733608156442642, 0.12738025188446045, 0.20119965076446533,
    -0.02170146442949772, 0.13692301511764526, 0.1434585452079773,
    -0.030385395511984825, 0.1565452218055725, 0.10842287540435791,
    -0.03992266580462456, 0.07536429166793823, 0.15806376934051514,
    -0.03490304946899414, 0.06413596868515015, 0.07703894376754761,
    -0.04701704904437065, 0.0496293306350708, 0.031381964683532715,
    -0.05240476876497269, 0.03681206703186035, -0.001009911298751831,
    -0.05688896030187607, 0.03942376375198364, 0.1726135015487671,
    -0.0374128557741642, 0.008090049028396606, 0.09009474515914917,
    -0.04902489855885506, -0.013370007276535034, 0.04655855894088745,
    -0.053542546927928925, -0.02887246012687683, 0.016812801361083984,
    -0.05725706368684769, 0.012033134698867798, 0.19919347763061523,
    -0.03908606246113777, -0.022472083568572998, 0.12650775909423828,
    -0.049961891025304794, -0.039518654346466064, 0.08441585302352905,
    -0.05658777803182602, -0.05035647749900818, 0.051334500312805176,
    -0.062176283448934555, -0.006764203310012817, 0.2322888970375061,
    -0.04069233685731888, -0.04757261276245117, 0.20167887210845947,
    -0.05381026118993759, -0.07199111580848694, 0.1782442331314087,
    -0.061005041003227234, -0.09040814638137817, 0.1531672477722168,
    -0.06528844684362411, -0.04684633016586304, -0.07679343223571777,
    0.028765380382537842, -0.07849520444869995, 0, 0, -0.0023369789123535156,
    0.05037200450897217, -0.10711735486984253, -0.07638520002365112,
    0.06266820430755615, -0.08347678184509277, -1, 0, -1, -1, -1, -1, -1, 1, 1,
    1, 1, 1,
  ];
    X=np.array([X], dtype=np.float64)
    print("Dims", len(X), len(X[0]))
    pred = model.predict(X)
    pred_prob = model.predict_proba(X)
    # Garbage - 1, 28: 5 (correct), 37: 50
    print("Sanity Pred", pred, list(pred_prob[0]).index( max(pred_prob[0])), max(pred_prob[0]) )

    # Export Onnx
    onx = to_onnx(model, X=X_train, options={LogisticRegression: {'zipmap': False}, SVC: {'zipmap': False}})
    
    filename = f"onnx_model_{model.__class__.__name__}.onnx"
    print(f"Exporting {filename}")
    
    with open(filename, "wb") as f:
        f.write(onx.SerializeToString())
        
    model = onnx.load(filename)
    onnx.checker.check_model(filename)

    

model init
Last supported opset: 18
--------------------
LogisticRegression accuracy: 0.957
Dims 1 150
Sanity Pred [28] 28 0.5761072199764745
Exporting onnx_model_LogisticRegression.onnx
--------------------
SVC accuracy: 0.907
Dims 1 150
Sanity Pred [28] 28 0.5305207361195617
Exporting onnx_model_SVC.onnx


In [3]:
print("DONE")


DONE
