In [1]:
from pathlib import Path
import matplotlib.pyplot as plt
import numpy as np
import skimage
from sklearn import svm, metrics, datasets
from sklearn.utils import Bunch
from sklearn.model_selection import GridSearchCV, train_test_split
%matplotlib notebook
from skimage.io import imread
from skimage.transform import resize
import cv2



In [2]:
def load_image_files(container_path, dimension=(64, 64)):

    """
    Load image files with categories as subfolder names 
    which performs like scikit-learn sample dataset

    Parameters
    ----------
    container_path : string or unicode
        Path to the main folder holding one subfolder per category
    dimension : tuple
        size to which image are adjusted to

    Returns
    -------
    Bunch
    """

    image_dir = Path(container_path)
    folders = [directory for directory in image_dir.iterdir() if directory.is_dir()]
    categories = [fo.name for fo in folders]

    descr = "A image classification dataset"
    images = []
    flat_data = []
    target = []
    for i, direc in enumerate(folders):
        for file in direc.iterdir():
            img = skimage.io.imread(file)
            if img.shape == (324,248,3):  
                print(file)
            img_resized = resize(img, dimension, anti_aliasing=True, mode='reflect')
            flat_data.append(img_resized.flatten()) 
            images.append(img_resized)
            target.append(i)
    flat_data = np.array(flat_data)
    target = np.array(target)
    images = np.array(images)

    return Bunch(data=flat_data,
                 target=target,
                 target_names=categories,
                 images=images,
                 DESCR=descr)


In [3]:
image_dataset = load_image_files("/home/yunhaoshui/FootKick/clean_dataset/")

SVM

In [4]:
X_train, X_test, y_train, y_test = train_test_split(
    image_dataset.data, image_dataset.target, test_size=0.3,random_state=109
    )

In [5]:
from sklearn.decomposition import PCA
X = X_train
pca = PCA(n_components=1000) #实例化
pca = pca.fit(X)
X_pca_train = pca.transform(X) #获取新矩阵

In [6]:
param_grid = [
  {'C': [1, 10, 100, 1000], 'kernel': ['linear']},
  {'C': [1, 10, 100, 1000], 'gamma': [0.001, 0.0001], 'kernel': ['rbf']},
 ]
svc = svm.SVC()
clf = GridSearchCV(svc, param_grid)
clf.fit(X_pca_train, y_train)

GridSearchCV(estimator=SVC(),
             param_grid=[{'C': [1, 10, 100, 1000], 'kernel': ['linear']},
                         {'C': [1, 10, 100, 1000], 'gamma': [0.001, 0.0001],
                          'kernel': ['rbf']}])

In [13]:
import time
pca_time = []
svm_time = []
for i in range(X_test.shape[0]):
    X_i = X_test[i,:].reshape(1,-1)

    pca_start_time = time.time()
    X_pca_test_i = pca.transform(X_i)
    pca_end_time = time.time()
    pca_time.append(pca_end_time-pca_start_time)

    svm_start_time = time.time()
    y_pred = clf.predict(X_pca_test_i)
    svm_end_time = time.time()
    svm_time.append(svm_end_time-svm_start_time)

print('pca time:',np.mean(pca_time)*1000,'ms')
print('svm time:',np.mean(svm_time)*1000,'ms')

pca time: 38.049742434788676 ms
svm time: 3.054360018193143 ms


In [None]:
X_pca_test = pca.transform(X_test)
y_pred = clf.predict(X_pca_test)

In [8]:
print("Classification report for - \n{}:\n{}\n".format(
    clf, metrics.classification_report(y_test, y_pred)))

Classification report for - 
GridSearchCV(estimator=SVC(),
             param_grid=[{'C': [1, 10, 100, 1000], 'kernel': ['linear']},
                         {'C': [1, 10, 100, 1000], 'gamma': [0.001, 0.0001],
                          'kernel': ['rbf']}]):
              precision    recall  f1-score   support

           0       0.81      0.71      0.76       516
           1       0.87      0.92      0.90      1132

    accuracy                           0.86      1648
   macro avg       0.84      0.82      0.83      1648
weighted avg       0.85      0.86      0.85      1648




In [14]:
import pickle
with open('svm_model_pca_1000dims.pkl', 'wb') as f:
    pickle.dump(clf, f)

In [5]:
from sklearn.decomposition import PCA
X = X_train
pca = PCA(n_components=500) #实例化
pca = pca.fit(X)
X_pca_train = pca.transform(X) #获取新矩阵

In [6]:
param_grid = [
  {'C': [1, 10, 100, 1000], 'kernel': ['linear']},
  {'C': [1, 10, 100, 1000], 'gamma': [0.001, 0.0001], 'kernel': ['rbf']},
 ]
svc = svm.SVC()
clf = GridSearchCV(svc, param_grid)
clf.fit(X_pca_train, y_train)

KeyboardInterrupt: 

In [None]:
import time
pca_time = []
svm_time = []
for i in range(X_test.shape[0]):
    X_i = X_test[i,:].reshape(1,-1)

    pca_start_time = time.time()
    X_pca_test_i = pca.transform(X_i)
    pca_end_time = time.time()
    pca_time.append(pca_end_time-pca_start_time)

    svm_start_time = time.time()
    y_pred = clf.predict(X_pca_test_i)
    svm_end_time = time.time()
    svm_time.append(svm_end_time-svm_start_time)

print('pca time:',np.mean(pca_time)*1000,'ms')
print('svm time:',np.mean(svm_time)*1000,'ms')

In [None]:
X_pca_test = pca.transform(X_test)
y_pred = clf.predict(X_pca_test)

In [None]:
print("Classification report for - \n{}:\n{}\n".format(
    clf, metrics.classification_report(y_test, y_pred)))