# 抽象图像风格分类（SVM版本）
## 1.载入数据

In [1]:
from sklearn import svm, model_selection, metrics
from sklearn.model_selection import StratifiedKFold, ParameterGrid
from sklearn.decomposition import PCA
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
import numpy as np
import os

In [2]:
data_dir = "/media/ziven/My Passport/Dataset/Painter by Numbers/140x140_style/"


def load_data_PbN(PbN_dir):
        """load Pinter by Numbers dataset"""
        train_images_Abstract_Art = np.load(os.path.join(PbN_dir, 'style_train_images_Abstract_Art.npy'))
        train_images_Abstract_Expressionism = np.load(os.path.join(PbN_dir, 'style_train_images_Abstract_Expressionism.npy'))
        train_images_Art_Informel = np.load(os.path.join(PbN_dir, 'style_train_images_Art_Informel.npy'))
        train_images_Color_Field_Painting = np.load(os.path.join(PbN_dir, 'style_train_images_Color_Field_Painting.npy'))
        train_images_Lyrical_Abstraction = np.load(os.path.join(PbN_dir, 'style_train_images_Lyrical_Abstraction.npy'))
        train_images_Concretism = np.load(os.path.join(PbN_dir, 'style_train_images_Concretism.npy'))
        train_data = np.concatenate((train_images_Abstract_Art, train_images_Abstract_Expressionism,
                                     train_images_Art_Informel, train_images_Color_Field_Painting,
                                     train_images_Lyrical_Abstraction, train_images_Concretism))

        train_labels_Abstract_Art = [0 for i in range(train_images_Abstract_Art.shape[0])]
        train_labels_Abstract_Expressionism = [1 for j in range(train_images_Abstract_Expressionism.shape[0])]
        train_labels_Art_Informel = [2 for k in range(train_images_Art_Informel.shape[0])]
        train_labels_Color_Field_Painting = [3 for l in range(train_images_Color_Field_Painting.shape[0])]
        train_labels_Lyrical_Abstraction = [4 for i1 in range(train_images_Lyrical_Abstraction.shape[0])]
        train_labels_Concretism = [5 for i2 in range(train_images_Concretism.shape[0])]
        train_labels = np.concatenate((train_labels_Abstract_Art, train_labels_Abstract_Expressionism,
                          train_labels_Art_Informel, train_labels_Color_Field_Painting,
                          train_labels_Lyrical_Abstraction, train_labels_Concretism))

        test_images_Abstract_Art = np.load(os.path.join(PbN_dir, 'style_test_images_Abstract_Art.npy'))
        test_images_Abstract_Expressionism = np.load(os.path.join(PbN_dir, 'style_test_images_Abstract_Expressionism.npy'))
        test_images_Art_Informel = np.load(os.path.join(PbN_dir, 'style_test_images_Art_Informel.npy'))
        test_images_Color_Field_Painting = np.load(os.path.join(PbN_dir, 'style_test_images_Color_Field_Painting.npy'))
        test_images_Lyrical_Abstraction = np.load(os.path.join(PbN_dir, 'style_test_images_Lyrical_Abstraction.npy'))
        test_images_Concretism = np.load(os.path.join(PbN_dir, 'style_test_images_Concretism.npy'))
        val_data = np.concatenate((test_images_Abstract_Art, test_images_Abstract_Expressionism,
                                   test_images_Art_Informel, test_images_Color_Field_Painting,
                                   test_images_Lyrical_Abstraction, test_images_Concretism))

        val_labels_Abstract_Art = [0 for m in range(test_images_Abstract_Art.shape[0])]
        val_labels_Abstract_Expressionism = [1 for n in range(test_images_Abstract_Expressionism.shape[0])]
        val_labels_Art_Informel = [2 for p in range(test_images_Art_Informel.shape[0])]
        val_labels_Color_Field_Painting = [3 for q in range(test_images_Color_Field_Painting.shape[0])]
        val_labels_Lyrical_Abstraction = [4 for j1 in range(test_images_Lyrical_Abstraction.shape[0])]
        val_labels_Concretism = [5 for j2 in range(test_images_Concretism.shape[0])]
        val_labels = np.concatenate((val_labels_Abstract_Art, val_labels_Abstract_Expressionism,
                              val_labels_Art_Informel, val_labels_Color_Field_Painting,
                              val_labels_Lyrical_Abstraction, val_labels_Concretism))

        X = np.concatenate((train_data, val_data))
        X = X.reshape([X.shape[0], -1])
        y = np.concatenate((train_labels, val_labels))
        return X, y

In [3]:
X, y = load_data_PbN(data_dir)

## 2.使用SVM进行分类

In [4]:
def SVM_cv(X, y, n_components, decision_function_shape='ovr', kernel='rbf', n_splits=5):
    skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=2)
    avg_accuracy = []
    avg_precision = []
    avg_recall = []
    avg_f_score = []
    for train_index, val_index in skf.split(X, y):
        pca = PCA(n_components=n_components)
        X_train = pca.fit_transform(X[train_index])
        X_val = pca.transform(X[val_index])
        print("X_train shape:", X_train.shape)
        print("X_val shape:", X_val.shape)        
        clf = svm.SVC(decision_function_shape='ovr', verbose=2, kernel=kernel)
        clf.fit(X_train, y[train_index]) 
        predicts = clf.predict(X_val)
        print(predicts)
        print(y[val_index])
        ac_score = metrics.accuracy_score(y[val_index], predicts)  
        cl_report = metrics.classification_report(y[val_index], predicts)
        precision = metrics.precision_score(y[val_index], predicts, average="macro")
        recall = metrics.recall_score(y[val_index], predicts, average="macro")
        f_score = metrics.f1_score(y[val_index], predicts, average="macro")
        
        avg_accuracy.append(ac_score)
        avg_precision.append(precision)
        avg_recall.append(recall)
        avg_f_score.append(f_score)
        
        print(ac_score)
        print(cl_report)
        print(precision)
        print(recall)
        print(f_score)
    avg_acc = np.average(avg_accuracy)
    avg_p = np.average(avg_precision)
    avg_r = np.average(avg_recall)
    avg_f = np.average(avg_f_score)
    print("avg_acc:", avg_acc)
    print("avg_p:", avg_p)
    print("avg_r:", avg_r)
    print("avg_f:", avg_f)

### 2.1 参数设置
* n_components：PCA降维后的维度
* decision_function_shape：多分类SVM的策略
* kernel：SVM的核函数
* gamma：RBF核的参数
* n_splits：交叉验证的折数

### 2.2 进行交叉验证

In [27]:
n_components = 0.9
decision_function_shape = 'ovr'
kernel = 'rbf'
n_splits = 5

In [28]:
SVM_cv(X, y, n_components, decision_function_shape, kernel, n_splits)

('X_train shape:', (4648, 440))
('X_val shape:', (1164, 440))
[LibSVM][1 1 1 ..., 1 1 1]
[0 0 0 ..., 5 5 5]
0.328178694158
             precision    recall  f1-score   support

          0       1.00      0.01      0.01       184
          1       0.31      0.98      0.47       352
          2       0.54      0.03      0.06       208
          3       0.60      0.14      0.23       181
          4       1.00      0.01      0.02       127
          5       0.38      0.03      0.05       112

avg / total       0.59      0.33      0.20      1164

0.638568267452
0.199111249151
0.141068307144
('X_train shape:', (4649, 448))
('X_val shape:', (1163, 448))
[LibSVM][1 1 1 ..., 1 1 1]
[0 0 0 ..., 5 5 5]
0.321582115219
             precision    recall  f1-score   support

          0       0.00      0.00      0.00       184
          1       0.31      0.95      0.47       352
          2       0.35      0.06      0.11       208
          3       0.57      0.13      0.22       181
          4     

In [5]:
n_components = 0.95
decision_function_shape = 'ovr'
kernel = 'rbf'
n_splits = 5

In [6]:
SVM_cv(X, y, n_components, decision_function_shape, kernel, n_splits)

('X_train shape:', (4648, 981))
('X_val shape:', (1164, 981))
[LibSVM][1 1 1 ..., 1 1 1]
[0 0 0 ..., 5 5 5]
0.337628865979
             precision    recall  f1-score   support

          0       1.00      0.01      0.01       184
          1       0.32      0.95      0.47       352
          2       0.35      0.04      0.07       208
          3       0.57      0.23      0.32       181
          4       1.00      0.04      0.08       127
          5       0.57      0.04      0.07       112

avg / total       0.57      0.34      0.22      1164

0.634164496936
0.215727276484
0.170266972785
('X_train shape:', (4649, 988))
('X_val shape:', (1163, 988))
[LibSVM][1 1 1 ..., 1 1 1]
[0 0 0 ..., 5 5 5]
0.337059329321
             precision    recall  f1-score   support

          0       0.00      0.00      0.00       184
          1       0.32      0.93      0.47       352
          2       0.42      0.08      0.13       208
          3       0.54      0.23      0.32       181
          4     

  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


('X_train shape:', (4649, 982))
('X_val shape:', (1163, 982))
[LibSVM][1 1 1 ..., 1 5 3]
[0 0 0 ..., 5 5 5]
0.337059329321
             precision    recall  f1-score   support

          0       0.50      0.01      0.02       184
          1       0.32      0.93      0.48       352
          2       0.42      0.05      0.09       208
          3       0.48      0.23      0.31       181
          4       0.38      0.02      0.04       126
          5       0.35      0.05      0.09       112

avg / total       0.40      0.34      0.23      1163

0.408280015636
0.217052260151
0.173183818458
('X_train shape:', (4650, 978))
('X_val shape:', (1162, 978))
[LibSVM][1 1 1 ..., 1 3 1]
[0 0 0 ..., 5 5 5]
0.334767641997
             precision    recall  f1-score   support

          0       1.00      0.01      0.02       183
          1       0.32      0.93      0.47       352
          2       0.62      0.05      0.09       208
          3       0.57      0.23      0.33       181
          4     

In [7]:
n_components = 0.99
decision_function_shape = 'ovr'
kernel = 'rbf'
n_splits = 5

In [8]:
SVM_cv(X, y, n_components, decision_function_shape, kernel, n_splits)

('X_train shape:', (4648, 2423))
('X_val shape:', (1164, 2423))
[LibSVM][1 2 1 ..., 1 1 1]
[0 0 0 ..., 5 5 5]
0.346219931271
             precision    recall  f1-score   support

          0       0.40      0.01      0.02       184
          1       0.32      0.93      0.48       352
          2       0.33      0.03      0.05       208
          3       0.51      0.30      0.38       181
          4       0.40      0.02      0.03       127
          5       0.71      0.09      0.16       112

avg / total       0.41      0.35      0.24      1164

0.446811984587
0.229625182865
0.186765887931
('X_train shape:', (4649, 2418))
('X_val shape:', (1163, 2418))
[LibSVM][1 1 0 ..., 1 1 1]
[0 0 0 ..., 5 5 5]
0.345657781599
             precision    recall  f1-score   support

          0       0.50      0.01      0.02       184
          1       0.32      0.91      0.48       352
          2       0.53      0.04      0.07       208
          3       0.47      0.34      0.39       181
          4 

In [9]:
n_components = 0.85
decision_function_shape = 'ovr'
kernel = 'rbf'
n_splits = 5

In [10]:
SVM_cv(X, y, n_components, decision_function_shape, kernel, n_splits)

('X_train shape:', (4648, 210))
('X_val shape:', (1164, 210))
[LibSVM][1 1 1 ..., 1 1 1]
[0 0 0 ..., 5 5 5]
0.317010309278
             precision    recall  f1-score   support

          0       1.00      0.01      0.01       184
          1       0.31      1.00      0.47       352
          2       0.71      0.02      0.05       208
          3       0.73      0.06      0.11       181
          4       0.00      0.00      0.00       127
          5       0.25      0.01      0.02       112

avg / total       0.52      0.32      0.17      1164

0.501054361938
0.182722397858
0.109711011653
('X_train shape:', (4649, 216))
('X_val shape:', (1163, 216))
[LibSVM][1 1 1 ..., 1 1 1]
[0 0 0 ..., 5 5 5]
0.318142734308
             precision    recall  f1-score   support

          0       0.00      0.00      0.00       184
          1       0.31      0.98      0.47       352
          2       0.39      0.04      0.08       208
          3       0.67      0.07      0.12       181
          4     

In [11]:
n_components = 0.8
decision_function_shape = 'ovr'
kernel = 'rbf'
n_splits = 5

In [12]:
SVM_cv(X, y, n_components, decision_function_shape, kernel, n_splits)

('X_train shape:', (4648, 103))
('X_val shape:', (1164, 103))
[LibSVM][1 1 1 ..., 1 1 1]
[0 0 0 ..., 5 5 5]
0.310137457045
             precision    recall  f1-score   support

          0       1.00      0.01      0.01       184
          1       0.31      1.00      0.47       352
          2       1.00      0.02      0.05       208
          3       0.75      0.02      0.03       181
          4       0.00      0.00      0.00       127
          5       0.00      0.00      0.00       112

avg / total       0.55      0.31      0.16      1164

0.509303504199
0.174341304964
0.0930980233839
('X_train shape:', (4649, 106))
('X_val shape:', (1163, 106))
[LibSVM][1 1 1 ..., 1 1 1]
[0 0 0 ..., 5 5 5]
0.314703353396
             precision    recall  f1-score   support

          0       0.00      0.00      0.00       184
          1       0.31      1.00      0.47       352
          2       0.69      0.04      0.08       208
          3       0.62      0.03      0.05       181
          4    

## 3.使用LR进行分类

In [13]:
def LR_cv(X, y, n_components, C=1.0, solver="saga", multi_class="multinomial", n_splits=5):
    skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=2)
    avg_accuracy = []
    avg_precision = []
    avg_recall = []
    avg_f_score = []
    for train_index, val_index in skf.split(X, y):
        # PCA
        pca = PCA(n_components=n_components)
        X_train = pca.fit_transform(X[train_index])
        X_val = pca.transform(X[val_index])
        print("X_train shape:", X_train.shape)
        print("X_val shape:", X_val.shape)  
        
        clf = LogisticRegression(C=C, solver=solver, multi_class=multi_class, n_jobs=-1)
        clf.fit(X_train, y[train_index]) 
        predicts = clf.predict(X_val)
        print(predicts)
        print(y[val_index])
        ac_score = metrics.accuracy_score(y[val_index], predicts)  
        cl_report = metrics.classification_report(y[val_index], predicts)
        precision = metrics.precision_score(y[val_index], predicts, average="macro")
        recall = metrics.recall_score(y[val_index], predicts, average="macro")
        f_score = metrics.f1_score(y[val_index], predicts, average="macro")
        
        avg_accuracy.append(ac_score)
        avg_precision.append(precision)
        avg_recall.append(recall)
        avg_f_score.append(f_score)
        
        print(ac_score)
        print(cl_report)
        print(precision)
        print(recall)
        print(f_score)
    avg_acc = np.average(avg_accuracy)
    avg_p = np.average(avg_precision)
    avg_r = np.average(avg_recall)
    avg_f = np.average(avg_f_score)
    print("avg_acc:", avg_acc)
    print("avg_p:", avg_p)
    print("avg_r:", avg_r)
    print("avg_f:", avg_f)

In [14]:
n_components = 0.95

In [15]:
LR_cv(X, y, n_components)

('X_train shape:', (4648, 981))
('X_val shape:', (1164, 981))




[1 1 0 ..., 1 1 1]
[0 0 0 ..., 5 5 5]
0.254295532646
             precision    recall  f1-score   support

          0       0.22      0.21      0.21       184
          1       0.29      0.39      0.33       352
          2       0.26      0.22      0.24       208
          3       0.27      0.27      0.27       181
          4       0.14      0.09      0.11       127
          5       0.19      0.13      0.16       112

avg / total       0.24      0.25      0.25      1164

0.228111209534
0.218415043552
0.220304142928
('X_train shape:', (4649, 988))
('X_val shape:', (1163, 988))
[3 2 0 ..., 4 3 2]
[0 0 0 ..., 5 5 5]
0.248495270851
             precision    recall  f1-score   support

          0       0.23      0.18      0.21       184
          1       0.29      0.38      0.33       352
          2       0.22      0.21      0.21       208
          3       0.26      0.28      0.27       181
          4       0.17      0.11      0.13       126
          5       0.14      0.12      0.1

In [16]:
LR_cv(X, y, 0.99)

('X_train shape:', (4648, 2423))
('X_val shape:', (1164, 2423))
[1 2 1 ..., 3 3 1]
[0 0 0 ..., 5 5 5]
0.233676975945
             precision    recall  f1-score   support

          0       0.18      0.16      0.17       184
          1       0.29      0.32      0.30       352
          2       0.22      0.20      0.21       208
          3       0.23      0.28      0.25       181
          4       0.19      0.16      0.17       127
          5       0.19      0.14      0.16       112

avg / total       0.23      0.23      0.23      1164

0.215042490127
0.21091680376
0.211397547846
('X_train shape:', (4649, 2418))
('X_val shape:', (1163, 2418))
[3 2 0 ..., 0 3 4]
[0 0 0 ..., 5 5 5]
0.249355116079
             precision    recall  f1-score   support

          0       0.23      0.16      0.19       184
          1       0.29      0.35      0.32       352
          2       0.21      0.19      0.20       208
          3       0.26      0.33      0.29       181
          4       0.18      0

In [17]:
LR_cv(X, y, 0.90)

('X_train shape:', (4648, 440))
('X_val shape:', (1164, 440))
[3 1 0 ..., 1 1 1]
[0 0 0 ..., 5 5 5]
0.27147766323
             precision    recall  f1-score   support

          0       0.22      0.20      0.21       184
          1       0.31      0.52      0.39       352
          2       0.23      0.16      0.19       208
          3       0.31      0.23      0.26       181
          4       0.14      0.08      0.10       127
          5       0.21      0.11      0.14       112

avg / total       0.25      0.27      0.25      1164

0.235488143414
0.215785578457
0.215411885423
('X_train shape:', (4649, 448))
('X_val shape:', (1163, 448))
[3 2 3 ..., 0 5 1]
[0 0 0 ..., 5 5 5]
0.263112639725
             precision    recall  f1-score   support

          0       0.22      0.17      0.19       184
          1       0.29      0.46      0.36       352
          2       0.24      0.20      0.22       208
          3       0.27      0.24      0.25       181
          4       0.16      0.10 

In [18]:
LR_cv(X, y, 0.85)

('X_train shape:', (4648, 210))
('X_val shape:', (1164, 210))
[3 1 1 ..., 1 1 1]
[0 0 0 ..., 5 5 5]
0.294673539519
             precision    recall  f1-score   support

          0       0.21      0.16      0.18       184
          1       0.32      0.64      0.42       352
          2       0.30      0.15      0.20       208
          3       0.27      0.19      0.22       181
          4       0.20      0.07      0.10       127
          5       0.34      0.14      0.20       112

avg / total       0.28      0.29      0.26      1164

0.273252174285
0.224096563669
0.221777027614
('X_train shape:', (4649, 216))
('X_val shape:', (1163, 216))
[3 0 3 ..., 2 5 1]
[0 0 0 ..., 5 5 5]
0.292347377472
             precision    recall  f1-score   support

          0       0.24      0.15      0.18       184
          1       0.31      0.62      0.41       352
          2       0.22      0.15      0.18       208
          3       0.32      0.24      0.27       181
          4       0.27      0.05

In [19]:
LR_cv(X, y, 0.80)

('X_train shape:', (4648, 103))
('X_val shape:', (1164, 103))
[3 3 1 ..., 1 1 1]
[0 0 0 ..., 5 5 5]
0.308419243986
             precision    recall  f1-score   support

          0       0.20      0.11      0.14       184
          1       0.32      0.75      0.45       352
          2       0.26      0.11      0.15       208
          3       0.37      0.20      0.26       181
          4       0.29      0.05      0.08       127
          5       0.40      0.09      0.15       112

avg / total       0.30      0.31      0.25      1164

0.30531627642
0.218001409123
0.205261217328
('X_train shape:', (4649, 106))
('X_val shape:', (1163, 106))
[3 1 0 ..., 1 2 2]
[0 0 0 ..., 5 5 5]
0.299226139295
             precision    recall  f1-score   support

          0       0.20      0.11      0.14       184
          1       0.31      0.74      0.44       352
          2       0.23      0.10      0.14       208
          3       0.30      0.18      0.23       181
          4       0.36      0.03 

## 4.使用RF进行分类

In [20]:
def RF_cv(X, y, n_components, n_estimators=10, max_depth=None, min_samples_split=2, n_splits=5):
    skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=2)
    avg_accuracy = []
    avg_precision = []
    avg_recall = []
    avg_f_score = []
    for train_index, val_index in skf.split(X, y):
        # PCA
        pca = PCA(n_components=n_components)
        X_train = pca.fit_transform(X[train_index])
        X_val = pca.transform(X[val_index])
        print("X_train shape:", X_train.shape)
        print("X_val shape:", X_val.shape)  
        
        clf = RandomForestClassifier(n_estimators=n_estimators, max_depth=max_depth, min_samples_split=min_samples_split, random_state=0, n_jobs=-1)
        clf.fit(X_train, y[train_index]) 
        predicts = clf.predict(X_val)
        print(predicts)
        print(y[val_index])
        ac_score = metrics.accuracy_score(y[val_index], predicts)  
        cl_report = metrics.classification_report(y[val_index], predicts)
        precision = metrics.precision_score(y[val_index], predicts, average="macro")
        recall = metrics.recall_score(y[val_index], predicts, average="macro")
        f_score = metrics.f1_score(y[val_index], predicts, average="macro")
        
        avg_accuracy.append(ac_score)
        avg_precision.append(precision)
        avg_recall.append(recall)
        avg_f_score.append(f_score)
        
        print(ac_score)
        print(cl_report)
        print(precision)
        print(recall)
        print(f_score)
    avg_acc = np.average(avg_accuracy)
    avg_p = np.average(avg_precision)
    avg_r = np.average(avg_recall)
    avg_f = np.average(avg_f_score)
    print("avg_acc:", avg_acc)
    print("avg_p:", avg_p)
    print("avg_r:", avg_r)
    print("avg_f:", avg_f)

In [21]:
n_components = 0.95

In [22]:
RF_cv(X, y, n_components)

('X_train shape:', (4648, 981))
('X_val shape:', (1164, 981))
[2 1 0 ..., 2 0 2]
[0 0 0 ..., 5 5 5]
0.274914089347
             precision    recall  f1-score   support

          0       0.22      0.27      0.24       184
          1       0.33      0.52      0.40       352
          2       0.21      0.14      0.17       208
          3       0.30      0.27      0.28       181
          4       0.15      0.07      0.10       127
          5       0.05      0.01      0.02       112

avg / total       0.24      0.27      0.25      1164

0.210435353375
0.213000572923
0.201612050289
('X_train shape:', (4649, 988))
('X_val shape:', (1163, 988))
[3 1 1 ..., 0 4 0]
[0 0 0 ..., 5 5 5]
0.282889079966
             precision    recall  f1-score   support

          0       0.20      0.22      0.21       184
          1       0.32      0.53      0.40       352
          2       0.26      0.21      0.23       208
          3       0.36      0.27      0.31       181
          4       0.10      0.04

In [23]:
RF_cv(X, y, 0.99)

('X_train shape:', (4648, 2423))
('X_val shape:', (1164, 2423))
[1 0 1 ..., 0 1 0]
[0 0 0 ..., 5 5 5]
0.254295532646
             precision    recall  f1-score   support

          0       0.19      0.24      0.21       184
          1       0.31      0.47      0.37       352
          2       0.17      0.12      0.14       208
          3       0.32      0.31      0.31       181
          4       0.07      0.02      0.04       127
          5       0.04      0.01      0.01       112

avg / total       0.22      0.25      0.23      1164

0.18353765273
0.19638188634
0.181910794555
('X_train shape:', (4649, 2418))
('X_val shape:', (1163, 2418))
[1 1 0 ..., 1 1 1]
[0 0 0 ..., 5 5 5]
0.263112639725
             precision    recall  f1-score   support

          0       0.17      0.18      0.18       184
          1       0.32      0.48      0.38       352
          2       0.20      0.16      0.18       208
          3       0.32      0.31      0.31       181
          4       0.12      0.

In [24]:
RF_cv(X, y, 0.90)

('X_train shape:', (4648, 440))
('X_val shape:', (1164, 440))
[0 1 0 ..., 3 1 1]
[0 0 0 ..., 5 5 5]
0.286082474227
             precision    recall  f1-score   support

          0       0.24      0.29      0.26       184
          1       0.34      0.55      0.42       352
          2       0.22      0.16      0.18       208
          3       0.32      0.25      0.28       181
          4       0.09      0.03      0.05       127
          5       0.08      0.02      0.03       112

avg / total       0.25      0.29      0.25      1164

0.213293747341
0.217361908233
0.204114473088
('X_train shape:', (4649, 448))
('X_val shape:', (1163, 448))
[2 1 4 ..., 0 1 1]
[0 0 0 ..., 5 5 5]
0.272570937231
             precision    recall  f1-score   support

          0       0.21      0.25      0.23       184
          1       0.32      0.50      0.39       352
          2       0.22      0.20      0.21       208
          3       0.38      0.27      0.31       181
          4       0.08      0.03

In [25]:
RF_cv(X, y, 0.85)

('X_train shape:', (4648, 210))
('X_val shape:', (1164, 210))
[1 0 1 ..., 5 1 2]
[0 0 0 ..., 5 5 5]
0.278350515464
             precision    recall  f1-score   support

          0       0.18      0.22      0.20       184
          1       0.32      0.52      0.40       352
          2       0.26      0.21      0.23       208
          3       0.35      0.26      0.30       181
          4       0.09      0.03      0.05       127
          5       0.21      0.06      0.10       112

avg / total       0.26      0.28      0.25      1164

0.236052756457
0.216711147002
0.211803294173
('X_train shape:', (4649, 216))
('X_val shape:', (1163, 216))
[3 2 1 ..., 0 0 1]
[0 0 0 ..., 5 5 5]
0.279449699054
             precision    recall  f1-score   support

          0       0.18      0.21      0.20       184
          1       0.32      0.53      0.40       352
          2       0.23      0.19      0.21       208
          3       0.38      0.25      0.30       181
          4       0.14      0.05

In [26]:
RF_cv(X, y, 0.80)

('X_train shape:', (4648, 103))
('X_val shape:', (1164, 103))
[3 1 1 ..., 1 3 1]
[0 0 0 ..., 5 5 5]
0.267182130584
             precision    recall  f1-score   support

          0       0.17      0.22      0.19       184
          1       0.31      0.52      0.39       352
          2       0.18      0.14      0.16       208
          3       0.43      0.28      0.34       181
          4       0.07      0.02      0.03       127
          5       0.16      0.04      0.07       112

avg / total       0.24      0.27      0.24      1164

0.220485446076
0.20361674974
0.195851349001
('X_train shape:', (4649, 106))
('X_val shape:', (1163, 106))
[1 4 0 ..., 1 2 2]
[0 0 0 ..., 5 5 5]
0.25709372313
             precision    recall  f1-score   support

          0       0.18      0.23      0.21       184
          1       0.30      0.48      0.37       352
          2       0.22      0.20      0.21       208
          3       0.33      0.19      0.24       181
          4       0.11      0.04  