In [30]:
import numpy as np
from sklearn import svm
import pandas as pd
import matplotlib.pyplot as plt
plt.switch_backend('agg')
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn import preprocessing
from sklearn.metrics import mean_squared_error
import random
from scipy.io import mmread
import math
from sklearn.preprocessing import normalize
from sklearn.metrics.pairwise import cosine_similarity
from scipy.spatial.distance import cosine
import os
from matplotlib import gridspec
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.feature_selection import SelectKBest, SelectFromModel
from sklearn.feature_selection import chi2,f_classif,mutual_info_classif,mutual_info_regression
from sklearn.linear_model import Lasso
from sklearn.linear_model import LassoCV
from sklearn.linear_model import LogisticRegression
from sklearn import tree
from scipy.stats import chisquare
from scipy.stats import pearsonr
from sklearn.feature_selection import VarianceThreshold
from sklearn.metrics import accuracy_score
from sklearn.manifold import TSNE
from sklearn.decomposition import PCA

def text_create(path, name, msg):
    full_path = path + "/" + name + '.txt'
    file = open(full_path, 'w')
    file.write(str(msg))


def SVM(X, y, model_type='SVC'):
    if model_type == 'SVC':
        # model = svm.SVC(kernel='linear')
        model = svm.LinearSVC(max_iter=1000000)
    elif model_type == 'SVR':
        model = svm.LinearSVR(kernel='linear')
    else:
        print("Invalid model type.")
    model.fit(X, y)
    return model


def get_error(model, X, y):
    y_pred = model.predict(X)
    return mean_squared_error(y_pred, y)


def select_samples(X, y, num_samples=None, model_type='SVC', reuse=True, sample_selected=None, balance=True):
    model = SVM(X, y, model_type=model_type)
    y_pred = model.predict(X)
    sv = [i for i in range(len(y)) if y[i] != y_pred[i]]
    # sv=model.support_
    if num_samples is None:
        return sv
    else:
        if reuse:
            if sample_selected is None:
                print("Miss parameter \"sample_selected\" when \"reuse\" is True.")
                return None
            else:
                reused = list(set(sample_selected) & set(sv))
                num_select=num_samples-len(reused)
                if num_select<=0:
                    return []
                else:
                    indices = reused
                    sv = list(set(sv) - set(indices))
                    if balance:
                        classes = np.unique(y)
                        num_classes = len(classes)
                        sv_classes = []
                        length = []
                        for i in range(num_classes):
                            sv_class = list(set(list(np.where(y_train == classes[i])[0])) & set(sv))
                            sv_classes.append(sv_class)
                            length.append(len(sv_class))
                        sort_class = sorted(range(len(length)), key=lambda i: length[i], reverse=False)
                        num_samples_class = int(num_select / num_classes)
                        for i in range(num_classes):
                            if num_samples_class >= length[sort_class[i]]:
                                indices += sv_classes[sort_class[i]]
                            else:
                                num_samples_class = int(num_select / (num_classes - i))
                                indices += random.sample(sv_classes[sort_class[i]], num_samples_class)
                    else:
                        if len(sv)<=num_select:
                            indices +=sv
                        else:
                            indices += random.sample(sv, num_select)
        else:
            indices = []
            if balance:
                classes = np.unique(y)
                num_classes = len(classes)
                sv_classes = []
                length = []
                indices = []
                for i in range(num_classes):
                    sv_class = list(set(list(np.where(y_train == classes[i])[0])) & set(sv))
                    sv_classes.append(sv_class)
                    length.append(len(sv_class))
                sort_class = sorted(range(len(length)), key=lambda i: length[i], reverse=False)
                num_samples_class = int(num_samples / num_classes)
                for i in range(num_classes):
                    if num_samples_class >= length[sort_class[i]]:
                        indices += sv_classes[sort_class[i]]
                    else:
                        num_samples_class = int((num_samples - len(indices)) / (num_classes - i))
                        indices += random.sample(sv_classes[sort_class[i]], num_samples_class)
            else:
                if len(sv)<num_samples:
                    indices +=sv
                else:
                    indices += random.sample(sv, num_samples)
    return indices


def select_feature(X, y, feature_list, model_type='SVC', strategy='weight'):
    # print(y)
    model = SVM(X, y, model_type=model_type)
    if strategy == 'weight':
        coef=np.sum(np.abs(model.coef_),axis=0)
        #coef = np.abs(model.coef_).sum(axis=0)
        # coef=np.max(np.abs(model.coef_),axis=0)
        #coef /= coef.sum()
        #print(coef)
        # coef=np.max(np.abs(model.coef_),axis=0)
        indices = sorted(range(X.shape[1]), key=lambda i: coef[i], reverse=False)
        #print(coef[indices[:50]])
    elif strategy == 'score':
        scores = []
        for i in range(X.shape[1]):
            X_local = X[:, feature_list + [i]]
            scores.append(SVM(X_local, y, model_type=model_type).score(X_local,
                                                                       y))  # R^2 for regression and mean accuracy for classificarion
        indices = sorted(range(X.shape[1]), key=lambda i: scores[i], reverse=True)
    elif strategy == 'angle':
        angles = []
        coef_ = SVM(X[:, feature_list], y, model_type=model_type).coef_
        w_current = np.hstack((coef_, np.zeros((coef_.shape[0], 1))))
        for i in range(X.shape[1]):
            X_local = X[:, feature_list + [i]]
            w_new = SVM(X_local, y, model_type=model_type).coef_
            angle = 0
            for j in range(w_current.shape[0]):
                cos = cosine_similarity(w_current[j].reshape(1, -1), w_new[j].reshape(1, -1))[0][0]
                # cos=np.dot(w_current,np.transpose(w_new))/(np.linalg.norm(w_current)*np.linalg.norm(w_new))
                # print(cos)
                if cos > 1:
                    cos = 1
                if cos < -1:
                    cos = -1
                angle = angle + math.acos(cos)
            angles.append(angle)
            # print('angle:'+str(angle))
        indices = sorted(range(X.shape[1]), key=lambda i: angles[i], reverse=True)
    else:
        print("Invalid strategy name.")
    return [i for i in indices if i not in feature_list][0]


def SVM_active_feature_selection(X_train, y_train, X_test, y_test, num_features, init_samples, num_samples,
                                 model_type='SVC', strategy='largest_weight', reuse=True, balance=True):
    filename = 'f' + str(num_features) + '_s' + str(num_samples) + '_' + model_type + '_' + strategy
    feature_selected = []
    num_samples_list = []
    samples_global = []
    train_errors = []
    test_errors = []
    train_scores = []
    test_scores = []
    # randomly sample
    if balance:
        classes = np.unique(y_train)
        num_classes = len(classes)
        sample_classes = []
        length = []
        samples_global = []
        for i in range(num_classes):
            sample_class = list(np.where(y_train == classes[i])[0])
            sample_classes.append(sample_class)
            length.append(len(sample_class))
        sort_class = sorted(range(len(length)), key=lambda i: length[i], reverse=False)
        num_samples_class = int(init_samples / num_classes)
        for i in range(num_classes):
            if num_samples_class >= length[sort_class[i]]:
                samples_global += sample_classes[sort_class[i]]
            else:
                num_samples_class = int((init_samples - len(samples_global)) / (num_classes - i))
                samples_global += random.sample(sample_classes[sort_class[i]], num_samples_class)
    else:
        shuffle = np.arange(X_train.shape[0])
        np.random.shuffle(shuffle)
        samples_global = shuffle[:init_samples]
    num_samples_list.append(len(samples_global))
    X_global = X_train[samples_global, :]
    y_global = y_train[samples_global]
    if strategy == 'angle':
        new_feature = select_feature(X_global, y_global, [], model_type=model_type, strategy='score')
    else:
        new_feature = select_feature(X_global, y_global, [], model_type=model_type, strategy=strategy)
    feature_selected.append(new_feature)

    for i in range(num_features - 1):
        X_measured_train = X_train[:, feature_selected]
        X_measured_test = X_test[:, feature_selected]
        model = SVM(X_measured_train, y_train, model_type=model_type)
        train_error = get_error(model, X_measured_train, y_train)
        test_error = get_error(model, X_measured_test, y_test)
        train_score = model.score(X_measured_train, y_train)
        test_score = model.score(X_measured_test, y_test)
        train_errors.append(train_error)
        test_errors.append(test_error)
        train_scores.append(train_score)
        test_scores.append(test_score)
        print("feature " + str(i) + ' : ' + str(len(samples_global)) + ' samples')
        if model_type == 'SVC':
            print('training error=' + str(train_error) + ' test error=' + str(test_error))
            print('training accuracy=' + str(train_score) + ' test accuracy=' + str(test_score))
        else:
            print('training error=' + str(train_error) + ' test error=' + str(test_error))
            print('training R^2=' + str(train_score) + ' test R^2=' + str(test_score))
        samples = select_samples(X_measured_train, y_train, num_samples, model_type=model_type, reuse=reuse,
                                 sample_selected=samples_global, balance=balance)
        samples_global = list(set().union(samples_global, samples))
        num_samples_list.append(len(samples_global))
        X_global = X_train[samples_global, :]
        y_global = y_train[samples_global]
        new_feature = select_feature(X_global, y_global, feature_selected, model_type=model_type, strategy=strategy)
        feature_selected.append(new_feature)
    X_measured_train = X_train[:, feature_selected]
    X_measured_test = X_test[:, feature_selected]
    model = SVM(X_measured_train, y_train, model_type=model_type)
    train_error = get_error(model, X_measured_train, y_train)
    test_error = get_error(model, X_measured_test, y_test)
    train_score = model.score(X_measured_train, y_train)
    test_score = model.score(X_measured_test, y_test)
    train_errors.append(train_error)
    test_errors.append(test_error)
    train_scores.append(train_score)
    test_scores.append(test_score)
    print("feature " + str(i + 1) + ' : ' + str(len(samples_global)) + ' samples')
    if model_type == 'SVC':
        print('training error=' + str(train_error) + ' test error=' + str(test_error))
        print('training accuracy=' + str(train_score) + ' test accuracy=' + str(test_score))
    else:
        print('training error=' + str(train_error) + ' test error=' + str(test_error))
        print('training R^2=' + str(train_score) + ' test R^2=' + str(test_score))

    return feature_selected, train_errors, test_errors, train_scores, test_scores, num_samples_list, samples_global


def random_SVM_active_feature_selection(X_train, y_train, X_test, y_test, num_features, num_samples=None,
                                        model_type='SVC', strategy='largest_weight', reuse=True, balance=True):
    filename = 'f' + str(num_features) + '_s' + str(num_samples) + '_' + model_type + '_' + strategy
    feature_selected = []
    num_samples_list = []
    samples_global = []
    train_errors = []
    test_errors = []
    train_scores = []
    test_scores = []
    if num_samples is None:
        X_global = X_train
        y_global = y_train
        samples_global = range(X_train.shape[0])
    else:
        init_samples = num_samples[0]
        if balance:
            classes = np.unique(y_train)
            num_classes = len(classes)
            sample_classes = []
            length = []
            samples_global = []
            for i in range(num_classes):
                sample_class = list(np.where(y_train == classes[i])[0])
                sample_classes.append(sample_class)
                length.append(len(sample_class))
            sort_class = sorted(range(len(length)), key=lambda i: length[i], reverse=False)
            num_samples_class = int(init_samples / num_classes)
            for i in range(num_classes):
                if num_samples_class >= length[sort_class[i]]:
                    samples_global += sample_classes[sort_class[i]]
                else:
                    num_samples_class = int((init_samples - len(samples_global)) / (num_classes - i))
                    samples_global += random.sample(sample_classes[sort_class[i]], num_samples_class)
        else:
            shuffle = np.arange(X_train.shape[0])
            np.random.shuffle(shuffle)
            samples_global = shuffle[:init_samples]
        num_samples_list.append(len(samples_global))
        X_global = X_train[samples_global, :]
        y_global = y_train[samples_global]
    if strategy == 'angle':
        new_feature = select_feature(X_global, y_global, [], model_type=model_type, strategy='score')
    else:
        new_feature = select_feature(X_global, y_global, [], model_type=model_type, strategy=strategy)
    feature_selected.append(new_feature)

    if num_samples is None:
        for i in range(num_features - 1):
            X_measured_train = X_train[:, feature_selected]
            X_measured_test = X_test[:, feature_selected]
            model = SVM(X_measured_train, y_train, model_type=model_type)
            train_error = get_error(model, X_measured_train, y_train)
            test_error = get_error(model, X_measured_test, y_test)
            train_score = model.score(X_measured_train, y_train)
            test_score = model.score(X_measured_test, y_test)
            train_errors.append(train_error)
            test_errors.append(test_error)
            train_scores.append(train_score)
            test_scores.append(test_score)
            print("feature " + str(i) + ' : ' + str(len(samples_global)) + ' samples')
            if model_type == 'SVC':
                print('training error=' + str(train_error) + ' test error=' + str(test_error))
                print('training accuracy=' + str(train_score) + ' test accuracy=' + str(test_score))
            else:
                print('training error=' + str(train_error) + ' test error=' + str(test_error))
                print('training R^2=' + str(train_score) + ' test R^2=' + str(test_score))
            new_feature = select_feature(X_global, y_global, feature_selected, model_type=model_type, strategy=strategy)
            feature_selected.append(new_feature)
    else:
        for i in range(num_features - 1):
            X_measured_train = X_train[:, feature_selected]
            X_measured_test = X_test[:, feature_selected]
            model = SVM(X_measured_train, y_train, model_type=model_type)
            train_error = get_error(model, X_measured_train, y_train)
            test_error = get_error(model, X_measured_test, y_test)
            train_score = model.score(X_measured_train, y_train)
            test_score = model.score(X_measured_test, y_test)
            train_errors.append(train_error)
            test_errors.append(test_error)
            train_scores.append(train_score)
            test_scores.append(test_score)
            print("feature " + str(i) + ' : ' + str(len(samples_global)) + ' samples')
            if model_type == 'SVC':
                print('training error=' + str(train_error) + ' test error=' + str(test_error))
                print('training accuracy=' + str(train_score) + ' test accuracy=' + str(test_score))
            else:
                print('training error=' + str(train_error) + ' test error=' + str(test_error))
                print('training R^2=' + str(train_score) + ' test R^2=' + str(test_score))
            ra = range(X_train.shape[0])
            samples = random.sample(list(set(ra) - set(samples_global)), (num_samples[i + 1] - num_samples[i]))
            samples_global = samples_global + samples
            num_samples_list.append(len(samples_global))
            X_global = X_train[samples_global, :]
            y_global = y_train[samples_global]
            new_feature = select_feature(X_global, y_global, feature_selected, model_type=model_type, strategy=strategy)
            feature_selected.append(new_feature)
    X_measured_train = X_train[:, feature_selected]
    X_measured_test = X_test[:, feature_selected]
    model = SVM(X_measured_train, y_train, model_type=model_type)
    train_error = get_error(model, X_measured_train, y_train)
    test_error = get_error(model, X_measured_test, y_test)
    train_score = model.score(X_measured_train, y_train)
    test_score = model.score(X_measured_test, y_test)
    train_errors.append(train_error)
    test_errors.append(test_error)
    train_scores.append(train_score)
    test_scores.append(test_score)
    print("feature " + str(i + 1) + ' : ' + str(len(samples_global)) + ' samples')
    if model_type == 'SVC':
        print('training error=' + str(train_error) + ' test error=' + str(test_error))
        print('training accuracy=' + str(train_score) + ' test accuracy=' + str(test_score))
    else:
        print('training error=' + str(train_error) + ' test error=' + str(test_error))
        print('training R^2=' + str(train_score) + ' test R^2=' + str(test_score))
    return train_errors, test_errors, train_scores, test_scores, feature_selected, num_samples_list, samples_global


def feature_compare(X_raw, Y_raw, num_f, num_s_list=None, model_type='SVC'):
    D = np.shape(X_raw)[1]
    if num_s_list is None:
        idd = None
        X = X_raw
        Y = Y_raw
        # Random
        ran_idx = random.choices(range(D), k=num_f)
        print('ran_idx:')
        print(ran_idx)
        # f_value
        model = SelectKBest(f_classif, k=D).fit(X, Y)
        score_raw = np.abs(model.scores_).tolist()
        score = [0 if math.isnan(x) else x for x in score_raw]
        idx = model.get_support(indices=True).tolist()
        fvalue_idx = [x for _, x in sorted(zip(score, idx), reverse=True)][:num_f]
        print('fvalue_idx:')
        print(fvalue_idx)
        # mutual
        model = SelectKBest(mutual_info_classif, k=D).fit(X, Y)
        score_raw = np.abs(model.scores_).tolist()
        score = [0 if math.isnan(x) else x for x in score_raw]
        idx = model.get_support(indices=True).tolist()
        mut_idx = [x for _, x in sorted(zip(score, idx), reverse=True)][:num_f]
        print('mut_idx:')
        print(mut_idx)
        # chi2
        u = np.unique(Y)
        sorted_list = []
        chi_idx = []
        for i in u:
            model = SelectKBest(chi2, k=D).fit(X, 1 * (Y == i))
            score_raw = np.abs(model.scores_).tolist()
            score = [0 if math.isnan(x) else x for x in score_raw]
            idx = model.get_support(indices=True).tolist()
            sorted_list.append([x for _, x in sorted(zip(score, idx), reverse=True)])
        for f in range(num_f):
            label = f % len(u)
            idx_select = [i for i in sorted_list[label] if i not in chi_idx][0]
            chi_idx.append(idx_select)
        print('chi_idx:')
        print(chi_idx)
        # SVM
        svm_idx = []
        for i in range(num_f):
            selector = SelectFromModel(estimator=svm.LinearSVC(max_iter=10000000), threshold=-np.inf,
                                       max_features=i + 1).fit(X, Y)
            idx = selector.get_support(indices=True).tolist()
            idx_select = list(set(idx) - set(svm_idx))[0]
            print(list(set(idx) - set(svm_idx)))
            svm_idx.append(idx_select)
        print('svm_idx:')
        print(svm_idx)
        # Feature importance is an inbuilt class that comes with Tree Based Classifiers
        model = ExtraTreesClassifier()
        model.fit(X, Y)
        coef = [0 if math.isnan(x) else x for x in np.abs(model.feature_importances_)]
        imp_idx = sorted(range(len(coef)), key=lambda i: coef[i], reverse=True)[:num_f]
        print('imp_idx:')
        print(imp_idx)
    else:
        idd = []
        fvalue_idx = []
        mut_idx = []
        chi_idx = []
        svm_idx = []
        imp_idx = []
        # Random
        ran_idx = random.choices(range(D), k=num_f)
        print('ran_idx:')
        print(ran_idx)
        for j in range(len(num_s_list)):
            ra = range(X_raw.shape[0])
            samples = random.sample(list(set(ra) - set(idd)), (num_s_list[j] - len(idd)))
            idd = idd + samples
            X = X_raw[idd, :]
            Y = Y_raw[idd]
            # f_value
            model = SelectKBest(f_classif, k=j + 1).fit(X, Y)
            fvalue_idx.append(model.get_support(indices=True).tolist())
            # mutual
            model = SelectKBest(mutual_info_classif, k=j + 1).fit(X, Y)
            mut_idx.append(model.get_support(indices=True).tolist())
            # chi2
            u, count = np.unique(Y, return_counts=True)
            if j < 10:
                sorted_list = []
                chi_idx_tmp = []
                for i in u:
                    model = SelectKBest(chi2, k=D).fit(X, 1 * (Y == i))
                    score_raw = np.abs(model.scores_).tolist()
                    score = [0 if math.isnan(x) else x for x in score_raw]
                    idx = model.get_support(indices=True).tolist()
                    sorted_list.append([x for _, x in sorted(zip(score, idx), reverse=True)])
                for f in range(j + 1):
                    label = f % len(u)
                    idx_select = [i for i in sorted_list[label] if i not in chi_idx_tmp][0]
                    chi_idx_tmp.append(idx_select)
                chi_idx.append(chi_idx_tmp)
            else:
                chi_idx_tmp = []
                for i in u:
                    model = SelectKBest(chi2, k=int((j + 1) / len(u))).fit(X, 1 * (Y == i))
                    chi_idx_tmp = chi_idx_tmp + model.get_support(indices=True).tolist()
                max_class = int(u[count.tolist().index(count.max())])
                model = SelectKBest(chi2, k=(j + 1 - len(chi_idx_tmp))).fit(X, 1 * (Y == max_class))
                chi_idx_tmp = chi_idx_tmp + model.get_support(indices=True).tolist()
                chi_idx.append(chi_idx_tmp)
            # SVM
            selector = SelectFromModel(estimator=svm.LinearSVC(max_iter=10000000), threshold=-np.inf,
                                       max_features=j + 1).fit(X, Y)
            svm_idx.append(selector.get_support(indices=True).tolist())
            # Feature importance is an inbuilt class that comes with Tree Based Classifiers
            model = ExtraTreesClassifier()
            model.fit(X, Y)
            coef = [0 if math.isnan(x) else x for x in np.abs(model.feature_importances_)]
            imp_idx.append(sorted(range(len(coef)), key=lambda i: coef[i], reverse=True)[:(j + 1)])
        print('fvalue_idx:')
        print(fvalue_idx)
        print('mut_idx:')
        print(mut_idx)
        print('chi_idx:')
        print(chi_idx)
        print('svm_idx:')
        print(svm_idx)
        print('imp_idx:')
        print(imp_idx)
    return idd, ran_idx, fvalue_idx, mut_idx, chi_idx, svm_idx, imp_idx


def show_features(X, y, filename):
    import torchvision.utils
    features = np.loadtxt(filename, delimiter=',')
    idx = np.arange(np.shape(X)[0])
    random.shuffle(idx)
    X = X[idx, :]
    y = y[idx]
    samples = []
    for i in np.unique(y):
        samples.append(X[np.where(y == i)[0][0], :])
    plt.figure(figsize=(19, 10))
    gs1 = gridspec.GridSpec(5, 10)
    gs1.update(wspace=0, hspace=0)  # set the spacing between axes.

    for i in range(5):
        flag = np.zeros((X.shape[1],))
        for f in features[:16 * (i + 1)]:
            flag[int(f)] = 1
        for j in range(10):
            img = 1 - np.multiply(samples[j], flag).reshape(28, 28)
            ax1 = plt.subplot(gs1[i * 10 + j])
            ax1.set_aspect('equal')
            ax1.set_xticklabels([])
            ax1.set_yticklabels([])
            ax1.imshow(img, cmap='gray', interpolation='nearest')
            plt.axis('off')
    plt.subplots_adjust(wspace=0, hspace=0)
    plt.show()

# Download Data

In [None]:
import urllib.request
import shutil

with urllib.request.urlopen("https://caltech.box.com/shared/static/wqvm0d9irzb7tneb16q01fnr4dt6cvl0.csv") as response, open('./PBMCnorm_final.csv', "wb") as out_file:
    shutil.copyfileobj(response, out_file)

# Load Data as Dense Matrix

In [31]:
import pandas as pd
    
data_df = pd.read_csv('./PBMCnorm_final.csv')
label_df=pd.read_csv('./labels_final.csv',header = None)
gene_df=pd.read_csv('./genes_final.csv')

raw_data=data_df.values
target=label_df.values.reshape((raw_data.shape[1],)).astype(np.double)
gene=gene_df.values.reshape((raw_data.shape[0],))
data=normalize(np.transpose(raw_data),axis = 0)
del data_df,label_df,gene_df,raw_data


idx = np.arange(np.shape(data)[0])
random.shuffle(idx)
X_train = data[idx[:int(np.shape(data)[0]*4/5)],:]
y_train = target[idx[:int(np.shape(data)[0]*4/5)]]
X_test = data[idx[int(np.shape(data)[0]*4/5):],:]
y_test = target[idx[int(np.shape(data)[0]*4/5):]]

print(type(data))
print(np.shape(data),np.shape(target),len(np.unique(target)))
print(np.shape(X_train))
print(np.shape(X_test))

<class 'numpy.ndarray'>
(10194, 6915) (10194,) 5
(8155, 6915)
(2039, 6915)


# Define Parameters and Work Space

In [32]:
folder='results'

num_features = 15
num_samples=100

path=folder+'/test'+str(num_features)+'_'+str(num_samples)
try:
    os.mkdir(folder)
except OSError:
    print ("Creation of the directory %s failed" % folder)
else:
    print ("Successfully created the directory %s " % folder)
try:
    os.mkdir(path)
except OSError:
    print ("Creation of the directory %s failed" % path)
else:
    print ("Successfully created the directory %s " % path)

Creation of the directory results failed
Creation of the directory results/test15_100 failed


# Select Genes

In [36]:
feature_selected, train_errors,test_errors,train_scores,test_scores, num_samples_list,samples_global = SVM_active_feature_selection(
    X_train,y_train,X_test,y_test,num_features=num_features,init_samples=num_samples*2,num_samples=num_samples,model_type='SVC',strategy='angle',reuse=False,balance=False)

text_create(path,'feature_selected',feature_selected)
text_create(path,'error',train_errors+test_errors)
text_create(path,'accuracy',train_scores+test_scores)
text_create(path,'num_samples_list',num_samples_list)
text_create(path,'genes_name',gene[feature_selected])

plt.figure(figsize=(12,12))
plt.plot(train_scores,linewidth=2)
plt.plot(test_scores,linewidth=2)
plt.legend(['train acc','test acc'],prop = {'size':18})
plt.xlabel('number of genes',fontdict={'weight':'normal','size': 18})
plt.ylabel('accuracy',fontdict={'weight':'normal','size': 18})
plt.tick_params(labelsize=18)
plt.savefig(path+'/acc.pdf', bbox_inches="tight")

plt.figure(figsize=(12,12))
plt.plot(train_errors,linewidth=2)
plt.plot(test_errors,linewidth=2)
plt.legend(['train error','test error'],prop = {'size':18})
plt.xlabel('number of genes',fontdict={'weight':'normal','size': 18})
plt.ylabel('error',fontdict={'weight':'normal','size': 18})
plt.tick_params(labelsize=18)
plt.savefig(path+'/error.pdf', bbox_inches="tight")

feature 0 : 200 samples
training error=1.8694052728387491 test error=1.766061794997548
training accuracy=0.34494175352544454 test accuracy=0.34526728788621874
feature 1 : 296 samples
training error=2.1168608215818514 test error=1.9460519862677783
training accuracy=0.6337216431637033 test accuracy=0.6463952918097107
feature 2 : 394 samples
training error=1.9742489270386265 test error=1.803825404610103
training accuracy=0.7156345800122624 test accuracy=0.7385973516429623
feature 3 : 486 samples
training error=1.4337216431637032 test error=1.293281020107896
training accuracy=0.7993868792152053 test accuracy=0.8146150073565473
feature 4 : 576 samples
training error=1.4229307173513182 test error=1.2810201078960275
training accuracy=0.8008583690987124 test accuracy=0.8155958803334968
feature 5 : 668 samples
training error=1.3932556713672593 test error=1.249141736145169
training accuracy=0.8096873083997548 test accuracy=0.8239333006375674
feature 6 : 746 samples
training error=1.3354996934396

Exception ignored in: <Finalize object, dead>
Traceback (most recent call last):
  File "/anaconda3/lib/python3.7/multiprocessing/util.py", line 224, in __call__
    res = self._callback(*self._args, **self._kwargs)
  File "/anaconda3/lib/python3.7/multiprocessing/pool.py", line 601, in _terminate_pool
    p.terminate()
  File "/anaconda3/lib/python3.7/multiprocessing/process.py", line 124, in terminate
    self._popen.terminate()
AttributeError: 'NoneType' object has no attribute 'terminate'
