In [5]:
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler, Normalizer

from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier

from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.pipeline import Pipeline

import numpy as np

In [6]:
RANDOM = 99

X = dict()
y = dict()

X_train = dict()
X_test = dict()
y_train = dict()
y_test = dict()

In [7]:
preprocessors = {
    'std': StandardScaler(),
    'norm': Normalizer(),
    'pca': PCA()
}

estimators = {
    'knn': [
        KNeighborsClassifier(),
        { 'n_neighbors': [1, 3, 5, 9], 'p': [1, 2] }
    ],
    'mlp': [
        MLPClassifier(random_state=RANDOM),
        {
            'activation': ['identity', 'logistic', 'tanh', 'relu'],
            'alpha': [0.000001, 0.00001, 0.0001, 0.001, 0.01]
        }
    ],
    'svm': [
        SVC(random_state=RANDOM),
        { 'kernel': ['linear', 'poly', 'rbf', 'sigmoid', 'precomputed'] }
    ],
    'tree': [
        DecisionTreeClassifier(random_state=RANDOM),
        {
            'criterion': ['gini', 'entropy'],
            'min_sample_leaf': [1, 3, 5, 9],
            'min_impurity_decrease': [0, 0.1, 0.25, 0.5]
        }
    ],
    'forest': [
        RandomForestClassifier(random_state=RANDOM),
        {
            'criterion': ['gini', 'entropy'],
            'min_sample_leaf': [1, 3, 5, 9],
            'min_impurity_decrease': [0, 0.1, 0.25, 0.5]
        }
    ]
}

In [9]:
mol = np.load('../res/molecules.npy').reshape((-1,49))
no_mol = np.load('../res/no_mol.npy').reshape((-1,49))

n_ones = mol.shape[0]
n_zeros = no_mol.shape[0]

X['mol'] = np.concatenate((mol, no_mol), axis=0)
y['mol'] = np.append(np.ones(n_ones, dtype=int), np.zeros(n_zeros, dtype=int))

In [11]:
X['rna'] = np.load('../res/rnafolding_X.npy')
y['rna'] = np.load('../res/rnafolding_y.npy').ravel()

In [12]:
X_train['mol'], X_test['mol'], y_train['mol'], y_test['mol'] = train_test_split(
    X['mol'], y['mol'], test_size=0.2, shuffle=True, random_state=RANDOM
)

X_train['rna'], X_test['rna'], y_train['rna'], y_test['rna'] = train_test_split(
    X['rna'], y['rna'], test_size=0.2, shuffle=True, random_state=RANDOM
)