# T03: X-Ray Classifier
---

A lo largo de este archivo se describen distintas estrategias para hacer la clasificación. 

Cada estrategia debe contar con los siguientes elementos:

    1. Caracteristicas extraídas.
    2. Selección y transformación de características.
    3. Clasificador utilizado.
    
---
### - Se cargan los datos

In [303]:
import numpy as np
import os

# data directory path
DATADIR = 'data'

# carga de datos de training
x_training = np.load(os.path.join(DATADIR, 'training', 'dataset_features.npy'), allow_pickle=True)
label_training = np.load(os.path.join(DATADIR, 'training', 'dataset_labels.npy'), allow_pickle=True)
d_train = np.array([int(y[1:3]) for y in label_training])

# carga de datos de testing
x_testing = np.load(os.path.join(DATADIR, 'testing', 'dataset_features.npy'), allow_pickle=True)
label_testing = np.load(os.path.join(DATADIR, 'testing', 'dataset_labels.npy'), allow_pickle=True)
d_test = np.array([int(y[1:3]) for y in label_testing])

# features_label
flabels = np.load(os.path.join(DATADIR, 'flabels.npy'), allow_pickle=True)

In [304]:
to_use = ['int', 'hog']

use_columns = []
for label in flabels:  # nombre de la feature
    for ftype in to_use:  # nombres de las features que queremos usar
        if ftype in label:
            use_columns.append(*np.where(flabels == label)[0])  # agregamos el indice de la feature

print(f'Using only {len(use_columns)} features...')

# selecionamos solo esas columnas
X_train = x_training[:, use_columns]
X_test = x_testing[:, use_columns]

Using only 713 features...


---
### - Training

In [305]:
from pybalu.feature_selection import clean
from pybalu.feature_transformation import normalize
from pybalu.feature_selection import sfs


FEATURES = 25

# Training: Cleaning
sclean = clean(X_train,show=False)
X_train_clean = X_train[:,sclean]
print('Cleaned features: '+ str(X_train_clean.shape[1]) + '('+str(X_train_clean.shape[0])+' samples).')

# Training: Normalization
X_train_norm, a, b = normalize(X_train_clean)
print('Normalized features: '+str(X_train_norm.shape[1])+ '('+str(X_train_norm.shape[0])+' samples).')

# Training: Feature selection
ssfs = sfs(X_train_norm, d_train, n_features=FEATURES ,method="fisher", show=True)
X_train_sfs = X_train_norm[:,ssfs]
print('Selected features: '+str(X_train_sfs.shape[1])+ '('+str(X_train_sfs.shape[0])+' samples).')

Selecting Features:   0%|          | 0.00/25.0 [00:00<?, ? features/s]

Cleaned features: 688(5040 samples).
Normalized features: 688(5040 samples).


Selecting Features: 100%|██████████| 25.0/25.0 [00:16<00:00, 1.54 features/s]

Selected features: 25(5040 samples).





### - Testing:

In [306]:
# Testing: Cleaning
X_test_clean = X_test[:,sclean]

# Testing: Normalization
X_test_norm = X_test_clean * a + b

# Testing: Feature selection
X_test_sfs = X_test_norm[:,ssfs]
print('Clean,Norm,SFS features: '+str(X_test_sfs.shape[1])+ '('+str(X_test_sfs.shape[0])+' samples)')

Clean,Norm,SFS features: 25(1260 samples)


### - Classification:

In [307]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, confusion_matrix


# Classification on Testing dataset
knn = KNeighborsClassifier(n_neighbors=3)
knn.fit(X_train_sfs, d_train)
ds = knn.predict(X_test_sfs)
acc = accuracy_score(d_test, ds)
cmatrix = confusion_matrix(d_test, ds)

print(cmatrix)
print(acc)

[[417   2   1]
 [ 10 383  27]
 [ 26  95 299]]
0.8722222222222222


In [None]:
        pca = PCA(n_components=commands[-1]['n_components'])
        pca.fit(x_training)
        print('Features reducidas a: ', commands[-1]['n_components'], ' componentes...')
        x_training, x_testing = pca.transform(x_training), pca.transform(x_testing)