In [1]:
%load_ext autoreload
%autoreload 2

In [2]:


from src.main.python.iSel import cnn, enn, icf, lssm, lsbo, drop3, ldis, cdis, xldis, psdsp, ib3, cis, egdis, e2sc
from src.main.python.utils.general import get_data
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score



In [42]:

def get_selector(method: str):
    print(f"IS-Method: {method}")
    #Baselines
    if method == 'cnn':     return cnn.CNN()
    if method == 'enn':     return enn.ENN()
    if method == 'icf':     return icf.ICF()
    if method == 'lssm':    return lssm.LSSm()
    if method == 'lsbo':    return lsbo.LSBo()
    if method == 'drop3':   return drop3.DROP3()
    if method == 'ldis':    return ldis.LDIS()
    if method == 'cdis':    return cdis.CDIS()
    if method == 'xldis':   return xldis.XLDIS()
    if method == 'psdsp':   return psdsp.PSDSP()
    if method == 'ib3':     return ib3.IB3()
    if method == 'egdis':   return egdis.EGDIS()
    if method == 'cis':     return cis.CIS(task="atc")
    #proposed framework
    if method == 'e2sc-1':   return e2sc.E2SC(alphaMode="exact", betaMode='iterative')
    if method == 'e2sc-2':   return e2sc.E2SC(alphaMode="approximated", betaMode='heuristic')
    return None


# Opening data - aisopos_ntua_2L dataset

In [43]:
inputdir = "resources/datasets/aisopos_ntua_2L/tfidf/"

X_train, y_train, X_test, y_test, _ = get_data(inputdir, f=0)

# Example CNN - Selecting Instances

In [57]:
#selector = e2sc.E2SC(alphaMode="approximated", beta=0.15)
#selector = e2sc.E2SC(alphaMode="exact", betaMode='iterative')
selector = get_selector(method="e2sc-2")
#selector = get_selector(method="e2sc-1")
selector.fit(X_train, y_train)
idx = selector.sample_indices_
#print(idx)
X_train_selected, y_train_selected =  X_train[idx], y_train[idx]
selector.reduction_

IS-Method: e2sc-2


0.248

# Example CNN - Comparing Classifiers

In [58]:
clf = KNeighborsClassifier(n_neighbors=1)
clf.fit(X_train, y_train)

y_pred = clf.predict(X_test)
acc = accuracy_score(y_test, y_pred)

print(f"NoSel: {acc}")

clf = KNeighborsClassifier(n_neighbors=1)
clf.fit(X_train_selected, y_train_selected)

y_pred = clf.predict(X_test)
acc = accuracy_score(y_test, y_pred)

print(f"CNN: {acc}")


NoSel: 0.75
CNN: 0.75


# make_classification Example

In [43]:
from collections import Counter
from sklearn.datasets import make_classification
from src.main.python.iSel import e2sc
X, y = make_classification(n_classes=2, class_sep=2, weights=[0.1, 0.9], n_informative=3, n_redundant=1, flip_y=0, n_features=20, n_clusters_per_class=1, n_samples=1000, random_state=10)
print('Original dataset shape %s' % Counter(y))



Original dataset shape Counter({1: 900, 0: 100})


In [46]:
selector = e2sc.E2SC()
selector.fit(X, y)
idx = selector.sample_indices_
X_train_selected, y_train_selected =  X[idx], y[idx]
print('Resampled dataset shape %s' % Counter(y_train_selected))


Resampled dataset shape Counter({1: 36, 0: 14})
