In [18]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import keras
from keras.models import Sequential
import matplotlib.pyplot as plt
from keras.layers import Dense, Activation, Embedding, Flatten, GlobalMaxPool1D, Dropout, Conv1D,MaxPooling1D
from keras.callbacks import ReduceLROnPlateau, EarlyStopping, ModelCheckpoint
from keras.losses import binary_crossentropy
from tensorflow.keras.optimizers import Adam, SGD
from keras.utils.vis_utils import plot_model
from plot_keras_history import plot_history


In [3]:
pip install plot_keras_history


In [5]:
x_train = pd.read_csv(r'../input/satdrec/Train_Features.csv', index_col=0)
y_train = pd.read_csv(r'../input/satdrec/Train_Labels.csv', index_col=0)
x_test = pd.read_csv(r'../input/satdrec/Test_Features.csv', index_col=0)
y_test = pd.read_csv(r'../input/satdrec/Test_Labels.csv', index_col=0)

In [6]:
import numpy as np
import pandas as pd
import random
from sklearn.datasets import make_classification
from sklearn.neighbors import NearestNeighbors



def get_tail_label(df):
    """
    Give tail label colums of the given target dataframe

    args
    df: pandas.DataFrame, target label df whose tail label has to identified

    return
    tail_label: list, a list containing column name of all the tail label
    """
    columns = df.columns
    n = len(columns)
    irpl = np.zeros(n)
    for column in range(n):
        irpl[column] = df[columns[column]].value_counts()[1]
    irpl = max(irpl)/irpl
    mir = np.average(irpl)
    tail_label = []
    for i in range(n):
        if irpl[i] > mir:
            tail_label.append(columns[i])
    return tail_label

def get_index(df):
    """
    give the index of all tail_label rows
    args
    df: pandas.DataFrame, target label df from which index for tail label has to identified

    return
    index: list, a list containing index number of all the tail label
    """
    tail_labels = get_tail_label(df)
    index = set()
    for tail_label in tail_labels:
        sub_index = set(df[df[tail_label]==1].index)
        index = index.union(sub_index)
    return list(index)

def get_minority_instace(X, y):
    """
    Give minority dataframe containing all the tail labels

    args
    X: pandas.DataFrame, the feature vector dataframe
    y: pandas.DataFrame, the target vector dataframe

    return
    X_sub: pandas.DataFrame, the feature vector minority dataframe
    y_sub: pandas.DataFrame, the target vector minority dataframe
    """
    index = get_index(y)
    X_sub = X[X.index.isin(index)].reset_index(drop = True)
    y_sub = y[y.index.isin(index)].reset_index(drop = True)
    return X_sub, y_sub

def nearest_neighbour(X):
    """
    Give index of 5 nearest neighbor of all the instance

    args
    X: np.array, array whose nearest neighbor has to find

    return
    indices: list of list, index of 5 NN of each element in X
    """
    nbs=NearestNeighbors(n_neighbors=5,metric='euclidean',algorithm='kd_tree').fit(X)
    euclidean,indices= nbs.kneighbors(X)
    return indices

def MLSMOTE(X,y, n_sample):
    """
    Give the augmented data using MLSMOTE algorithm

    args
    X: pandas.DataFrame, input vector DataFrame
    y: pandas.DataFrame, feature vector dataframe
    n_sample: int, number of newly generated sample

    return
    new_X: pandas.DataFrame, augmented feature vector data
    target: pandas.DataFrame, augmented target vector data
    """
    indices2 = nearest_neighbour(X)
    n = len(indices2)
    new_X = np.zeros((n_sample, X.shape[1]))
    target = np.zeros((n_sample, y.shape[1]))
    for i in range(n_sample):
        reference = random.randint(0,n-1)
        neighbour = random.choice(indices2[reference,1:])
        all_point = indices2[reference]
        nn_df = y[y.index.isin(all_point)]
        ser = nn_df.sum(axis = 0, skipna = True)
        target[i] = np.array([1 if val>2 else 0 for val in ser])
        ratio = random.random()
        gap = X.loc[reference,:] - X.loc[neighbour,:]
        new_X[i] = np.array(X.loc[reference,:] + ratio * gap)
    new_X = pd.DataFrame(new_X, columns=X.columns)
    target = pd.DataFrame(target, columns=y.columns)
    new_X = pd.concat([X, new_X], axis=0)
    target = pd.concat([y, target], axis=0)
    return new_X, target

In [7]:
X_sub, y_sub = get_minority_instace(x_train, y_train)   #Getting minority instance of that datframe
X_res,y_res =MLSMOTE(X_sub, y_sub, 1000)     #Applying MLSMOTE to augment the dataframe

print(y_res.sum())
print("-----------------------------")

print(y_train.sum())

In [8]:
model = Sequential()
model.add(Dense(5000, activation='sigmoid', input_dim=X_res.shape[1]))
model.add(Dropout(0.5))
model.add(Dense(600, activation='sigmoid'))
model.add(Dropout(0.5))
model.add(Dense(300, activation='sigmoid'))
model.add(Dropout(0.5))
model.add(Dense(100, activation="sigmoid"))
model.add(Dropout(0.5))
model.add(Dense(60, activation="sigmoid"))
model.add(Dropout(0.5))
model.add(Dense(40, activation="sigmoid"))
model.add(Dropout(0.5))
model.add(Dense(y_res.shape[1], activation='sigmoid'))

sgd = SGD(learning_rate=0.01, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss='binary_crossentropy',optimizer='sgd')
#model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['binary_accuracy'])

#model.fit(x_train, y_train, epochs=5, batch_size=2000)
model.fit(X_res, y_res,validation_data=(x_test, y_test), epochs=20, batch_size=5000)

preds = model.predict(x_test)
preds[preds>=0.5] = 1
preds[preds<0.5] = 0

#loss, accuracy = model.evaluate(X_res, y_res, verbose=False)
#print("Training Accuracy: {:.4f}".format(accuracy))
#loss, accuracy = model.evaluate(x_test, y_test, verbose=False)
#print("Testing Accuracy:  {:.4f}".format(accuracy))
# score = compare preds and y_test

In [33]:
input_dim = X_res.shape[1]  # Number of features

model = Sequential()
model.add(Dense(5000, input_dim=input_dim, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(600, activation='sigmoid'))
model.add(Dropout(0.5))
model.add(Dense(300, activation='sigmoid'))
model.add(Dropout(0.5))
model.add(Dense(100, activation="sigmoid"))
model.add(Dropout(0.5))
model.add(Dense(60, activation="sigmoid"))
model.add(Dropout(0.5))
model.add(Dense(40, activation="sigmoid"))
model.add(Dense(y_res.shape[1], activation='sigmoid'))



model.compile(loss='binary_crossentropy', 
              optimizer='adam', 
              metrics=['binary_accuracy'])
model.summary()



history = model.fit(X_res, y_res,
                    epochs=20,
                    verbose=True,
                    validation_data=(x_test, y_test),
                    batch_size=10)


loss, accuracy = model.evaluate(X_res, y_res, verbose=False)
print("Training Accuracy: {:.4f}".format(accuracy))
loss, accuracy = model.evaluate(x_test, y_test, verbose=False)
print("Testing Accuracy:  {:.4f}".format(accuracy))

In [34]:
plot_history(history)

In [None]:
loss, accuracy = model.evaluate(X_res, y_res, verbose=False)
print("Training Accuracy: {:.4f}".format(accuracy))
loss, accuracy = model.evaluate(x_test, y_test, verbose=False)
print("Testing Accuracy:  {:.4f}".format(accuracy))

In [13]:
plot_model(model, to_file='model_plot.png', show_shapes=True, show_layer_names=True)

In [None]:
callbacks = [
    ReduceLROnPlateau(), 
    EarlyStopping(patience=4), 
    ModelCheckpoint(filepath='model-conv1d.h5', save_best_only=True)
]

In [None]:
history = model.fit(x_train, y_train,epochs=20,batch_size=32,callbacks=callbacks)

In [None]:
model = Sequential()
model.add(Dense(100, activation="relu", input_dim=X_res.shape[1]))
model.add(Dropout(0.5))
model.add(Dense(80, activation="relu"))
model.add(Dropout(0.5))
model.add(Dense(60, activation="relu"))
model.add(Dropout(0.5))
model.add(Dense(40, activation="relu"))
model.add(Dropout(0.5))
model.add(Dense(y_res.shape[1], activation='sigmoid'))

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['binary_accuracy'])
history=model.fit(X_res, y_res, epochs=20, batch_size=2000)


metrics = model.evaluate(x_test,y_test)
print("{}: {}".format(model.metrics_names[0], metrics[0]))
print("{}: {}".format(model.metrics_names[1], metrics[1]))

In [None]:
plot_model(model, to_file='model_plot.png', show_shapes=True, show_layer_names=True)
