**Reading and processing data**

In [1]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
test = 'test.csv'
train = 'train.csv'

data = pd.read_csv(train)
target = data["Target"]
data = data.drop(columns=["Id", "Target", "idhogar"])
data['dependency'] = data['dependency'].map({'yes': 1, 'no': 0})
data['edjefe'] = data['edjefe'].map({'yes': 1, 'no': 0})
data['edjefa'] = data['edjefa'].map({'yes': 1, 'no': 0})


test = pd.read_csv(test)
ID = test["Id"]
test = test.drop(columns=["Id", "idhogar"])
test['dependency'] = test['dependency'].map({'yes': 1, 'no': 0})
test['edjefe'] = test['edjefe'].map({'yes': 1, 'no': 0})
test['edjefa'] = test['edjefa'].map({'yes': 1, 'no': 0})

In [3]:
X_train, X_test, y_train, y_test = train_test_split(data, target, test_size=0.3, random_state=0)
X_train = X_train.fillna(X_train.mean())
X_test = X_test.fillna(X_test.mean())

test = test.fillna(test.mean())

In [4]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.fit_transform(X_test)


test_scaled = scaler.fit_transform(test)

In [5]:
from imblearn.over_sampling import RandomOverSampler
from imblearn.combine import SMOTETomek
from imblearn.under_sampling import RandomUnderSampler

def oversample(X, y):
    ros = RandomOverSampler(random_state=0)
    return ros.fit_resample(X, y)

def undersample(X, y):
    ros = RandomUnderSampler(random_state=0)
    return ros.fit_resample(X, y)

def smotsample(X, y):
    ros = SMOTETomek(sampling_strategy='auto')
    return ros.fit_resample(X, y)

In [6]:
undersampled_X, undersampled_y = undersample(X_scaled, y_train)
oversampled_X, oversampled_y = oversample(X_scaled, y_train)
smotsampled_X, smotsampled_y = smotsample(X_scaled, y_train)

from keras.utils import to_categorical

undersampled_y_onehot = to_categorical(undersampled_y-1)
oversampled_y_onehot = to_categorical(oversampled_y-1)
smotsampled_y_onehot = to_categorical(smotsampled_y-1)

y_train_onehot = to_categorical(y_train-1)

**Training models**

In [7]:
model = Sequential()
model.add(Dense(256, activation='relu', input_dim=X_scaled.shape[1]))
model.add(Dense(128, activation='relu'))
model.add(Dense(64, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(16, activation='relu'))
model.add(BatchNormalization())
model.add(Dense(4, activation='softmax'))

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [8]:
import tensorflow as tf
tf.config.run_functions_eagerly(True)
model.fit(X_scaled, y_train_onehot, epochs=100, batch_size=1000, validation_split=0.2)

Epoch 1/100




Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 7

<keras.src.callbacks.History at 0x7ab0af2e1f30>

In [9]:
model_under = Sequential()
model_under.add(Dense(256, activation='relu', input_dim=undersampled_X.shape[1]))
model_under.add(Dense(128, activation='relu'))
model_under.add(Dense(64, activation='relu'))
model_under.add(Dense(32, activation='relu'))
model_under.add(Dense(16, activation='relu'))
model.add(BatchNormalization())
model_under.add(Dense(4, activation='softmax'))

model_under.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [10]:
model_under.fit(undersampled_X, undersampled_y_onehot, epochs=100, batch_size=1000, validation_split=0.2)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<keras.src.callbacks.History at 0x7ab0a80245b0>

In [11]:
model_over = Sequential()
model_over.add(Dense(256, activation='relu', input_dim=oversampled_X.shape[1]))
model_over.add(Dense(128, activation='relu'))
model_over.add(Dense(64, activation='relu'))
model_over.add(Dense(32, activation='relu'))
model_over.add(Dense(16, activation='relu'))
model.add(BatchNormalization())
model_over.add(Dense(4, activation='softmax'))

model_over.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [12]:
model_over.fit(oversampled_X, oversampled_y_onehot, epochs=100, batch_size=1000, validation_split=0.2)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<keras.src.callbacks.History at 0x7ab0a80d3ee0>

In [13]:
model_smot = Sequential()
model_smot.add(Dense(256, activation='relu', input_dim=smotsampled_X.shape[1]))
model_smot.add(Dense(128, activation='relu'))
model_smot.add(Dense(64, activation='relu'))
model_smot.add(Dense(32, activation='relu'))
model_smot.add(Dense(16, activation='relu'))
model.add(BatchNormalization())
model_smot.add(Dense(4, activation='softmax'))

model_smot.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [14]:
model_smot.fit(smotsampled_X, smotsampled_y_onehot, epochs=100, batch_size=1000, validation_split=0.2)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<keras.src.callbacks.History at 0x7ab0a374a1d0>

In [15]:
predictions = model.predict(X_test_scaled)
predictions2 = model_under.predict(X_test_scaled)
predictions3 = model_over.predict(X_test_scaled)
predictions4 = model_smot.predict(X_test_scaled)



In [16]:
from sklearn.metrics import accuracy_score

predictions = np.argmax(predictions, axis=1) + 1
predictions2 = np.argmax(predictions2, axis=1) + 1
predictions3 = np.argmax(predictions3, axis=1) + 1
predictions4 = np.argmax(predictions4, axis=1) + 1

accuracy = accuracy_score(y_test, predictions)
accuracy2 = accuracy_score(y_test, predictions2)
accuracy3 = accuracy_score(y_test, predictions3)
accuracy4 = accuracy_score(y_test, predictions4)
print("Accuracy of all data:", accuracy)
print("Accuracy of undersampling:", accuracy2)
print("Accuracy of oversampling:", accuracy3)
print("Accuracy of smot sampling:", accuracy4)

Accuracy of all data: 0.75278940027894
Accuracy of undersampling: 0.43654114365411434
Accuracy of oversampling: 0.8165969316596932
Accuracy of smot sampling: 0.7932357043235704


**Preparing Kaggle submission**

In [22]:
preds = model_over.predict(test_scaled)
preds = np.argmax(preds, axis=1) + 1

  5/746 [..............................] - ETA: 9s 





In [38]:
df = pd.DataFrame({'ID': ID, 'Target': preds})
df.to_csv('submission.csv', index=False)