In [75]:
import torch
import pandas as pd
import numpy as np

from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import StandardScaler

from sklearn.model_selection import train_test_split

In [65]:
train = pd.read_csv("train_data.csv")
test = pd.read_csv("test_data.csv")

In [66]:
train.head()

Unnamed: 0,ID,parents,has_nurs,form,children,housing,finance,social,health,app_status
0,1,usual,less_proper,complete,3,critical,convenient,problematic,not_recom,0
1,2,pretentious,very_crit,completed,1,convenient,inconv,nonprob,not_recom,0
2,3,pretentious,proper,incomplete,1,less_conv,convenient,slightly_prob,priority,1
3,4,great_pret,improper,complete,1,convenient,convenient,nonprob,recommended,1
4,5,great_pret,less_proper,completed,1,convenient,convenient,slightly_prob,priority,1


In [67]:
train.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10368 entries, 0 to 10367
Data columns (total 10 columns):
 #   Column      Non-Null Count  Dtype 
---  ------      --------------  ----- 
 0   ID          10368 non-null  int64 
 1   parents     10368 non-null  object
 2   has_nurs    10368 non-null  object
 3   form        10368 non-null  object
 4   children    10368 non-null  object
 5   housing     10368 non-null  object
 6   finance     10368 non-null  object
 7   social      10368 non-null  object
 8   health      10368 non-null  object
 9   app_status  10368 non-null  int64 
dtypes: int64(2), object(8)
memory usage: 810.1+ KB


In [68]:
train.columns

Index(['ID', 'parents', 'has_nurs', 'form', 'children', 'housing', 'finance',
       'social', 'health', 'app_status'],
      dtype='object')

In [69]:
train.drop("ID", axis=1, inplace=True)

In [70]:
train.value_counts("health")

health
not_recom      3465
priority       3456
recommended    3447
Name: count, dtype: int64

In [71]:
categorical_cols = train.select_dtypes(include=["object"]).columns
numerical_cols = train.select_dtypes(include=["int64"]).columns

In [72]:
categorical_cols

Index(['parents', 'has_nurs', 'form', 'children', 'housing', 'finance',
       'social', 'health'],
      dtype='object')

In [85]:
encoder = OneHotEncoder()

encoded_data = encoder.fit_transform(train[categorical_cols])
dummy_coded_df = pd.DataFrame(encoded_data.toarray(), columns = encoder.get_feature_names_out(categorical_cols))


In [74]:
X = dummy_coded_df
y = train[numerical_cols]

In [82]:
# train test split
train_x, test_x, train_y, test_y = train_test_split(X, y, test_size=0.2, random_state=42)

In [84]:
test_y.shape

(2074, 1)

# Model Building

## neural network

In [118]:
import torch 
import torch.nn as nn

from torchsummary import summary

In [136]:
train_nx = torch.tensor(train_x.to_numpy(), dtype=torch.float64)
train_ny = torch.tensor(train_y.to_numpy(), dtype=torch.float64)
test_nx = torch.tensor(test_x.to_numpy(), dtype=torch.float64)
test_ny = torch.tensor(test_y.to_numpy(), dtype=torch.float64)

In [120]:
class model(nn.Module):
    def __init__(self,input_size,hidden_size):
        super().__init__()
        self.input = nn.Linear(input_size,hidden_size)
        self.relu = nn.ReLU()
        self.output = nn.Linear(hidden_size, 1)

    def forward(self, x):
        x = self.input(x)
        x = self.relu(x)
        x = self.output(x)
        return x

In [124]:
model1 = model(train_nx.shape[1],10)
summary(model1, input_size=(train_nx.shape[1],))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Linear-1                   [-1, 10]             280
              ReLU-2                   [-1, 10]               0
            Linear-3                    [-1, 1]              11
Total params: 291
Trainable params: 291
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.00
Params size (MB): 0.00
Estimated Total Size (MB): 0.00
----------------------------------------------------------------


In [116]:
train_nx.shape[1]

27

In [147]:
import torch.utils.data as data
import torch.optim as optim

optimizer = optim.Adam(model1.parameters(), lr = 0.01)
criterian = nn.BCEWithLogitsLoss()

loader = data.DataLoader(data.TensorDataset(train_nx, train_ny), shuffle=True, batch_size=8)

epochs = 500

for epoch in  range(epochs):
    model1.train()
    for x_batch, y_batch in loader:
        x_batch = x_batch.float()  
        y_batch = y_batch.float().unsqueeze(1)
        
        pred = model1(x_batch)
        loss = criterian(pred, y_batch)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    if epoch % 50 != 0:
        model1.eval()
        with torch.no_grad():
            train_pred = model1(train_nx.float())
            train_pred_label = (torch.sigmoid(train_pred) > 0.5).float()
            train_accuracy = (train_pred_label.squeeze() == train_ny).float().mean()

            test_pred = model1(test_nx.float())
            test_pred_label = (torch.sigmoid(test_pred) > 0.5).float()
            test_accuracy = (test_pred_label.squeeze() == test_ny).float().mean()

            print(f'Epoch {epoch}: Train Accuracy = {train_accuracy.item():.4f}, Test Accuracy = {test_accuracy.item():.4f}')

ValueError: Target size (torch.Size([8, 1, 1])) must be the same as input size (torch.Size([8, 1]))

In [98]:
!pip install torchsummary

Collecting torchsummary
  Downloading torchsummary-1.5.1-py3-none-any.whl.metadata (296 bytes)
Downloading torchsummary-1.5.1-py3-none-any.whl (2.8 kB)
Installing collected packages: torchsummary
Successfully installed torchsummary-1.5.1


In [138]:
for x_batch, y_batch in loader:
    print(x_batch)

tensor([[1., 0., 0., 0., 0., 0., 1., 0., 0., 0., 1., 0., 0., 1., 0., 0., 0., 0.,
         1., 1., 0., 0., 0., 1., 0., 0., 1.],
        [1., 0., 0., 0., 1., 0., 0., 0., 0., 1., 0., 0., 0., 1., 0., 0., 0., 1.,
         0., 0., 1., 1., 0., 0., 0., 0., 1.],
        [0., 1., 0., 0., 0., 1., 0., 0., 1., 0., 0., 0., 1., 0., 0., 0., 0., 0.,
         1., 1., 0., 0., 1., 0., 0., 1., 0.],
        [0., 1., 0., 0., 0., 1., 0., 0., 0., 0., 0., 1., 0., 0., 1., 0., 0., 0.,
         1., 0., 1., 1., 0., 0., 0., 0., 1.],
        [0., 0., 1., 0., 0., 1., 0., 0., 0., 0., 0., 1., 0., 0., 0., 1., 0., 0.,
         1., 1., 0., 0., 1., 0., 1., 0., 0.],
        [0., 1., 0., 1., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 1., 0., 0.,
         1., 1., 0., 1., 0., 0., 0., 1., 0.],
        [1., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 1., 0., 0., 1., 0., 1., 0.,
         0., 0., 1., 1., 0., 0., 0., 1., 0.],
        [0., 0., 1., 0., 0., 1., 0., 0., 1., 0., 0., 0., 0., 0., 1., 0., 0., 1.,
         0., 0., 1., 0., 0., 1