In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, LSTM, Dense, concatenate

In [2]:
# Load the dataset
dataset = pd.read_csv(r'C:\Users\u1158100\Desktop\GDG_research\delisting_detection_py_project\processed\processed_data.csv')

 

# Separate target variable, time-related data, numerical data, and categorical data
y = dataset['delist_tab'].values
time_cols = [col for col in dataset.columns if col.startswith('_')]
time_data = dataset[time_cols].values
numerical_data = dataset['month_count'].values.reshape(-1, 1)
cat_data = dataset.drop(columns=['delist_tab'] + time_cols + ['month_count']).values

 

# Split data into training and testing sets
split_data = train_test_split(time_data, numerical_data, cat_data, y, test_size=0.2, random_state=42)
time_train, time_test, num_train, num_test, cat_train, cat_test, y_train, y_test = split_data

 

# Design the RNN model

 

# Time series input
time_input = Input(shape=(53, 1), name='time_input')
lstm_out = LSTM(32)(time_input)

 

# Numerical input
num_input = Input(shape=(1,), name='num_input')
dense_num = Dense(16, activation='relu')(num_input)

 

# Categorical input
cat_input = Input(shape=(68,), name='cat_input')
dense_cat = Dense(32, activation='relu')(cat_input)

 

# Merge outputs
merged = concatenate([lstm_out, dense_num, dense_cat])

 

# Hidden layer
hidden = Dense(64, activation='relu')(merged)

 

# Output layer
output = Dense(1, activation='sigmoid')(hidden)

 

model = Model(inputs=[time_input, num_input, cat_input], outputs=output)

 

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

 

# Reshape time data for LSTM
time_train = time_train.reshape(-1, 53, 1)
time_test = time_test.reshape(-1, 53, 1)

 

# Train the model (adjust epochs and batch_size as needed)
model.fit([time_train, num_train, cat_train], y_train, epochs=10, batch_size=32, validation_data=([time_test, num_test, cat_test], y_test))

 

# Evaluate the model on the testing data
loss, accuracy = model.evaluate([time_test, num_test, cat_test], y_test)

 

# Predict on test data
y_pred = model.predict([time_test, num_test, cat_test])
y_pred_classes = (y_pred > 0.5).astype(int).flatten()

 

# Output the error by output class
from sklearn.metrics import classification_report
print(classification_report(y_test, y_pred_classes))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
              precision    recall  f1-score   support

           0       0.92      0.94      0.93       203
           1       0.58      0.51      0.55        35

    accuracy                           0.87       238
   macro avg       0.75      0.73      0.74       238
weighted avg       0.87      0.87      0.87       238

