### Imports

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns 
import numpy as np
import os
import json
import requests
from tqdm import tqdm
import time
import keras


import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import layers, models, backend as K

C:\Users\riskf\anaconda3\lib\site-packages\numpy\.libs\libopenblas64__v0.3.21-gcc_10_3_0.dll
C:\Users\riskf\anaconda3\lib\site-packages\numpy\.libs\libopenblas64__v0.3.23-246-g3d31191b-gcc_10_3_0.dll


In [2]:
# WGAN-GP Architecture
def make_generator_model(input_dim, output_dim):
    model = tf.keras.Sequential()
    model.add(layers.Dense(256, activation='relu', input_dim=input_dim))
    model.add(layers.Dense(512, activation='relu'))
    model.add(layers.Dense(1024, activation='relu'))
    model.add(layers.Dense(output_dim, activation='linear'))  # Linear activation for WGAN
    return model

def make_critic_model(input_dim):
    model = tf.keras.Sequential()
    model.add(layers.Dense(512, activation='relu', input_dim=input_dim))
    model.add(layers.Dropout(0.1))
    model.add(layers.Dense(256, activation='relu'))
    model.add(layers.Dense(1))  # No activation, linear output
    return model

generator = make_generator_model(100, 101)
critic = make_critic_model(101)

# Losses and training
def critic_loss(real_output, fake_output):
    return tf.reduce_mean(fake_output) - tf.reduce_mean(real_output)

def generator_loss(fake_output):
    return -tf.reduce_mean(fake_output)

def gradient_penalty(batch_size, real_images, fake_images, critic):
    epsilon = tf.random.normal([batch_size, 1], 0.0, 1.0)
    interpolated = epsilon * real_images + (1 - epsilon) * fake_images
    with tf.GradientTape() as tape:
        tape.watch(interpolated)
        pred = critic(interpolated, training=True)
    grads = tape.gradient(pred, [interpolated])[0]
    norm = tf.sqrt(tf.reduce_sum(tf.square(grads), axis=[1]))
    gp = tf.reduce_mean((norm - 1.0) ** 2)
    return gp

def train_step(generator, critic, batch_size, generator_optimizer, critic_optimizer, real_features):
    # Append a label column to real_features to match the critic's input expectations
    labels = tf.ones((batch_size, 1))  # Assume label 1 for all positive samples
    real_data = tf.concat([real_features, labels], axis=1)
    
    noise = tf.random.normal([batch_size, generator.input_shape[1]])
    with tf.GradientTape() as gen_tape, tf.GradientTape() as crit_tape:
        generated_data = generator(noise, training=True)

        real_output = critic(real_data, training=True)
        fake_output = critic(generated_data, training=True)

        crit_loss = critic_loss(real_output, fake_output)
        gen_loss = generator_loss(fake_output)
        penalty = gradient_penalty(batch_size, real_data, generated_data, critic)
        crit_loss += 10 * penalty  # lambda for gradient penalty

    gradients_of_generator = gen_tape.gradient(gen_loss, generator.trainable_variables)
    gradients_of_critic = crit_tape.gradient(crit_loss, critic.trainable_variables)

    generator_optimizer.apply_gradients(zip(gradients_of_generator, generator.trainable_variables))
    critic_optimizer.apply_gradients(zip(gradients_of_critic, critic.trainable_variables))

    return crit_loss, gen_loss




In [3]:
#relative paths. # Set directory paths for later use.
# Get the directory of the script file
base_dir = os.getcwd()
ligants_type = ['enzyme', 'GPCR', 'ion_channel', 'nuclear_receptor']
ltype = ligants_type[2]
file_name = 'final_new_par_50.csv'
file_path = os.path.join(base_dir, 'data', 'split', ltype, file_name)
data_frame = pd.read_csv(file_path, header=None, skiprows=1)
features = data_frame.iloc[:, :-1].values
labels = data_frame.iloc[:, -1].values
# Filter to get only the positive samples
positive_features = features[labels == 1]

In [4]:
print(positive_features.shape)
print(features.shape)
print(labels.shape)
len(positive_features)

(1476, 100)
(42840, 100)
(42840,)


1476

In [5]:
# Training parameters
num_samples_to_generate = 39888
epochs = 100
batch_size = 256
learning_rate=0.0001
beta_1=0.5
generator_optimizer = Adam(learning_rate=learning_rate, beta_1=beta_1)
critic_optimizer = Adam(learning_rate=learning_rate, beta_1=beta_1)

# Training loop
for epoch in range(epochs):
    for batch in range(0, len(positive_features), batch_size):
        real_data_batch = positive_features[batch:batch + batch_size]
        if real_data_batch.shape[0] != batch_size:  # Handle last batch which may be smaller
            continue  # Skip if the batch isn't full size
        crit_loss, gen_loss = train_step(generator, critic, batch_size, generator_optimizer, critic_optimizer, real_data_batch)
        print(f'Epoch {epoch}, Batch {batch // batch_size}, Critic Loss: {crit_loss.numpy()}, Generator Loss: {gen_loss.numpy()}')

Epoch 0, Batch 0, Critic Loss: 2.2848117351531982, Generator Loss: 0.13644111156463623
Epoch 0, Batch 1, Critic Loss: 2.2574143409729004, Generator Loss: 0.04363536089658737
Epoch 0, Batch 2, Critic Loss: 2.273629665374756, Generator Loss: -0.056689612567424774
Epoch 0, Batch 3, Critic Loss: 2.229916572570801, Generator Loss: -0.160558819770813
Epoch 0, Batch 4, Critic Loss: 2.2799177169799805, Generator Loss: -0.273512065410614
Epoch 1, Batch 0, Critic Loss: 2.308932304382324, Generator Loss: -0.40861254930496216
Epoch 1, Batch 1, Critic Loss: 2.405252456665039, Generator Loss: -0.5618970990180969
Epoch 1, Batch 2, Critic Loss: 2.362370014190674, Generator Loss: -0.7364904880523682
Epoch 1, Batch 3, Critic Loss: 2.483381748199463, Generator Loss: -0.9233759641647339
Epoch 1, Batch 4, Critic Loss: 2.5260376930236816, Generator Loss: -1.152200698852539
Epoch 2, Batch 0, Critic Loss: 2.6896088123321533, Generator Loss: -1.3714675903320312
Epoch 2, Batch 1, Critic Loss: 2.803415298461914,

Epoch 19, Batch 0, Critic Loss: -0.03174477815628052, Generator Loss: 1.3531591892242432
Epoch 19, Batch 1, Critic Loss: 0.04143768548965454, Generator Loss: 1.3768539428710938
Epoch 19, Batch 2, Critic Loss: 0.17818844318389893, Generator Loss: 1.4834179878234863
Epoch 19, Batch 3, Critic Loss: -0.02402585744857788, Generator Loss: 1.4762156009674072
Epoch 19, Batch 4, Critic Loss: -0.06595247983932495, Generator Loss: 1.5324491262435913
Epoch 20, Batch 0, Critic Loss: -0.10210788249969482, Generator Loss: 1.5216184854507446
Epoch 20, Batch 1, Critic Loss: -0.026699721813201904, Generator Loss: 1.5273622274398804
Epoch 20, Batch 2, Critic Loss: -0.029441654682159424, Generator Loss: 1.564677119255066
Epoch 20, Batch 3, Critic Loss: -0.06401759386062622, Generator Loss: 1.5006206035614014
Epoch 20, Batch 4, Critic Loss: -0.07719266414642334, Generator Loss: 1.4815895557403564
Epoch 21, Batch 0, Critic Loss: -0.09946823120117188, Generator Loss: 1.463010549545288
Epoch 21, Batch 1, Crit

Epoch 37, Batch 4, Critic Loss: 0.5432920455932617, Generator Loss: 0.6484721899032593
Epoch 38, Batch 0, Critic Loss: 0.545022189617157, Generator Loss: 0.6338196396827698
Epoch 38, Batch 1, Critic Loss: 0.6265689134597778, Generator Loss: 0.6319513320922852
Epoch 38, Batch 2, Critic Loss: 0.786919116973877, Generator Loss: 0.6156836152076721
Epoch 38, Batch 3, Critic Loss: 0.531903862953186, Generator Loss: 0.6076177358627319
Epoch 38, Batch 4, Critic Loss: 0.5526494383811951, Generator Loss: 0.5902782678604126
Epoch 39, Batch 0, Critic Loss: 0.6004170775413513, Generator Loss: 0.5790281295776367
Epoch 39, Batch 1, Critic Loss: 0.6493600606918335, Generator Loss: 0.5757086277008057
Epoch 39, Batch 2, Critic Loss: 0.7712521553039551, Generator Loss: 0.5729147791862488
Epoch 39, Batch 3, Critic Loss: 0.5778359770774841, Generator Loss: 0.5585798025131226
Epoch 39, Batch 4, Critic Loss: 0.5531985759735107, Generator Loss: 0.5639714598655701
Epoch 40, Batch 0, Critic Loss: 0.615852534770

Epoch 56, Batch 4, Critic Loss: 0.7043870687484741, Generator Loss: 0.3350808322429657
Epoch 57, Batch 0, Critic Loss: 0.6490652561187744, Generator Loss: 0.3351172208786011
Epoch 57, Batch 1, Critic Loss: 0.740318775177002, Generator Loss: 0.3202318549156189
Epoch 57, Batch 2, Critic Loss: 0.7973505258560181, Generator Loss: 0.32674044370651245
Epoch 57, Batch 3, Critic Loss: 0.6745742559432983, Generator Loss: 0.3175065815448761
Epoch 57, Batch 4, Critic Loss: 0.7365278005599976, Generator Loss: 0.31679296493530273
Epoch 58, Batch 0, Critic Loss: 0.6535961627960205, Generator Loss: 0.31531238555908203
Epoch 58, Batch 1, Critic Loss: 0.7095361351966858, Generator Loss: 0.3032248318195343
Epoch 58, Batch 2, Critic Loss: 0.7716246247291565, Generator Loss: 0.30795031785964966
Epoch 58, Batch 3, Critic Loss: 0.7101984620094299, Generator Loss: 0.2980624735355377
Epoch 58, Batch 4, Critic Loss: 0.7196897268295288, Generator Loss: 0.2997717261314392
Epoch 59, Batch 0, Critic Loss: 0.662748

Epoch 75, Batch 4, Critic Loss: 1.0365290641784668, Generator Loss: 0.1295565515756607
Epoch 76, Batch 0, Critic Loss: 0.9383460283279419, Generator Loss: 0.14570757746696472
Epoch 76, Batch 1, Critic Loss: 0.891966700553894, Generator Loss: 0.15828442573547363
Epoch 76, Batch 2, Critic Loss: 0.989848792552948, Generator Loss: 0.17215263843536377
Epoch 76, Batch 3, Critic Loss: 1.005171537399292, Generator Loss: 0.18594563007354736
Epoch 76, Batch 4, Critic Loss: 1.0302348136901855, Generator Loss: 0.191688671708107
Epoch 77, Batch 0, Critic Loss: 0.8467159271240234, Generator Loss: 0.21632248163223267
Epoch 77, Batch 1, Critic Loss: 0.8997960090637207, Generator Loss: 0.22864852845668793
Epoch 77, Batch 2, Critic Loss: 1.0063378810882568, Generator Loss: 0.24879616498947144
Epoch 77, Batch 3, Critic Loss: 0.9116647243499756, Generator Loss: 0.2640712857246399
Epoch 77, Batch 4, Critic Loss: 0.9469605684280396, Generator Loss: 0.27475088834762573
Epoch 78, Batch 0, Critic Loss: 0.75982

Epoch 94, Batch 3, Critic Loss: 0.9463244676589966, Generator Loss: 0.22499413788318634
Epoch 94, Batch 4, Critic Loss: 0.8773430585861206, Generator Loss: 0.23466284573078156
Epoch 95, Batch 0, Critic Loss: 0.9521142244338989, Generator Loss: 0.23048345744609833
Epoch 95, Batch 1, Critic Loss: 0.9668322205543518, Generator Loss: 0.2371227890253067
Epoch 95, Batch 2, Critic Loss: 0.834550142288208, Generator Loss: 0.2423287332057953
Epoch 95, Batch 3, Critic Loss: 0.8290315866470337, Generator Loss: 0.24502459168434143
Epoch 95, Batch 4, Critic Loss: 0.8547106981277466, Generator Loss: 0.2367214858531952
Epoch 96, Batch 0, Critic Loss: 0.9457170963287354, Generator Loss: 0.23633761703968048
Epoch 96, Batch 1, Critic Loss: 0.9276583194732666, Generator Loss: 0.23201468586921692
Epoch 96, Batch 2, Critic Loss: 0.8947176933288574, Generator Loss: 0.23059993982315063
Epoch 96, Batch 3, Critic Loss: 0.8499957323074341, Generator Loss: 0.22352758049964905
Epoch 96, Batch 4, Critic Loss: 0.89

In [6]:
# Assuming the last column name in  original dataframe represents the label
all_column_names = data_frame.columns.tolist()  # This should have 101 names if the label is included in data_frame

# Generate synthetic data
noise = tf.random.normal([num_samples_to_generate, 100])
synthetic_data = generator(noise, training=False)
synthetic_data_df = pd.DataFrame(synthetic_data.numpy(), columns=all_column_names)
# Set the label for all generated data to 1
synthetic_data_df[all_column_names[-1]] = 1

In [None]:
#synthetic_data

In [7]:
# Combine original and synthetic data
enhanced_df = pd.concat([data_frame, synthetic_data_df], axis=0).reset_index(drop=True)
file_name='enhanced_GAN_final_new_par_50_space_1.csv'
file_path = os.path.join(base_dir,'data','split',ltype, file_name)
output_path = file_path
enhanced_df.to_csv(output_path, index=False)