### Imports

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns 
import numpy as np
import os
import json
import requests
from tqdm import tqdm
import time
import keras


import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import layers, models, backend as K

C:\Users\riskf\anaconda3\lib\site-packages\numpy\.libs\libopenblas64__v0.3.21-gcc_10_3_0.dll
C:\Users\riskf\anaconda3\lib\site-packages\numpy\.libs\libopenblas64__v0.3.23-246-g3d31191b-gcc_10_3_0.dll


In [2]:
# WGAN-GP Architecture
def make_generator_model(input_dim, output_dim):
    model = tf.keras.Sequential()
    model.add(layers.Dense(256, activation='relu', input_dim=input_dim))
    model.add(layers.Dense(512, activation='relu'))
    model.add(layers.Dense(1024, activation='relu'))
    model.add(layers.Dense(output_dim, activation='linear'))  # Linear activation for WGAN
    return model

def make_critic_model(input_dim):
    model = tf.keras.Sequential()
    model.add(layers.Dense(512, activation='relu', input_dim=input_dim))
    model.add(layers.Dropout(0.1))
    model.add(layers.Dense(256, activation='relu'))
    model.add(layers.Dense(1))  # No activation, linear output
    return model

generator = make_generator_model(100, 101)
critic = make_critic_model(101)

# Losses and training
def critic_loss(real_output, fake_output):
    return tf.reduce_mean(fake_output) - tf.reduce_mean(real_output)

def generator_loss(fake_output):
    return -tf.reduce_mean(fake_output)

def gradient_penalty(batch_size, real_images, fake_images, critic):
    epsilon = tf.random.normal([batch_size, 1], 0.0, 1.0)
    interpolated = epsilon * real_images + (1 - epsilon) * fake_images
    with tf.GradientTape() as tape:
        tape.watch(interpolated)
        pred = critic(interpolated, training=True)
    grads = tape.gradient(pred, [interpolated])[0]
    norm = tf.sqrt(tf.reduce_sum(tf.square(grads), axis=[1]))
    gp = tf.reduce_mean((norm - 1.0) ** 2)
    return gp

def train_step(generator, critic, batch_size, generator_optimizer, critic_optimizer, real_features):
    # Append a label column to real_features to match the critic's input expectations
    labels = tf.ones((batch_size, 1))  # Assume label 1 for all positive samples
    real_data = tf.concat([real_features, labels], axis=1)
    
    noise = tf.random.normal([batch_size, generator.input_shape[1]])
    with tf.GradientTape() as gen_tape, tf.GradientTape() as crit_tape:
        generated_data = generator(noise, training=True)

        real_output = critic(real_data, training=True)
        fake_output = critic(generated_data, training=True)

        crit_loss = critic_loss(real_output, fake_output)
        gen_loss = generator_loss(fake_output)
        penalty = gradient_penalty(batch_size, real_data, generated_data, critic)
        crit_loss += 10 * penalty  # lambda for gradient penalty

    gradients_of_generator = gen_tape.gradient(gen_loss, generator.trainable_variables)
    gradients_of_critic = crit_tape.gradient(crit_loss, critic.trainable_variables)

    generator_optimizer.apply_gradients(zip(gradients_of_generator, generator.trainable_variables))
    critic_optimizer.apply_gradients(zip(gradients_of_critic, critic.trainable_variables))

    return crit_loss, gen_loss




In [3]:
#relative paths. # Set directory paths for later use.
# Get the directory of the script file
base_dir = os.getcwd()
ligants_type = ['enzyme', 'GPCR', 'ion_channel', 'nuclear_receptor']
ltype = ligants_type[2]
file_name = 'final_new_par_LMF_50.csv'
file_path = os.path.join(base_dir, 'data', 'split', ltype, file_name)
data_frame = pd.read_csv(file_path, header=None, skiprows=1)
features = data_frame.iloc[:, :-1].values
labels = data_frame.iloc[:, -1].values
# Filter to get only the positive samples
positive_features = features[labels == 1]

In [4]:
print(positive_features.shape)
print(features.shape)
print(labels.shape)
len(positive_features)

(1476, 100)
(42840, 100)
(42840,)


1476

In [5]:
# Training parameters
num_samples_to_generate = 39888
epochs = 100
batch_size = 256
learning_rate=0.0001
beta_1=0.5
generator_optimizer = Adam(learning_rate=learning_rate, beta_1=beta_1)
critic_optimizer = Adam(learning_rate=learning_rate, beta_1=beta_1)

# Training loop
for epoch in range(epochs):
    for batch in range(0, len(positive_features), batch_size):
        real_data_batch = positive_features[batch:batch + batch_size]
        if real_data_batch.shape[0] != batch_size:  # Handle last batch which may be smaller
            continue  # Skip if the batch isn't full size
        crit_loss, gen_loss = train_step(generator, critic, batch_size, generator_optimizer, critic_optimizer, real_data_batch)
        print(f'Epoch {epoch}, Batch {batch // batch_size}, Critic Loss: {crit_loss.numpy()}, Generator Loss: {gen_loss.numpy()}')

Epoch 0, Batch 0, Critic Loss: 2.8573222160339355, Generator Loss: 0.002047100104391575
Epoch 0, Batch 1, Critic Loss: 2.6792163848876953, Generator Loss: -0.09069015085697174
Epoch 0, Batch 2, Critic Loss: 2.4988512992858887, Generator Loss: -0.19069904088974
Epoch 0, Batch 3, Critic Loss: 2.3692727088928223, Generator Loss: -0.31137317419052124
Epoch 0, Batch 4, Critic Loss: 2.25699520111084, Generator Loss: -0.45482680201530457
Epoch 1, Batch 0, Critic Loss: 2.5651028156280518, Generator Loss: -0.6152185201644897
Epoch 1, Batch 1, Critic Loss: 2.4262146949768066, Generator Loss: -0.79111647605896
Epoch 1, Batch 2, Critic Loss: 2.310145854949951, Generator Loss: -0.9903579354286194
Epoch 1, Batch 3, Critic Loss: 2.264512062072754, Generator Loss: -1.1981993913650513
Epoch 1, Batch 4, Critic Loss: 2.3824732303619385, Generator Loss: -1.4669077396392822
Epoch 2, Batch 0, Critic Loss: 2.7781383991241455, Generator Loss: -1.7569897174835205
Epoch 2, Batch 1, Critic Loss: 2.86654090881347

Epoch 19, Batch 1, Critic Loss: -1.1218583583831787, Generator Loss: 2.7064857482910156
Epoch 19, Batch 2, Critic Loss: -0.8197721242904663, Generator Loss: 2.713888168334961
Epoch 19, Batch 3, Critic Loss: -1.0842781066894531, Generator Loss: 2.7269058227539062
Epoch 19, Batch 4, Critic Loss: -0.7813966274261475, Generator Loss: 2.7256271839141846
Epoch 20, Batch 0, Critic Loss: -1.3960812091827393, Generator Loss: 2.690716505050659
Epoch 20, Batch 1, Critic Loss: -1.2042168378829956, Generator Loss: 2.654731273651123
Epoch 20, Batch 2, Critic Loss: -0.7388135194778442, Generator Loss: 2.5471553802490234
Epoch 20, Batch 3, Critic Loss: -0.8819169402122498, Generator Loss: 2.4588284492492676
Epoch 20, Batch 4, Critic Loss: -0.4366183876991272, Generator Loss: 2.3678290843963623
Epoch 21, Batch 0, Critic Loss: -0.7648265361785889, Generator Loss: 2.1783995628356934
Epoch 21, Batch 1, Critic Loss: -0.4567428231239319, Generator Loss: 2.1126718521118164
Epoch 21, Batch 2, Critic Loss: -0.

Epoch 38, Batch 0, Critic Loss: 0.8160414099693298, Generator Loss: 0.7672740817070007
Epoch 38, Batch 1, Critic Loss: 0.8059067726135254, Generator Loss: 0.8010382056236267
Epoch 38, Batch 2, Critic Loss: 0.8781816363334656, Generator Loss: 0.8378663063049316
Epoch 38, Batch 3, Critic Loss: 1.353348731994629, Generator Loss: 0.8741039037704468
Epoch 38, Batch 4, Critic Loss: 0.8687382340431213, Generator Loss: 0.9000152349472046
Epoch 39, Batch 0, Critic Loss: 0.5440671443939209, Generator Loss: 0.9203970432281494
Epoch 39, Batch 1, Critic Loss: 0.6304954886436462, Generator Loss: 0.9351652264595032
Epoch 39, Batch 2, Critic Loss: 0.6789644360542297, Generator Loss: 0.9694717526435852
Epoch 39, Batch 3, Critic Loss: 1.3018475770950317, Generator Loss: 0.9917147159576416
Epoch 39, Batch 4, Critic Loss: 0.8266138434410095, Generator Loss: 0.9663751721382141
Epoch 40, Batch 0, Critic Loss: 0.2923504114151001, Generator Loss: 0.9975241422653198
Epoch 40, Batch 1, Critic Loss: 0.4471952319

Epoch 56, Batch 4, Critic Loss: -0.5997537970542908, Generator Loss: 0.4822293817996979
Epoch 57, Batch 0, Critic Loss: 1.5395619869232178, Generator Loss: 0.3909786343574524
Epoch 57, Batch 1, Critic Loss: 0.8144329190254211, Generator Loss: 0.28481048345565796
Epoch 57, Batch 2, Critic Loss: 0.015146583318710327, Generator Loss: 0.18598267436027527
Epoch 57, Batch 3, Critic Loss: 0.005691200494766235, Generator Loss: 0.09700891375541687
Epoch 57, Batch 4, Critic Loss: -0.2032431811094284, Generator Loss: -0.018567778170108795
Epoch 58, Batch 0, Critic Loss: 1.9280714988708496, Generator Loss: -0.1276506930589676
Epoch 58, Batch 1, Critic Loss: 1.2618190050125122, Generator Loss: -0.25249141454696655
Epoch 58, Batch 2, Critic Loss: 0.4510304927825928, Generator Loss: -0.3604232966899872
Epoch 58, Batch 3, Critic Loss: 0.5287465453147888, Generator Loss: -0.5074987411499023
Epoch 58, Batch 4, Critic Loss: 0.3808901607990265, Generator Loss: -0.6270855665206909
Epoch 59, Batch 0, Critic

Epoch 75, Batch 4, Critic Loss: 1.9189984798431396, Generator Loss: -0.29748743772506714
Epoch 76, Batch 0, Critic Loss: 0.4359041750431061, Generator Loss: -0.3409978449344635
Epoch 76, Batch 1, Critic Loss: 0.9918647408485413, Generator Loss: -0.3975091576576233
Epoch 76, Batch 2, Critic Loss: 1.769073247909546, Generator Loss: -0.3929373621940613
Epoch 76, Batch 3, Critic Loss: 1.4950590133666992, Generator Loss: -0.4031614065170288
Epoch 76, Batch 4, Critic Loss: 1.988675594329834, Generator Loss: -0.4295900762081146
Epoch 77, Batch 0, Critic Loss: 0.6029426455497742, Generator Loss: -0.4745764136314392
Epoch 77, Batch 1, Critic Loss: 1.0599102973937988, Generator Loss: -0.4763229489326477
Epoch 77, Batch 2, Critic Loss: 1.8525038957595825, Generator Loss: -0.4961371421813965
Epoch 77, Batch 3, Critic Loss: 1.7891310453414917, Generator Loss: -0.4962836802005768
Epoch 77, Batch 4, Critic Loss: 2.0423390865325928, Generator Loss: -0.47961732745170593
Epoch 78, Batch 0, Critic Loss: 

Epoch 94, Batch 4, Critic Loss: 0.002198725938796997, Generator Loss: -0.05551854521036148
Epoch 95, Batch 0, Critic Loss: 0.8656612634658813, Generator Loss: -0.0836414322257042
Epoch 95, Batch 1, Critic Loss: 0.5295818448066711, Generator Loss: -0.10947838425636292
Epoch 95, Batch 2, Critic Loss: 0.3342968821525574, Generator Loss: -0.1409536898136139
Epoch 95, Batch 3, Critic Loss: -0.24955379962921143, Generator Loss: -0.16931335628032684
Epoch 95, Batch 4, Critic Loss: 0.08071619272232056, Generator Loss: -0.20109792053699493
Epoch 96, Batch 0, Critic Loss: 0.7551409602165222, Generator Loss: -0.24405509233474731
Epoch 96, Batch 1, Critic Loss: 0.5560030937194824, Generator Loss: -0.2642812728881836
Epoch 96, Batch 2, Critic Loss: 0.34574851393699646, Generator Loss: -0.3033483624458313
Epoch 96, Batch 3, Critic Loss: -0.29982537031173706, Generator Loss: -0.33660614490509033
Epoch 96, Batch 4, Critic Loss: 0.010736912488937378, Generator Loss: -0.367245614528656
Epoch 97, Batch 0

In [6]:
# Assuming the last column name in  original dataframe represents the label
all_column_names = data_frame.columns.tolist()  # This should have 101 names if the label is included in data_frame

# Generate synthetic data
noise = tf.random.normal([num_samples_to_generate, 100])
synthetic_data = generator(noise, training=False)
synthetic_data_df = pd.DataFrame(synthetic_data.numpy(), columns=all_column_names)
# Set the label for all generated data to 1
synthetic_data_df[all_column_names[-1]] = 1

In [7]:
#synthetic_data

In [8]:
# Combine original and synthetic data
enhanced_df = pd.concat([data_frame, synthetic_data_df], axis=0).reset_index(drop=True)
file_name='enhanced_GAN_final_new_par_50_LMF_space_3.csv'
file_path = os.path.join(base_dir,'data','split',ltype, file_name)
output_path = file_path
enhanced_df.to_csv(output_path, index=False)