### Imports

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns 
import numpy as np
import os
import json
import requests
from tqdm import tqdm
import time
import keras


import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import layers, models, backend as K

C:\Users\riskf\anaconda3\lib\site-packages\numpy\.libs\libopenblas64__v0.3.21-gcc_10_3_0.dll
C:\Users\riskf\anaconda3\lib\site-packages\numpy\.libs\libopenblas64__v0.3.23-246-g3d31191b-gcc_10_3_0.dll


In [2]:
# WGAN-GP Architecture
def make_generator_model(input_dim, output_dim):
    model = tf.keras.Sequential()
    model.add(layers.Dense(256, activation='relu', input_dim=input_dim))
    model.add(layers.Dense(512, activation='relu'))
    model.add(layers.Dense(1024, activation='relu'))
    model.add(layers.Dense(output_dim, activation='linear'))  # Linear activation for WGAN
    return model

def make_critic_model(input_dim):
    model = tf.keras.Sequential()
    model.add(layers.Dense(512, activation='relu', input_dim=input_dim))
    model.add(layers.Dropout(0.1))
    model.add(layers.Dense(256, activation='relu'))
    model.add(layers.Dense(1))  # No activation, linear output
    return model

generator = make_generator_model(100, 101)
critic = make_critic_model(101)

# Losses and training
def critic_loss(real_output, fake_output):
    return tf.reduce_mean(fake_output) - tf.reduce_mean(real_output)

def generator_loss(fake_output):
    return -tf.reduce_mean(fake_output)

def gradient_penalty(batch_size, real_images, fake_images, critic):
    epsilon = tf.random.normal([batch_size, 1], 0.0, 1.0)
    interpolated = epsilon * real_images + (1 - epsilon) * fake_images
    with tf.GradientTape() as tape:
        tape.watch(interpolated)
        pred = critic(interpolated, training=True)
    grads = tape.gradient(pred, [interpolated])[0]
    norm = tf.sqrt(tf.reduce_sum(tf.square(grads), axis=[1]))
    gp = tf.reduce_mean((norm - 1.0) ** 2)
    return gp

def train_step(generator, critic, batch_size, generator_optimizer, critic_optimizer, real_features):
    # Append a label column to real_features to match the critic's input expectations
    labels = tf.ones((batch_size, 1))  # Assume label 1 for all positive samples
    real_data = tf.concat([real_features, labels], axis=1)
    
    noise = tf.random.normal([batch_size, generator.input_shape[1]])
    with tf.GradientTape() as gen_tape, tf.GradientTape() as crit_tape:
        generated_data = generator(noise, training=True)

        real_output = critic(real_data, training=True)
        fake_output = critic(generated_data, training=True)

        crit_loss = critic_loss(real_output, fake_output)
        gen_loss = generator_loss(fake_output)
        penalty = gradient_penalty(batch_size, real_data, generated_data, critic)
        crit_loss += 10 * penalty  # lambda for gradient penalty

    gradients_of_generator = gen_tape.gradient(gen_loss, generator.trainable_variables)
    gradients_of_critic = crit_tape.gradient(crit_loss, critic.trainable_variables)

    generator_optimizer.apply_gradients(zip(gradients_of_generator, generator.trainable_variables))
    critic_optimizer.apply_gradients(zip(gradients_of_critic, critic.trainable_variables))

    return crit_loss, gen_loss




In [3]:
#relative paths. # Set directory paths for later use.
# Get the directory of the script file
base_dir = os.getcwd()
ligants_type = ['enzyme', 'GPCR', 'ion_channel', 'nuclear_receptor']
ltype = ligants_type[2]
file_name = 'final_new_par_NNMF_50.csv'
file_path = os.path.join(base_dir, 'data', 'split', ltype, file_name)
data_frame = pd.read_csv(file_path, header=None, skiprows=1)
features = data_frame.iloc[:, :-1].values
labels = data_frame.iloc[:, -1].values
# Filter to get only the positive samples
positive_features = features[labels == 1]

In [4]:
print(positive_features.shape)
print(features.shape)
print(labels.shape)
len(positive_features)

(1476, 100)
(42840, 100)
(42840,)


1476

In [5]:
# Training parameters
num_samples_to_generate = 39888
epochs = 100
batch_size = 256
learning_rate=0.0001
beta_1=0.5
generator_optimizer = Adam(learning_rate=learning_rate, beta_1=beta_1)
critic_optimizer = Adam(learning_rate=learning_rate, beta_1=beta_1)

# Training loop
for epoch in range(epochs):
    for batch in range(0, len(positive_features), batch_size):
        real_data_batch = positive_features[batch:batch + batch_size]
        if real_data_batch.shape[0] != batch_size:  # Handle last batch which may be smaller
            continue  # Skip if the batch isn't full size
        crit_loss, gen_loss = train_step(generator, critic, batch_size, generator_optimizer, critic_optimizer, real_data_batch)
        print(f'Epoch {epoch}, Batch {batch // batch_size}, Critic Loss: {crit_loss.numpy()}, Generator Loss: {gen_loss.numpy()}')

Epoch 0, Batch 0, Critic Loss: 2.6986050605773926, Generator Loss: -0.12610262632369995
Epoch 0, Batch 1, Critic Loss: 2.632004737854004, Generator Loss: -0.22083866596221924
Epoch 0, Batch 2, Critic Loss: 2.3754024505615234, Generator Loss: -0.32016775012016296
Epoch 0, Batch 3, Critic Loss: 2.476247549057007, Generator Loss: -0.4457160532474518
Epoch 0, Batch 4, Critic Loss: 2.3555119037628174, Generator Loss: -0.5828413963317871
Epoch 1, Batch 0, Critic Loss: 2.5018529891967773, Generator Loss: -0.7285786867141724
Epoch 1, Batch 1, Critic Loss: 2.443084239959717, Generator Loss: -0.9049251675605774
Epoch 1, Batch 2, Critic Loss: 2.211125373840332, Generator Loss: -1.0972225666046143
Epoch 1, Batch 3, Critic Loss: 2.3695218563079834, Generator Loss: -1.3499112129211426
Epoch 1, Batch 4, Critic Loss: 2.4459140300750732, Generator Loss: -1.5968492031097412
Epoch 2, Batch 0, Critic Loss: 2.7451624870300293, Generator Loss: -1.8641676902770996
Epoch 2, Batch 1, Critic Loss: 2.85748839378

Epoch 19, Batch 1, Critic Loss: -0.07843565940856934, Generator Loss: 1.1316862106323242
Epoch 19, Batch 2, Critic Loss: 0.09331512451171875, Generator Loss: 1.2201590538024902
Epoch 19, Batch 3, Critic Loss: 0.12594270706176758, Generator Loss: 1.3455150127410889
Epoch 19, Batch 4, Critic Loss: 0.10442525148391724, Generator Loss: 1.3900477886199951
Epoch 20, Batch 0, Critic Loss: -0.38698363304138184, Generator Loss: 1.5192958116531372
Epoch 20, Batch 1, Critic Loss: -0.40677469968795776, Generator Loss: 1.6551103591918945
Epoch 20, Batch 2, Critic Loss: -0.20213085412979126, Generator Loss: 1.7204890251159668
Epoch 20, Batch 3, Critic Loss: -0.1637936234474182, Generator Loss: 1.8477336168289185
Epoch 20, Batch 4, Critic Loss: -0.17479705810546875, Generator Loss: 1.94008207321167
Epoch 21, Batch 0, Critic Loss: -0.8061729669570923, Generator Loss: 2.1391873359680176
Epoch 21, Batch 1, Critic Loss: -0.8563580513000488, Generator Loss: 2.220810651779175
Epoch 21, Batch 2, Critic Loss

Epoch 38, Batch 2, Critic Loss: 2.311298370361328, Generator Loss: 0.5125914812088013
Epoch 38, Batch 3, Critic Loss: 1.9903690814971924, Generator Loss: 0.5250101089477539
Epoch 38, Batch 4, Critic Loss: 2.1031837463378906, Generator Loss: 0.5097068548202515
Epoch 39, Batch 0, Critic Loss: 1.8542578220367432, Generator Loss: 0.5210713148117065
Epoch 39, Batch 1, Critic Loss: 2.100266933441162, Generator Loss: 0.4993407726287842
Epoch 39, Batch 2, Critic Loss: 2.2022228240966797, Generator Loss: 0.5213022232055664
Epoch 39, Batch 3, Critic Loss: 2.011176347732544, Generator Loss: 0.5072521567344666
Epoch 39, Batch 4, Critic Loss: 1.893930435180664, Generator Loss: 0.5153185129165649
Epoch 40, Batch 0, Critic Loss: 1.7849586009979248, Generator Loss: 0.4991171061992645
Epoch 40, Batch 1, Critic Loss: 1.840113878250122, Generator Loss: 0.5052297711372375
Epoch 40, Batch 2, Critic Loss: 2.2219090461730957, Generator Loss: 0.48894011974334717
Epoch 40, Batch 3, Critic Loss: 2.0041141510009

Epoch 57, Batch 4, Critic Loss: 1.8939504623413086, Generator Loss: 0.26992613077163696
Epoch 58, Batch 0, Critic Loss: 1.640407681465149, Generator Loss: 0.2698589563369751
Epoch 58, Batch 1, Critic Loss: 1.6462153196334839, Generator Loss: 0.2797319293022156
Epoch 58, Batch 2, Critic Loss: 1.8904409408569336, Generator Loss: 0.2841489315032959
Epoch 58, Batch 3, Critic Loss: 1.8935065269470215, Generator Loss: 0.279472291469574
Epoch 58, Batch 4, Critic Loss: 1.8840303421020508, Generator Loss: 0.2861781120300293
Epoch 59, Batch 0, Critic Loss: 1.6468896865844727, Generator Loss: 0.2869468331336975
Epoch 59, Batch 1, Critic Loss: 1.6608754396438599, Generator Loss: 0.29866504669189453
Epoch 59, Batch 2, Critic Loss: 1.9263566732406616, Generator Loss: 0.30524903535842896
Epoch 59, Batch 3, Critic Loss: 1.9363908767700195, Generator Loss: 0.3180503845214844
Epoch 59, Batch 4, Critic Loss: 1.9856865406036377, Generator Loss: 0.3171423375606537
Epoch 60, Batch 0, Critic Loss: 1.62402367

Epoch 76, Batch 3, Critic Loss: 1.6223335266113281, Generator Loss: 0.14222504198551178
Epoch 76, Batch 4, Critic Loss: 1.7323923110961914, Generator Loss: 0.13699515163898468
Epoch 77, Batch 0, Critic Loss: 1.4909472465515137, Generator Loss: 0.12336669117212296
Epoch 77, Batch 1, Critic Loss: 1.5839509963989258, Generator Loss: 0.11819435656070709
Epoch 77, Batch 2, Critic Loss: 1.8211190700531006, Generator Loss: 0.11121343821287155
Epoch 77, Batch 3, Critic Loss: 1.6947649717330933, Generator Loss: 0.10271969437599182
Epoch 77, Batch 4, Critic Loss: 1.7513386011123657, Generator Loss: 0.09421766549348831
Epoch 78, Batch 0, Critic Loss: 1.568679690361023, Generator Loss: 0.08498677611351013
Epoch 78, Batch 1, Critic Loss: 1.630706548690796, Generator Loss: 0.07641825079917908
Epoch 78, Batch 2, Critic Loss: 1.8993661403656006, Generator Loss: 0.07217049598693848
Epoch 78, Batch 3, Critic Loss: 1.602189540863037, Generator Loss: 0.06188713759183884
Epoch 78, Batch 4, Critic Loss: 1.7

Epoch 95, Batch 2, Critic Loss: 0.8323764801025391, Generator Loss: 0.7721550464630127
Epoch 95, Batch 3, Critic Loss: 1.435917854309082, Generator Loss: 0.7236126661300659
Epoch 95, Batch 4, Critic Loss: 1.4448275566101074, Generator Loss: 0.6796891689300537
Epoch 96, Batch 0, Critic Loss: 1.0096721649169922, Generator Loss: 0.6519205570220947
Epoch 96, Batch 1, Critic Loss: 0.9088444709777832, Generator Loss: 0.601331353187561
Epoch 96, Batch 2, Critic Loss: 0.7505680918693542, Generator Loss: 0.567501425743103
Epoch 96, Batch 3, Critic Loss: 1.5390405654907227, Generator Loss: 0.5104597806930542
Epoch 96, Batch 4, Critic Loss: 1.5146043300628662, Generator Loss: 0.45502468943595886
Epoch 97, Batch 0, Critic Loss: 1.08172607421875, Generator Loss: 0.4130662679672241
Epoch 97, Batch 1, Critic Loss: 0.9523888826370239, Generator Loss: 0.368802011013031
Epoch 97, Batch 2, Critic Loss: 0.847415566444397, Generator Loss: 0.3180539011955261
Epoch 97, Batch 3, Critic Loss: 1.662669897079467

In [6]:
# Assuming the last column name in  original dataframe represents the label
all_column_names = data_frame.columns.tolist()  # This should have 101 names if the label is included in data_frame

# Generate synthetic data
noise = tf.random.normal([num_samples_to_generate, 100])
synthetic_data = generator(noise, training=False)
synthetic_data_df = pd.DataFrame(synthetic_data.numpy(), columns=all_column_names)
# Set the label for all generated data to 1
synthetic_data_df[all_column_names[-1]] = 1

In [7]:
#synthetic_data

In [8]:
# Combine original and synthetic data
enhanced_df = pd.concat([data_frame, synthetic_data_df], axis=0).reset_index(drop=True)
file_name='enhanced_GAN_final_new_par_50_NNMF_space_2.csv'
file_path = os.path.join(base_dir,'data','split',ltype, file_name)
output_path = file_path
enhanced_df.to_csv(output_path, index=False)