### Imports

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns 
import numpy as np
import os
import json
import requests
from tqdm import tqdm
import time
import keras


import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import layers, models, backend as K

C:\Users\riskf\anaconda3\lib\site-packages\numpy\.libs\libopenblas64__v0.3.21-gcc_10_3_0.dll
C:\Users\riskf\anaconda3\lib\site-packages\numpy\.libs\libopenblas64__v0.3.23-246-g3d31191b-gcc_10_3_0.dll


In [11]:
# WGAN-GP Architecture
def make_generator_model(input_dim, output_dim):
    model = tf.keras.Sequential()
    model.add(layers.Dense(256, activation='relu', input_dim=input_dim))
    model.add(layers.Dense(512, activation='relu'))
    model.add(layers.Dense(1024, activation='relu'))
    model.add(layers.Dense(output_dim, activation='linear'))  # Linear activation for WGAN
    return model

def make_critic_model(input_dim):
    model = tf.keras.Sequential()
    model.add(layers.Dense(512, activation='relu', input_dim=input_dim))
    model.add(layers.Dropout(0.1))
    model.add(layers.Dense(256, activation='relu'))
    model.add(layers.Dense(1))  # No activation, linear output
    return model

generator = make_generator_model(100, 101)
critic = make_critic_model(101)

# Losses and training
def critic_loss(real_output, fake_output):
    return tf.reduce_mean(fake_output) - tf.reduce_mean(real_output)

def generator_loss(fake_output):
    return -tf.reduce_mean(fake_output)

def gradient_penalty(batch_size, real_images, fake_images, critic):
    epsilon = tf.random.normal([batch_size, 1], 0.0, 1.0)
    interpolated = epsilon * real_images + (1 - epsilon) * fake_images
    with tf.GradientTape() as tape:
        tape.watch(interpolated)
        pred = critic(interpolated, training=True)
    grads = tape.gradient(pred, [interpolated])[0]
    norm = tf.sqrt(tf.reduce_sum(tf.square(grads), axis=[1]))
    gp = tf.reduce_mean((norm - 1.0) ** 2)
    return gp

def train_step(generator, critic, batch_size, generator_optimizer, critic_optimizer, real_features):
    # Append a label column to real_features to match the critic's input expectations
    labels = tf.ones((batch_size, 1))  # Assume label 1 for all positive samples
    real_data = tf.concat([real_features, labels], axis=1)
    
    noise = tf.random.normal([batch_size, generator.input_shape[1]])
    with tf.GradientTape() as gen_tape, tf.GradientTape() as crit_tape:
        generated_data = generator(noise, training=True)

        real_output = critic(real_data, training=True)
        fake_output = critic(generated_data, training=True)

        crit_loss = critic_loss(real_output, fake_output)
        gen_loss = generator_loss(fake_output)
        penalty = gradient_penalty(batch_size, real_data, generated_data, critic)
        crit_loss += 10 * penalty  # lambda for gradient penalty

    gradients_of_generator = gen_tape.gradient(gen_loss, generator.trainable_variables)
    gradients_of_critic = crit_tape.gradient(crit_loss, critic.trainable_variables)

    generator_optimizer.apply_gradients(zip(gradients_of_generator, generator.trainable_variables))
    critic_optimizer.apply_gradients(zip(gradients_of_critic, critic.trainable_variables))

    return crit_loss, gen_loss




In [3]:
#relative paths. # Set directory paths for later use.
# Get the directory of the script file
base_dir = os.getcwd()
ligants_type = ['enzyme', 'GPCR', 'ion_channel', 'nuclear_receptor']
ltype = ligants_type[0]
file_name = 'final_new_par_50.csv'
file_path = os.path.join(base_dir, 'data', 'split', ltype, file_name)
data_frame = pd.read_csv(file_path, header=None, skiprows=1)
features = data_frame.iloc[:, :-1].values
labels = data_frame.iloc[:, -1].values
# Filter to get only the positive samples
positive_features = features[labels == 1]

In [9]:
print(positive_features.shape)
print(features.shape)
print(labels.shape)
len(positive_features)

(2926, 100)
(295480, 100)
(295480,)


2926

In [25]:
# Training parameters
num_samples_to_generate = 289628
epochs = 100
batch_size = 256
learning_rate=0.0001
beta_1=0.5
generator_optimizer = Adam(learning_rate=learning_rate, beta_1=beta_1)
critic_optimizer = Adam(learning_rate=learning_rate, beta_1=beta_1)

# Training loop
for epoch in range(epochs):
    for batch in range(0, len(positive_features), batch_size):
        real_data_batch = positive_features[batch:batch + batch_size]
        if real_data_batch.shape[0] != batch_size:  # Handle last batch which may be smaller
            continue  # Skip if the batch isn't full size
        crit_loss, gen_loss = train_step(generator, critic, batch_size, generator_optimizer, critic_optimizer, real_data_batch)
        print(f'Epoch {epoch}, Batch {batch // batch_size}, Critic Loss: {crit_loss.numpy()}, Generator Loss: {gen_loss.numpy()}')

Epoch 0, Batch 0, Critic Loss: -0.784879207611084, Generator Loss: 0.6578428745269775
Epoch 0, Batch 1, Critic Loss: -0.966731607913971, Generator Loss: 0.6665098667144775
Epoch 0, Batch 2, Critic Loss: -1.0013773441314697, Generator Loss: 0.671965479850769
Epoch 0, Batch 3, Critic Loss: -1.1038994789123535, Generator Loss: 0.6823076009750366
Epoch 0, Batch 4, Critic Loss: -0.9992983341217041, Generator Loss: 0.6736606359481812
Epoch 0, Batch 5, Critic Loss: -0.9318733811378479, Generator Loss: 0.6637182831764221
Epoch 0, Batch 6, Critic Loss: -1.0314366817474365, Generator Loss: 0.6773547530174255
Epoch 0, Batch 7, Critic Loss: -1.3599915504455566, Generator Loss: 0.6770311594009399
Epoch 0, Batch 8, Critic Loss: -0.9129783511161804, Generator Loss: 0.6653642058372498
Epoch 0, Batch 9, Critic Loss: -1.088902235031128, Generator Loss: 0.6529031991958618
Epoch 0, Batch 10, Critic Loss: -0.6465653777122498, Generator Loss: 0.6527606248855591
Epoch 1, Batch 0, Critic Loss: -0.868910849094

Epoch 8, Batch 7, Critic Loss: -1.1918158531188965, Generator Loss: 0.30497562885284424
Epoch 8, Batch 8, Critic Loss: -1.2523502111434937, Generator Loss: 0.34656280279159546
Epoch 8, Batch 9, Critic Loss: -1.3554054498672485, Generator Loss: 0.39233967661857605
Epoch 8, Batch 10, Critic Loss: -0.74091637134552, Generator Loss: 0.41090524196624756
Epoch 9, Batch 0, Critic Loss: -1.1364448070526123, Generator Loss: 0.43682143092155457
Epoch 9, Batch 1, Critic Loss: -1.1321494579315186, Generator Loss: 0.4466136693954468
Epoch 9, Batch 2, Critic Loss: -1.1170974969863892, Generator Loss: 0.4507368206977844
Epoch 9, Batch 3, Critic Loss: -1.5488661527633667, Generator Loss: 0.44063466787338257
Epoch 9, Batch 4, Critic Loss: -1.173887014389038, Generator Loss: 0.44080084562301636
Epoch 9, Batch 5, Critic Loss: -1.1540056467056274, Generator Loss: 0.42970889806747437
Epoch 9, Batch 6, Critic Loss: -1.150932788848877, Generator Loss: 0.42532825469970703
Epoch 9, Batch 7, Critic Loss: -1.436

Epoch 17, Batch 4, Critic Loss: -0.6421186923980713, Generator Loss: 0.24947646260261536
Epoch 17, Batch 5, Critic Loss: -0.5318334102630615, Generator Loss: 0.23591242730617523
Epoch 17, Batch 6, Critic Loss: -0.6485505700111389, Generator Loss: 0.2054910957813263
Epoch 17, Batch 7, Critic Loss: -1.0389792919158936, Generator Loss: 0.18013893067836761
Epoch 17, Batch 8, Critic Loss: -0.8901777267456055, Generator Loss: 0.17050379514694214
Epoch 17, Batch 9, Critic Loss: -0.7940653562545776, Generator Loss: 0.14727060496807098
Epoch 17, Batch 10, Critic Loss: -0.3933076858520508, Generator Loss: 0.1384710669517517
Epoch 18, Batch 0, Critic Loss: -0.378158837556839, Generator Loss: 0.12489835917949677
Epoch 18, Batch 1, Critic Loss: 0.2050027698278427, Generator Loss: 0.11810509115457535
Epoch 18, Batch 2, Critic Loss: -0.005532130599021912, Generator Loss: 0.09941023588180542
Epoch 18, Batch 3, Critic Loss: -1.1495440006256104, Generator Loss: 0.11015988886356354
Epoch 18, Batch 4, Cri

Epoch 25, Batch 10, Critic Loss: -0.5419580340385437, Generator Loss: 0.30480337142944336
Epoch 26, Batch 0, Critic Loss: -0.43987858295440674, Generator Loss: 0.2952132523059845
Epoch 26, Batch 1, Critic Loss: 0.388363242149353, Generator Loss: 0.27327287197113037
Epoch 26, Batch 2, Critic Loss: 0.35184967517852783, Generator Loss: 0.25220948457717896
Epoch 26, Batch 3, Critic Loss: -1.1980130672454834, Generator Loss: 0.24059650301933289
Epoch 26, Batch 4, Critic Loss: -0.6305514574050903, Generator Loss: 0.22258339822292328
Epoch 26, Batch 5, Critic Loss: -0.6136869192123413, Generator Loss: 0.22549213469028473
Epoch 26, Batch 6, Critic Loss: -0.6690486669540405, Generator Loss: 0.2524838447570801
Epoch 26, Batch 7, Critic Loss: -0.6518815755844116, Generator Loss: 0.26069581508636475
Epoch 26, Batch 8, Critic Loss: -0.9340968728065491, Generator Loss: 0.2783980965614319
Epoch 26, Batch 9, Critic Loss: -0.8874565362930298, Generator Loss: 0.313187837600708
Epoch 26, Batch 10, Critic

Epoch 34, Batch 4, Critic Loss: -0.7896130681037903, Generator Loss: 0.5124177932739258
Epoch 34, Batch 5, Critic Loss: -0.7174711227416992, Generator Loss: 0.46059489250183105
Epoch 34, Batch 6, Critic Loss: -0.7122284770011902, Generator Loss: 0.4268171191215515
Epoch 34, Batch 7, Critic Loss: -0.8241775035858154, Generator Loss: 0.39267972111701965
Epoch 34, Batch 8, Critic Loss: -0.8520157337188721, Generator Loss: 0.3482862412929535
Epoch 34, Batch 9, Critic Loss: -0.9333926439285278, Generator Loss: 0.31814128160476685
Epoch 34, Batch 10, Critic Loss: -0.5482961535453796, Generator Loss: 0.2875376343727112
Epoch 35, Batch 0, Critic Loss: -0.5751376152038574, Generator Loss: 0.2594780921936035
Epoch 35, Batch 1, Critic Loss: -0.2459910362958908, Generator Loss: 0.2413713037967682
Epoch 35, Batch 2, Critic Loss: -0.34993457794189453, Generator Loss: 0.20766910910606384
Epoch 35, Batch 3, Critic Loss: -0.9727521538734436, Generator Loss: 0.18825805187225342
Epoch 35, Batch 4, Critic

Epoch 43, Batch 0, Critic Loss: -0.9405757188796997, Generator Loss: 0.7827080488204956
Epoch 43, Batch 1, Critic Loss: -0.24385809898376465, Generator Loss: 0.8257094621658325
Epoch 43, Batch 2, Critic Loss: -0.41221314668655396, Generator Loss: 0.8237395286560059
Epoch 43, Batch 3, Critic Loss: -1.505896806716919, Generator Loss: 0.8306498527526855
Epoch 43, Batch 4, Critic Loss: -1.172502875328064, Generator Loss: 0.8447480797767639
Epoch 43, Batch 5, Critic Loss: -1.098527431488037, Generator Loss: 0.8318879008293152
Epoch 43, Batch 6, Critic Loss: -1.2397167682647705, Generator Loss: 0.8287103176116943
Epoch 43, Batch 7, Critic Loss: -1.5070809125900269, Generator Loss: 0.8256800174713135
Epoch 43, Batch 8, Critic Loss: -1.4087517261505127, Generator Loss: 0.816058874130249
Epoch 43, Batch 9, Critic Loss: -1.4381966590881348, Generator Loss: 0.7969797253608704
Epoch 43, Batch 10, Critic Loss: -0.962398886680603, Generator Loss: 0.7661362886428833
Epoch 44, Batch 0, Critic Loss: -0

Epoch 51, Batch 7, Critic Loss: -0.9283732175827026, Generator Loss: 0.4110969305038452
Epoch 51, Batch 8, Critic Loss: -0.9946057796478271, Generator Loss: 0.43855398893356323
Epoch 51, Batch 9, Critic Loss: -0.9568761587142944, Generator Loss: 0.4656957685947418
Epoch 51, Batch 10, Critic Loss: -0.3492002785205841, Generator Loss: 0.4971398115158081
Epoch 52, Batch 0, Critic Loss: -0.8545472621917725, Generator Loss: 0.5393275022506714
Epoch 52, Batch 1, Critic Loss: -0.9363940358161926, Generator Loss: 0.5902293920516968
Epoch 52, Batch 2, Critic Loss: -1.0654164552688599, Generator Loss: 0.6065850257873535
Epoch 52, Batch 3, Critic Loss: -1.0506008863449097, Generator Loss: 0.6463190317153931
Epoch 52, Batch 4, Critic Loss: -0.7349570989608765, Generator Loss: 0.6433976888656616
Epoch 52, Batch 5, Critic Loss: -0.8454557657241821, Generator Loss: 0.6592060327529907
Epoch 52, Batch 6, Critic Loss: -0.7979906797409058, Generator Loss: 0.660493791103363
Epoch 52, Batch 7, Critic Loss:

Epoch 60, Batch 2, Critic Loss: -0.9949740767478943, Generator Loss: 0.8524815440177917
Epoch 60, Batch 3, Critic Loss: -1.732888102531433, Generator Loss: 0.965286135673523
Epoch 60, Batch 4, Critic Loss: -1.2552887201309204, Generator Loss: 1.0494896173477173
Epoch 60, Batch 5, Critic Loss: -1.3366153240203857, Generator Loss: 1.1618049144744873
Epoch 60, Batch 6, Critic Loss: -1.3671244382858276, Generator Loss: 1.2045162916183472
Epoch 60, Batch 7, Critic Loss: -1.3231134414672852, Generator Loss: 1.2591111660003662
Epoch 60, Batch 8, Critic Loss: -1.6575688123703003, Generator Loss: 1.2274004220962524
Epoch 60, Batch 9, Critic Loss: -1.7886972427368164, Generator Loss: 1.2452523708343506
Epoch 60, Batch 10, Critic Loss: -0.9119405746459961, Generator Loss: 1.2628297805786133
Epoch 61, Batch 0, Critic Loss: -1.364864706993103, Generator Loss: 1.2660691738128662
Epoch 61, Batch 1, Critic Loss: -1.389805555343628, Generator Loss: 1.2695918083190918
Epoch 61, Batch 2, Critic Loss: -1.

Epoch 68, Batch 7, Critic Loss: -0.34513404965400696, Generator Loss: 0.22559452056884766
Epoch 68, Batch 8, Critic Loss: -0.9202756285667419, Generator Loss: 0.2375488579273224
Epoch 68, Batch 9, Critic Loss: -0.5853083729743958, Generator Loss: 0.2711055278778076
Epoch 68, Batch 10, Critic Loss: -0.3872378170490265, Generator Loss: 0.33433082699775696
Epoch 69, Batch 0, Critic Loss: -0.8516157269477844, Generator Loss: 0.3980039656162262
Epoch 69, Batch 1, Critic Loss: -0.8137694597244263, Generator Loss: 0.45818281173706055
Epoch 69, Batch 2, Critic Loss: -0.730199933052063, Generator Loss: 0.5076979994773865
Epoch 69, Batch 3, Critic Loss: -1.4176161289215088, Generator Loss: 0.5507126450538635
Epoch 69, Batch 4, Critic Loss: -0.8378070592880249, Generator Loss: 0.5699448585510254
Epoch 69, Batch 5, Critic Loss: -0.9000046849250793, Generator Loss: 0.6129655838012695
Epoch 69, Batch 6, Critic Loss: -0.8827053308486938, Generator Loss: 0.6621195077896118
Epoch 69, Batch 7, Critic Lo

Epoch 77, Batch 2, Critic Loss: -1.1791177988052368, Generator Loss: 0.8477634191513062
Epoch 77, Batch 3, Critic Loss: -1.568516492843628, Generator Loss: 0.7951297760009766
Epoch 77, Batch 4, Critic Loss: -1.0628031492233276, Generator Loss: 0.7354799509048462
Epoch 77, Batch 5, Critic Loss: -0.9720109701156616, Generator Loss: 0.6504908204078674
Epoch 77, Batch 6, Critic Loss: -1.036798119544983, Generator Loss: 0.5981325507164001
Epoch 77, Batch 7, Critic Loss: -1.1052138805389404, Generator Loss: 0.5380812287330627
Epoch 77, Batch 8, Critic Loss: -1.214876413345337, Generator Loss: 0.5144999623298645
Epoch 77, Batch 9, Critic Loss: -1.1433978080749512, Generator Loss: 0.48028093576431274
Epoch 77, Batch 10, Critic Loss: -0.7551393508911133, Generator Loss: 0.4594646096229553
Epoch 78, Batch 0, Critic Loss: -0.9494497776031494, Generator Loss: 0.4662241041660309
Epoch 78, Batch 1, Critic Loss: -0.9064035415649414, Generator Loss: 0.45717984437942505
Epoch 78, Batch 2, Critic Loss: 

Epoch 85, Batch 8, Critic Loss: -1.1054004430770874, Generator Loss: 0.8993697166442871
Epoch 85, Batch 9, Critic Loss: -0.7334404587745667, Generator Loss: 0.8342247605323792
Epoch 85, Batch 10, Critic Loss: -0.6157712936401367, Generator Loss: 0.7701038122177124
Epoch 86, Batch 0, Critic Loss: -0.41322243213653564, Generator Loss: 0.7189579606056213
Epoch 86, Batch 1, Critic Loss: 0.36211588978767395, Generator Loss: 0.6690367460250854
Epoch 86, Batch 2, Critic Loss: 0.12622256577014923, Generator Loss: 0.6180866360664368
Epoch 86, Batch 3, Critic Loss: -0.7680529356002808, Generator Loss: 0.560746967792511
Epoch 86, Batch 4, Critic Loss: -0.45013928413391113, Generator Loss: 0.5113208293914795
Epoch 86, Batch 5, Critic Loss: -0.465178519487381, Generator Loss: 0.4662187695503235
Epoch 86, Batch 6, Critic Loss: -0.49480387568473816, Generator Loss: 0.4244900941848755
Epoch 86, Batch 7, Critic Loss: -0.6660659313201904, Generator Loss: 0.3867882192134857
Epoch 86, Batch 8, Critic Loss

Epoch 94, Batch 4, Critic Loss: -0.3232671320438385, Generator Loss: 0.41232991218566895
Epoch 94, Batch 5, Critic Loss: -0.3694911003112793, Generator Loss: 0.33626580238342285
Epoch 94, Batch 6, Critic Loss: -0.19155387580394745, Generator Loss: 0.28991544246673584
Epoch 94, Batch 7, Critic Loss: -0.5913785099983215, Generator Loss: 0.24228981137275696
Epoch 94, Batch 8, Critic Loss: -0.899564266204834, Generator Loss: 0.21932779252529144
Epoch 94, Batch 9, Critic Loss: -0.5725010633468628, Generator Loss: 0.18456652760505676
Epoch 94, Batch 10, Critic Loss: -0.31567972898483276, Generator Loss: 0.1838749796152115
Epoch 95, Batch 0, Critic Loss: -0.656226634979248, Generator Loss: 0.17198412120342255
Epoch 95, Batch 1, Critic Loss: -0.6251615881919861, Generator Loss: 0.17014989256858826
Epoch 95, Batch 2, Critic Loss: -0.619961142539978, Generator Loss: 0.13013096153736115
Epoch 95, Batch 3, Critic Loss: -0.14191682636737823, Generator Loss: 0.11073653399944305
Epoch 95, Batch 4, Cr

In [None]:
# Assuming the last column name in original dataframe represents the label
all_column_names = data_frame.columns.tolist()  # This should have 101 names if the label is included in data_frame

# Generate synthetic data
noise = tf.random.normal([num_samples_to_generate, 100])
synthetic_data = generator(noise, training=False)
synthetic_data_df = pd.DataFrame(synthetic_data.numpy(), columns=all_column_names)
# Set the label for all generated data to 1
synthetic_data_df[all_column_names[-1]] = 1

In [22]:
#synthetic_data

In [27]:
# Combine original and synthetic data
enhanced_df = pd.concat([data_frame, synthetic_data_df], axis=0).reset_index(drop=True)
file_name='enhanced_GAN_final_new_par_50_space_1.csv'
file_path = os.path.join(base_dir,'data','split',ltype, file_name)
output_path = file_path
enhanced_df.to_csv(output_path, index=False)