### Imports

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns 
import numpy as np
import os
import json
import requests
from tqdm import tqdm
import time
import keras


import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import layers, models, backend as K

C:\Users\riskf\anaconda3\lib\site-packages\numpy\.libs\libopenblas64__v0.3.21-gcc_10_3_0.dll
C:\Users\riskf\anaconda3\lib\site-packages\numpy\.libs\libopenblas64__v0.3.23-246-g3d31191b-gcc_10_3_0.dll


In [2]:
# WGAN-GP Architecture
def make_generator_model(input_dim, output_dim):
    model = tf.keras.Sequential()
    model.add(layers.Dense(256, activation='relu', input_dim=input_dim))
    model.add(layers.Dense(512, activation='relu'))
    model.add(layers.Dense(1024, activation='relu'))
    model.add(layers.Dense(output_dim, activation='linear'))  # Linear activation for WGAN
    return model

def make_critic_model(input_dim):
    model = tf.keras.Sequential()
    model.add(layers.Dense(512, activation='relu', input_dim=input_dim))
    model.add(layers.Dropout(0.1))
    model.add(layers.Dense(256, activation='relu'))
    model.add(layers.Dense(1))  # No activation, linear output
    return model

generator = make_generator_model(100, 101)
critic = make_critic_model(101)

# Losses and training
def critic_loss(real_output, fake_output):
    return tf.reduce_mean(fake_output) - tf.reduce_mean(real_output)

def generator_loss(fake_output):
    return -tf.reduce_mean(fake_output)

def gradient_penalty(batch_size, real_images, fake_images, critic):
    epsilon = tf.random.normal([batch_size, 1], 0.0, 1.0)
    interpolated = epsilon * real_images + (1 - epsilon) * fake_images
    with tf.GradientTape() as tape:
        tape.watch(interpolated)
        pred = critic(interpolated, training=True)
    grads = tape.gradient(pred, [interpolated])[0]
    norm = tf.sqrt(tf.reduce_sum(tf.square(grads), axis=[1]))
    gp = tf.reduce_mean((norm - 1.0) ** 2)
    return gp

def train_step(generator, critic, batch_size, generator_optimizer, critic_optimizer, real_features):
    # Append a label column to real_features to match the critic's input expectations
    labels = tf.ones((batch_size, 1))  # Assume label 1 for all positive samples
    real_data = tf.concat([real_features, labels], axis=1)
    
    noise = tf.random.normal([batch_size, generator.input_shape[1]])
    with tf.GradientTape() as gen_tape, tf.GradientTape() as crit_tape:
        generated_data = generator(noise, training=True)

        real_output = critic(real_data, training=True)
        fake_output = critic(generated_data, training=True)

        crit_loss = critic_loss(real_output, fake_output)
        gen_loss = generator_loss(fake_output)
        penalty = gradient_penalty(batch_size, real_data, generated_data, critic)
        crit_loss += 10 * penalty  # lambda for gradient penalty

    gradients_of_generator = gen_tape.gradient(gen_loss, generator.trainable_variables)
    gradients_of_critic = crit_tape.gradient(crit_loss, critic.trainable_variables)

    generator_optimizer.apply_gradients(zip(gradients_of_generator, generator.trainable_variables))
    critic_optimizer.apply_gradients(zip(gradients_of_critic, critic.trainable_variables))

    return crit_loss, gen_loss




In [3]:
#relative paths. # Set directory paths for later use.
# Get the directory of the script file
base_dir = os.getcwd()
ligants_type = ['enzyme', 'GPCR', 'ion_channel', 'nuclear_receptor']
ltype = ligants_type[0]
file_name = 'final_new_par_LMF_50.csv'
file_path = os.path.join(base_dir, 'data', 'split', ltype, file_name)
data_frame = pd.read_csv(file_path, header=None, skiprows=1)
features = data_frame.iloc[:, :-1].values
labels = data_frame.iloc[:, -1].values
# Filter to get only the positive samples
positive_features = features[labels == 1]

In [4]:
print(positive_features.shape)
print(features.shape)
print(labels.shape)
len(positive_features)

(2926, 100)
(295480, 100)
(295480,)


2926

In [5]:
# Training parameters
num_samples_to_generate = 289628
epochs = 100
batch_size = 256
learning_rate=0.0001
beta_1=0.5
generator_optimizer = Adam(learning_rate=learning_rate, beta_1=beta_1)
critic_optimizer = Adam(learning_rate=learning_rate, beta_1=beta_1)

# Training loop
for epoch in range(epochs):
    for batch in range(0, len(positive_features), batch_size):
        real_data_batch = positive_features[batch:batch + batch_size]
        if real_data_batch.shape[0] != batch_size:  # Handle last batch which may be smaller
            continue  # Skip if the batch isn't full size
        crit_loss, gen_loss = train_step(generator, critic, batch_size, generator_optimizer, critic_optimizer, real_data_batch)
        print(f'Epoch {epoch}, Batch {batch // batch_size}, Critic Loss: {crit_loss.numpy()}, Generator Loss: {gen_loss.numpy()}')

Epoch 0, Batch 0, Critic Loss: 2.7145755290985107, Generator Loss: -0.10265226662158966
Epoch 0, Batch 1, Critic Loss: 2.660320520401001, Generator Loss: -0.18824177980422974
Epoch 0, Batch 2, Critic Loss: 2.5022830963134766, Generator Loss: -0.2909536361694336
Epoch 0, Batch 3, Critic Loss: 2.399735927581787, Generator Loss: -0.41935601830482483
Epoch 0, Batch 4, Critic Loss: 2.5749690532684326, Generator Loss: -0.5729066133499146
Epoch 0, Batch 5, Critic Loss: 2.569801092147827, Generator Loss: -0.7359496355056763
Epoch 0, Batch 6, Critic Loss: 2.5597827434539795, Generator Loss: -0.9202494025230408
Epoch 0, Batch 7, Critic Loss: 2.82185697555542, Generator Loss: -1.1134955883026123
Epoch 0, Batch 8, Critic Loss: 2.5998549461364746, Generator Loss: -1.3252294063568115
Epoch 0, Batch 9, Critic Loss: 2.8904953002929688, Generator Loss: -1.5906234979629517
Epoch 0, Batch 10, Critic Loss: 2.946277379989624, Generator Loss: -1.8839881420135498
Epoch 1, Batch 0, Critic Loss: 2.960505008697

Epoch 8, Batch 10, Critic Loss: 0.09875071048736572, Generator Loss: 1.8092916011810303
Epoch 9, Batch 0, Critic Loss: 0.14508378505706787, Generator Loss: 1.9221882820129395
Epoch 9, Batch 1, Critic Loss: 0.10613751411437988, Generator Loss: 2.0140042304992676
Epoch 9, Batch 2, Critic Loss: 0.07908105850219727, Generator Loss: 2.119980812072754
Epoch 9, Batch 3, Critic Loss: -0.36196595430374146, Generator Loss: 2.111020803451538
Epoch 9, Batch 4, Critic Loss: -0.35153359174728394, Generator Loss: 2.1422762870788574
Epoch 9, Batch 5, Critic Loss: -0.34757983684539795, Generator Loss: 2.1660122871398926
Epoch 9, Batch 6, Critic Loss: -0.281686007976532, Generator Loss: 2.153764247894287
Epoch 9, Batch 7, Critic Loss: 0.5341321229934692, Generator Loss: 2.1061794757843018
Epoch 9, Batch 8, Critic Loss: -0.19723403453826904, Generator Loss: 2.151184320449829
Epoch 9, Batch 9, Critic Loss: -0.457505464553833, Generator Loss: 2.1311707496643066
Epoch 9, Batch 10, Critic Loss: -0.4834947586

Epoch 17, Batch 7, Critic Loss: 0.585100531578064, Generator Loss: 1.0393208265304565
Epoch 17, Batch 8, Critic Loss: 0.9597650766372681, Generator Loss: 1.0855095386505127
Epoch 17, Batch 9, Critic Loss: 0.9281061291694641, Generator Loss: 1.0707123279571533
Epoch 17, Batch 10, Critic Loss: 0.9827624559402466, Generator Loss: 1.1110483407974243
Epoch 18, Batch 0, Critic Loss: 0.8332670331001282, Generator Loss: 1.124872088432312
Epoch 18, Batch 1, Critic Loss: 0.8946933150291443, Generator Loss: 1.1823087930679321
Epoch 18, Batch 2, Critic Loss: 0.8331544399261475, Generator Loss: 1.1518135070800781
Epoch 18, Batch 3, Critic Loss: 0.7187532782554626, Generator Loss: 1.145269751548767
Epoch 18, Batch 4, Critic Loss: 0.6126918792724609, Generator Loss: 1.1727294921875
Epoch 18, Batch 5, Critic Loss: 0.7154772281646729, Generator Loss: 1.159895658493042
Epoch 18, Batch 6, Critic Loss: 0.5929110050201416, Generator Loss: 1.1360753774642944
Epoch 18, Batch 7, Critic Loss: 0.446443557739257

Epoch 26, Batch 3, Critic Loss: -0.8227136731147766, Generator Loss: 0.507948637008667
Epoch 26, Batch 4, Critic Loss: -0.18445780873298645, Generator Loss: 0.6647941470146179
Epoch 26, Batch 5, Critic Loss: -0.12911099195480347, Generator Loss: 0.8045982718467712
Epoch 26, Batch 6, Critic Loss: -0.556490421295166, Generator Loss: 0.99300217628479
Epoch 26, Batch 7, Critic Loss: -0.03127896785736084, Generator Loss: 1.144178032875061
Epoch 26, Batch 8, Critic Loss: 0.0855872631072998, Generator Loss: 1.3320789337158203
Epoch 26, Batch 9, Critic Loss: -0.5016978979110718, Generator Loss: 1.4839081764221191
Epoch 26, Batch 10, Critic Loss: -0.2206312119960785, Generator Loss: 1.621110439300537
Epoch 27, Batch 0, Critic Loss: -0.07470077276229858, Generator Loss: 1.723634958267212
Epoch 27, Batch 1, Critic Loss: 0.6141234040260315, Generator Loss: 1.844082236289978
Epoch 27, Batch 2, Critic Loss: 0.6350258588790894, Generator Loss: 1.815362811088562
Epoch 27, Batch 3, Critic Loss: -1.7749

Epoch 34, Batch 10, Critic Loss: 1.512634038925171, Generator Loss: -0.7809609174728394
Epoch 35, Batch 0, Critic Loss: 1.023554801940918, Generator Loss: -0.8502629995346069
Epoch 35, Batch 1, Critic Loss: 0.3568652272224426, Generator Loss: -0.9296494722366333
Epoch 35, Batch 2, Critic Loss: 0.48655301332473755, Generator Loss: -0.9984908103942871
Epoch 35, Batch 3, Critic Loss: 1.8940761089324951, Generator Loss: -1.0585435628890991
Epoch 35, Batch 4, Critic Loss: 1.5051870346069336, Generator Loss: -1.0902421474456787
Epoch 35, Batch 5, Critic Loss: 1.6991592645645142, Generator Loss: -1.1676301956176758
Epoch 35, Batch 6, Critic Loss: 1.6420795917510986, Generator Loss: -1.1961300373077393
Epoch 35, Batch 7, Critic Loss: 1.0172537565231323, Generator Loss: -1.2475773096084595
Epoch 35, Batch 8, Critic Loss: 1.946016550064087, Generator Loss: -1.3297557830810547
Epoch 35, Batch 9, Critic Loss: 1.950516939163208, Generator Loss: -1.3533486127853394
Epoch 35, Batch 10, Critic Loss: 2

Epoch 43, Batch 7, Critic Loss: 1.1365187168121338, Generator Loss: -0.5911766290664673
Epoch 43, Batch 8, Critic Loss: 0.9472200870513916, Generator Loss: -0.5675468444824219
Epoch 43, Batch 9, Critic Loss: 0.11171650886535645, Generator Loss: -0.5600669980049133
Epoch 43, Batch 10, Critic Loss: 1.22652006149292, Generator Loss: -0.5387117862701416
Epoch 44, Batch 0, Critic Loss: 1.187409520149231, Generator Loss: -0.5478836894035339
Epoch 44, Batch 1, Critic Loss: 1.378711223602295, Generator Loss: -0.5631271600723267
Epoch 44, Batch 2, Critic Loss: 1.2812259197235107, Generator Loss: -0.49844640493392944
Epoch 44, Batch 3, Critic Loss: -1.1468406915664673, Generator Loss: -0.4296615719795227
Epoch 44, Batch 4, Critic Loss: 0.10771554708480835, Generator Loss: -0.4141906201839447
Epoch 44, Batch 5, Critic Loss: 0.3743566870689392, Generator Loss: -0.336897611618042
Epoch 44, Batch 6, Critic Loss: -0.18463635444641113, Generator Loss: -0.3024508059024811
Epoch 44, Batch 7, Critic Loss

Epoch 52, Batch 2, Critic Loss: 0.41300418972969055, Generator Loss: -0.16408947110176086
Epoch 52, Batch 3, Critic Loss: 0.18439513444900513, Generator Loss: -0.2432091385126114
Epoch 52, Batch 4, Critic Loss: -0.17932811379432678, Generator Loss: -0.21544969081878662
Epoch 52, Batch 5, Critic Loss: 0.025516122579574585, Generator Loss: -0.20388378202915192
Epoch 52, Batch 6, Critic Loss: -0.2208642214536667, Generator Loss: -0.14857906103134155
Epoch 52, Batch 7, Critic Loss: -1.6927001476287842, Generator Loss: -0.08445289731025696
Epoch 52, Batch 8, Critic Loss: -0.9435043334960938, Generator Loss: 0.013242178596556187
Epoch 52, Batch 9, Critic Loss: -0.9965274333953857, Generator Loss: 0.09760448336601257
Epoch 52, Batch 10, Critic Loss: -1.0264722108840942, Generator Loss: 0.19570386409759521
Epoch 53, Batch 0, Critic Loss: -0.4216083884239197, Generator Loss: 0.27483755350112915
Epoch 53, Batch 1, Critic Loss: 1.0029029846191406, Generator Loss: 0.3425951600074768
Epoch 53, Batc

Epoch 60, Batch 9, Critic Loss: -1.6789253950119019, Generator Loss: 0.37371397018432617
Epoch 60, Batch 10, Critic Loss: -0.11619728803634644, Generator Loss: 0.3123815655708313
Epoch 61, Batch 0, Critic Loss: -1.603347897529602, Generator Loss: 0.22383680939674377
Epoch 61, Batch 1, Critic Loss: -3.6801888942718506, Generator Loss: 0.15197551250457764
Epoch 61, Batch 2, Critic Loss: -3.0774405002593994, Generator Loss: 0.08705910295248032
Epoch 61, Batch 3, Critic Loss: -2.816777229309082, Generator Loss: -0.02790887840092182
Epoch 61, Batch 4, Critic Loss: -1.5701303482055664, Generator Loss: -0.10080848634243011
Epoch 61, Batch 5, Critic Loss: -0.9637666940689087, Generator Loss: -0.20835250616073608
Epoch 61, Batch 6, Critic Loss: -1.4563554525375366, Generator Loss: -0.2988784611225128
Epoch 61, Batch 7, Critic Loss: -1.8983827829360962, Generator Loss: -0.4078286290168762
Epoch 61, Batch 8, Critic Loss: 0.23172223567962646, Generator Loss: -0.5252687931060791
Epoch 61, Batch 9, 

Epoch 69, Batch 4, Critic Loss: -1.076282262802124, Generator Loss: -0.5580973625183105
Epoch 69, Batch 5, Critic Loss: -0.8481310606002808, Generator Loss: -0.5583338737487793
Epoch 69, Batch 6, Critic Loss: -0.9958440661430359, Generator Loss: -0.5702018737792969
Epoch 69, Batch 7, Critic Loss: -0.28688669204711914, Generator Loss: -0.5554359555244446
Epoch 69, Batch 8, Critic Loss: -0.49980753660202026, Generator Loss: -0.49275076389312744
Epoch 69, Batch 9, Critic Loss: -1.988195538520813, Generator Loss: -0.47291356325149536
Epoch 69, Batch 10, Critic Loss: -0.8981348276138306, Generator Loss: -0.40980759263038635
Epoch 70, Batch 0, Critic Loss: -0.5683048963546753, Generator Loss: -0.3178291618824005
Epoch 70, Batch 1, Critic Loss: -0.23878616094589233, Generator Loss: -0.26676368713378906
Epoch 70, Batch 2, Critic Loss: 0.011118173599243164, Generator Loss: -0.289591521024704
Epoch 70, Batch 3, Critic Loss: -2.354924201965332, Generator Loss: -0.36962366104125977
Epoch 70, Batch

Epoch 78, Batch 0, Critic Loss: -2.0582528114318848, Generator Loss: -0.05865052714943886
Epoch 78, Batch 1, Critic Loss: -3.678586721420288, Generator Loss: -0.09792996942996979
Epoch 78, Batch 2, Critic Loss: -3.0080108642578125, Generator Loss: -0.09498273581266403
Epoch 78, Batch 3, Critic Loss: -0.537288248538971, Generator Loss: -0.12426457554101944
Epoch 78, Batch 4, Critic Loss: -1.1643242835998535, Generator Loss: -0.118920236825943
Epoch 78, Batch 5, Critic Loss: -1.2586842775344849, Generator Loss: -0.1134677454829216
Epoch 78, Batch 6, Critic Loss: -0.6278986930847168, Generator Loss: -0.07857047766447067
Epoch 78, Batch 7, Critic Loss: 0.18107187747955322, Generator Loss: -0.0828729122877121
Epoch 78, Batch 8, Critic Loss: -1.3675423860549927, Generator Loss: -0.1810077428817749
Epoch 78, Batch 9, Critic Loss: -2.347475528717041, Generator Loss: -0.2130526602268219
Epoch 78, Batch 10, Critic Loss: -1.8621697425842285, Generator Loss: -0.18077252805233002
Epoch 79, Batch 0,

Epoch 86, Batch 8, Critic Loss: -2.3432717323303223, Generator Loss: -0.17081867158412933
Epoch 86, Batch 9, Critic Loss: -2.4512805938720703, Generator Loss: 0.04066689312458038
Epoch 86, Batch 10, Critic Loss: -0.850303590297699, Generator Loss: 0.2842405438423157
Epoch 87, Batch 0, Critic Loss: -3.4059791564941406, Generator Loss: 0.5751051902770996
Epoch 87, Batch 1, Critic Loss: -5.901891708374023, Generator Loss: 0.9306677579879761
Epoch 87, Batch 2, Critic Loss: -5.304632663726807, Generator Loss: 1.2274926900863647
Epoch 87, Batch 3, Critic Loss: -0.6129746437072754, Generator Loss: 1.3791805505752563
Epoch 87, Batch 4, Critic Loss: -2.0418074131011963, Generator Loss: 1.4529610872268677
Epoch 87, Batch 5, Critic Loss: -1.787848711013794, Generator Loss: 1.4836230278015137
Epoch 87, Batch 6, Critic Loss: -1.3124668598175049, Generator Loss: 1.5291156768798828
Epoch 87, Batch 7, Critic Loss: -4.265267848968506, Generator Loss: 1.4766478538513184
Epoch 87, Batch 8, Critic Loss: -

Epoch 95, Batch 4, Critic Loss: -4.010861396789551, Generator Loss: 3.594799041748047
Epoch 95, Batch 5, Critic Loss: -4.114710330963135, Generator Loss: 3.54506254196167
Epoch 95, Batch 6, Critic Loss: -3.4785866737365723, Generator Loss: 3.5617082118988037
Epoch 95, Batch 7, Critic Loss: -0.7118868827819824, Generator Loss: 3.4531917572021484
Epoch 95, Batch 8, Critic Loss: -3.1489670276641846, Generator Loss: 3.17053484916687
Epoch 95, Batch 9, Critic Loss: -4.064291000366211, Generator Loss: 3.01560640335083
Epoch 95, Batch 10, Critic Loss: -3.045576572418213, Generator Loss: 2.77986478805542
Epoch 96, Batch 0, Critic Loss: -3.8175950050354004, Generator Loss: 2.6413469314575195
Epoch 96, Batch 1, Critic Loss: -6.184364318847656, Generator Loss: 2.436890125274658
Epoch 96, Batch 2, Critic Loss: -5.3409600257873535, Generator Loss: 2.2966668605804443
Epoch 96, Batch 3, Critic Loss: -3.809201717376709, Generator Loss: 2.201850175857544
Epoch 96, Batch 4, Critic Loss: -2.6090900897979

In [None]:
# Assuming the last column name in original dataframe represents the label
all_column_names = data_frame.columns.tolist()  # This should have 101 names if the label is included in data_frame

# Generate synthetic data
noise = tf.random.normal([num_samples_to_generate, 100])
synthetic_data = generator(noise, training=False)
synthetic_data_df = pd.DataFrame(synthetic_data.numpy(), columns=all_column_names)
# Set the label for all generated data to 1
synthetic_data_df[all_column_names[-1]] = 1

In [7]:
#synthetic_data

In [8]:
# Combine original and synthetic data
enhanced_df = pd.concat([data_frame, synthetic_data_df], axis=0).reset_index(drop=True)
file_name='enhanced_GAN_final_new_par_50_LFM_space_3.csv'
file_path = os.path.join(base_dir,'data','split',ltype, file_name)
output_path = file_path
enhanced_df.to_csv(output_path, index=False)