### Imports

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns 
import numpy as np
import os
import json
import requests
from tqdm import tqdm
import time
import keras


import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import layers, models, backend as K

C:\Users\riskf\anaconda3\lib\site-packages\numpy\.libs\libopenblas64__v0.3.21-gcc_10_3_0.dll
C:\Users\riskf\anaconda3\lib\site-packages\numpy\.libs\libopenblas64__v0.3.23-246-g3d31191b-gcc_10_3_0.dll


In [2]:
# WGAN-GP Architecture
def make_generator_model(input_dim, output_dim):
    model = tf.keras.Sequential()
    model.add(layers.Dense(256, activation='relu', input_dim=input_dim))
    model.add(layers.Dense(512, activation='relu'))
    model.add(layers.Dense(1024, activation='relu'))
    model.add(layers.Dense(output_dim, activation='linear'))  # Linear activation for WGAN
    return model

def make_critic_model(input_dim):
    model = tf.keras.Sequential()
    model.add(layers.Dense(512, activation='relu', input_dim=input_dim))
    model.add(layers.Dropout(0.1))
    model.add(layers.Dense(256, activation='relu'))
    model.add(layers.Dense(1))  # No activation, linear output
    return model

generator = make_generator_model(100, 101)
critic = make_critic_model(101)

# Losses and training
def critic_loss(real_output, fake_output):
    return tf.reduce_mean(fake_output) - tf.reduce_mean(real_output)

def generator_loss(fake_output):
    return -tf.reduce_mean(fake_output)

def gradient_penalty(batch_size, real_images, fake_images, critic):
    epsilon = tf.random.normal([batch_size, 1], 0.0, 1.0)
    interpolated = epsilon * real_images + (1 - epsilon) * fake_images
    with tf.GradientTape() as tape:
        tape.watch(interpolated)
        pred = critic(interpolated, training=True)
    grads = tape.gradient(pred, [interpolated])[0]
    norm = tf.sqrt(tf.reduce_sum(tf.square(grads), axis=[1]))
    gp = tf.reduce_mean((norm - 1.0) ** 2)
    return gp

def train_step(generator, critic, batch_size, generator_optimizer, critic_optimizer, real_features):
    # Append a label column to real_features to match the critic's input expectations
    labels = tf.ones((batch_size, 1))  # Assume label 1 for all positive samples
    real_data = tf.concat([real_features, labels], axis=1)
    
    noise = tf.random.normal([batch_size, generator.input_shape[1]])
    with tf.GradientTape() as gen_tape, tf.GradientTape() as crit_tape:
        generated_data = generator(noise, training=True)

        real_output = critic(real_data, training=True)
        fake_output = critic(generated_data, training=True)

        crit_loss = critic_loss(real_output, fake_output)
        gen_loss = generator_loss(fake_output)
        penalty = gradient_penalty(batch_size, real_data, generated_data, critic)
        crit_loss += 10 * penalty  # lambda for gradient penalty

    gradients_of_generator = gen_tape.gradient(gen_loss, generator.trainable_variables)
    gradients_of_critic = crit_tape.gradient(crit_loss, critic.trainable_variables)

    generator_optimizer.apply_gradients(zip(gradients_of_generator, generator.trainable_variables))
    critic_optimizer.apply_gradients(zip(gradients_of_critic, critic.trainable_variables))

    return crit_loss, gen_loss




In [3]:
#relative paths. # Set directory paths for later use.
# Get the directory of the script file
base_dir = os.getcwd()
ligants_type = ['enzyme', 'GPCR', 'ion_channel', 'nuclear_receptor']
ltype = ligants_type[0]
file_name = 'final_new_par_NNMF_50.csv'
file_path = os.path.join(base_dir, 'data', 'split', ltype, file_name)
data_frame = pd.read_csv(file_path, header=None, skiprows=1)
features = data_frame.iloc[:, :-1].values
labels = data_frame.iloc[:, -1].values
# Filter to get only the positive samples
positive_features = features[labels == 1]

In [4]:
print(positive_features.shape)
print(features.shape)
print(labels.shape)
len(positive_features)

(2926, 100)
(295480, 100)
(295480,)


2926

In [5]:
# Training parameters
num_samples_to_generate = 289628
epochs = 100
batch_size = 256
learning_rate=0.0001
beta_1=0.5
generator_optimizer = Adam(learning_rate=learning_rate, beta_1=beta_1)
critic_optimizer = Adam(learning_rate=learning_rate, beta_1=beta_1)

# Training loop
for epoch in range(epochs):
    for batch in range(0, len(positive_features), batch_size):
        real_data_batch = positive_features[batch:batch + batch_size]
        if real_data_batch.shape[0] != batch_size:  # Handle last batch which may be smaller
            continue  # Skip if the batch isn't full size
        crit_loss, gen_loss = train_step(generator, critic, batch_size, generator_optimizer, critic_optimizer, real_data_batch)
        print(f'Epoch {epoch}, Batch {batch // batch_size}, Critic Loss: {crit_loss.numpy()}, Generator Loss: {gen_loss.numpy()}')

Epoch 0, Batch 0, Critic Loss: 2.5831797122955322, Generator Loss: -0.13753053545951843
Epoch 0, Batch 1, Critic Loss: 2.5615339279174805, Generator Loss: -0.2339310646057129
Epoch 0, Batch 2, Critic Loss: 2.488785982131958, Generator Loss: -0.34682291746139526
Epoch 0, Batch 3, Critic Loss: 2.2835865020751953, Generator Loss: -0.4816269874572754
Epoch 0, Batch 4, Critic Loss: 2.375202178955078, Generator Loss: -0.6265969276428223
Epoch 0, Batch 5, Critic Loss: 2.3276145458221436, Generator Loss: -0.7966181635856628
Epoch 0, Batch 6, Critic Loss: 2.4180092811584473, Generator Loss: -1.0057199001312256
Epoch 0, Batch 7, Critic Loss: 2.393782615661621, Generator Loss: -1.1975915431976318
Epoch 0, Batch 8, Critic Loss: 2.347682476043701, Generator Loss: -1.458723545074463
Epoch 0, Batch 9, Critic Loss: 2.1378262042999268, Generator Loss: -1.708848237991333
Epoch 0, Batch 10, Critic Loss: 2.9464011192321777, Generator Loss: -2.0062437057495117
Epoch 1, Batch 0, Critic Loss: 2.9870843887329

Epoch 8, Batch 7, Critic Loss: -1.818004846572876, Generator Loss: 2.882669448852539
Epoch 8, Batch 8, Critic Loss: -1.2459869384765625, Generator Loss: 2.8429388999938965
Epoch 8, Batch 9, Critic Loss: -0.6385672092437744, Generator Loss: 2.7978830337524414
Epoch 8, Batch 10, Critic Loss: -1.4819207191467285, Generator Loss: 2.671189785003662
Epoch 9, Batch 0, Critic Loss: -1.256576418876648, Generator Loss: 2.680553913116455
Epoch 9, Batch 1, Critic Loss: -0.8771358728408813, Generator Loss: 2.5746283531188965
Epoch 9, Batch 2, Critic Loss: -0.9227097630500793, Generator Loss: 2.625418186187744
Epoch 9, Batch 3, Critic Loss: -1.1169153451919556, Generator Loss: 2.6768712997436523
Epoch 9, Batch 4, Critic Loss: -1.2380372285842896, Generator Loss: 2.6310250759124756
Epoch 9, Batch 5, Critic Loss: -1.2461861371994019, Generator Loss: 2.65565824508667
Epoch 9, Batch 6, Critic Loss: -1.2030699253082275, Generator Loss: 2.6436915397644043
Epoch 9, Batch 7, Critic Loss: -1.480072259902954,

Epoch 17, Batch 4, Critic Loss: 1.5802433490753174, Generator Loss: 0.45235228538513184
Epoch 17, Batch 5, Critic Loss: 1.702742338180542, Generator Loss: 0.4414292573928833
Epoch 17, Batch 6, Critic Loss: 1.6439911127090454, Generator Loss: 0.4212227165699005
Epoch 17, Batch 7, Critic Loss: 1.447505235671997, Generator Loss: 0.42768043279647827
Epoch 17, Batch 8, Critic Loss: 2.2462985515594482, Generator Loss: 0.4228368103504181
Epoch 17, Batch 9, Critic Loss: 3.3061182498931885, Generator Loss: 0.40616413950920105
Epoch 17, Batch 10, Critic Loss: 1.6457328796386719, Generator Loss: 0.4085942208766937
Epoch 18, Batch 0, Critic Loss: 2.0876119136810303, Generator Loss: 0.39319467544555664
Epoch 18, Batch 1, Critic Loss: 2.0985302925109863, Generator Loss: 0.3941887319087982
Epoch 18, Batch 2, Critic Loss: 2.0322678089141846, Generator Loss: 0.38816410303115845
Epoch 18, Batch 3, Critic Loss: 1.7845145463943481, Generator Loss: 0.39939987659454346
Epoch 18, Batch 4, Critic Loss: 1.7484

Epoch 26, Batch 1, Critic Loss: 1.290237307548523, Generator Loss: 0.4087753891944885
Epoch 26, Batch 2, Critic Loss: 1.2318155765533447, Generator Loss: 0.41180068254470825
Epoch 26, Batch 3, Critic Loss: 1.1578595638275146, Generator Loss: 0.3917742669582367
Epoch 26, Batch 4, Critic Loss: 1.0368882417678833, Generator Loss: 0.38016852736473083
Epoch 26, Batch 5, Critic Loss: 1.0967682600021362, Generator Loss: 0.3798505365848541
Epoch 26, Batch 6, Critic Loss: 1.0545461177825928, Generator Loss: 0.3660615086555481
Epoch 26, Batch 7, Critic Loss: 0.8171195387840271, Generator Loss: 0.3483710289001465
Epoch 26, Batch 8, Critic Loss: 1.7447670698165894, Generator Loss: 0.3512984812259674
Epoch 26, Batch 9, Critic Loss: 2.3037209510803223, Generator Loss: 0.33360597491264343
Epoch 26, Batch 10, Critic Loss: 1.0929040908813477, Generator Loss: 0.32569748163223267
Epoch 27, Batch 0, Critic Loss: 1.447236180305481, Generator Loss: 0.3136221766471863
Epoch 27, Batch 1, Critic Loss: 1.299795

Epoch 34, Batch 8, Critic Loss: 1.5638957023620605, Generator Loss: 0.2528236508369446
Epoch 34, Batch 9, Critic Loss: 1.97991943359375, Generator Loss: 0.2469206154346466
Epoch 34, Batch 10, Critic Loss: 1.1797829866409302, Generator Loss: 0.23373164236545563
Epoch 35, Batch 0, Critic Loss: 1.2197620868682861, Generator Loss: 0.23305535316467285
Epoch 35, Batch 1, Critic Loss: 0.9261850714683533, Generator Loss: 0.21741105616092682
Epoch 35, Batch 2, Critic Loss: 0.8030219674110413, Generator Loss: 0.2132861316204071
Epoch 35, Batch 3, Critic Loss: 1.2915083169937134, Generator Loss: 0.19773510098457336
Epoch 35, Batch 4, Critic Loss: 1.151574730873108, Generator Loss: 0.19842010736465454
Epoch 35, Batch 5, Critic Loss: 1.2639787197113037, Generator Loss: 0.18914151191711426
Epoch 35, Batch 6, Critic Loss: 1.2899669408798218, Generator Loss: 0.17631465196609497
Epoch 35, Batch 7, Critic Loss: 1.2376043796539307, Generator Loss: 0.17252326011657715
Epoch 35, Batch 8, Critic Loss: 1.606

Epoch 43, Batch 5, Critic Loss: 0.9725485444068909, Generator Loss: 0.6360499262809753
Epoch 43, Batch 6, Critic Loss: 0.9024379253387451, Generator Loss: 0.6154858469963074
Epoch 43, Batch 7, Critic Loss: 0.7880021929740906, Generator Loss: 0.5917288661003113
Epoch 43, Batch 8, Critic Loss: 1.2173337936401367, Generator Loss: 0.5730921030044556
Epoch 43, Batch 9, Critic Loss: 0.7165383100509644, Generator Loss: 0.553946852684021
Epoch 43, Batch 10, Critic Loss: 0.8938001394271851, Generator Loss: 0.5300343036651611
Epoch 44, Batch 0, Critic Loss: 1.3990206718444824, Generator Loss: 0.5070757865905762
Epoch 44, Batch 1, Critic Loss: 1.6617531776428223, Generator Loss: 0.4768027663230896
Epoch 44, Batch 2, Critic Loss: 1.8089388608932495, Generator Loss: 0.45659148693084717
Epoch 44, Batch 3, Critic Loss: 1.0568946599960327, Generator Loss: 0.41989022493362427
Epoch 44, Batch 4, Critic Loss: 1.049607276916504, Generator Loss: 0.3932226300239563
Epoch 44, Batch 5, Critic Loss: 1.15496206

Epoch 52, Batch 2, Critic Loss: 0.8805087804794312, Generator Loss: 0.8532065153121948
Epoch 52, Batch 3, Critic Loss: 0.5825631618499756, Generator Loss: 0.8558206558227539
Epoch 52, Batch 4, Critic Loss: 0.5717182159423828, Generator Loss: 0.8767198324203491
Epoch 52, Batch 5, Critic Loss: 0.7372341156005859, Generator Loss: 0.8597018122673035
Epoch 52, Batch 6, Critic Loss: 0.5545257329940796, Generator Loss: 0.8505809307098389
Epoch 52, Batch 7, Critic Loss: 0.4790986180305481, Generator Loss: 0.8553580045700073
Epoch 52, Batch 8, Critic Loss: 0.8635756373405457, Generator Loss: 0.8635259866714478
Epoch 52, Batch 9, Critic Loss: 1.4973225593566895, Generator Loss: 0.8277029991149902
Epoch 52, Batch 10, Critic Loss: 0.6144474744796753, Generator Loss: 0.7763741612434387
Epoch 53, Batch 0, Critic Loss: 0.8811724781990051, Generator Loss: 0.7562226057052612
Epoch 53, Batch 1, Critic Loss: 0.9268350601196289, Generator Loss: 0.7381801009178162
Epoch 53, Batch 2, Critic Loss: 1.01640009

Epoch 60, Batch 10, Critic Loss: 2.8769569396972656, Generator Loss: -0.34195223450660706
Epoch 61, Batch 0, Critic Loss: 2.3054046630859375, Generator Loss: -0.35924068093299866
Epoch 61, Batch 1, Critic Loss: 2.197849988937378, Generator Loss: -0.3802647292613983
Epoch 61, Batch 2, Critic Loss: 2.1128997802734375, Generator Loss: -0.39547520875930786
Epoch 61, Batch 3, Critic Loss: 2.490114688873291, Generator Loss: -0.41863197088241577
Epoch 61, Batch 4, Critic Loss: 2.6161160469055176, Generator Loss: -0.44794076681137085
Epoch 61, Batch 5, Critic Loss: 2.6421866416931152, Generator Loss: -0.4727897644042969
Epoch 61, Batch 6, Critic Loss: 2.559636116027832, Generator Loss: -0.5056437253952026
Epoch 61, Batch 7, Critic Loss: 2.573269844055176, Generator Loss: -0.5532490611076355
Epoch 61, Batch 8, Critic Loss: 2.6088175773620605, Generator Loss: -0.58050537109375
Epoch 61, Batch 9, Critic Loss: 2.2786202430725098, Generator Loss: -0.6306371688842773
Epoch 61, Batch 10, Critic Loss:

Epoch 69, Batch 6, Critic Loss: 1.5359113216400146, Generator Loss: 0.21246865391731262
Epoch 69, Batch 7, Critic Loss: 1.2688536643981934, Generator Loss: 0.20442208647727966
Epoch 69, Batch 8, Critic Loss: 1.128735065460205, Generator Loss: 0.1874890774488449
Epoch 69, Batch 9, Critic Loss: -0.3440173268318176, Generator Loss: 0.1741023063659668
Epoch 69, Batch 10, Critic Loss: 1.5803782939910889, Generator Loss: 0.16161686182022095
Epoch 70, Batch 0, Critic Loss: 1.334352970123291, Generator Loss: 0.15230393409729004
Epoch 70, Batch 1, Critic Loss: 0.8499468564987183, Generator Loss: 0.14204564690589905
Epoch 70, Batch 2, Critic Loss: 0.8298153877258301, Generator Loss: 0.12282714247703552
Epoch 70, Batch 3, Critic Loss: 1.8263697624206543, Generator Loss: 0.11456787586212158
Epoch 70, Batch 4, Critic Loss: 1.7076189517974854, Generator Loss: 0.10226714611053467
Epoch 70, Batch 5, Critic Loss: 1.9235224723815918, Generator Loss: 0.09497010707855225
Epoch 70, Batch 6, Critic Loss: 1.

Epoch 78, Batch 3, Critic Loss: -0.17740797996520996, Generator Loss: 1.253969669342041
Epoch 78, Batch 4, Critic Loss: -0.07361188530921936, Generator Loss: 1.228179693222046
Epoch 78, Batch 5, Critic Loss: 0.028970181941986084, Generator Loss: 1.1864644289016724
Epoch 78, Batch 6, Critic Loss: -0.06649041175842285, Generator Loss: 1.186837911605835
Epoch 78, Batch 7, Critic Loss: -0.3886755108833313, Generator Loss: 1.1403547525405884
Epoch 78, Batch 8, Critic Loss: -0.12732535600662231, Generator Loss: 1.0899324417114258
Epoch 78, Batch 9, Critic Loss: -0.8038866519927979, Generator Loss: 1.0318083763122559
Epoch 78, Batch 10, Critic Loss: -0.00858953595161438, Generator Loss: 0.9747709631919861
Epoch 79, Batch 0, Critic Loss: 0.5471897721290588, Generator Loss: 0.9514498710632324
Epoch 79, Batch 1, Critic Loss: 1.0189471244812012, Generator Loss: 0.9012115001678467
Epoch 79, Batch 2, Critic Loss: 1.163800835609436, Generator Loss: 0.8460484147071838
Epoch 79, Batch 3, Critic Loss: 

Epoch 86, Batch 8, Critic Loss: 0.3438710868358612, Generator Loss: 0.26580023765563965
Epoch 86, Batch 9, Critic Loss: 0.07725845277309418, Generator Loss: 0.3025714159011841
Epoch 86, Batch 10, Critic Loss: 0.5109696388244629, Generator Loss: 0.26234811544418335
Epoch 87, Batch 0, Critic Loss: 0.5126030445098877, Generator Loss: 0.26497650146484375
Epoch 87, Batch 1, Critic Loss: 0.4277745485305786, Generator Loss: 0.2876876890659332
Epoch 87, Batch 2, Critic Loss: 0.3726796805858612, Generator Loss: 0.3190107047557831
Epoch 87, Batch 3, Critic Loss: 0.14826156198978424, Generator Loss: 0.3494429588317871
Epoch 87, Batch 4, Critic Loss: 0.25887879729270935, Generator Loss: 0.3766906261444092
Epoch 87, Batch 5, Critic Loss: 0.3628346621990204, Generator Loss: 0.4158352315425873
Epoch 87, Batch 6, Critic Loss: 0.22763186693191528, Generator Loss: 0.4613569974899292
Epoch 87, Batch 7, Critic Loss: 0.1196628212928772, Generator Loss: 0.49636322259902954
Epoch 87, Batch 8, Critic Loss: 0.

Epoch 95, Batch 3, Critic Loss: 0.6870899200439453, Generator Loss: -0.6623505353927612
Epoch 95, Batch 4, Critic Loss: 1.1363000869750977, Generator Loss: -0.6929222941398621
Epoch 95, Batch 5, Critic Loss: 1.1684210300445557, Generator Loss: -0.731458306312561
Epoch 95, Batch 6, Critic Loss: 1.148705005645752, Generator Loss: -0.7548256516456604
Epoch 95, Batch 7, Critic Loss: 1.4484593868255615, Generator Loss: -0.780614972114563
Epoch 95, Batch 8, Critic Loss: 1.0403931140899658, Generator Loss: -0.8188709616661072
Epoch 95, Batch 9, Critic Loss: 1.136801838874817, Generator Loss: -0.8453922867774963
Epoch 95, Batch 10, Critic Loss: 1.5417500734329224, Generator Loss: -0.8660503625869751
Epoch 96, Batch 0, Critic Loss: 1.2211185693740845, Generator Loss: -0.8943337798118591
Epoch 96, Batch 1, Critic Loss: 0.8498655557632446, Generator Loss: -0.9302892684936523
Epoch 96, Batch 2, Critic Loss: 0.6829916834831238, Generator Loss: -0.9579414129257202
Epoch 96, Batch 3, Critic Loss: 0.9

In [6]:
# Assuming the last column name in  original dataframe represents the label
all_column_names = data_frame.columns.tolist()  # This should have 101 names if the label is included in data_frame

# Generate synthetic data
noise = tf.random.normal([num_samples_to_generate, 100])
synthetic_data = generator(noise, training=False)
synthetic_data_df = pd.DataFrame(synthetic_data.numpy(), columns=all_column_names)
# Set the label for all generated data to 1
synthetic_data_df[all_column_names[-1]] = 1

In [7]:
#synthetic_data

In [8]:
# Combine original and synthetic data
enhanced_df = pd.concat([data_frame, synthetic_data_df], axis=0).reset_index(drop=True)
file_name='enhanced_GAN_final_new_par_50_NNFM_space_2.csv'
file_path = os.path.join(base_dir,'data','split',ltype, file_name)
output_path = file_path
enhanced_df.to_csv(output_path, index=False)