In [None]:
import tensorflow as tf
from tensorflow.keras import layers
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

In [None]:
# Define the generator model
def build_generator(latent_dim, output_shape):
    model = tf.keras.Sequential()
    model.add(layers.Dense(256, input_dim=latent_dim, activation='relu'))
    model.add(layers.Dense(512, activation='relu'))
    model.add(layers.Dense(output_shape, activation='sigmoid'))
    return model

In [None]:
# Define the discriminator model
def build_discriminator(input_shape):
    model = tf.keras.Sequential()
    model.add(layers.Dense(512, input_shape=(input_shape,), activation='relu'))
    model.add(layers.Dense(256, activation='relu'))
    model.add(layers.Dense(1, activation='sigmoid'))
    return model

In [None]:
# Combine the generator and discriminator into a GAN model
def build_gan(generator, discriminator):
    discriminator.trainable = False  # Freeze discriminator during GAN training
    model = tf.keras.Sequential()
    model.add(generator)
    model.add(discriminator)
    return model

In [None]:
# Function to train the GAN
def train_gan(generator, discriminator, gan, epochs, batch_size, latent_dim, real_data):
    for epoch in range(epochs):
        # Generate random noise for the generator
        noise = np.random.normal(0, 1, (batch_size, latent_dim))

        # Generate synthetic data using the generator
        generated_data = generator.predict(noise)

        # Combine real and generated data
        # combined_data = np.concatenate([real_data, generated_data])

        # Labels for real and generated data
        labels_real = np.ones((batch_size, 1))
        labels_fake = np.zeros((batch_size, 1))

        # Train discriminator on real and generated data separately
        d_loss_real = discriminator.train_on_batch(real_data, labels_real)
        d_loss_fake = discriminator.train_on_batch(generated_data, labels_fake)
        d_loss = 0.5 * np.add(d_loss_real, d_loss_fake)

        # Train the generator via the GAN model
        noise = np.random.normal(0, 1, (batch_size, latent_dim))
        labels_gan = np.ones((batch_size, 1))
        g_loss = gan.train_on_batch(noise, labels_gan)

        # Print progress
        if epoch % 10 == 0:
            print(f"Epoch {epoch}, D Loss: {d_loss[0]}, G Loss: {g_loss}")

In [None]:
# Example usage
latent_dim = 100
input_shape = 63857  # Assuming you are generating images of size 28x28
epochs = 10000
batch_size = 601

In [None]:
# Takes input data (gene expressions) joined with one ground truth column (cancerous tissue or non-cancerous) as input
real_data = pd.read_csv("G:\\My Drive\\HATCH24\\allData.csv").drop(columns=['Unnamed: 0'])

In [None]:
real_data

In [None]:
real_data.max(axis=1).max

In [None]:
real_data.min(axis=1)

In [None]:
real_data.shape

In [None]:
np.array(real_data)

In [None]:
from sklearn.preprocessing import MinMaxScaler
scaler_model = MinMaxScaler()
scaler_model.fit(real_data.astype(float))
scaled_data = pd.DataFrame(scaler_model.transform(real_data))

In [None]:
scaled_data

In [None]:
set(list(scaled_data[63856]))

In [None]:
# Build and compile the discriminator
discriminator = build_discriminator(input_shape)
discriminator.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])

# Build and compile the generator
generator = build_generator(latent_dim, input_shape)
generator.compile(loss='binary_crossentropy', optimizer='adam')

# Build and compile the GAN model
gan = build_gan(generator, discriminator)
gan.compile(loss='binary_crossentropy', optimizer='adam')

In [None]:
# Train the GAN
train_gan(generator, discriminator, gan, 100, batch_size, latent_dim, real_data)

In [None]:
# Generate synthetic data
noise_for_generation = np.random.normal(0, 1, (2000, latent_dim))
generated_data = generator.predict(noise_for_generation)

In [None]:
generated_data.min().min()

In [None]:
generated_data.max().max()


In [None]:
GeneratedDF = pd.DataFrame(generated_data).clip(lower=0).round().astype(dtype='uint')
GeneratedDF

In [None]:
GeneratedDF.max(axis=1)

In [None]:
unscaled_generated_data = scaler_model.inverse_transform(generated_data).clip(0)
unscaled_generated_data

In [None]:
pd.DataFrame(unscaled_generated_data).astype(int).max().max()


In [None]:
pd.DataFrame(unscaled_generated_data).astype(int).min().min()

In [None]:
pd.DataFrame(unscaled_generated_data).astype(int).abs().to_csv("G:\\My Drive\\HATCH24\\generatedDataGAN1000EpochsNormalizedv5.csv")

In [None]:
GeneratedDF.to_csv("G:\\My Drive\\HATCH24\\generatedDataGAN1000EpochsNormalv3.csv")