In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
from ganerator.data_loader import Loader
from ganerator.synthesizer import GANDataSynthesizer
from sklearn.preprocessing import MinMaxScaler

import pandas as pd

CATEGORIAL_COLUMNS = ["NAME_CONTRACT_TYPE", "CODE_GENDER", "FLAG_OWN_CAR", "FLAG_OWN_REALTY"]

In [None]:
def encode_data(df):
    return pd.get_dummies(df, columns=CATEGORIAL_COLUMNS)

In [None]:
loader = Loader()

test_data = loader.load_data("test")

encoded_test_data = encode_data(test_data)
scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(encoded_test_data.values)

In [None]:
gan_synthesizer = GANDataSynthesizer(input_dim=scaled_data.shape[1], output_dim=scaled_data.shape[1], latent_dim=50, hidden_dim=128, num_epochs=5, batch_size=64, lr=0.0002)
gan_synthesizer.train(scaled_data)

In [None]:
new_data = gan_synthesizer.generate_samples(1000)
new_data = scaler.inverse_transform(new_data)
new_df = pd.DataFrame(new_data, columns=encoded_test_data.columns)

In [None]:
new_df.head()

In [None]:
def decode_new_df(new_df, original_df):
    # Map values in encoded columns to categorical variables
    new_df['NAME_CONTRACT_TYPE'] = new_df['NAME_CONTRACT_TYPE_Cash loans'].apply(lambda x: 'Cash loans' if x > 0 else 'Revolving loans')
    new_df['CODE_GENDER'] = new_df['CODE_GENDER_F'].apply(lambda x: 'F' if x > 0 else 'M')
    new_df['FLAG_OWN_CAR'] = new_df['FLAG_OWN_CAR_Y'].apply(lambda x: 'Y' if x > 0 else 'N')
    new_df['FLAG_OWN_REALTY'] = new_df['FLAG_OWN_REALTY_Y'].apply(lambda x: 'Y' if x > 0 else 'N')

    # Drop encoded columns
    new_df.drop(columns=['NAME_CONTRACT_TYPE_Cash loans', 'NAME_CONTRACT_TYPE_Revolving loans',
                         'CODE_GENDER_F', 'CODE_GENDER_M',
                         'FLAG_OWN_CAR_N', 'FLAG_OWN_CAR_Y',
                         'FLAG_OWN_REALTY_N', 'FLAG_OWN_REALTY_Y'], inplace=True)

    # Reorder columns to match the original dataframe's order
    new_df = new_df[original_df.columns]

    return new_df

In [None]:
decoded_new = decode_new_df(new_df, test_data)

In [None]:
decoded_new["CNT_CHILDREN"].unique()