from collections import namedtuple
import shutil
import os.path

import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split

from ludwig.experiment import full_experiment
from tests.integration_tests.utils import generate_data
from tests.integration_tests.utils import sequence_feature


from ludwig.api import LudwigModel

def extract_train_metrics(epochs, train_results):
    epoch = pd.DataFrame(1 + pd.Series(range(epochs)))
    df = pd.DataFrame(train_results['train'][list(train_results['train'].keys())[0]])
    df_columns = df.columns.to_list()
    df = pd.concat([epoch, df], axis=1)
    df.columns = ['epoch'] + df_columns
    return df

###
#  Main code
###

#
# setup
#
pd.set_option('display.width', 100)
pd.set_option('display.max_columns', 10)

shutil.rmtree('results', ignore_errors=True)
shutil.rmtree('data8', ignore_errors=True)
os.mkdir('data8')

input_features = [
    sequence_feature(
        min_len=5,
        max_len=10,
        encoder='rnn',
        cell_type='lstm',
        reduce_output=None
    )
]
output_features = [
    sequence_feature(
        min_len=5,
        max_len=10,
        decoder='generator',
        cell_type='lstm',
        attention='luong',  #bahdanau
        reduce_input=None
    )
]

rel_path = generate_data(input_features, output_features, './data8/train.csv',
                         500)

model_definition = {
    'input_features': input_features,
    'output_features': output_features,
    'combiner': {'type': 'concat'},
    'training': {
        'epochs': 5,
        'batch_size': 32,
        'early_stop': 0,
        'optimizer': {'type': 'adam'}
    }
}

#
# create a model maybe using the LudwigModel api if it viable
# train it for a few epochs
#

print('>>>> initial training <<<<<<')
model1 = LudwigModel(model_definition=model_definition)
train_results1 = model1.train(
    data_csv='./data8/train.csv',
    output_directory='results'
)

df1 = extract_train_metrics(model_definition['training']['epochs'], train_results1)
print(df1[['epoch', 'loss']]\
      .head(model_definition['training']['epochs']))

#
# collect weights
# save it
#
weights1 = model1.model.ecd.get_weights()
preds1 = model1.predict(
    data_csv='./data8/train.csv'
)
################################

#
# load it
# collect predictions and weights
#
print(">>>>>>>>>>>>>> Model Restoration <<<<<<<<<<<<<<<<<<<<")
model2 = LudwigModel.load(os.path.join('results', 'api_experiment_run', 'model'))

weights2_before_predict = model2.model.ecd.get_weights()
preds2 = model2.predict(
    data_csv='./data8/train.csv'
)
weights2_after_predict = model2.model.ecd.get_weights()

#
# compare the weights, they have to be the same
#
print("\n\nWeight Comparision:")
print("size of model1.weights", len(weights1))
print("size of model2.weights before predict()", len(weights2_before_predict))
print("size of model2.weights after predict()", len(weights2_after_predict))

print("compare model1.weights with model2.weights after predict()")
try:
    assert np.all([np.all(np.isclose(w1, w2)) for w1, w2 in zip(weights1, weights2_after_predict)])
    print('OK model weights matched after predictions matched')
except AssertionError:
    print("FAILED weight comparision AFTER predictions")