from collections import namedtuple import shutil import os.path import numpy as np import pandas as pd from sklearn.model_selection import train_test_split from ludwig.experiment import full_experiment from tests.integration_tests.utils import generate_data from tests.integration_tests.utils import sequence_feature from ludwig.api import LudwigModel def extract_train_metrics(epochs, train_results): epoch = pd.DataFrame(1 + pd.Series(range(epochs))) df = pd.DataFrame(train_results['train'][list(train_results['train'].keys())[0]]) df_columns = df.columns.to_list() df = pd.concat([epoch, df], axis=1) df.columns = ['epoch'] + df_columns return df ### # Main code ### # # setup # pd.set_option('display.width', 100) pd.set_option('display.max_columns', 10) shutil.rmtree('results', ignore_errors=True) shutil.rmtree('data8', ignore_errors=True) os.mkdir('data8') input_features = [ sequence_feature( min_len=5, max_len=10, encoder='rnn', cell_type='lstm', reduce_output=None ) ] output_features = [ sequence_feature( min_len=5, max_len=10, decoder='generator', cell_type='lstm', attention='luong', #bahdanau reduce_input=None ) ] rel_path = generate_data(input_features, output_features, './data8/train.csv', 500) model_definition = { 'input_features': input_features, 'output_features': output_features, 'combiner': {'type': 'concat'}, 'training': { 'epochs': 5, 'batch_size': 32, 'early_stop': 0, 'optimizer': {'type': 'adam'} } } # # create a model maybe using the LudwigModel api if it viable # train it for a few epochs # print('>>>> initial training <<<<<<') model1 = LudwigModel(model_definition=model_definition) train_results1 = model1.train( data_csv='./data8/train.csv', output_directory='results' ) df1 = extract_train_metrics(model_definition['training']['epochs'], train_results1) print(df1[['epoch', 'loss']]\ .head(model_definition['training']['epochs'])) # # collect weights # save it # weights1 = model1.model.ecd.get_weights() preds1 = model1.predict( data_csv='./data8/train.csv' ) ################################ # # load it # collect predictions and weights # print(">>>>>>>>>>>>>> Model Restoration <<<<<<<<<<<<<<<<<<<<") model2 = LudwigModel.load(os.path.join('results', 'api_experiment_run', 'model')) weights2_before_predict = model2.model.ecd.get_weights() preds2 = model2.predict( data_csv='./data8/train.csv' ) weights2_after_predict = model2.model.ecd.get_weights() # # compare the weights, they have to be the same # print("\n\nWeight Comparision:") print("size of model1.weights", len(weights1)) print("size of model2.weights before predict()", len(weights2_before_predict)) print("size of model2.weights after predict()", len(weights2_after_predict)) print("compare model1.weights with model2.weights after predict()") try: assert np.all([np.all(np.isclose(w1, w2)) for w1, w2 in zip(weights1, weights2_after_predict)]) print('OK model weights matched after predictions matched') except AssertionError: print("FAILED weight comparision AFTER predictions")