
# train the models using all data, which is the final model for application

In [1]:
from autogluon.tabular import TabularDataset, TabularPredictor
import numpy as np
import os
import pandas as pd
from nutrient_utils.my_utils import *

In [2]:

def train_autogluon_final(train_data):
    id, label1, label2 = 'Id', 'DIN', 'DIP'
    
    add_drop_labels = ['lon', 'lat']
    log_label = [label1, label2]
    
    # log some features of train data
    for label in log_label:
        train_data[label] = np.log(train_data[label])
    
    predictor = TabularPredictor(label=label1, path=f'./AutogluonModels/autogl-DIN-without-lon-lat-logoutput-final').fit(
        train_data.drop(columns=[id, label2] + add_drop_labels),
        hyperparameters='multimodal',
        num_stack_levels=1, num_bag_folds=5,
    )

    predictor2 = TabularPredictor(label=label2, path=f'./AutogluonModels/autogl-DIP-without-lon-lat-logoutput-final').fit(
        train_data.drop(columns=[id, label1] + add_drop_labels),
        hyperparameters='multimodal',
        num_stack_levels=1, num_bag_folds=5,
    )
    
    predictor = TabularPredictor.load('./AutogluonModels/autogl-DIN-without-lon-lat-logoutput-final')
    predictor2 = TabularPredictor.load('./AutogluonModels/autogl-DIP-without-lon-lat-logoutput-final')
    
    # Test

    
    preds_train = predictor.predict(train_data.drop(columns=[id, label1, label2] + add_drop_labels))
    preds_train2 = predictor2.predict(train_data.drop(columns=[id, label1, label2] + add_drop_labels))

    
    plt.rcParams['font.sans-serif'] = ['Arial']
    
    font_dic = {"size": 16,
                "family": "Arial"}
    # double plot
    _, axs = plt.subplots(1, 2, figsize=(11, 5))
    plot_scatter(axs[0], np.exp(train_data[label1]), np.exp(preds_train), 'training', 1.4)
    axs[0].set_title('DIN mg/L', fontdict=font_dic)
    
    # Plot DIP
    plot_scatter(axs[1], np.exp(train_data[label2]), np.exp(preds_train2), 'training', 0.14)
    axs[1].set_title('DIP mg/L', fontdict=font_dic)
    
    # plt.subplots_adjust(wspace=0.3)
    plt.tight_layout()
    plt.savefig(f'scatter_plots-autogl-final.jpg', dpi=300, bbox_inches='tight', transparent=True)
    plt.show()


In [None]:
data_dir = '/Nutrients/match_Acolite_L2/NN_input/with_qc/'


p1_train_data = TabularDataset(os.path.join(data_dir, f'train_autogl_mean_Baozhuyu.csv'))  
p2_test_data = TabularDataset(os.path.join(data_dir, f'test_autogl_mean_Baozhuyu.csv'))
train_data = pd.concat([p1_train_data, p2_test_data], axis=0)
print(len(train_data))

train_autogluon_final(train_data)