In [2]:
from autogluon.tabular import TabularDataset, TabularPredictor
import numpy as np
import os
import pandas as pd
from nutrient_utils.my_utils import *

In [28]:

def train_autogluon(train_data, test_data, individual_buoy):
    id, label1, label2 = 'Id', 'DIN', 'DIP'
    
    add_drop_labels = ['lon', 'lat']
    log_label = [label1, label2]
    
    # log some features of train data
    for label in log_label:
        train_data[label] = np.log(train_data[label])
    
    predictor = TabularPredictor(label=label1, path=f'./AutogluonModels/autogl-DIN-without-lon-lat-logoutput-{individual_buoy}').fit(
        train_data.drop(columns=[id, label2] + add_drop_labels),
        hyperparameters='multimodal',
        num_stack_levels=1, num_bag_folds=5,
    )

    predictor2 = TabularPredictor(label=label2, path=f'./AutogluonModels/autogl-DIP-without-lon-lat-logoutput-{individual_buoy}').fit(
        train_data.drop(columns=[id, label1] + add_drop_labels),
        hyperparameters='multimodal',
        num_stack_levels=1, num_bag_folds=5,
    )
    
    predictor = TabularPredictor.load(f'./AutogluonModels/autogl-DIN-without-lon-lat-logoutput-{individual_buoy}')
    predictor2 = TabularPredictor.load(f'./AutogluonModels/autogl-DIP-without-lon-lat-logoutput-{individual_buoy}')
    
    # Test

    # log some features of test data
    for label in log_label:
        test_data[label] = np.log(test_data[label])
    
    
    preds = predictor.predict(test_data.drop(columns=[id, label1, label2] + add_drop_labels))
    preds_train = predictor.predict(train_data.drop(columns=[id, label1, label2] + add_drop_labels))
    
    preds2 = predictor2.predict(test_data.drop(columns=[id, label1, label2] + add_drop_labels))
    preds_train2 = predictor2.predict(train_data.drop(columns=[id, label1, label2] + add_drop_labels))
    

    
    plt.rcParams['font.sans-serif'] = ['Arial']
    
    font_dic = {"size": 16,
                "family": "Arial"}
    # double plot
    # _, axs = plt.subplots(1, 2, figsize=(11, 5))
    # plot_scatter(axs[0], np.exp(train_data[label1]), np.exp(preds_train), 'training', 1.4)
    # plot_scatter(axs[0], np.exp(test_data[label1]), np.exp(preds), 'testing', 1.4)
    # axs[0].set_title('DIN mg/L', fontdict=font_dic)
    # 
    # # Plot DIP
    # plot_scatter(axs[1], np.exp(train_data[label2]), np.exp(preds_train2), 'training', 0.14)
    # plot_scatter(axs[1], np.exp(test_data[label2]), np.exp(preds2), 'testing', 0.14)
    # axs[1].set_title('DIP mg/L', fontdict=font_dic)
    # 
    # # plt.subplots_adjust(wspace=0.3)
    # plt.tight_layout()
    # plt.savefig(f'scatter_plots-autogl-{individual_buoy}.jpg', dpi=300, bbox_inches='tight', transparent=True)
    # plt.show()
    pd.DataFrame(np.exp(train_data[label1]))
    
    columns_name = ['true', 'pred']
    df_DIN_train = pd.concat([pd.DataFrame(np.exp(train_data[label1])), 
                              pd.DataFrame(np.exp(preds_train))], axis=1)
    df_DIN_train.columns = columns_name
    df_DIN_test = pd.concat([pd.DataFrame(np.exp(test_data[label1])),
                             pd.DataFrame(np.exp(preds))], axis=1)
    df_DIN_test.columns = columns_name
    df_DIP_train = pd.concat([pd.DataFrame(np.exp(train_data[label2])),
                              pd.DataFrame(np.exp(preds_train2))], axis=1)
    df_DIP_train.columns = columns_name
    df_DIP_test = pd.concat([pd.DataFrame(np.exp(test_data[label2])),
                             pd.DataFrame(np.exp(preds2))], axis=1)
    df_DIP_test.columns = columns_name
    
    df_DIN_train.to_csv(f'DIN_train-{individual_buoy}.csv', index=False)
    df_DIP_train.to_csv(f'DIP_train-{individual_buoy}.csv', index=False)
    df_DIN_test.to_csv(f'DIN_test-{individual_buoy}.csv', index=False)
    df_DIP_test.to_csv(f'DIP_test-{individual_buoy}.csv', index=False)


In [29]:
data_dir = '/Python/Nutrients/match_Acolite_L2/NN_input/with_qc'
individual_buoys =['Baozhuyu']

for individual_buoy in individual_buoys:
    train_data = TabularDataset(os.path.join(data_dir, f'train_autogl_mean_{individual_buoy}.csv'))
    test_data = TabularDataset(os.path.join(data_dir, f'test_autogl_mean_{individual_buoy}.csv'))
    train_autogluon(train_data, test_data, individual_buoy)