# Non-ionic surfactant
```
RM12/0002, RM12/0003, RM12/0005, RM12/0008, RM12/0012
```

In [1]:
%%capture
!pip install fredapi # To install additional package for Datalab

In [2]:
import preprocessor as pre
import forecastor as fc
import pandas as pd
from sklearn.linear_model import Lasso
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import RandomizedSearchCV
from sklearn.metrics import mean_absolute_percentage_error
import numpy as np

# Import data
gas_df = pre.get_Fred_data('PNGASEUUSDM',2014,2024)
crude_oil_df = pre.get_Fred_data('POILBREUSDM',2014,2024)
palm_oil_df = pre.get_Fred_data('PPOILUSDM',2014,2024)
ammonia_df = pre.get_Fred_data('WPU0652013A',2014,2024)
elec_df = pre.clean_elec_csv('Data_flat_files/ELECTRICITY_03_2024.csv',2014,2024)

df = pre.clean_pred_price_evo_csv("Data_flat_files/Dataset_Future_Predicting_Price_Evolutions_202403.csv",2014,2023)

target = 'non-ionic surfactant'.lower()
RM_codes = ['RM12/0002', 'RM12/0003', 'RM12/0005', 'RM12/0008', 'RM12/0012']

external_drivers = {
    "PNGASEUUSDM": gas_df,
    'POILBREUSDM': crude_oil_df,
    'PPOILUSDM': palm_oil_df,
    "WPU0652013A": ammonia_df,
    "Electricity": elec_df
}

test_periods = [
    ('2019-01-01', '2019-07-01'),
    ('2019-07-01', '2020-01-01'),
    ('2020-01-01', '2020-07-01'),
    ('2020-07-01', '2021-01-01'),
    ('2021-01-01', '2021-07-01'),
    ('2021-07-01', '2022-01-01'),
    ('2022-01-01', '2022-07-01'),
    ('2022-07-01', '2023-01-01'),
    ('2023-01-01', '2023-07-01'),
    ('2023-07-01', '2024-01-01')
]

lags = [1,3,6]

alpha_bottom = 0.01

In [3]:
# Impute raw data of target variables 
imputed_df, missing = pre.impute_pred_price_evo_csv(df)

# Feature engineering
dummy_df = pre.get_dummies_and_average_price(imputed_df,target,*RM_codes)
feature_df = pre.generate_features(1,12,dummy_df,missing,*RM_codes, **external_drivers)

if type(feature_df.Time) != "datetime64":
    feature_df['Time'] = pd.to_datetime(feature_df['Time'])

assert feature_df['Time'].dtype == "datetime64[ns]" , "df[Time] is not dataetime64."

feature_df = feature_df[feature_df.Year >= 2016]

In [4]:
# Persistent Naive
for code in RM_codes:
    for lag in lags:
        for period in test_periods:
            mape_values = list()
            result = fc.persistence_Naive_MAPE(feature_df,code,lag,period)
            mape_values.append(result)
            
        average_mape = np.mean(mape_values) 
        print(f"{target} {code}, {lag}-month lag, Naive, average MAPE: {average_mape:.3f}")

non-ionic surfactant RM12/0002, 1-month lag, Naive, average MAPE: 15.768
non-ionic surfactant RM12/0002, 3-month lag, Naive, average MAPE: 15.981
non-ionic surfactant RM12/0002, 6-month lag, Naive, average MAPE: 11.142
non-ionic surfactant RM12/0003, 1-month lag, Naive, average MAPE: 3.192
non-ionic surfactant RM12/0003, 3-month lag, Naive, average MAPE: 5.685
non-ionic surfactant RM12/0003, 6-month lag, Naive, average MAPE: 6.249
non-ionic surfactant RM12/0005, 1-month lag, Naive, average MAPE: 3.845
non-ionic surfactant RM12/0005, 3-month lag, Naive, average MAPE: 6.782
non-ionic surfactant RM12/0005, 6-month lag, Naive, average MAPE: 5.701
non-ionic surfactant RM12/0008, 1-month lag, Naive, average MAPE: 7.938
non-ionic surfactant RM12/0008, 3-month lag, Naive, average MAPE: 8.441
non-ionic surfactant RM12/0008, 6-month lag, Naive, average MAPE: 9.026
non-ionic surfactant RM12/0012, 1-month lag, Naive, average MAPE: 14.379
non-ionic surfactant RM12/0012, 3-month lag, Naive, average 

In [5]:
# Lasso with autoregression features only
for code in RM_codes:
    for lag in lags:
        for period in test_periods:
            mape_values = list()
            result = fc.train_model_AR(feature_df,code,lag,period,alpha_bottom)
            mape_values.append(result)
            
        average_mape = np.mean(mape_values) 
        print(f"{target} {code}, {lag}-month lag, AR, average MAPE: {average_mape:.3f}")

non-ionic surfactant RM12/0002, 1-month lag, AR, average MAPE: 12.362
non-ionic surfactant RM12/0002, 3-month lag, AR, average MAPE: 11.901
non-ionic surfactant RM12/0002, 6-month lag, AR, average MAPE: 8.200
non-ionic surfactant RM12/0003, 1-month lag, AR, average MAPE: 3.927
non-ionic surfactant RM12/0003, 3-month lag, AR, average MAPE: 5.970
non-ionic surfactant RM12/0003, 6-month lag, AR, average MAPE: 8.069
non-ionic surfactant RM12/0005, 1-month lag, AR, average MAPE: 5.821
non-ionic surfactant RM12/0005, 3-month lag, AR, average MAPE: 7.483
non-ionic surfactant RM12/0005, 6-month lag, AR, average MAPE: 5.079
non-ionic surfactant RM12/0008, 1-month lag, AR, average MAPE: 7.223
non-ionic surfactant RM12/0008, 3-month lag, AR, average MAPE: 7.254
non-ionic surfactant RM12/0008, 6-month lag, AR, average MAPE: 8.818
non-ionic surfactant RM12/0012, 1-month lag, AR, average MAPE: 17.678
non-ionic surfactant RM12/0012, 3-month lag, AR, average MAPE: 12.445
non-ionic surfactant RM12/0012

In [6]:
# Lasso with autoregression features and external price drivers
for code in RM_codes:
    for lag in lags:
        mape_values = list()
        for period in test_periods:
            result = fc.train_model_all_features(feature_df,code,lag,period,alpha_bottom)
            mape_values.append(result)
            
        average_mape = np.mean(mape_values) 
        print(f"{target} {code}, {lag}-month lag, all features, average MAPE: {average_mape:.3f}")

non-ionic surfactant RM12/0002, 1-month lag, all features, average MAPE: 12.971
non-ionic surfactant RM12/0002, 3-month lag, all features, average MAPE: 15.945
non-ionic surfactant RM12/0002, 6-month lag, all features, average MAPE: 14.231
non-ionic surfactant RM12/0003, 1-month lag, all features, average MAPE: 10.613
non-ionic surfactant RM12/0003, 3-month lag, all features, average MAPE: 11.299
non-ionic surfactant RM12/0003, 6-month lag, all features, average MAPE: 12.556
non-ionic surfactant RM12/0005, 1-month lag, all features, average MAPE: 12.122
non-ionic surfactant RM12/0005, 3-month lag, all features, average MAPE: 13.039
non-ionic surfactant RM12/0005, 6-month lag, all features, average MAPE: 12.558
non-ionic surfactant RM12/0008, 1-month lag, all features, average MAPE: 12.170
non-ionic surfactant RM12/0008, 3-month lag, all features, average MAPE: 12.428
non-ionic surfactant RM12/0008, 6-month lag, all features, average MAPE: 14.665
non-ionic surfactant RM12/0012, 1-month 