# Acid
```
'RM01/0001'
'RM01/0004'
'RM01/0006'
'RM01/0007'
```

In [None]:
import numpy as np
from src import (extract_and_clean as ec,
                 transform as ts,
                 feature_engineer as fe,
                 forecastor as fc,
                 format_handle)

# Preparations
# Import data
gas_df = ec.get_fred_data('PNGASEUUSDM', 2014, 2024)
wheat_df = ec.get_fred_data('PWHEAMTUSDM', 2014, 2024)
ammonia_df = ec.get_fred_data('WPU0652013A', 2014, 2024)
elec_df = ec.clean_elec_csv('/Users/barryhuang/Projects/Raw_Material_Price_Prediction/data/raw/ELECTRICITY_03_2024.csv',2014,2024)

df = ec.clean_pred_price_evo_csv("/Users/barryhuang/Projects/Raw_Material_Price_Prediction/data/raw/Dataset_Future_Predicting_Price_Evolutions_202403.csv",2014,2023)

target = 'acid'.lower()

RM_codes = ['RM01/0001','RM01/0004','RM01/0006','RM01/0007']

external_drivers = {
    "PNGASEUUSDM": gas_df,
    "PWHEAMTUSDM": wheat_df,
    "WPU0652013A": ammonia_df,
    "Electricity": elec_df
}

test_periods = [
    ('2019-01-01', '2019-07-01'),
    ('2019-07-01', '2020-01-01'),
    ('2020-01-01', '2020-07-01'),
    ('2020-07-01', '2021-01-01'),
    ('2021-01-01', '2021-07-01'),
    ('2021-07-01', '2022-01-01'),
    ('2022-01-01', '2022-07-01'),
    ('2022-07-01', '2023-01-01'),
    ('2023-01-01', '2023-07-01'),
    ('2023-07-01', '2024-01-01')
]

lags = [1,3,6]

alpha_bottom = 0.01


In [None]:
# Impute raw data of target variables 
imputed_df, missing = ts.impute_pred_price_evo_csv(df)

# Feature engineering
dummy_df = ts.get_dummies_and_average_price(imputed_df,target,*RM_codes)
feature_df = fe.generate_features(1,12,dummy_df,missing,*RM_codes, **external_drivers)
#
# if type(feature_df.Time) != "datetime64":
#     feature_df['Time'] = pd.to_datetime(feature_df['Time'])
#
# assert feature_df['Time'].dtype == "datetime64[ns]" , "df[Time] is not dataetime64."
#
# feature_df = feature_df[feature_df.Year >= 2016]

In [None]:
# Persistent Naive
for code in RM_codes:
    for lag in lags:
        mape_values = list()
        for period in test_periods:
            result = fc.persistence_Naive_MAPE(feature_df,code,lag,period)
            mape_values.append(result)
            
        assert len(mape_values) == len(test_periods), "len(mape_values)!=len(test_periods)"
        average_mape = np.mean(mape_values) 
        print(f"{target} {code}, {lag}-month lag, Naive, average MAPE: {average_mape:.3f}")

In [None]:
# Lasso with autoregression features only
for code in RM_codes:
    for lag in lags:
        mape_values = list()
        for period in test_periods:
            result = fc.train_model_AR(feature_df,code,lag,period,alpha_bottom)
            mape_values.append(result)
        
        assert len(mape_values) == len(test_periods), "len(mape_values)!=len(test_periods)"
        average_mape = np.mean(mape_values) 
        print(f"{target} {code}, {lag}-month lag, AR, average MAPE: {average_mape:.3f}")

In [None]:
# Lasso with autoregression features and external price drivers
for code in RM_codes:
    for lag in lags:
        mape_values = list()
        for period in test_periods:
            result = fc.train_model_all_features(feature_df,code,lag,period,alpha_bottom)
            mape_values.append(result)
        
        assert len(mape_values) == len(test_periods), "len(mape_values)!=len(test_periods)"
        average_mape = np.mean(mape_values) 
        print(f"{target} {code}, {lag}-month lag, all features, average MAPE: {average_mape:.3f}")