# About this notebook

This notebook uses PyCaret for model implementation,

This notebook does the following...
- Normalize and Transform Data
- Get the top 3 models according to RMSE
- Tune and Blend top 3 models and compare
- Evaluate the models
- Finalize the best model and outputs `irrice_final_model.pkl`

# Model Implementation for Irrigated Rice

In [3]:
# Import necessary libraries
import pandas as pd
import matplotlib.pyplot as plt
from pycaret.regression import *

### Load data

In [4]:
# Climate
climate_df = pd.read_csv('./climateipynb_output.csv', names=['Date', 'Rainfall', 'Max Temperature', 'Min Temperature', 'Relative Humidity', 'Wind Speed'], header=0)

# Irrigated and Rainfed Rice
rice_df = pd.read_csv('./riceipynb_output1.csv', names=['Date', 'Irrigated Palay Production', 'Rainfed Palay Production'], header=0)

### Merge Data

In [5]:
# Merge climate and rice data
climate_rice_df = pd.merge(climate_df, rice_df,  
                   on='Date',  
                   how='outer')

In [None]:
climate_rice_df.head() # View data

### Create 'Year' and 'Quarter' columns by separating Date

In [7]:
# Add Year and Quarter columns
climate_rice_df['Year'] = pd.to_datetime(climate_rice_df['Date']).dt.year
climate_rice_df['Quarter'] = pd.to_datetime(climate_rice_df['Date']).dt.quarter

# Remove Date column
climate_rice_df.drop('Date', axis=1, inplace=True)

climate_rice_df.head() # View data

climate_rice_df.to_csv('combined_data.csv', index=False) # Save data

### Separate Irrigated Palay Production and Rainfed Palay Production

In [8]:
# Irrigated Palay Data
irrigated_palay_df = climate_rice_df[['Rainfall', 'Max Temperature', 'Min Temperature', 'Relative Humidity', 'Wind Speed', 'Irrigated Palay Production']]

# Rainfed Palay Data
# rainfed_palay_df = climate_rice_df[['Rainfall', 'Max Temperature', 'Min Temperature', 'Relative Humidity', 'Wind Speed', 'Rainfed Palay Production']]

In [None]:
irrigated_palay_df.head() # View data

In [None]:
exp_irrigated = setup(data=irrigated_palay_df, target='Irrigated Palay Production', session_id=123, fold=5, normalize=True, transformation=True)

In [11]:
metric = 'RMSE'

In [None]:
best_model = exp_irrigated.compare_models(sort=metric, n_select=3)

In [13]:
# Normalization and Transformation improves model performance

In [None]:
ir_finalize = setup(data = irrigated_palay_df ,target = 'Irrigated Palay Production', session_id=123, verbose=False, fold = 5, normalize=True, transformation=True)
ir_finalize = create_model('et')
final_ir = finalize_model(ir_finalize)
save_model(final_ir, 'irrice_final_model')


In [None]:
# Feature Importance / Variable Coefficients
for i in range(5):
    print(climate_rice_df.columns[i], final_ir.feature_importances_[i].round(4))


In [None]:
exp_irrigated.plot_model(best_model[0], plot="feature")

In [None]:
evaluate_model(final_ir)

In [18]:
# Run cells above this to save memory

## Stop here to save time

The code above this section saves the best model. <br>

The code below are just exploratory code for tuning and blending the models, which does not produce better models for most of the time. <br>
If you are running this ipynb multiple times for testing, its better to run only the code above, (To save our precious processing power and time), <br>
unless you are specifically trying something with tuning and blending


In [None]:
print(irrigated_palay_df["Irrigated Palay Production"].mean()) # Avreage Volume of Production
print(325272 / irrigated_palay_df["Irrigated Palay Production"].mean())# Average Margin of Error
234833 / 154971 # RMSE / MAE

In [None]:
print(irrigated_palay_df["Irrigated Palay Production"].mean()) # Avreage Volume of Production
print(325272 / irrigated_palay_df["Irrigated Palay Production"].mean())# Average Margin of Error
234833 / 154971 # RMSE / MAE

## Tune and Blend

In [None]:
tuned_best_model = [tune_model(i, fold = 3, optimize=metric) for i in best_model]

In [None]:
blend = blend_models(estimator_list = tuned_best_model, fold = 3, optimize=metric)

In [23]:
tuned_top_model = tuned_best_model[0]

In [None]:
evaluate_model(blend)
evaluate_model(tuned_top_model)