## ARIMA Model

In [2]:
import pandas as pd
from statsmodels.tsa.arima.model import ARIMA
from sklearn.metrics import mean_squared_error, r2_score
import numpy as np
import warnings
import os
import pandas as pd
import glob
warnings.filterwarnings('ignore')

In [3]:
df_data = []
model_name = 'arima'
for site in (glob.glob("./cleaned_data/*/*")):
    try:
        data = pd.read_csv(site)

        state = site.split('/')[2]
        city = site.split('/')[3].split('_')[0]
        _id = '_'.join(site.split('/')[3].split('_')[1:])[:-4]

        # Convert the date columns to a datetime format
        data['From Date'] = pd.to_datetime(data['From Date'])
        data['To Date'] = pd.to_datetime(data['To Date'])

        # Set the date columns as the index
        data.set_index('From Date', inplace=True)

        # Drop unnecessary columns
        data.drop(['To Date'], axis=1, inplace=True)

        # Split the data into training and testing sets
        train_size = int(len(data) * 0.8)
        train_data, test_data = data[:train_size], data[train_size:]

        # Train the ARIMA model
        rf = ARIMA(train_data['PM2.5 (ug/m3)'], order=(2, 1, 2))
        rf = rf.fit()

        # Make predictions on the test set
        predictions = rf.forecast(len(test_data)).values


        # Calculate RMSE and R-squared scores
        rmse = np.sqrt(mean_squared_error(test_data['PM2.5 (ug/m3)'], predictions))
        r2 = r2_score(test_data['PM2.5 (ug/m3)'], predictions)

        n = len(test_data)
        p = test_data.shape[1]
        adj_r2 = 1 - ((1 - r2) * (n - 1)) / (n - p - 1)

        print(site)
        df_data.append([state,city,_id, model_name ,rmse, adj_r2])
    except:
        pass


./cleaned_data/Karnataka/Belgaum_site_5432.csv
./cleaned_data/Karnataka/Mangalore_site_5421.csv
./cleaned_data/Karnataka/Mysuru_site_5125.csv
./cleaned_data/Karnataka/Chikkamagaluru_site_5265.csv
./cleaned_data/Karnataka/Shivamogga_site_5266.csv
./cleaned_data/Karnataka/Vijayapura_site_5267.csv
./cleaned_data/Karnataka/Ramanagara_site_5255.csv
./cleaned_data/Karnataka/Bengaluru_site_5686.csv
./cleaned_data/Karnataka/Madikeri_site_5391.csv
./cleaned_data/Karnataka/Haveri_site_5420.csv
./cleaned_data/Karnataka/Bengaluru_site_1558.csv
./cleaned_data/Karnataka/Hassan_site_5448.csv
./cleaned_data/Karnataka/Koppal_site_5415.csv
./cleaned_data/Karnataka/Bagalkot_site_5264.csv
./cleaned_data/Karnataka/Gadag_site_5419.csv
./cleaned_data/Karnataka/Chamarajanagar_site_5124.csv
./cleaned_data/Karnataka/Bengaluru_site_5678.csv
./cleaned_data/Karnataka/Raichur_site_5414.csv
./cleaned_data/Karnataka/Bengaluru_site_1553.csv
./cleaned_data/Karnataka/Kalaburagi_site_5679.csv
./cleaned_data/Karnataka/Yad

./cleaned_data/Haryana/Kurukshetra _site_5043.csv
./cleaned_data/Haryana/Gurugram_site_5345.csv
./cleaned_data/Haryana/Panchkula_site_161.csv
./cleaned_data/Haryana/Charkhi Dadri_site_5339.csv
./cleaned_data/Haryana/Panipat_site_5048.csv
./cleaned_data/Haryana/Faridabad_site_5341.csv
./cleaned_data/Haryana/Fatehabad_site_5054.csv
./cleaned_data/Haryana/Mandikhera_site_5055.csv
./cleaned_data/Haryana/Sonipat_site_5041.csv
./cleaned_data/Haryana/Karnal_site_5049.csv
./cleaned_data/Haryana/Manesar_site_5039.csv
./cleaned_data/Haryana/Sirsa_site_5051.csv
./cleaned_data/Haryana/Hisar_site_5057.csv
./cleaned_data/Haryana/Narnaul_site_5053.csv
./cleaned_data/Gujarat/Ankleshwar_site_5065.csv
./cleaned_data/Gujarat/Gandhinagar_site_5457.csv
./cleaned_data/Gujarat/Surat_site_5664.csv
./cleaned_data/Gujarat/Gandhinagar_site_5458.csv
./cleaned_data/Gujarat/Ahmedabad_site_5456.csv
./cleaned_data/Gujarat/Ahmedabad_site_5453.csv
./cleaned_data/Gujarat/Vapi_site_5071.csv
./cleaned_data/Gujarat/Ahmedab

In [4]:
stats_df = pd.DataFrame(df_data, columns=['state', 'city', 'site_id', 'model_name' ,'rmse', 'adj_r2'])
stats_df

Unnamed: 0,state,city,site_id,model_name,rmse,adj_r2
0,Karnataka,Belgaum,site_5432,arima,14.525141,-1.427326
1,Karnataka,Mangalore,site_5421,arima,35.605598,-1.389384
2,Karnataka,Mysuru,site_5125,arima,8.065923,-0.039327
3,Karnataka,Chikkamagaluru,site_5265,arima,14.381921,-0.213845
4,Karnataka,Shivamogga,site_5266,arima,4.692986,-0.137441
...,...,...,...,...,...,...
320,Delhi,Delhi,site_1422,arima,91.627634,-0.006887
321,Delhi,Delhi,site_1420,arima,88.267384,-0.004897
322,Delhi,Delhi,site_1431,arima,94.192686,-0.027938
323,Delhi,Delhi,site_1424,arima,80.502208,-0.018445


In [5]:
stats_df.to_csv("./feature_engineering_arima.csv", header=True, index=False)