<a href="https://colab.research.google.com/github/wolf1729/AQI_Prediction/blob/main/LSTM.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install keras
!pip install tensorflow

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_squared_error

In [3]:
# Read the data from CSV
link = "/content/delhi_aqi_day_wise.csv"
data = pd.read_csv(link)

In [4]:
# Check for missing values
print(data.isnull().sum())

# Drop rows with missing values
data.dropna(inplace=True)

City            0
Date            0
PM2.5           2
PM10           77
NO              2
NO2             2
NOx             0
NH3             9
CO              0
SO2           110
O3             84
Benzene         0
Toluene         0
AQI            10
AQI_Bucket     10
dtype: int64


In [5]:
# Convert date strings to datetime objects
data['Date'] = pd.to_datetime(data['Date'], format="%d-%m-%Y")

# Create a new column 'Season' based on the 'Date' column
data['Season'] = data['Date'].dt.month.map({1: 'Winter', 2: 'Winter', 3: 'Spring', 4: 'Spring', 5: 'Spring', 6: 'Summer', 7: 'Summer', 8: 'Summer', 9: 'Autumn', 10: 'Autumn', 11: 'Autumn', 12: 'Winter'})

# Convert datetime to numerical representation
data['Date'] = (data['Date'] - pd.to_datetime("2015-01-01")).dt.total_seconds() / (24 * 60 * 60)


In [6]:
# Select features for the model
features = ['Date','PM2.5', 'PM10', 'NO', 'NO2', 'NOx', 'NH3', 'CO', 'SO2', 'O3']
x = data[features]

# Select prediction target
y = data['AQI']

# Split the data into training and validation data
train_x, val_x, train_y, val_y = train_test_split(x, y, random_state=0)


In [7]:
# Normalize the data
scaler = MinMaxScaler()
train_x_scaled = scaler.fit_transform(train_x)
val_x_scaled = scaler.transform(val_x)

In [8]:
# Reshape the data
train_x_reshaped = np.reshape(train_x_scaled, (train_x_scaled.shape[0], 1, train_x_scaled.shape[1]))
val_x_reshaped = np.reshape(val_x_scaled, (val_x_scaled.shape[0], 1, val_x_scaled.shape[1]))


In [9]:
# Build the LSTM model
model = Sequential()
model.add(LSTM(units=100, activation='relu', input_shape=(1, train_x.shape[1])))
model.add(Dense(units=1))

In [10]:
# Compile and train the model
model.compile(loss='mean_squared_error', optimizer='adam')
model.fit(train_x_reshaped, train_y, epochs=10, batch_size=32)

# Evaluate the model
predictions = model.predict(val_x_reshaped)

print(mean_absolute_error(val_y, predictions))
print(mean_squared_error(val_y, predictions))
print(data['AQI'].head(), predictions[:5])

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
194.34963320673398
46831.33996293371
0    472.0
1    454.0
2    143.0
3    319.0
4    325.0
Name: AQI, dtype: float64 [[ 56.259365]
 [ 62.204548]
 [ 58.794773]
 [100.16777 ]
 [129.21089 ]]


In [11]:
# Calculate accuracy
threshold = 300 # Define a threshold to classify the predictions
predicted_classes = np.where(predictions > threshold, 1, 0)
actual_classes = np.where(val_y > threshold, 1, 0)
accuracy = np.mean(predicted_classes == actual_classes)
print("Accuracy:", accuracy)

Accuracy: 0.6093418259023354
