In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense
from tensorflow.keras.callbacks import EarlyStopping

In [None]:
import smtplib
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
from email.mime.base import MIMEBase
from email import encoders

def send_email(subject, body, to_email, from_email, from_password, file_path=None):
    # Set up the email parameters
    msg = MIMEMultipart()
    msg['From'] = from_email
    msg['To'] = to_email
    msg['Subject'] = subject

    # Attach the email body text
    msg.attach(MIMEText(body, 'plain'))

    # Attach a file, if specified
    if file_path:
        attachment = open(file_path, "rb")
        part = MIMEBase('application', 'octet-stream')
        part.set_payload(attachment.read())
        encoders.encode_base64(part)
        part.add_header('Content-Disposition', f'attachment; filename={file_path}')
        msg.attach(part)

    # Connect to the Gmail SMTP server and send the email
    try:
        server = smtplib.SMTP('smtp.gmail.com', 587)
        server.starttls()
        server.login(from_email, from_password)
        server.sendmail(from_email, to_email, msg.as_string())
        server.quit()
        print("Email sent successfully.")
    except Exception as e:
        print(f"Error: {e}")


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# Load the data
data = pd.read_csv('/content/drive/MyDrive/Data2.csv')

# Parse timestamp column and set it as index
data['timestamp'] = pd.to_datetime(data['time'])
data.set_index('timestamp', inplace=True)

# Extract temperature, humidity, and AQI values
data['temperature'] = data['value'].str.extract(r'Temperature = ([\d.]+)').astype(float)
data['humidity'] = data['value'].str.extract(r'Humidity = ([\d.]+)').astype(float)
data['aqi'] = data['value'].str.extract(r'Air Quality = (\d+)').astype(float)

# Drop original 'value' and 'time' columns
data.drop(columns=['value', 'time'], inplace=True)

In [None]:
data.head()

Unnamed: 0_level_0,temperature,humidity,aqi
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2024-10-29 09:56:14.647258456+00:00,30.8,71.0,195.0
2024-10-29 09:56:30.833291225+00:00,30.8,71.0,185.0
2024-10-29 09:57:01.988378378+00:00,30.8,71.0,195.0
2024-10-29 09:57:17.663395787+00:00,30.8,71.0,192.0
2024-10-29 09:57:33.628309926+00:00,30.8,71.0,194.0


In [None]:
data.info()

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 3548 entries, 2024-10-29 09:56:14.647258456+00:00 to 2024-10-31 11:23:50.898014347+00:00
Data columns (total 3 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   temperature  3544 non-null   float64
 1   humidity     3544 non-null   float64
 2   aqi          3544 non-null   float64
dtypes: float64(3)
memory usage: 110.9 KB


In [None]:
# A function to create lag features
def create_lagged_features(df, lags=5):
    for lag in range(1, lags + 1):
        df[f'temp_t-{lag}'] = df['temperature'].shift(lag)
        df[f'hum_t-{lag}'] = df['humidity'].shift(lag)
        df[f'aqi_t-{lag}'] = df['aqi'].shift(lag)
    return df

# Apply lag feature creation
data = create_lagged_features(data)

# Drop rows with NaN values due to lagging
data.dropna(inplace=True)


In [None]:
data.head(5)

Unnamed: 0_level_0,temperature,humidity,aqi,temp_t-1,hum_t-1,aqi_t-1,temp_t-2,hum_t-2,aqi_t-2,temp_t-3,hum_t-3,aqi_t-3,temp_t-4,hum_t-4,aqi_t-4,temp_t-5,hum_t-5,aqi_t-5
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1
2024-10-29 09:57:49.300079539+00:00,30.8,71.0,184.0,30.8,71.0,194.0,30.8,71.0,192.0,30.8,71.0,195.0,30.8,71.0,185.0,30.8,71.0,195.0
2024-10-29 09:58:05.273975800+00:00,30.8,71.0,190.0,30.8,71.0,184.0,30.8,71.0,194.0,30.8,71.0,192.0,30.8,71.0,195.0,30.8,71.0,185.0
2024-10-29 09:58:36.411568494+00:00,30.8,71.0,192.0,30.8,71.0,190.0,30.8,71.0,184.0,30.8,71.0,194.0,30.8,71.0,192.0,30.8,71.0,195.0
2024-10-29 09:58:52.068244169+00:00,30.8,71.0,190.0,30.8,71.0,192.0,30.8,71.0,190.0,30.8,71.0,184.0,30.8,71.0,194.0,30.8,71.0,192.0
2024-10-29 09:59:08.130321423+00:00,30.8,71.0,192.0,30.8,71.0,190.0,30.8,71.0,192.0,30.8,71.0,190.0,30.8,71.0,184.0,30.8,71.0,194.0


In [None]:
# Define target variables (next step for AQI, temperature, humidity)
forecast_horizon = 1  # Predict one step ahead
data['aqi_t+1'] = data['aqi'].shift(-forecast_horizon)
data['temp_t+1'] = data['temperature'].shift(-forecast_horizon)
data['hum_t+1'] = data['humidity'].shift(-forecast_horizon)
data.dropna(inplace=True)
data.head()

Unnamed: 0_level_0,temperature,humidity,aqi,temp_t-1,hum_t-1,aqi_t-1,temp_t-2,hum_t-2,aqi_t-2,temp_t-3,...,aqi_t-3,temp_t-4,hum_t-4,aqi_t-4,temp_t-5,hum_t-5,aqi_t-5,aqi_t+1,temp_t+1,hum_t+1
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2024-10-29 09:57:49.300079539+00:00,30.8,71.0,184.0,30.8,71.0,194.0,30.8,71.0,192.0,30.8,...,195.0,30.8,71.0,185.0,30.8,71.0,195.0,190.0,30.8,71.0
2024-10-29 09:58:05.273975800+00:00,30.8,71.0,190.0,30.8,71.0,184.0,30.8,71.0,194.0,30.8,...,192.0,30.8,71.0,195.0,30.8,71.0,185.0,192.0,30.8,71.0
2024-10-29 09:58:36.411568494+00:00,30.8,71.0,192.0,30.8,71.0,190.0,30.8,71.0,184.0,30.8,...,194.0,30.8,71.0,192.0,30.8,71.0,195.0,190.0,30.8,71.0
2024-10-29 09:58:52.068244169+00:00,30.8,71.0,190.0,30.8,71.0,192.0,30.8,71.0,190.0,30.8,...,184.0,30.8,71.0,194.0,30.8,71.0,192.0,192.0,30.8,71.0
2024-10-29 09:59:08.130321423+00:00,30.8,71.0,192.0,30.8,71.0,190.0,30.8,71.0,192.0,30.8,...,190.0,30.8,71.0,184.0,30.8,71.0,194.0,182.0,30.8,71.0


In [None]:
# Separate features (X) and target labels (y)
X = data.drop(columns=['aqi', 'temperature', 'humidity', 'aqi_t+1', 'temp_t+1', 'hum_t+1'])
y = data[['aqi_t+1', 'temp_t+1', 'hum_t+1']]

In [None]:
X.head()

Unnamed: 0_level_0,temp_t-1,hum_t-1,aqi_t-1,temp_t-2,hum_t-2,aqi_t-2,temp_t-3,hum_t-3,aqi_t-3,temp_t-4,hum_t-4,aqi_t-4,temp_t-5,hum_t-5,aqi_t-5
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2024-10-29 09:57:49.300079539+00:00,30.8,71.0,194.0,30.8,71.0,192.0,30.8,71.0,195.0,30.8,71.0,185.0,30.8,71.0,195.0
2024-10-29 09:58:05.273975800+00:00,30.8,71.0,184.0,30.8,71.0,194.0,30.8,71.0,192.0,30.8,71.0,195.0,30.8,71.0,185.0
2024-10-29 09:58:36.411568494+00:00,30.8,71.0,190.0,30.8,71.0,184.0,30.8,71.0,194.0,30.8,71.0,192.0,30.8,71.0,195.0
2024-10-29 09:58:52.068244169+00:00,30.8,71.0,192.0,30.8,71.0,190.0,30.8,71.0,184.0,30.8,71.0,194.0,30.8,71.0,192.0
2024-10-29 09:59:08.130321423+00:00,30.8,71.0,190.0,30.8,71.0,192.0,30.8,71.0,190.0,30.8,71.0,184.0,30.8,71.0,194.0


In [None]:
# Assuming you have defined and used the scaler for X and y
scaler_X = MinMaxScaler()
X_scaled = scaler_X.fit_transform(X)  # Fit and transform the feature set

scaler_y = MinMaxScaler()
y_scaled = scaler_y.fit_transform(y)  # Fit and transform the target set



In [None]:
# Get original ranges for features
original_X_min = scaler_X.data_min_
original_X_max = scaler_X.data_max_

# Get original ranges for target variables
original_y_min = scaler_y.data_min_
original_y_max = scaler_y.data_max_

# Create a DataFrame to display original min and max values
feature_names = X.columns  # Get feature names from DataFrame
target_names = y.columns  # Get target names from DataFrame

# Create a DataFrame for features
features_df = pd.DataFrame({
    'Feature': feature_names,
    'Original Min': original_X_min,
    'Original Max': original_X_max
})

# Create a DataFrame for target variables
targets_df = pd.DataFrame({
    'Target Variable': target_names,
    'Original Min': original_y_min,
    'Original Max': original_y_max
})

# Display the DataFrames
print("Original Feature Ranges:")
print(features_df)

print("\nOriginal Target Variable Ranges:")
print(targets_df)

Original Feature Ranges:
     Feature  Original Min  Original Max
0   temp_t-1          29.8          32.8
1    hum_t-1          68.0          91.0
2    aqi_t-1          50.0         436.0
3   temp_t-2          29.8          32.8
4    hum_t-2          68.0          91.0
5    aqi_t-2          50.0         436.0
6   temp_t-3          29.8          32.8
7    hum_t-3          68.0          91.0
8    aqi_t-3          50.0         436.0
9   temp_t-4          29.8          32.8
10   hum_t-4          68.0          91.0
11   aqi_t-4          50.0         436.0
12  temp_t-5          29.8          32.8
13   hum_t-5          68.0          91.0
14   aqi_t-5          50.0         436.0

Original Target Variable Ranges:
  Target Variable  Original Min  Original Max
0         aqi_t+1          50.0         436.0
1        temp_t+1          29.8          32.8
2         hum_t+1          68.0          91.0


In [None]:
X_scaled[:10]

array([[0.33333333, 0.13043478, 0.37305699, 0.33333333, 0.13043478,
        0.36787565, 0.33333333, 0.13043478, 0.37564767, 0.33333333,
        0.13043478, 0.34974093, 0.33333333, 0.13043478, 0.37564767],
       [0.33333333, 0.13043478, 0.34715026, 0.33333333, 0.13043478,
        0.37305699, 0.33333333, 0.13043478, 0.36787565, 0.33333333,
        0.13043478, 0.37564767, 0.33333333, 0.13043478, 0.34974093],
       [0.33333333, 0.13043478, 0.3626943 , 0.33333333, 0.13043478,
        0.34715026, 0.33333333, 0.13043478, 0.37305699, 0.33333333,
        0.13043478, 0.36787565, 0.33333333, 0.13043478, 0.37564767],
       [0.33333333, 0.13043478, 0.36787565, 0.33333333, 0.13043478,
        0.3626943 , 0.33333333, 0.13043478, 0.34715026, 0.33333333,
        0.13043478, 0.37305699, 0.33333333, 0.13043478, 0.36787565],
       [0.33333333, 0.13043478, 0.3626943 , 0.33333333, 0.13043478,
        0.36787565, 0.33333333, 0.13043478, 0.3626943 , 0.33333333,
        0.13043478, 0.34715026, 0.33333333, 

In [None]:
# Split the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y_scaled, test_size=0.2, shuffle=False)

# Reshape data to 3D for LSTM [samples, timesteps, features]
X_train = X_train.reshape((X_train.shape[0], 1, X_train.shape[1]))
X_test = X_test.reshape((X_test.shape[0], 1, X_test.shape[1]))

In [None]:
X_train[:3]

array([[[0.33333333, 0.13043478, 0.37305699, 0.33333333, 0.13043478,
         0.36787565, 0.33333333, 0.13043478, 0.37564767, 0.33333333,
         0.13043478, 0.34974093, 0.33333333, 0.13043478, 0.37564767]],

       [[0.33333333, 0.13043478, 0.34715026, 0.33333333, 0.13043478,
         0.37305699, 0.33333333, 0.13043478, 0.36787565, 0.33333333,
         0.13043478, 0.37564767, 0.33333333, 0.13043478, 0.34974093]],

       [[0.33333333, 0.13043478, 0.3626943 , 0.33333333, 0.13043478,
         0.34715026, 0.33333333, 0.13043478, 0.37305699, 0.33333333,
         0.13043478, 0.36787565, 0.33333333, 0.13043478, 0.37564767]]])

In [None]:
model = Sequential()
model.add(LSTM(50, activation='relu', input_shape=(X_train.shape[1], X_train.shape[2])))
model.add(Dense(3))  # Output layer now has 3 neurons for AQI, temperature, and humidity predictions
model.compile(optimizer='adam', loss='mse')

# Early stopping to avoid overfitting
early_stopping = EarlyStopping(monitor='val_loss', patience=10)

# Train the model
history = model.fit(X_train, y_train, epochs=100, batch_size=32, validation_data=(X_test, y_test),
                    callbacks=[early_stopping], verbose=1)

  super().__init__(**kwargs)


Epoch 1/100
[1m89/89[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 22ms/step - loss: 0.0526 - val_loss: 0.0081
Epoch 2/100
[1m89/89[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.0039 - val_loss: 0.0017
Epoch 3/100
[1m89/89[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.0016 - val_loss: 0.0022
Epoch 4/100
[1m89/89[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - loss: 0.0017 - val_loss: 0.0022
Epoch 5/100
[1m89/89[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.0011 - val_loss: 0.0026
Epoch 6/100
[1m89/89[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.0012 - val_loss: 0.0027
Epoch 7/100
[1m89/89[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.0014 - val_loss: 0.0024
Epoch 8/100
[1m89/89[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.0012 - val_loss: 0.0034
Epoch 9/100
[1m89/89[0m [32m━━━━━━━━━━━━━━━━

In [None]:
# Predict on test data
y_pred = model.predict(X_test)

[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 14ms/step


In [None]:
# Evaluate model's RMSE for each target variable
from sklearn.metrics import mean_squared_error

rmse_aqi = np.sqrt(mean_squared_error(y_test[:, 0], y_pred[:, 0]))
rmse_temp = np.sqrt(mean_squared_error(y_test[:, 1], y_pred[:, 1]))
rmse_hum = np.sqrt(mean_squared_error(y_test[:, 2], y_pred[:, 2]))

print(f"RMSE - AQI: {rmse_aqi}")
print(f"RMSE - Temperature: {rmse_temp}")
print(f"RMSE - Humidity: {rmse_hum}")

RMSE - AQI: 0.025147458913453654
RMSE - Temperature: 0.09612263380981581
RMSE - Humidity: 0.022165079121417817


In [None]:
# Forecast future AQI, temperature, and humidity for one step ahead (for demonstration)
future_input = X_scaled[-1].reshape((1, 1, X_scaled.shape[1]))  # Reshape last record in X_scaled
future_pred = model.predict(future_input)
print(f"Predicted AQI, Temperature, and Humidity for the next step: {future_pred}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 212ms/step
Predicted AQI, Temperature, and Humidity for the next step: [[0.10461558 0.38757354 0.06505439]]


In [None]:
# Define the predicted values
predicted_original = scaler_y.inverse_transform(future_pred)

# Extract the values for AQI, Temperature, and Humidity
predicted_aqi = predicted_original[0, 0]
predicted_temperature = predicted_original[0, 1]
predicted_humidity = predicted_original[0, 2]

# Print the values (optional, for confirmation)
print(f"Predicted AQI (original scale): {predicted_aqi}")
print(f"Predicted Temperature (original scale): {predicted_temperature}")
print(f"Predicted Humidity (original scale): {predicted_humidity}")

# Prepare the email content
subject = "Predicted Values for Air Quality Monitoring"
body = (
    f"Here are the predicted values:\n\n"
    f"Predicted AQI (original scale): {predicted_aqi:.2f}\n"
    f"Predicted Temperature (original scale): {predicted_temperature:.2f}°C\n"
    f"Predicted Humidity (original scale): {predicted_humidity:.2f}%\n"
)

# Set email details
from_email = "aqiservicesalert@gmail.com"
from_password = "mvju mgni xiyo borh"  # Or use `getpass.getpass()` for security
to_email = "yogendirandev@gmail.com"

# Call the email function with the new content
send_email(subject, body, to_email, from_email, from_password)


Predicted AQI (original scale): 90.3816146850586
Predicted Temperature (original scale): 30.962718963623047
Predicted Humidity (original scale): 69.49625396728516
Email sent successfully.
