In [2]:
import pandas as pd
from sklearn.tree import DecisionTreeRegressor, DecisionTreeClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score, accuracy_score, classification_report

# Load the dataset
file_path = 'STATIONS.xlsx'
data = pd.read_excel(file_path)

# Check for missing values and drop rows with any missing values
data = data.dropna()

# Define features (input variables) and target variables
features = data[['PM2.5', 'PM10', 'NO2', 'NH3', 'SO2', 'CO', 'Ozone']]
target_aqi = data['AQI']
target_aqi_val = data['AQI VAL']

# Split the data into training and testing sets for AQI Value regression (80% train, 20% test)
X_train_val, X_test_val, y_train_val, y_test_val = train_test_split(features, target_aqi_val, test_size=0.2, random_state=42)

# Initialize the Decision Tree regressor for AQI Value
dt_model_val = DecisionTreeRegressor(random_state=42)

# Train the regressor on the training data
dt_model_val.fit(X_train_val, y_train_val)

# Make predictions on the test data
y_pred_val = dt_model_val.predict(X_test_val)

# Evaluate the regression model using Mean Squared Error (MSE) and R-squared
mse_val = mean_squared_error(y_test_val, y_pred_val)
r2_val = r2_score(y_test_val, y_pred_val)
print(f'Regression Model - Mean Squared Error: {mse_val}')
print(f'Regression Model - R-squared: {r2_val}')

# Split the data into training and testing sets for AQI classification (80% train, 20% test)
X_train_aqi, X_test_aqi, y_train_aqi, y_test_aqi = train_test_split(features, target_aqi, test_size=0.2, random_state=42)

# Initialize the Decision Tree classifier for AQI
dt_model_aqi = DecisionTreeClassifier(random_state=42)

# Train the classifier on the training data
dt_model_aqi.fit(X_train_aqi, y_train_aqi)

# Make predictions on the test data
y_pred_aqi = dt_model_aqi.predict(X_test_aqi)

# Evaluate the classification model using accuracy score and classification report
accuracy_aqi = accuracy_score(y_test_aqi, y_pred_aqi)
classification_report_aqi = classification_report(y_test_aqi, y_pred_aqi, zero_division=0)
print(f'Classification Model - Accuracy: {accuracy_aqi}')
print('Classification Model - Classification Report:')
print(classification_report_aqi)

def predict_aqi_and_aqi_val(pm25, pm10, no2, nh3, so2, co, ozone):
    # Prepare input data as a DataFrame
    input_data = pd.DataFrame({
        'PM2.5': [pm25],
        'PM10': [pm10],
        'NO2': [no2],
        'NH3': [nh3],
        'SO2': [so2],
        'CO': [co],
        'Ozone': [ozone]
    })

    # Predict AQI Value using the regression model
    predicted_aqi_val = dt_model_val.predict(input_data)[0]

    # Predict AQI Category using the classification model
    predicted_aqi = dt_model_aqi.predict(input_data)[0]

    return predicted_aqi, predicted_aqi_val




Regression Model - Mean Squared Error: 49.103004306783056
Regression Model - R-squared: 0.9741854345945753
Classification Model - Accuracy: 0.9981735159817352
Classification Model - Classification Report:
              precision    recall  f1-score   support

        Good       1.00      1.00      1.00       284
    Moderate       1.00      1.00      1.00       204
        Poor       1.00      1.00      1.00        14
Satisfactory       1.00      1.00      1.00       588
      Severe       0.00      0.00      0.00         1
   Very Poor       0.80      1.00      0.89         4

    accuracy                           1.00      1095
   macro avg       0.80      0.83      0.81      1095
weighted avg       1.00      1.00      1.00      1095



In [3]:
def main():
    print("Enter pollutant values to predict AQI and AQI Value:")
    pm25 = float(input("PM2.5 value: "))
    pm10 = float(input("PM10 value: "))
    no2 = float(input("NO2 value: "))
    nh3 = float(input("NH3 value: "))
    so2 = float(input("SO2 value: "))
    co = float(input("CO value: "))
    ozone = float(input("Ozone value: "))

    # Predict AQI and AQI Value based on user input
    predicted_aqi, predicted_aqi_val = predict_aqi_and_aqi_val(pm25, pm10, no2, nh3, so2, co, ozone)

    print(f'\nPredicted AQI Category: {predicted_aqi}')
    print(f'Predicted AQI Value: {predicted_aqi_val}')

if __name__ == "__main__":
    main()

Enter pollutant values to predict AQI and AQI Value:
PM2.5 value: 11.08
PM10 value: 26.47
NO2 value: 24.71
NH3 value: 27.47
SO2 value: 9.38
CO value: 0.6
Ozone value: 23.12

Predicted AQI Category: Good
Predicted AQI Value: 30.803298611111
