Import necessary libraries

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.tsa.stattools import adfuller
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
!pip install pmdarima
!pip install statsmodels
from statsmodels.tsa.arima.model import ARIMA
from pmdarima import auto_arima


Load CSV File

In [None]:
data = pd.read_csv('most-polluted-countries.csv')

: Preprocess data

 Convert 'pollution_2023' column to datetime format

In [None]:
data['pollution_2023'] = pd.to_datetime(data['pollution_2023'])



Set 'pollution_2023' column as index



In [None]:
data.set_index('pollution_2023', inplace=True)

# Task 1: Dataset Preparation

 Extract 'Pollution' column for time series analysis

In [None]:
pollution_data = data['mostPollutedCountries_particlePollution']

Plot the time series

In [None]:
plt.figure(figsize=(10, 6))
plt.plot(pollution_data.index, pollution_data, label='Particle Pollution Levels')
plt.title('Time Series - Particle Pollution Levels')
plt.xlabel('Year')
plt.ylabel('Particle Pollution')
plt.legend()
plt.show()

# Task 2: Stationarity Testing

Augmented Dickey-Fuller Test

In [None]:
result = adfuller(pollution_data)
print('ADF Statistic:', result[0])
print('p-value:', result[1])
print('Critical Values:')
for key, value in result[4].items():
    print(f'\t{key}: {value}')

# Task 3: ARIMA Model Identification

 ACF and PACF plots

In [None]:
fig, ax = plt.subplots(2, 1, figsize=(10, 8))
plot_acf(pollution_data, ax=ax[0], lags=50)
plot_pacf(pollution_data, ax=ax[1], lags=24)
plt.show()


# Task 4: ARIMA Model Fitting

 Auto-ARIMA for model identification

In [None]:
model = auto_arima(pollution_data, seasonal=True, m=12)
print('ARIMA Model Parameters:', model.order, model.seasonal_order)

ARIMA Model Parameters: (3, 1, 1) (0, 0, 0, 12)


fit ARIMA model

In [None]:
arima_model = ARIMA(pollution_data, order=model.order, seasonal_order=model.seasonal_order)
arima_result = arima_model.fit()

arima_result.plot_diagnostics(figsize=(10, 8))
plt.show()


# Task 5: Forecasting

 Forecasting next 2 years

In [None]:
forecast_steps = 24
forecast = arima_result.forecast(steps=forecast_steps)
forecast_index = pd.date_range(start=pollution_data.index[-1], periods=forecast_steps + 1, freq='M')[1:]
forecast_df = pd.DataFrame({'Date': forecast_index, 'Forecast': forecast})

Plot Forcast

In [None]:
plt.figure(figsize=(10, 6))
plt.plot(pollution_data.index, pollution_data, label='Historical Data')
plt.plot(forecast_df['Date'], forecast_df['Forecast'], label='Forecast', linestyle='--')
plt.title('Particle Pollution Levels Forecast')
plt.xlabel('Year')
plt.ylabel('Particle Pollution')
plt.legend()
plt.show()


# Task 6: Report and Insights

Stakeholders are able to plan ahead and anticipate future trends in particle pollution levels thanks to the analysis, which also helps with pollution mitigation.
Policymakers and environmental agencies can create strategies to address environmental challenges and improve air quality by having a better understanding of the projected pollution levels.