In [1]:
# Import necessary libraries

import pandas as pd
import numpy as np

# Reading Data

In [4]:
# Read gaseous and particle AQI data from CSV files
df_gas = pd.read_csv('../Results/gaseous_aqi.csv', index_col=0)
df_par = pd.read_csv('../Results/particle_aqi.csv', index_col=0)

# Selecting and subsetting on Columns of Interest

In [7]:
# Define columns of interest for both dataframes
columns_of_interest = ['arithmetic_mean','first_max_value', 'first_max_hour', 'aqi', 'date_local']

# Select only the columns of interest for both dataframes
df_gas = df_gas[columns_of_interest]
df_par = df_par[columns_of_interest]

# Data Grouping

In [8]:
# Group data by 'date_local' and retain the maximum values for each day
df_gas = df_gas.groupby('date_local').max()
df_par = df_par.groupby('date_local').max()

# Check the shapes of the grouped dataframes
df_gas.shape, df_par.shape

((6632, 4), (2940, 4))

# Merging Data

In [9]:
# Merge the gaseous and particle dataframes on the date
df_merged = df_gas.merge(df_par, on='date_local', how='outer', suffixes=('_gaseous', '_particle'))
df_merged = df_merged.sort_index()

# Creating AQI Column

In [10]:
# Create a new 'AQI' column by taking the maximum AQI value from gaseous and particle columns
df_merged['AQI'] = (df_merged[['aqi_gaseous', 'aqi_particle']]).max(axis=1, skipna=True)
df_merged = df_merged.reset_index()

# Reset the index and create a new 'date_year' column that could be used for grouping later
df_merged['date_year'] = df_merged.date_local.astype(np.datetime64).dt.year

# Save the Resulting DataFrame


In [11]:
# Save the resulting merged and processed dataframe to a new CSV file
df_merged.to_csv('../Results/yearly_aqi_data.csv')