# Comparative Analysis of Air Quality Variability Between Major and Peripheral Cities: A Case Study of Thailand

## Import libraries

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import ttest_ind, mannwhitneyu, shapiro, levene
from itertools import product

## Import dataset

In [2]:
df = pd.read_csv("thai_air_data_cleaned.csv")

In [3]:
df.head()

Unnamed: 0,Timestamp,City,name,lat,lon,co,dew,h,no2,o3,p,pm10,pm25,r,so2,t,w,time
0,2025-02-19 18:11:33,Samut Prakan,"City Hall, Samut Prakan, Thailand (ศาลากลางจัง...",13.599149,100.597345,0.1,-5.9,8.4,9.3,13.4,1008.5,55.0,110.0,0.6,0.6,31.7,0.5,2025-02-20 00:00:00
1,2025-02-19 18:11:33,Nakhon Ratchasima,"Municipal Waste Water Pumping Station, Nakhon ...",14.979537,102.098335,,-4.6,9.2,11.0,23.7,985.9,,100.0,0.2,0.6,31.9,0.5,2025-02-20 00:00:00
2,2025-02-19 18:11:34,Surin,"Pyramid House, Surin, Thailand",14.793612,103.43056,,-4.6,9.2,,,993.9,,,,,32.0,0.5,
3,2025-02-19 18:11:34,Sakon Nakhon,"Meteorological stations, Sakon Nakhon, Thailan...",17.15662,104.133216,0.1,-4.9,9.9,,20.1,991.0,43.0,63.0,,1.1,30.2,1.5,2025-02-20 00:00:00
4,2025-02-19 18:11:35,Mueang Khon Kaen,"Khonkaen, Thailand (ส่วนอุทกวิทยา สำนักงานทรัพ...",16.445383,102.83525,,-5.2,9.0,,,988.0,,79.0,0.6,,31.5,0.5,2025-02-20 00:00:00


In [4]:
df.shape

(2146, 18)

In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2146 entries, 0 to 2145
Data columns (total 18 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Timestamp  2146 non-null   object 
 1   City       2146 non-null   object 
 2   name       2146 non-null   object 
 3   lat        2146 non-null   float64
 4   lon        2146 non-null   float64
 5   co         1375 non-null   float64
 6   dew        1811 non-null   float64
 7   h          2146 non-null   float64
 8   no2        1655 non-null   float64
 9   o3         1699 non-null   float64
 10  p          2146 non-null   float64
 11  pm10       1836 non-null   float64
 12  pm25       2084 non-null   float64
 13  r          1141 non-null   float64
 14  so2        1561 non-null   float64
 15  t          2146 non-null   float64
 16  w          2146 non-null   float64
 17  time       2084 non-null   object 
dtypes: float64(14), object(4)
memory usage: 301.9+ KB


In [6]:
df["time"] = pd.to_datetime(df["time"])

In [7]:
df.isna().sum()

Timestamp       0
City            0
name            0
lat             0
lon             0
co            771
dew           335
h               0
no2           491
o3            447
p               0
pm10          310
pm25           62
r            1005
so2           585
t               0
w               0
time           62
dtype: int64

In [8]:
(df.groupby(["City"]).apply(lambda x: x.isna().sum() / len(x) * 100, include_groups=False)
                     .drop(columns="City", errors="ignore").round(2))

Unnamed: 0_level_0,Timestamp,name,lat,lon,co,dew,h,no2,o3,p,pm10,pm25,r,so2,t,w,time
City,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
Ayutthaya,0.0,0.0,0.0,0.0,0.0,14.75,0.0,0.0,0.0,0.0,0.0,0.0,54.1,0.0,0.0,0.0,0.0
Bangkok,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,21.31,0.0,0.0,0.0,0.0
Chiang Mai,0.0,0.0,0.0,0.0,100.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,63.93,0.0,0.0,0.0,0.0
Chonburi,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,31.67,0.0,0.0,0.0,0.0
Kamphaeng Phet,0.0,0.0,0.0,0.0,100.0,100.0,0.0,100.0,100.0,0.0,0.0,0.0,100.0,100.0,0.0,0.0,0.0
Kanchanaburi,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,34.92,100.0,0.0,0.0,0.0
Krabi,0.0,0.0,0.0,0.0,100.0,0.0,0.0,100.0,100.0,0.0,100.0,0.0,100.0,100.0,0.0,0.0,0.0
Lampang,0.0,0.0,0.0,0.0,0.0,18.03,0.0,0.0,0.0,0.0,0.0,0.0,47.54,0.0,0.0,0.0,0.0
Lamphun,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,75.81,0.0,0.0,0.0,0.0
Mueang Chiang Rai,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0,100.0,0.0,0.0,0.0


In [9]:
df.groupby(["City"]).size()

City
Ayutthaya              61
Bangkok                61
Chiang Mai             61
Chonburi               60
Kamphaeng Phet          1
Kanchanaburi           63
Krabi                   1
Lampang                61
Lamphun                62
Mueang Chiang Rai      61
Mueang Khon Kaen       60
Nakhon Pathom          60
Nakhon Ratchasima      63
Nakhon Sawan           63
Nan                    62
Narathiwat             60
Nong Khai              63
Nonthaburi             62
Pathum Thani           60
Pattani                 1
Phayao                 62
Phitsanulok            62
Phuket                 60
Prachuap Khiri Khan    61
Ratchaburi             60
Rayong                 60
Sakon Nakhon           62
Samut Prakan           63
Samut Sakhon           61
Saraburi               60
Satun                  60
Suphan Buri            63
Surin                  62
Trang                  60
Trat                   61
Ubon Ratchathani       61
Uthai Thani             1
Uttaradit              61
Yala   

In [10]:
# remove cities with only one record
df = df[~df["City"].isin(["Kamphaeng Phet", "Krabi", "Pattani", "Uthai Thani", "Surin"])]

df.groupby(["City"]).size()

City
Ayutthaya              61
Bangkok                61
Chiang Mai             61
Chonburi               60
Kanchanaburi           63
Lampang                61
Lamphun                62
Mueang Chiang Rai      61
Mueang Khon Kaen       60
Nakhon Pathom          60
Nakhon Ratchasima      63
Nakhon Sawan           63
Nan                    62
Narathiwat             60
Nong Khai              63
Nonthaburi             62
Pathum Thani           60
Phayao                 62
Phitsanulok            62
Phuket                 60
Prachuap Khiri Khan    61
Ratchaburi             60
Rayong                 60
Sakon Nakhon           62
Samut Prakan           63
Samut Sakhon           61
Saraburi               60
Satun                  60
Suphan Buri            63
Trang                  60
Trat                   61
Ubon Ratchathani       61
Uttaradit              61
Yala                   60
dtype: int64

## Data preprocessing

### Define city type (major/peripheral) and region (North, Northeast, East, Central, West, South)

In [11]:
# City type: major and peripheral
city_type = {
    'Samut Prakan': 'Peripheral',
    'Nakhon Ratchasima': 'Major',
    'Surin': 'Peripheral',
    'Sakon Nakhon': 'Peripheral',
    'Mueang Khon Kaen': 'Major',
    'Nong Khai': 'Peripheral',
    'Kanchanaburi': 'Major',
    'Suphan Buri': 'Peripheral',
    'Uthai Thani': 'Peripheral',
    'Nakhon Sawan': 'Peripheral',
    'Phitsanulok': 'Peripheral',
    'Kamphaeng Phet': 'Peripheral',
    'Lamphun': 'Peripheral',
    'Lampang': 'Peripheral',
    'Phayao': 'Peripheral',
    'Bangkok': 'Major',
    'Trat': 'Peripheral',
    'Nonthaburi': 'Peripheral',
    'Nan': 'Peripheral',
    'Ubon Ratchathani': 'Peripheral',
    'Samut Sakhon': 'Peripheral',
    'Chiang Mai': 'Major',
    'Mueang Chiang Rai': 'Peripheral',
    'Ayutthaya': 'Peripheral',
    'Uttaradit': 'Peripheral',
    'Rayong': 'Major',
    'Pathum Thani': 'Peripheral',
    'Chonburi': 'Major',
    'Nakhon Pathom': 'Peripheral',
    'Ratchaburi': 'Peripheral',
    'Phuket': 'Major',
    'Yala': 'Peripheral',
    'Krabi': 'Peripheral',
    'Trang': 'Peripheral',
    'Satun': 'Peripheral',
    'Pattani': 'Peripheral',
    'Narathiwat': 'Peripheral',
    'Prachuap Khiri Khan': 'Peripheral',
    'Saraburi': 'Peripheral',
}

df['City_type'] = df['City'].map(city_type)

In [12]:
# Region: north, northeast, central, south, west, east
city_region = {
    'Samut Prakan': 'Central',
    'Nakhon Ratchasima': 'Northeast',
    'Surin': 'Northeast',
    'Sakon Nakhon': 'Northeast',
    'Mueang Khon Kaen': 'Northeast',
    'Nong Khai': 'Northeast',
    'Kanchanaburi': 'West',
    'Suphan Buri': 'Central',
    'Uthai Thani': 'Central',
    'Nakhon Sawan': 'Central',
    'Phitsanulok': 'Central',
    'Kamphaeng Phet': 'Central',
    'Lamphun': 'North',
    'Lampang': 'North',
    'Phayao': 'North',
    'Bangkok': 'Central',
    'Trat': 'East',
    'Nonthaburi': 'Central',
    'Nan': 'North',
    'Ubon Ratchathani': 'Northeast',
    'Samut Sakhon': 'Central',
    'Chiang Mai': 'North',
    'Mueang Chiang Rai': 'North',
    'Ayutthaya': 'Central',
    'Uttaradit': 'North',
    'Rayong': 'East',
    'Pathum Thani': 'Central',
    'Chonburi': 'East',
    'Nakhon Pathom': 'Central',
    'Ratchaburi': 'West',
    'Phuket': 'South',
    'Yala': 'South',
    'Krabi': 'South',
    'Trang': 'South',
    'Satun': 'South',
    'Pattani': 'South',
    'Narathiwat': 'South',
    'Prachuap Khiri Khan': 'West',
    'Saraburi': 'Central',
}

df['City_region'] = df['City'].map(city_region)

In [13]:
df.groupby(["City_region", "City_type"])["City"].nunique()

City_region  City_type 
Central      Major          1
             Peripheral    10
East         Major          2
             Peripheral     1
North        Major          1
             Peripheral     6
Northeast    Major          2
             Peripheral     3
South        Major          1
             Peripheral     4
West         Major          1
             Peripheral     2
Name: City, dtype: int64

### Impute missing value

In [14]:
df_no_missing = df.sort_values(by=["City", "time"]).copy()
df_no_missing = df_no_missing.groupby(["City"]).apply(lambda g: g.ffill().bfill(), include_groups=False)
df_no_missing = df_no_missing.reset_index(drop=False)
df_no_missing = df_no_missing.drop(columns=["level_1"])
df_no_missing

Unnamed: 0,City,Timestamp,name,lat,lon,co,dew,h,no2,o3,p,pm10,pm25,r,so2,t,w,time,City_type,City_region
0,Ayutthaya,2025-02-19 18:11:46,"Ayutthaya Witthayalai School, Ayutthaya, Thail...",14.352220,100.565325,0.1,-5.9,9.5,3.5,5.2,1008.4,64.0,74.0,0.5,0.6,29.6,2.5,2025-02-20 00:00:00,Peripheral,Central
1,Ayutthaya,2025-02-20 12:00:25,"Ayutthaya Witthayalai School, Ayutthaya, Thail...",14.352220,100.565325,0.1,23.8,69.5,3.5,5.2,1013.9,64.0,135.0,0.5,0.6,30.0,2.5,2025-02-20 18:00:00,Peripheral,Central
2,Ayutthaya,2025-02-21 01:36:00,"Ayutthaya Witthayalai School, Ayutthaya, Thail...",14.352220,100.565325,0.1,25.3,84.8,3.5,5.2,1011.4,64.0,138.0,0.5,0.6,25.8,1.0,2025-02-21 08:00:00,Peripheral,Central
3,Ayutthaya,2025-02-22 01:53:38,"Ayutthaya Witthayalai School, Ayutthaya, Thail...",14.352220,100.565325,0.1,25.3,76.0,3.5,5.2,1012.9,64.0,95.0,0.5,0.6,27.2,0.5,2025-02-22 08:00:00,Peripheral,Central
4,Ayutthaya,2025-02-22 02:11:00,"Ayutthaya Witthayalai School, Ayutthaya, Thail...",14.352220,100.565325,0.1,25.3,76.0,3.5,5.2,1012.4,64.0,88.0,0.5,0.6,26.6,0.2,2025-02-22 09:00:00,Peripheral,Central
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2075,Yala,2025-04-15 17:33:18,"White Elephant Park, Yala, Thailand (สนามโรงพิ...",6.546205,101.283137,,25.1,63.0,,,1003.1,33.0,23.0,0.3,,33.1,2.0,2025-04-15 23:00:00,Peripheral,South
2076,Yala,2025-04-17 13:00:19,"White Elephant Park, Yala, Thailand (สนามโรงพิ...",6.546205,101.283137,,-6.8,7.4,,,1006.5,33.0,27.0,99.2,,32.7,2.0,2025-04-17 19:00:00,Peripheral,South
2077,Yala,2025-04-18 13:00:24,"White Elephant Park, Yala, Thailand (สนามโรงพิ...",6.546205,101.283137,,-6.5,7.5,,,1006.5,33.0,21.0,99.2,,33.0,2.0,2025-04-18 19:00:00,Peripheral,South
2078,Yala,2025-04-20 02:11:35,"White Elephant Park, Yala, Thailand (สนามโรงพิ...",6.546205,101.283137,,24.0,85.7,,,1008.4,33.0,19.0,99.2,,26.6,1.0,2025-04-20 05:00:00,Peripheral,South


### Resample data to day level

In [15]:
["co", "dew", "h", "no2", "o3", "p", "pm10", "pm25", "r", "so2", "t", "w"]

['co', 'dew', 'h', 'no2', 'o3', 'p', 'pm10', 'pm25', 'r', 'so2', 't', 'w']

In [16]:
df_no_missing.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2080 entries, 0 to 2079
Data columns (total 20 columns):
 #   Column       Non-Null Count  Dtype         
---  ------       --------------  -----         
 0   City         2080 non-null   object        
 1   Timestamp    2080 non-null   object        
 2   name         2080 non-null   object        
 3   lat          2080 non-null   float64       
 4   lon          2080 non-null   float64       
 5   co           1411 non-null   float64       
 6   dew          1837 non-null   float64       
 7   h            2080 non-null   float64       
 8   no2          1654 non-null   float64       
 9   o3           1779 non-null   float64       
 10  p            2080 non-null   float64       
 11  pm10         2017 non-null   float64       
 12  pm25         2080 non-null   float64       
 13  r            1896 non-null   float64       
 14  so2          1653 non-null   float64       
 15  t            2080 non-null   float64       
 16  w     

In [17]:
aqi_pivot = pd.pivot_table(data=df_no_missing, index="time", columns=["City"], values=["pm25", "pm10", "co", "dew", "h", "no2", "o3", "so2", "p", "r", "t", "w"], aggfunc="mean", fill_value=np.nan)
aqi_pivot

Unnamed: 0_level_0,co,co,co,co,co,co,co,co,co,co,...,w,w,w,w,w,w,w,w,w,w
City,Ayutthaya,Bangkok,Kanchanaburi,Lampang,Mueang Chiang Rai,Nakhon Sawan,Nan,Nong Khai,Nonthaburi,Pathum Thani,...,Samut Prakan,Samut Sakhon,Saraburi,Satun,Suphan Buri,Trang,Trat,Ubon Ratchathani,Uttaradit,Yala
time,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2025-02-19 12:00:00,,,,,,,,,,,...,,,,,,,,,1.0,
2025-02-20 00:00:00,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,...,0.5,2.0,0.5,2.0,2.5,5.1,2.5,1.5,,3.0
2025-02-20 01:00:00,,,,,,,,,,,...,,,,,,,,,,
2025-02-20 18:00:00,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,...,1.5,1.0,0.2,3.0,2.0,1.5,1.0,2.0,3.0,4.6
2025-02-21 04:00:00,,,,,,,,,,,...,,,,0.5,,,,,,1.5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2025-04-20 08:00:00,,0.1,0.1,,0.1,,0.1,0.1,0.1,0.1,...,1.5,1.5,,,2.0,0.5,,1.0,1.0,
2025-04-20 09:00:00,0.1,,,,,,,,,,...,,,0.2,,,,1.0,,,
2025-04-21 14:00:00,,,,,,,,,,,...,,,,,,,,,,
2025-04-21 19:00:00,,,,,,,,,,,...,,,,1.5,,,1.0,,3.6,1.0


In [18]:
resampled_aqi = aqi_pivot.resample("D").mean()
resampled_aqi

Unnamed: 0_level_0,co,co,co,co,co,co,co,co,co,co,...,w,w,w,w,w,w,w,w,w,w
City,Ayutthaya,Bangkok,Kanchanaburi,Lampang,Mueang Chiang Rai,Nakhon Sawan,Nan,Nong Khai,Nonthaburi,Pathum Thani,...,Samut Prakan,Samut Sakhon,Saraburi,Satun,Suphan Buri,Trang,Trat,Ubon Ratchathani,Uttaradit,Yala
time,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2025-02-19,,,,,,,,,,,...,,,,,,,,,1.0,
2025-02-20,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,...,1.0,1.5,0.35,2.50,2.250000,3.30,1.75,1.75,3.0,3.80
2025-02-21,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,...,1.0,1.5,0.50,0.50,4.100000,6.10,0.50,3.60,1.0,1.50
2025-02-22,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,...,2.6,2.4,0.60,2.50,1.233333,1.00,1.75,3.05,1.0,3.05
2025-02-23,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,...,2.8,1.5,0.60,4.05,2.300000,2.25,1.00,4.60,2.0,4.30
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2025-04-17,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,...,0.5,1.0,1.00,1.00,1.000000,2.00,2.50,2.50,3.6,2.00
2025-04-18,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,...,2.0,0.5,1.00,1.50,2.000000,1.00,1.00,4.10,2.0,2.00
2025-04-19,,,,,,,,,,,...,,,,,,,,,,
2025-04-20,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,...,1.5,1.5,0.20,0.50,2.000000,0.50,1.00,1.00,1.0,1.00


In [19]:
resampled_aqi.columns = resampled_aqi.columns.swaplevel(0, 1)
resampled_aqi = resampled_aqi.sort_index(axis=1, level=0)
resampled_aqi

City,Ayutthaya,Ayutthaya,Ayutthaya,Ayutthaya,Ayutthaya,Ayutthaya,Ayutthaya,Ayutthaya,Ayutthaya,Ayutthaya,...,Uttaradit,Uttaradit,Yala,Yala,Yala,Yala,Yala,Yala,Yala,Yala
Unnamed: 0_level_1,co,dew,h,no2,o3,p,pm10,pm25,r,so2,...,t,w,dew,h,p,pm10,pm25,r,t,w
time,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2025-02-19,,,,,,,,,,,...,23.000000,1.0,,,,,,,,
2025-02-20,0.1,8.950000,39.500000,3.5,5.2,1011.150000,64.0,104.500000,0.5,0.6,...,26.300000,3.0,-6.65,8.35,1006.55,33.0,44.0,0.2,30.85,3.80
2025-02-21,0.1,25.300000,84.800000,3.5,5.2,1011.400000,64.0,138.000000,0.5,0.6,...,25.700000,1.0,-8.20,9.20,1009.90,33.0,40.0,0.2,27.10,1.50
2025-02-22,0.1,15.066667,53.633333,3.5,5.2,1013.266667,64.0,82.666667,0.5,0.6,...,26.266667,1.0,8.30,46.95,1011.20,33.0,46.0,99.0,28.90,3.05
2025-02-23,0.1,9.600000,38.700000,3.5,5.2,1013.150000,64.0,92.500000,0.5,0.6,...,31.150000,2.0,-7.65,8.30,1009.45,33.0,44.0,99.0,29.70,4.30
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2025-04-17,0.1,-6.500000,7.200000,3.5,5.2,1010.000000,64.0,76.000000,99.8,0.6,...,33.400000,3.6,-6.80,7.40,1006.50,33.0,27.0,99.2,32.70,2.00
2025-04-18,0.1,-5.600000,7.700000,3.5,5.2,1009.700000,64.0,36.000000,99.6,0.6,...,32.400000,2.0,-6.50,7.50,1006.50,33.0,21.0,99.2,33.00,2.00
2025-04-19,,,,,,,,,,,...,,,,,,,,,,
2025-04-20,0.1,24.700000,80.400000,3.5,5.2,1006.400000,64.0,52.000000,99.6,0.6,...,29.500000,1.0,24.00,85.70,1008.40,33.0,19.0,99.2,26.60,1.00


In [20]:
resampled_aqi = resampled_aqi.ffill().bfill()
resampled_aqi

City,Ayutthaya,Ayutthaya,Ayutthaya,Ayutthaya,Ayutthaya,Ayutthaya,Ayutthaya,Ayutthaya,Ayutthaya,Ayutthaya,...,Uttaradit,Uttaradit,Yala,Yala,Yala,Yala,Yala,Yala,Yala,Yala
Unnamed: 0_level_1,co,dew,h,no2,o3,p,pm10,pm25,r,so2,...,t,w,dew,h,p,pm10,pm25,r,t,w
time,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2025-02-19,0.1,8.950000,39.500000,3.5,5.2,1011.150000,64.0,104.500000,0.5,0.6,...,23.000000,1.0,-6.65,8.35,1006.55,33.0,44.0,0.2,30.85,3.80
2025-02-20,0.1,8.950000,39.500000,3.5,5.2,1011.150000,64.0,104.500000,0.5,0.6,...,26.300000,3.0,-6.65,8.35,1006.55,33.0,44.0,0.2,30.85,3.80
2025-02-21,0.1,25.300000,84.800000,3.5,5.2,1011.400000,64.0,138.000000,0.5,0.6,...,25.700000,1.0,-8.20,9.20,1009.90,33.0,40.0,0.2,27.10,1.50
2025-02-22,0.1,15.066667,53.633333,3.5,5.2,1013.266667,64.0,82.666667,0.5,0.6,...,26.266667,1.0,8.30,46.95,1011.20,33.0,46.0,99.0,28.90,3.05
2025-02-23,0.1,9.600000,38.700000,3.5,5.2,1013.150000,64.0,92.500000,0.5,0.6,...,31.150000,2.0,-7.65,8.30,1009.45,33.0,44.0,99.0,29.70,4.30
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2025-04-17,0.1,-6.500000,7.200000,3.5,5.2,1010.000000,64.0,76.000000,99.8,0.6,...,33.400000,3.6,-6.80,7.40,1006.50,33.0,27.0,99.2,32.70,2.00
2025-04-18,0.1,-5.600000,7.700000,3.5,5.2,1009.700000,64.0,36.000000,99.6,0.6,...,32.400000,2.0,-6.50,7.50,1006.50,33.0,21.0,99.2,33.00,2.00
2025-04-19,0.1,-5.600000,7.700000,3.5,5.2,1009.700000,64.0,36.000000,99.6,0.6,...,32.400000,2.0,-6.50,7.50,1006.50,33.0,21.0,99.2,33.00,2.00
2025-04-20,0.1,24.700000,80.400000,3.5,5.2,1006.400000,64.0,52.000000,99.6,0.6,...,29.500000,1.0,24.00,85.70,1008.40,33.0,19.0,99.2,26.60,1.00


In [21]:
summary_statistics = resampled_aqi.describe().transpose().reset_index()

for metric in summary_statistics["level_1"].unique():
    print(f"Summary statistics of {metric}")
    display(summary_statistics[summary_statistics["level_1"] == metric])

Summary statistics of co


Unnamed: 0,City,level_1,count,mean,std,min,25%,50%,75%,max
0,Ayutthaya,co,62.0,0.1,4.223569e-17,0.1,0.1,0.1,0.1,0.1
12,Bangkok,co,62.0,0.1,4.1973230000000005e-17,0.1,0.1,0.1,0.1,0.1
44,Kanchanaburi,co,62.0,0.1,4.223569e-17,0.1,0.1,0.1,0.1,0.1
54,Lampang,co,62.0,0.1,4.1973230000000005e-17,0.1,0.1,0.1,0.1,0.1
77,Mueang Chiang Rai,co,62.0,0.1,4.1973230000000005e-17,0.1,0.1,0.1,0.1,0.1
114,Nakhon Sawan,co,62.0,0.1,4.223569e-17,0.1,0.1,0.1,0.1,0.1
126,Nan,co,62.0,0.1,4.223569e-17,0.1,0.1,0.1,0.1,0.1
146,Nong Khai,co,62.0,0.1,4.223569e-17,0.1,0.1,0.1,0.1,0.1
158,Nonthaburi,co,62.0,0.1,4.223569e-17,0.1,0.1,0.1,0.1,0.1
169,Pathum Thani,co,62.0,0.1,4.1973230000000005e-17,0.1,0.1,0.1,0.1,0.1


Summary statistics of dew


Unnamed: 0,City,level_1,count,mean,std,min,25%,50%,75%,max
1,Ayutthaya,dew,62.0,2.340591,12.33635,-8.4,-6.5,-5.8,14.4,25.7
33,Chonburi,dew,62.0,4.866935,12.407916,-8.1,-6.3,-4.25,14.0,26.6
45,Kanchanaburi,dew,62.0,2.974731,11.564497,-8.2,-5.3375,-4.3,12.5875,24.9
55,Lampang,dew,62.0,2.079032,10.940823,-8.5,-5.1,-4.3,14.35,23.5
66,Lamphun,dew,62.0,12.743548,9.633882,-6.5,9.075,16.7,19.0,22.5
86,Mueang Khon Kaen,dew,62.0,6.560484,12.172537,-8.2,-5.375,6.75,18.0,25.1
94,Nakhon Pathom,dew,62.0,5.533871,13.20013,-8.8,-6.125,-5.05,20.15,26.5
104,Nakhon Ratchasima,dew,62.0,3.324194,12.578236,-8.1,-5.175,-4.4,15.2,26.5
115,Nakhon Sawan,dew,62.0,4.390323,12.676782,-8.9,-6.0,-4.5,19.025,26.8
127,Nan,dew,62.0,3.031452,11.25351,-8.8,-5.4,-4.8,14.4,23.9


Summary statistics of h


Unnamed: 0,City,level_1,count,mean,std,min,25%,50%,75%,max
2,Ayutthaya,h,62.0,25.584409,23.579843,7.2,7.7,8.3,43.2,86.25
13,Bangkok,h,62.0,32.675806,28.701336,7.3,8.2,10.55,55.475,95.4
23,Chiang Mai,h,62.0,30.146774,20.110294,7.8,9.5,30.125,45.95,73.7
34,Chonburi,h,62.0,28.570161,23.970828,6.9,8.125,11.25,45.075,86.65
46,Kanchanaburi,h,62.0,22.698118,21.104105,7.1,7.825,9.05,35.6875,74.2
56,Lampang,h,62.0,29.804839,26.124268,7.2,8.425,11.35,53.025,96.5
67,Lamphun,h,62.0,39.764516,20.436325,7.9,30.4,41.0,50.925,79.4
78,Mueang Chiang Rai,h,62.0,60.450806,14.521032,33.3,50.075,58.0,67.1875,94.6
87,Mueang Khon Kaen,h,62.0,31.606452,25.077611,6.8,7.8,33.0,49.25,93.0
95,Nakhon Pathom,h,62.0,29.421774,25.16082,7.2,7.6,10.35,47.8875,88.8


Summary statistics of no2


Unnamed: 0,City,level_1,count,mean,std,min,25%,50%,75%,max
3,Ayutthaya,no2,62.0,3.5,0.0,3.5,3.5,3.5,3.5,3.5
14,Bangkok,no2,62.0,1.312903,0.9129279,0.6,0.6,1.2,1.65,5.2
24,Chiang Mai,no2,62.0,7.5,0.0,7.5,7.5,7.5,7.5,7.5
35,Chonburi,no2,62.0,9.3,5.372574e-15,9.3,9.3,9.3,9.3,9.3
57,Lampang,no2,62.0,1.8,2.238572e-16,1.8,1.8,1.8,1.8,1.8
68,Lamphun,no2,62.0,3.493548,3.432624,0.6,0.6,0.6,7.5,7.5
79,Mueang Chiang Rai,no2,62.0,0.6,2.238572e-16,0.6,0.6,0.6,0.6,0.6
106,Nakhon Ratchasima,no2,62.0,6.605645,2.979095,2.9,4.1,5.8,8.1,14.4
117,Nakhon Sawan,no2,62.0,5.376344,2.331735,2.4,3.5,4.7,6.95,11.6
129,Nan,no2,62.0,1.000806,0.3803032,0.6,0.6,1.2,1.2,2.65


Summary statistics of o3


Unnamed: 0,City,level_1,count,mean,std,min,25%,50%,75%,max
4,Ayutthaya,o3,62.0,5.2,1.790858e-15,5.2,5.2,5.2,5.2,5.2
15,Bangkok,o3,62.0,18.101613,6.250278,2.8,14.4,18.8,21.7,35.5
25,Chiang Mai,o3,62.0,14.6,1.790858e-15,14.6,14.6,14.6,14.6,14.6
36,Chonburi,o3,62.0,6.0,0.0,6.0,6.0,6.0,6.0,6.0
47,Kanchanaburi,o3,62.0,6.7,8.95429e-16,6.7,6.7,6.7,6.7,6.7
58,Lampang,o3,62.0,11.5,0.0,11.5,11.5,11.5,11.5,11.5
69,Lamphun,o3,62.0,4.912903,2.29177,1.6,2.8,4.8,6.2,9.9
80,Mueang Chiang Rai,o3,62.0,12.7,5.372574e-15,12.7,12.7,12.7,12.7,12.7
96,Nakhon Pathom,o3,62.0,6.0,0.0,6.0,6.0,6.0,6.0,6.0
107,Nakhon Ratchasima,o3,62.0,26.546774,8.275734,6.0,23.1,27.2,31.7,43.4


Summary statistics of p


Unnamed: 0,City,level_1,count,mean,std,min,25%,50%,75%,max
5,Ayutthaya,p,62.0,1010.849462,2.851783,1005.0,1008.825,1010.75,1012.6,1017.1
16,Bangkok,p,62.0,1011.083871,3.064174,1004.7,1009.225,1010.85,1013.0375,1018.2
26,Chiang Mai,p,62.0,984.174194,17.350679,970.4,973.0125,975.175,991.825,1020.0
37,Chonburi,p,62.0,1009.433065,3.105336,1005.2,1007.375,1008.95,1010.9375,1018.5
48,Kanchanaburi,p,62.0,1007.847312,3.217534,1003.5,1005.0,1007.0,1009.8125,1015.7
59,Lampang,p,62.0,987.762903,10.709288,973.7,980.9,983.4,990.45,1015.6
70,Lamphun,p,62.0,997.872581,17.232767,970.6,977.9,1008.5,1011.875,1020.0
81,Mueang Chiang Rai,p,62.0,977.896774,3.513579,971.8,975.5,977.1,979.35,985.8
88,Mueang Khon Kaen,p,62.0,995.33871,10.903806,983.4,987.6,990.5,998.8,1022.0
97,Nakhon Pathom,p,62.0,1010.350806,3.185746,1004.0,1008.0625,1009.95,1012.15,1018.0


Summary statistics of pm10


Unnamed: 0,City,level_1,count,mean,std,min,25%,50%,75%,max
6,Ayutthaya,pm10,62.0,64.0,0.0,64.0,64.0,64.0,64.0,64.0
17,Bangkok,pm10,62.0,47.387097,13.795467,23.0,40.0,46.5,53.875,86.0
27,Chiang Mai,pm10,62.0,82.0,0.0,82.0,82.0,82.0,82.0,82.0
38,Chonburi,pm10,62.0,33.0,0.0,33.0,33.0,33.0,33.0,33.0
49,Kanchanaburi,pm10,62.0,47.0,0.0,47.0,47.0,47.0,47.0,47.0
60,Lampang,pm10,62.0,75.0,0.0,75.0,75.0,75.0,75.0,75.0
71,Lamphun,pm10,62.0,82.0,0.0,82.0,82.0,82.0,82.0,82.0
82,Mueang Chiang Rai,pm10,62.0,50.612903,22.215675,8.5,37.125,49.5,69.125,92.0
89,Mueang Khon Kaen,pm10,62.0,40.0,0.0,40.0,40.0,40.0,40.0,40.0
98,Nakhon Pathom,pm10,62.0,42.427419,11.257364,14.5,30.75,51.0,51.0,60.0


Summary statistics of pm25


Unnamed: 0,City,level_1,count,mean,std,min,25%,50%,75%,max
7,Ayutthaya,pm25,62.0,84.841398,35.103158,12.0,63.0,72.5,107.875,173.0
18,Bangkok,pm25,62.0,98.524194,29.405925,58.0,77.75,89.0,119.375,177.0
28,Chiang Mai,pm25,62.0,88.66129,34.088214,26.0,67.5,79.0,107.0,163.0
39,Chonburi,pm25,62.0,73.782258,33.240261,20.0,54.25,73.0,94.75,156.0
50,Kanchanaburi,pm25,62.0,74.427419,34.126554,6.0,57.0,70.0,88.875,169.0
61,Lampang,pm25,62.0,114.016129,42.120141,48.0,79.625,107.5,150.75,195.0
72,Lamphun,pm25,62.0,111.080645,44.547009,27.0,69.375,112.5,152.75,178.0
83,Mueang Chiang Rai,pm25,62.0,110.483871,48.817825,18.0,73.75,103.5,159.625,180.0
90,Mueang Khon Kaen,pm25,62.0,109.096774,37.465595,5.0,82.75,102.0,147.0,172.0
99,Nakhon Pathom,pm25,62.0,81.41129,34.639422,26.0,56.625,73.0,106.0,154.0


Summary statistics of r


Unnamed: 0,City,level_1,count,mean,std,min,25%,50%,75%,max
8,Ayutthaya,r,62.0,13.141935,33.491997,0.1,0.2,0.3,0.5,99.8
19,Bangkok,r,62.0,15.391935,33.679015,0.1,0.425,1.1,2.8,99.4
29,Chiang Mai,r,62.0,67.86371,44.934107,0.2,2.8,99.0,99.3,99.3
40,Chonburi,r,62.0,25.375,42.278829,0.1,1.45,1.7,3.575,99.7
51,Kanchanaburi,r,62.0,40.293548,48.724774,0.1,0.3,0.3,99.0,99.3
62,Lampang,r,62.0,48.472581,48.61601,0.1,0.9,27.0,99.0,99.6
73,Lamphun,r,62.0,48.177419,49.888099,0.1,0.1,1.9,99.3,99.6
91,Mueang Khon Kaen,r,62.0,25.908065,43.571924,0.1,0.325,0.4,74.625,99.6
100,Nakhon Pathom,r,62.0,39.530645,48.261567,0.2,0.6,0.6,99.2,99.9
110,Nakhon Ratchasima,r,62.0,48.993548,49.017444,0.2,1.725,4.4,99.0,99.7


Summary statistics of so2


Unnamed: 0,City,level_1,count,mean,std,min,25%,50%,75%,max
9,Ayutthaya,so2,62.0,0.6,2.238572e-16,0.6,0.6,0.6,0.6,0.6
20,Bangkok,so2,62.0,0.6,2.238572e-16,0.6,0.6,0.6,0.6,0.6
30,Chiang Mai,so2,62.0,0.6,2.238572e-16,0.6,0.6,0.6,0.6,0.6
41,Chonburi,so2,62.0,1.1,4.477145e-16,1.1,1.1,1.1,1.1,1.1
63,Lampang,so2,62.0,0.6,2.238572e-16,0.6,0.6,0.6,0.6,0.6
74,Lamphun,so2,62.0,0.6,2.238572e-16,0.6,0.6,0.6,0.6,0.6
101,Nakhon Pathom,so2,62.0,9.0,0.0,9.0,9.0,9.0,9.0,9.0
111,Nakhon Ratchasima,so2,62.0,0.6,2.238572e-16,0.6,0.6,0.6,0.6,0.6
123,Nakhon Sawan,so2,62.0,0.6,2.238572e-16,0.6,0.6,0.6,0.6,0.6
135,Nan,so2,62.0,0.6,2.238572e-16,0.6,0.6,0.6,0.6,0.6


Summary statistics of t


Unnamed: 0,City,level_1,count,mean,std,min,25%,50%,75%,max
10,Ayutthaya,t,62.0,31.756452,2.770771,24.5,30.2125,32.25,33.4,37.6
21,Bangkok,t,62.0,33.033065,2.290643,26.0,31.85,33.725,34.5,36.7
31,Chiang Mai,t,62.0,31.715323,2.889673,24.8,30.125,32.0,33.8,36.4
42,Chonburi,t,62.0,31.612097,2.530562,23.7,29.65,32.7,33.475,36.2
52,Kanchanaburi,t,62.0,32.68172,3.301648,25.2,30.15,33.55,35.0,38.3
64,Lampang,t,62.0,30.531452,3.868528,21.9,27.725,31.025,33.625,38.2
75,Lamphun,t,62.0,30.966129,3.50283,22.0,28.725,31.3,33.8,38.2
84,Mueang Chiang Rai,t,62.0,30.083065,4.215457,17.7,27.625,30.8,33.5,35.7
92,Mueang Khon Kaen,t,62.0,30.740323,4.853416,19.0,26.1,30.85,35.0,37.0
102,Nakhon Pathom,t,62.0,31.962903,2.963562,23.3,30.8,32.725,33.8,36.0


Summary statistics of w


Unnamed: 0,City,level_1,count,mean,std,min,25%,50%,75%,max
11,Ayutthaya,w,62.0,2.409677,1.27565,0.2,1.5,2.5,3.0,5.1
22,Bangkok,w,62.0,2.145161,1.178625,0.5,1.275,2.0,3.0,4.6
32,Chiang Mai,w,62.0,1.447581,0.751361,0.5,1.0,1.5,2.0,3.6
43,Chonburi,w,62.0,2.666935,1.336835,0.5,2.0,2.2,3.0,6.6
53,Kanchanaburi,w,62.0,1.973387,0.894506,0.5,1.5,2.0,2.5,5.1
65,Lampang,w,62.0,1.283871,0.896385,0.5,0.5,1.0,1.5,5.0
76,Lamphun,w,62.0,1.897581,0.978432,0.5,1.5,1.875,2.1125,5.35
85,Mueang Chiang Rai,w,62.0,1.616129,0.884927,0.1,1.0,1.625,2.0,3.6
93,Mueang Khon Kaen,w,62.0,1.778226,1.042248,0.5,1.0,1.5,2.375,5.6
103,Nakhon Pathom,w,62.0,2.256452,1.016724,0.5,1.5,2.375,3.0,4.6


## Hypothesis 1

In [22]:
# Get metadata
city_meta = df_no_missing[['City', 'City_region', 'City_type']].drop_duplicates().set_index("City")

results = []
feature = "pm25"

for region in city_meta["City_region"].unique():
    regional_cities = city_meta[city_meta["City_region"] == region]
    
    majors = regional_cities[regional_cities["City_type"] == "Major"].index
    peripherals = regional_cities[regional_cities["City_type"] == "Peripheral"].index

    for major_city, peripheral_city in product(majors, peripherals):
        try:
            series1 = resampled_aqi[(major_city, feature)].dropna()
            series2 = resampled_aqi[(peripheral_city, feature)].dropna()
        except KeyError:
            continue  # Skip if either city's data is missing

        # Align by common dates
        common_index = series1.index.intersection(series2.index)
        series1 = series1.loc[common_index]
        series2 = series2.loc[common_index]

        if len(series1) < 3 or len(series2) < 3:
            continue

        # Shapiro normality test
        shapiro1 = shapiro(series1)
        shapiro2 = shapiro(series2)

        if shapiro1.pvalue > 0.05 and shapiro2.pvalue > 0.05:
            levene_test = levene(series1, series2)
            ttest = ttest_ind(series1, series2, equal_var=(levene_test.pvalue > 0.05))

            results.append({
                "region": region,
                "major_city": major_city,
                "peripheral_city": peripheral_city,
                "test": "t-test",
                "p_value": ttest.pvalue,
                "shapiro1_p": shapiro1.pvalue,
                "shapiro2_p": shapiro2.pvalue,
                "levene_p": levene_test.pvalue,
            })
        else:
            u_test = mannwhitneyu(series1, series2, alternative="two-sided")
            results.append({
                "region": region,
                "major_city": major_city,
                "peripheral_city": peripheral_city,
                "test": "mannwhitneyu",
                "p_value": u_test.pvalue,
                "shapiro1_p": shapiro1.pvalue,
                "shapiro2_p": shapiro2.pvalue,
                "levene_p": None,
            })

# Wrap in DataFrame
comparison_results = pd.DataFrame(results)
significant_comparisons = comparison_results[comparison_results["p_value"] < 0.05]

In [23]:
comparison_results

Unnamed: 0,region,major_city,peripheral_city,test,p_value,shapiro1_p,shapiro2_p,levene_p
0,Central,Bangkok,Ayutthaya,mannwhitneyu,0.01218165,0.000739,0.066649,
1,Central,Bangkok,Nakhon Pathom,mannwhitneyu,0.001426929,0.000739,0.006449,
2,Central,Bangkok,Nakhon Sawan,mannwhitneyu,0.01372281,0.000739,0.00026,
3,Central,Bangkok,Nonthaburi,mannwhitneyu,0.0002364395,0.000739,0.077411,
4,Central,Bangkok,Pathum Thani,mannwhitneyu,0.004547958,0.000739,0.007828,
5,Central,Bangkok,Phitsanulok,mannwhitneyu,0.7528263,0.000739,0.012013,
6,Central,Bangkok,Samut Prakan,mannwhitneyu,0.6348721,0.000739,0.002519,
7,Central,Bangkok,Samut Sakhon,mannwhitneyu,1.51553e-05,0.000739,0.006973,
8,Central,Bangkok,Saraburi,mannwhitneyu,9.353249e-06,0.000739,0.003264,
9,Central,Bangkok,Suphan Buri,mannwhitneyu,1.661274e-07,0.000739,3e-06,


In [24]:
significant_comparisons

Unnamed: 0,region,major_city,peripheral_city,test,p_value,shapiro1_p,shapiro2_p,levene_p
0,Central,Bangkok,Ayutthaya,mannwhitneyu,0.01218165,0.000739,0.066649,
1,Central,Bangkok,Nakhon Pathom,mannwhitneyu,0.001426929,0.000739,0.006449,
2,Central,Bangkok,Nakhon Sawan,mannwhitneyu,0.01372281,0.000739,0.00026,
3,Central,Bangkok,Nonthaburi,mannwhitneyu,0.0002364395,0.000739,0.077411,
4,Central,Bangkok,Pathum Thani,mannwhitneyu,0.004547958,0.000739,0.007828,
7,Central,Bangkok,Samut Sakhon,mannwhitneyu,1.51553e-05,0.000739,0.006973,
8,Central,Bangkok,Saraburi,mannwhitneyu,9.353249e-06,0.000739,0.003264,
9,Central,Bangkok,Suphan Buri,mannwhitneyu,1.661274e-07,0.000739,3e-06,
10,North,Chiang Mai,Lampang,mannwhitneyu,0.0006826784,0.008207,0.016125,
11,North,Chiang Mai,Lamphun,mannwhitneyu,0.004634405,0.008207,0.007953,


In [None]:
# export train data for modeling
unstack_resampled = resampled_aqi.unstack().reset_index()
train_data = unstack_resampled.pivot_table(values=0, index=["City", "time"], columns=["level_1"]).reset_index()
# train_data.to_csv("air_data_train.csv")

In [None]:
# export test data (19 - 21 April, 2025)
unstack_resampled = resampled_aqi.unstack().reset_index()
test_data = unstack_resampled.pivot_table(values=0, index=["City", "time"], columns=["level_1"]).reset_index()
test_dates = pd.to_datetime(["2025-04-19", "2025-04-20", "2025-04-21"])
test_data = test_data[test_data["time"].isin(test_dates)]
# test_data.to_csv("air_quality_test.csv")