# Comparative Analysis of Air Quality Variability Between Major and Peripheral Cities: A Case Study of Thailand

## Import libraries

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import ttest_ind, mannwhitneyu, shapiro, levene
from itertools import product

## Import dataset

In [2]:
df = pd.read_csv("thai_air_data_cleaned.csv")

In [3]:
df.head()

Unnamed: 0,Timestamp,City,name,lat,lon,co,dew,h,no2,o3,p,pm10,pm25,r,so2,t,w,time
0,2025-02-19 18:11:33,Samut Prakan,"City Hall, Samut Prakan, Thailand (ศาลากลางจัง...",13.599149,100.597345,0.1,-5.9,8.4,9.3,13.4,1008.5,55.0,110.0,0.6,0.6,31.7,0.5,2025-02-20 00:00:00
1,2025-02-19 18:11:33,Nakhon Ratchasima,"Municipal Waste Water Pumping Station, Nakhon ...",14.979537,102.098335,,-4.6,9.2,11.0,23.7,985.9,,100.0,0.2,0.6,31.9,0.5,2025-02-20 00:00:00
2,2025-02-19 18:11:34,Surin,"Pyramid House, Surin, Thailand",14.793612,103.43056,,-4.6,9.2,,,993.9,,,,,32.0,0.5,
3,2025-02-19 18:11:34,Sakon Nakhon,"Meteorological stations, Sakon Nakhon, Thailan...",17.15662,104.133216,0.1,-4.9,9.9,,20.1,991.0,43.0,63.0,,1.1,30.2,1.5,2025-02-20 00:00:00
4,2025-02-19 18:11:35,Mueang Khon Kaen,"Khonkaen, Thailand (ส่วนอุทกวิทยา สำนักงานทรัพ...",16.445383,102.83525,,-5.2,9.0,,,988.0,,79.0,0.6,,31.5,0.5,2025-02-20 00:00:00


In [4]:
df.shape

(1866, 18)

In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1866 entries, 0 to 1865
Data columns (total 18 columns):
 #   Column     Non-Null Count  Dtype  
---  ------     --------------  -----  
 0   Timestamp  1866 non-null   object 
 1   City       1866 non-null   object 
 2   name       1866 non-null   object 
 3   lat        1866 non-null   float64
 4   lon        1866 non-null   float64
 5   co         1199 non-null   float64
 6   dew        1574 non-null   float64
 7   h          1866 non-null   float64
 8   no2        1439 non-null   float64
 9   o3         1484 non-null   float64
 10  p          1866 non-null   float64
 11  pm10       1594 non-null   float64
 12  pm25       1812 non-null   float64
 13  r          912 non-null    float64
 14  so2        1376 non-null   float64
 15  t          1866 non-null   float64
 16  w          1866 non-null   float64
 17  time       1812 non-null   object 
dtypes: float64(14), object(4)
memory usage: 262.5+ KB


In [6]:
df["time"] = pd.to_datetime(df["time"])

In [7]:
df.isna().sum()

Timestamp      0
City           0
name           0
lat            0
lon            0
co           667
dew          292
h              0
no2          427
o3           382
p              0
pm10         272
pm25          54
r            954
so2          490
t              0
w              0
time          54
dtype: int64

In [8]:
(df.groupby(["City"]).apply(lambda x: x.isna().sum() / len(x) * 100, include_groups=False)
                     .drop(columns="City", errors="ignore").round(2))

Unnamed: 0_level_0,Timestamp,name,lat,lon,co,dew,h,no2,o3,p,pm10,pm25,r,so2,t,w,time
City,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
Ayutthaya,0.0,0.0,0.0,0.0,0.0,15.09,0.0,0.0,0.0,0.0,0.0,0.0,60.38,0.0,0.0,0.0,0.0
Bangkok,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,24.53,0.0,0.0,0.0,0.0
Chiang Mai,0.0,0.0,0.0,0.0,100.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,73.58,0.0,0.0,0.0,0.0
Chonburi,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,36.54,0.0,0.0,0.0,0.0
Kamphaeng Phet,0.0,0.0,0.0,0.0,100.0,100.0,0.0,100.0,100.0,0.0,0.0,0.0,100.0,100.0,0.0,0.0,0.0
Kanchanaburi,0.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,40.0,100.0,0.0,0.0,0.0
Krabi,0.0,0.0,0.0,0.0,100.0,0.0,0.0,100.0,100.0,0.0,100.0,0.0,100.0,100.0,0.0,0.0,0.0
Lampang,0.0,0.0,0.0,0.0,0.0,20.75,0.0,0.0,0.0,0.0,0.0,0.0,54.72,0.0,0.0,0.0,0.0
Lamphun,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,79.63,0.0,0.0,0.0,0.0
Mueang Chiang Rai,0.0,0.0,0.0,0.0,0.0,100.0,0.0,0.0,0.0,0.0,0.0,0.0,100.0,100.0,0.0,0.0,0.0


In [9]:
df.groupby(["City"]).size()

City
Ayutthaya              53
Bangkok                53
Chiang Mai             53
Chonburi               52
Kamphaeng Phet          1
Kanchanaburi           55
Krabi                   1
Lampang                53
Lamphun                54
Mueang Chiang Rai      53
Mueang Khon Kaen       52
Nakhon Pathom          52
Nakhon Ratchasima      55
Nakhon Sawan           55
Nan                    54
Narathiwat             52
Nong Khai              55
Nonthaburi             54
Pathum Thani           52
Pattani                 1
Phayao                 54
Phitsanulok            54
Phuket                 52
Prachuap Khiri Khan    53
Ratchaburi             52
Rayong                 52
Sakon Nakhon           54
Samut Prakan           55
Samut Sakhon           53
Saraburi               52
Satun                  52
Suphan Buri            55
Surin                  54
Trang                  52
Trat                   53
Ubon Ratchathani       53
Uthai Thani             1
Uttaradit              53
Yala   

In [10]:
# remove cities with only one record
df = df[~df["City"].isin(["Kamphaeng Phet", "Krabi", "Pattani", "Uthai Thani", "Surin"])]

df.groupby(["City"]).size()

City
Ayutthaya              53
Bangkok                53
Chiang Mai             53
Chonburi               52
Kanchanaburi           55
Lampang                53
Lamphun                54
Mueang Chiang Rai      53
Mueang Khon Kaen       52
Nakhon Pathom          52
Nakhon Ratchasima      55
Nakhon Sawan           55
Nan                    54
Narathiwat             52
Nong Khai              55
Nonthaburi             54
Pathum Thani           52
Phayao                 54
Phitsanulok            54
Phuket                 52
Prachuap Khiri Khan    53
Ratchaburi             52
Rayong                 52
Sakon Nakhon           54
Samut Prakan           55
Samut Sakhon           53
Saraburi               52
Satun                  52
Suphan Buri            55
Trang                  52
Trat                   53
Ubon Ratchathani       53
Uttaradit              53
Yala                   52
dtype: int64

## Data preprocessing

### Define city type (major/peripheral) and region (North, Northeast, East, Central, West, South)

In [11]:
# City type: major and peripheral
city_type = {
    'Samut Prakan': 'Peripheral',
    'Nakhon Ratchasima': 'Major',
    'Surin': 'Peripheral',
    'Sakon Nakhon': 'Peripheral',
    'Mueang Khon Kaen': 'Major',
    'Nong Khai': 'Peripheral',
    'Kanchanaburi': 'Major',
    'Suphan Buri': 'Peripheral',
    'Uthai Thani': 'Peripheral',
    'Nakhon Sawan': 'Peripheral',
    'Phitsanulok': 'Peripheral',
    'Kamphaeng Phet': 'Peripheral',
    'Lamphun': 'Peripheral',
    'Lampang': 'Peripheral',
    'Phayao': 'Peripheral',
    'Bangkok': 'Major',
    'Trat': 'Peripheral',
    'Nonthaburi': 'Peripheral',
    'Nan': 'Peripheral',
    'Ubon Ratchathani': 'Peripheral',
    'Samut Sakhon': 'Peripheral',
    'Chiang Mai': 'Major',
    'Mueang Chiang Rai': 'Peripheral',
    'Ayutthaya': 'Peripheral',
    'Uttaradit': 'Peripheral',
    'Rayong': 'Major',
    'Pathum Thani': 'Peripheral',
    'Chonburi': 'Major',
    'Nakhon Pathom': 'Peripheral',
    'Ratchaburi': 'Peripheral',
    'Phuket': 'Major',
    'Yala': 'Peripheral',
    'Krabi': 'Peripheral',
    'Trang': 'Peripheral',
    'Satun': 'Peripheral',
    'Pattani': 'Peripheral',
    'Narathiwat': 'Peripheral',
    'Prachuap Khiri Khan': 'Peripheral',
    'Saraburi': 'Peripheral',
}

df['City_type'] = df['City'].map(city_type)

In [12]:
# Region: north, northeast, central, south, west, east
city_region = {
    'Samut Prakan': 'Central',
    'Nakhon Ratchasima': 'Northeast',
    'Surin': 'Northeast',
    'Sakon Nakhon': 'Northeast',
    'Mueang Khon Kaen': 'Northeast',
    'Nong Khai': 'Northeast',
    'Kanchanaburi': 'West',
    'Suphan Buri': 'Central',
    'Uthai Thani': 'Central',
    'Nakhon Sawan': 'Central',
    'Phitsanulok': 'Central',
    'Kamphaeng Phet': 'Central',
    'Lamphun': 'North',
    'Lampang': 'North',
    'Phayao': 'North',
    'Bangkok': 'Central',
    'Trat': 'East',
    'Nonthaburi': 'Central',
    'Nan': 'North',
    'Ubon Ratchathani': 'Northeast',
    'Samut Sakhon': 'Central',
    'Chiang Mai': 'North',
    'Mueang Chiang Rai': 'North',
    'Ayutthaya': 'Central',
    'Uttaradit': 'North',
    'Rayong': 'East',
    'Pathum Thani': 'Central',
    'Chonburi': 'East',
    'Nakhon Pathom': 'Central',
    'Ratchaburi': 'West',
    'Phuket': 'South',
    'Yala': 'South',
    'Krabi': 'South',
    'Trang': 'South',
    'Satun': 'South',
    'Pattani': 'South',
    'Narathiwat': 'South',
    'Prachuap Khiri Khan': 'West',
    'Saraburi': 'Central',
}

df['City_region'] = df['City'].map(city_region)

In [13]:
df.groupby(["City_region", "City_type"])["City"].nunique()

City_region  City_type 
Central      Major          1
             Peripheral    10
East         Major          2
             Peripheral     1
North        Major          1
             Peripheral     6
Northeast    Major          2
             Peripheral     3
South        Major          1
             Peripheral     4
West         Major          1
             Peripheral     2
Name: City, dtype: int64

### Impute missing value

In [14]:
df_no_missing = df.sort_values(by=["City", "time"]).copy()
df_no_missing = df_no_missing.groupby(["City"]).apply(lambda g: g.ffill().bfill(), include_groups=False)
df_no_missing = df_no_missing.reset_index(drop=False)
df_no_missing = df_no_missing.drop(columns=["level_1"])
df_no_missing

Unnamed: 0,City,Timestamp,name,lat,lon,co,dew,h,no2,o3,p,pm10,pm25,r,so2,t,w,time,City_type,City_region
0,Ayutthaya,2025-02-19 18:11:46,"Ayutthaya Witthayalai School, Ayutthaya, Thail...",14.352220,100.565325,0.1,-5.9,9.5,3.5,5.2,1008.4,64.0,74.0,0.5,0.6,29.6,2.5,2025-02-20 00:00:00,Peripheral,Central
1,Ayutthaya,2025-02-20 12:00:25,"Ayutthaya Witthayalai School, Ayutthaya, Thail...",14.352220,100.565325,0.1,23.8,69.5,3.5,5.2,1013.9,64.0,135.0,0.5,0.6,30.0,2.5,2025-02-20 18:00:00,Peripheral,Central
2,Ayutthaya,2025-02-21 01:36:00,"Ayutthaya Witthayalai School, Ayutthaya, Thail...",14.352220,100.565325,0.1,25.3,84.8,3.5,5.2,1011.4,64.0,138.0,0.5,0.6,25.8,1.0,2025-02-21 08:00:00,Peripheral,Central
3,Ayutthaya,2025-02-22 01:53:38,"Ayutthaya Witthayalai School, Ayutthaya, Thail...",14.352220,100.565325,0.1,25.3,76.0,3.5,5.2,1012.9,64.0,95.0,0.5,0.6,27.2,0.5,2025-02-22 08:00:00,Peripheral,Central
4,Ayutthaya,2025-02-22 02:11:00,"Ayutthaya Witthayalai School, Ayutthaya, Thail...",14.352220,100.565325,0.1,25.3,76.0,3.5,5.2,1012.4,64.0,88.0,0.5,0.6,26.6,0.2,2025-02-22 09:00:00,Peripheral,Central
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1803,Yala,2025-04-07 14:01:14,"White Elephant Park, Yala, Thailand (สนามโรงพิ...",6.546205,101.283137,,-6.6,8.0,,,1006.7,33.0,40.0,0.2,,31.8,1.5,2025-04-07 19:00:00,Peripheral,South
1804,Yala,2025-04-08 15:09:19,"White Elephant Park, Yala, Thailand (สนามโรงพิ...",6.546205,101.283137,,24.4,66.5,,,1008.0,33.0,46.0,0.2,,31.4,3.6,2025-04-08 21:00:00,Peripheral,South
1805,Yala,2025-04-09 13:00:23,"White Elephant Park, Yala, Thailand (สนามโรงพิ...",6.546205,101.283137,,-6.7,7.9,,,1009.5,33.0,42.0,0.4,,31.8,3.0,2025-04-09 19:00:00,Peripheral,South
1806,Yala,2025-04-10 13:00:19,"White Elephant Park, Yala, Thailand (สนามโรงพิ...",6.546205,101.283137,,-6.7,7.5,,,1005.8,33.0,39.0,0.2,,32.6,3.6,2025-04-10 19:00:00,Peripheral,South


### Resample data to day level

In [15]:
["co", "dew", "h", "no2", "o3", "p", "pm10", "pm25", "r", "so2", "t", "w"]

['co', 'dew', 'h', 'no2', 'o3', 'p', 'pm10', 'pm25', 'r', 'so2', 't', 'w']

In [16]:
df_no_missing.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1808 entries, 0 to 1807
Data columns (total 20 columns):
 #   Column       Non-Null Count  Dtype         
---  ------       --------------  -----         
 0   City         1808 non-null   object        
 1   Timestamp    1808 non-null   object        
 2   name         1808 non-null   object        
 3   lat          1808 non-null   float64       
 4   lon          1808 non-null   float64       
 5   co           1227 non-null   float64       
 6   dew          1597 non-null   float64       
 7   h            1808 non-null   float64       
 8   no2          1438 non-null   float64       
 9   o3           1547 non-null   float64       
 10  p            1808 non-null   float64       
 11  pm10         1753 non-null   float64       
 12  pm25         1808 non-null   float64       
 13  r            1648 non-null   float64       
 14  so2          1437 non-null   float64       
 15  t            1808 non-null   float64       
 16  w     

In [17]:
aqi_pivot = pd.pivot_table(data=df_no_missing, index="time", columns=["City"], values=["pm25", "pm10", "co", "dew", "h", "no2", "o3", "so2", "p", "r", "t", "w"], aggfunc="mean", fill_value=np.nan)
aqi_pivot

Unnamed: 0_level_0,co,co,co,co,co,co,co,co,co,co,...,w,w,w,w,w,w,w,w,w,w
City,Ayutthaya,Bangkok,Kanchanaburi,Lampang,Mueang Chiang Rai,Nakhon Sawan,Nan,Nong Khai,Nonthaburi,Pathum Thani,...,Samut Prakan,Samut Sakhon,Saraburi,Satun,Suphan Buri,Trang,Trat,Ubon Ratchathani,Uttaradit,Yala
time,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2025-02-19 12:00:00,,,,,,,,,,,...,,,,,,,,,1.0,
2025-02-20 00:00:00,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,...,0.5,2.0,0.5,2.0,2.5,5.1,2.5,1.5,,3.0
2025-02-20 01:00:00,,,,,,,,,,,...,,,,,,,,,,
2025-02-20 18:00:00,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,...,1.5,1.0,0.2,3.0,2.0,1.5,1.0,2.0,3.0,4.6
2025-02-21 04:00:00,,,,,,,,,,,...,,,,0.5,,,,,,1.5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2025-04-09 19:00:00,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,...,5.1,5.1,0.2,2.5,0.2,4.1,4.1,2.0,3.6,3.0
2025-04-10 06:00:00,,,,,,0.1,,,,,...,,,,,,,,,,
2025-04-10 18:00:00,,,,,,,,,,,...,,,,,,,,,,
2025-04-10 19:00:00,0.1,0.1,0.1,0.1,0.1,,0.1,0.1,0.1,0.1,...,1.0,1.5,1.0,0.5,1.0,1.0,1.0,1.5,2.0,3.6


In [18]:
resampled_aqi = aqi_pivot.resample("D").mean()
resampled_aqi

Unnamed: 0_level_0,co,co,co,co,co,co,co,co,co,co,...,w,w,w,w,w,w,w,w,w,w
City,Ayutthaya,Bangkok,Kanchanaburi,Lampang,Mueang Chiang Rai,Nakhon Sawan,Nan,Nong Khai,Nonthaburi,Pathum Thani,...,Samut Prakan,Samut Sakhon,Saraburi,Satun,Suphan Buri,Trang,Trat,Ubon Ratchathani,Uttaradit,Yala
time,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2025-02-19,,,,,,,,,,,...,,,,,,,,,1.0,
2025-02-20,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,...,1.0,1.5,0.35,2.5,2.25,3.3,1.75,1.75,3.0,3.8
2025-02-21,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,...,1.0,1.5,0.5,0.5,4.1,6.1,0.5,3.6,1.0,1.5
2025-02-22,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,...,2.6,2.4,0.6,2.5,1.233333,1.0,1.75,3.05,1.0,3.05
2025-02-23,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,...,2.8,1.5,0.6,4.05,2.3,2.25,1.0,4.6,2.0,4.3
2025-02-24,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,...,1.0,2.0,4.6,1.5,4.6,1.5,7.2,5.6,3.0,1.0
2025-02-25,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,...,1.5,1.5,2.5,2.0,3.6,2.0,4.1,3.6,2.5,3.0
2025-02-26,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,...,1.75,1.5,3.3,6.1,2.8,2.25,3.8,3.55,1.25,2.05
2025-02-27,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,...,3.6,3.6,3.0,7.2,3.0,3.6,4.6,1.5,2.0,4.6
2025-02-28,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,0.1,...,3.0,2.0,2.0,4.6,1.0,4.6,0.5,2.0,0.5,3.0


In [19]:
resampled_aqi.columns = resampled_aqi.columns.swaplevel(0, 1)
resampled_aqi = resampled_aqi.sort_index(axis=1, level=0)
resampled_aqi

City,Ayutthaya,Ayutthaya,Ayutthaya,Ayutthaya,Ayutthaya,Ayutthaya,Ayutthaya,Ayutthaya,Ayutthaya,Ayutthaya,...,Uttaradit,Uttaradit,Yala,Yala,Yala,Yala,Yala,Yala,Yala,Yala
Unnamed: 0_level_1,co,dew,h,no2,o3,p,pm10,pm25,r,so2,...,t,w,dew,h,p,pm10,pm25,r,t,w
time,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2025-02-19,,,,,,,,,,,...,23.0,1.0,,,,,,,,
2025-02-20,0.1,8.95,39.5,3.5,5.2,1011.15,64.0,104.5,0.5,0.6,...,26.3,3.0,-6.65,8.35,1006.55,33.0,44.0,0.2,30.85,3.8
2025-02-21,0.1,25.3,84.8,3.5,5.2,1011.4,64.0,138.0,0.5,0.6,...,25.7,1.0,-8.2,9.2,1009.9,33.0,40.0,0.2,27.1,1.5
2025-02-22,0.1,15.066667,53.633333,3.5,5.2,1013.266667,64.0,82.666667,0.5,0.6,...,26.266667,1.0,8.3,46.95,1011.2,33.0,46.0,99.0,28.9,3.05
2025-02-23,0.1,9.6,38.7,3.5,5.2,1013.15,64.0,92.5,0.5,0.6,...,31.15,2.0,-7.65,8.3,1009.45,33.0,44.0,99.0,29.7,4.3
2025-02-24,0.1,-6.1,10.0,3.5,5.2,1016.9,64.0,42.0,0.5,0.6,...,27.8,3.0,23.6,96.5,1009.2,33.0,38.0,0.5,24.2,1.0
2025-02-25,0.1,17.8,51.5,3.5,5.2,1016.4,64.0,52.0,0.5,0.6,...,22.5,2.5,25.1,95.9,1009.4,33.0,53.0,0.4,27.7,3.0
2025-02-26,0.1,5.85,33.15,3.5,5.2,1016.75,64.0,53.0,0.5,0.6,...,24.65,1.25,8.25,51.5,1011.75,33.0,47.0,0.35,27.45,2.05
2025-02-27,0.1,19.0,41.0,3.5,5.2,1014.0,64.0,73.0,0.5,0.6,...,31.2,2.0,-7.1,8.2,1007.5,33.0,51.0,99.5,30.5,4.6
2025-02-28,0.1,-6.1,7.9,3.5,5.2,1014.0,64.0,70.0,0.5,0.6,...,32.5,0.5,-7.2,8.0,1006.9,33.0,49.0,99.5,30.8,3.0


In [20]:
resampled_aqi = resampled_aqi.ffill().bfill()
resampled_aqi

City,Ayutthaya,Ayutthaya,Ayutthaya,Ayutthaya,Ayutthaya,Ayutthaya,Ayutthaya,Ayutthaya,Ayutthaya,Ayutthaya,...,Uttaradit,Uttaradit,Yala,Yala,Yala,Yala,Yala,Yala,Yala,Yala
Unnamed: 0_level_1,co,dew,h,no2,o3,p,pm10,pm25,r,so2,...,t,w,dew,h,p,pm10,pm25,r,t,w
time,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
2025-02-19,0.1,8.95,39.5,3.5,5.2,1011.15,64.0,104.5,0.5,0.6,...,23.0,1.0,-6.65,8.35,1006.55,33.0,44.0,0.2,30.85,3.8
2025-02-20,0.1,8.95,39.5,3.5,5.2,1011.15,64.0,104.5,0.5,0.6,...,26.3,3.0,-6.65,8.35,1006.55,33.0,44.0,0.2,30.85,3.8
2025-02-21,0.1,25.3,84.8,3.5,5.2,1011.4,64.0,138.0,0.5,0.6,...,25.7,1.0,-8.2,9.2,1009.9,33.0,40.0,0.2,27.1,1.5
2025-02-22,0.1,15.066667,53.633333,3.5,5.2,1013.266667,64.0,82.666667,0.5,0.6,...,26.266667,1.0,8.3,46.95,1011.2,33.0,46.0,99.0,28.9,3.05
2025-02-23,0.1,9.6,38.7,3.5,5.2,1013.15,64.0,92.5,0.5,0.6,...,31.15,2.0,-7.65,8.3,1009.45,33.0,44.0,99.0,29.7,4.3
2025-02-24,0.1,-6.1,10.0,3.5,5.2,1016.9,64.0,42.0,0.5,0.6,...,27.8,3.0,23.6,96.5,1009.2,33.0,38.0,0.5,24.2,1.0
2025-02-25,0.1,17.8,51.5,3.5,5.2,1016.4,64.0,52.0,0.5,0.6,...,22.5,2.5,25.1,95.9,1009.4,33.0,53.0,0.4,27.7,3.0
2025-02-26,0.1,5.85,33.15,3.5,5.2,1016.75,64.0,53.0,0.5,0.6,...,24.65,1.25,8.25,51.5,1011.75,33.0,47.0,0.35,27.45,2.05
2025-02-27,0.1,19.0,41.0,3.5,5.2,1014.0,64.0,73.0,0.5,0.6,...,31.2,2.0,-7.1,8.2,1007.5,33.0,51.0,99.5,30.5,4.6
2025-02-28,0.1,-6.1,7.9,3.5,5.2,1014.0,64.0,70.0,0.5,0.6,...,32.5,0.5,-7.2,8.0,1006.9,33.0,49.0,99.5,30.8,3.0


In [21]:
summary_statistics = resampled_aqi.describe().transpose().reset_index()

for metric in summary_statistics["level_1"].unique():
    print(f"Summary statistics of {metric}")
    display(summary_statistics[summary_statistics["level_1"] == metric])

Summary statistics of co


Unnamed: 0,City,level_1,count,mean,std,min,25%,50%,75%,max
0,Ayutthaya,co,52.0,0.1,4.2352780000000005e-17,0.1,0.1,0.1,0.1,0.1
12,Bangkok,co,52.0,0.1,4.203955e-17,0.1,0.1,0.1,0.1,0.1
44,Kanchanaburi,co,52.0,0.1,4.2352780000000005e-17,0.1,0.1,0.1,0.1,0.1
54,Lampang,co,52.0,0.1,4.203955e-17,0.1,0.1,0.1,0.1,0.1
77,Mueang Chiang Rai,co,52.0,0.1,4.203955e-17,0.1,0.1,0.1,0.1,0.1
114,Nakhon Sawan,co,52.0,0.1,4.2352780000000005e-17,0.1,0.1,0.1,0.1,0.1
126,Nan,co,52.0,0.1,4.2352780000000005e-17,0.1,0.1,0.1,0.1,0.1
146,Nong Khai,co,52.0,0.1,4.2352780000000005e-17,0.1,0.1,0.1,0.1,0.1
158,Nonthaburi,co,52.0,0.1,4.2352780000000005e-17,0.1,0.1,0.1,0.1,0.1
169,Pathum Thani,co,52.0,0.1,4.203955e-17,0.1,0.1,0.1,0.1,0.1


Summary statistics of dew


Unnamed: 0,City,level_1,count,mean,std,min,25%,50%,75%,max
1,Ayutthaya,dew,52.0,2.936859,12.268427,-7.4,-6.25,-5.75,14.4,25.7
33,Chonburi,dew,52.0,6.441346,12.238251,-8.0,-5.55,8.875,16.45,26.6
45,Kanchanaburi,dew,52.0,4.15641,11.690678,-8.2,-4.925,-4.05,15.6,24.9
55,Lampang,dew,52.0,3.1,10.985623,-8.0,-4.7,-3.8,16.2,21.5
66,Lamphun,dew,52.0,12.619231,8.863741,-6.4,11.025,16.5,19.0,21.3
86,Mueang Khon Kaen,dew,52.0,7.299038,11.886854,-8.2,-4.9,7.375,18.0,25.1
94,Nakhon Pathom,dew,52.0,6.667308,12.922578,-8.8,-5.8,8.75,20.2,25.5
104,Nakhon Ratchasima,dew,52.0,2.745192,11.883603,-8.1,-5.0,-4.35,8.75,26.5
115,Nakhon Sawan,dew,52.0,5.355769,12.715197,-8.9,-5.65,-3.8,20.525,26.8
127,Nan,dew,52.0,1.707692,10.132589,-8.8,-5.4,-4.9,8.95,21.3


Summary statistics of h


Unnamed: 0,City,level_1,count,mean,std,min,25%,50%,75%,max
2,Ayutthaya,h,52.0,25.792949,22.849204,7.4,7.7,8.725,44.125,86.25
13,Bangkok,h,52.0,31.523077,26.20661,7.3,8.1875,18.45,53.075,90.5
23,Chiang Mai,h,52.0,30.218269,18.337578,8.2,10.575,31.0,45.85,66.0
34,Chonburi,h,52.0,31.414423,23.995368,6.9,8.3,33.175,47.6,86.65
46,Kanchanaburi,h,52.0,24.799679,21.93164,7.1,7.975,10.55,37.0125,74.2
56,Lampang,h,52.0,32.173077,25.391884,7.2,8.975,22.275,54.75,80.2
67,Lamphun,h,52.0,38.475,18.616471,8.0,30.4,38.725,47.625,79.4
78,Mueang Chiang Rai,h,52.0,58.343269,13.240098,33.3,50.0,56.5,65.725,84.6
87,Mueang Khon Kaen,h,52.0,33.230769,24.740812,6.9,8.3,33.5,50.175,93.0
95,Nakhon Pathom,h,52.0,30.891346,24.221184,7.2,7.6,29.425,48.775,88.8


Summary statistics of no2


Unnamed: 0,City,level_1,count,mean,std,min,25%,50%,75%,max
3,Ayutthaya,no2,52.0,3.5,0.0,3.5,3.5,3.5,3.5,3.5
14,Bangkok,no2,52.0,1.347115,0.9565263,0.6,0.6,1.2,1.35,5.2
24,Chiang Mai,no2,52.0,7.5,0.0,7.5,7.5,7.5,7.5,7.5
35,Chonburi,no2,52.0,9.3,0.0,9.3,9.3,9.3,9.3,9.3
57,Lampang,no2,52.0,1.8,2.242109e-16,1.8,1.8,1.8,1.8,1.8
68,Lamphun,no2,52.0,2.723077,3.215686,0.6,0.6,0.6,7.5,7.5
79,Mueang Chiang Rai,no2,52.0,0.6,1.121055e-16,0.6,0.6,0.6,0.6,0.6
106,Nakhon Ratchasima,no2,52.0,6.573077,2.905327,2.9,4.1,5.525,8.25,13.6
117,Nakhon Sawan,no2,52.0,5.550641,2.405411,2.4,3.5,4.7,7.5,11.6
129,Nan,no2,52.0,1.077885,0.3680118,0.6,0.6,1.2,1.2,2.65


Summary statistics of o3


Unnamed: 0,City,level_1,count,mean,std,min,25%,50%,75%,max
4,Ayutthaya,o3,52.0,5.2,8.968438e-16,5.2,5.2,5.2,5.2,5.2
15,Bangkok,o3,52.0,18.9,6.208557,4.4,15.3,19.0,22.5,35.5
25,Chiang Mai,o3,52.0,14.6,1.793688e-15,14.6,14.6,14.6,14.6,14.6
36,Chonburi,o3,52.0,6.0,0.0,6.0,6.0,6.0,6.0,6.0
47,Kanchanaburi,o3,52.0,6.7,8.968438e-16,6.7,6.7,6.7,6.7,6.7
58,Lampang,o3,52.0,11.5,0.0,11.5,11.5,11.5,11.5,11.5
69,Lamphun,o3,52.0,4.903846,2.504796,1.6,2.8,4.0,7.1625,9.9
80,Mueang Chiang Rai,o3,52.0,12.7,5.381063e-15,12.7,12.7,12.7,12.7,12.7
96,Nakhon Pathom,o3,52.0,6.0,0.0,6.0,6.0,6.0,6.0,6.0
107,Nakhon Ratchasima,o3,52.0,25.659615,8.651992,6.0,21.35,25.9,32.6125,43.4


Summary statistics of p


Unnamed: 0,City,level_1,count,mean,std,min,25%,50%,75%,max
5,Ayutthaya,p,52.0,1011.324359,2.831361,1005.0,1009.3125,1011.175,1013.179167,1017.1
16,Bangkok,p,52.0,1011.663462,2.921105,1004.7,1009.9625,1011.2,1013.325,1018.2
26,Chiang Mai,p,52.0,986.332692,18.180017,970.4,973.575,975.9,1008.5,1020.0
37,Chonburi,p,52.0,1009.918269,3.116497,1005.2,1007.45,1009.85,1011.15,1018.5
48,Kanchanaburi,p,52.0,1008.312179,3.283565,1003.5,1006.05,1008.1,1010.308333,1015.7
59,Lampang,p,52.0,989.009615,11.218367,974.2,981.05,983.8,996.35,1015.6
70,Lamphun,p,52.0,999.6,16.792003,970.6,980.2,1010.0,1012.0,1020.0
81,Mueang Chiang Rai,p,52.0,978.288462,3.651734,971.8,976.525,977.35,980.025,985.8
88,Mueang Khon Kaen,p,52.0,996.590385,11.13475,984.5,988.0,991.0,1005.65,1022.0
97,Nakhon Pathom,p,52.0,1010.839423,3.209687,1004.0,1009.0875,1010.4,1013.0,1018.0


Summary statistics of pm10


Unnamed: 0,City,level_1,count,mean,std,min,25%,50%,75%,max
6,Ayutthaya,pm10,52.0,64.0,0.0,64.0,64.0,64.0,64.0,64.0
17,Bangkok,pm10,52.0,50.471154,12.514076,29.0,42.75,50.0,57.0,86.0
27,Chiang Mai,pm10,52.0,82.0,0.0,82.0,82.0,82.0,82.0,82.0
38,Chonburi,pm10,52.0,33.0,0.0,33.0,33.0,33.0,33.0,33.0
49,Kanchanaburi,pm10,52.0,47.0,0.0,47.0,47.0,47.0,47.0,47.0
60,Lampang,pm10,52.0,75.0,0.0,75.0,75.0,75.0,75.0,75.0
71,Lamphun,pm10,52.0,82.0,0.0,82.0,82.0,82.0,82.0,82.0
82,Mueang Chiang Rai,pm10,52.0,55.173077,19.769724,12.5,43.0,51.5,72.0,92.0
89,Mueang Khon Kaen,pm10,52.0,40.0,0.0,40.0,40.0,40.0,40.0,40.0
98,Nakhon Pathom,pm10,52.0,40.778846,11.592938,14.5,29.875,40.0,51.0,60.0


Summary statistics of pm25


Unnamed: 0,City,level_1,count,mean,std,min,25%,50%,75%,max
7,Ayutthaya,pm25,52.0,89.157051,35.971415,12.0,69.0,75.5,112.0,173.0
18,Bangkok,pm25,52.0,103.067308,29.145347,65.0,83.0,90.25,124.0,177.0
28,Chiang Mai,pm25,52.0,95.115385,32.436305,43.0,69.0,86.75,117.75,163.0
39,Chonburi,pm25,52.0,81.682692,29.688254,27.5,63.5,75.0,96.5,156.0
50,Kanchanaburi,pm25,52.0,79.971154,32.477355,6.0,57.75,73.5,93.875,169.0
61,Lampang,pm25,52.0,124.519231,36.955453,51.5,95.0,120.75,155.0,195.0
72,Lamphun,pm25,52.0,121.846154,38.567809,48.0,90.25,126.0,155.0,178.0
83,Mueang Chiang Rai,pm25,52.0,120.192308,43.018254,31.0,86.875,118.5,161.0,180.0
90,Mueang Khon Kaen,pm25,52.0,114.75,37.898407,5.0,86.0,118.75,151.0,172.0
99,Nakhon Pathom,pm25,52.0,88.644231,32.579667,38.0,66.0,80.5,109.75,154.0


Summary statistics of r


Unnamed: 0,City,level_1,count,mean,std,min,25%,50%,75%,max
8,Ayutthaya,r,52.0,6.011538,23.267015,0.1,0.2,0.2,0.5,99.2
19,Bangkok,r,52.0,14.289423,32.412708,0.1,0.4,1.1,2.8,99.4
29,Chiang Mai,r,52.0,72.161538,43.368468,0.4,2.8,99.0,99.3,99.3
40,Chonburi,r,52.0,28.2625,43.723655,0.3,1.6375,1.7,99.25,99.7
51,Kanchanaburi,r,52.0,43.948077,49.560379,0.1,0.3,0.3,99.0,99.2
62,Lampang,r,52.0,49.915385,49.594799,0.1,0.9,49.95,99.0,99.4
73,Lamphun,r,52.0,49.773077,50.007451,0.1,0.1,50.55,99.3,99.6
91,Mueang Khon Kaen,r,52.0,27.032692,44.241307,0.1,0.4,0.4,99.0,99.6
100,Nakhon Pathom,r,52.0,35.613462,47.197757,0.3,0.6,0.6,99.0,99.3
110,Nakhon Ratchasima,r,52.0,43.103846,48.484697,0.2,1.55,4.4,99.0,99.7


Summary statistics of so2


Unnamed: 0,City,level_1,count,mean,std,min,25%,50%,75%,max
9,Ayutthaya,so2,52.0,0.6,1.121055e-16,0.6,0.6,0.6,0.6,0.6
20,Bangkok,so2,52.0,0.6,1.121055e-16,0.6,0.6,0.6,0.6,0.6
30,Chiang Mai,so2,52.0,0.6,1.121055e-16,0.6,0.6,0.6,0.6,0.6
41,Chonburi,so2,52.0,1.1,0.0,1.1,1.1,1.1,1.1,1.1
63,Lampang,so2,52.0,0.6,1.121055e-16,0.6,0.6,0.6,0.6,0.6
74,Lamphun,so2,52.0,0.6,1.121055e-16,0.6,0.6,0.6,0.6,0.6
101,Nakhon Pathom,so2,52.0,9.0,0.0,9.0,9.0,9.0,9.0,9.0
111,Nakhon Ratchasima,so2,52.0,0.6,1.121055e-16,0.6,0.6,0.6,0.6,0.6
123,Nakhon Sawan,so2,52.0,0.6,1.121055e-16,0.6,0.6,0.6,0.6,0.6
135,Nan,so2,52.0,0.6,1.121055e-16,0.6,0.6,0.6,0.6,0.6


Summary statistics of t


Unnamed: 0,City,level_1,count,mean,std,min,25%,50%,75%,max
10,Ayutthaya,t,52.0,31.698077,2.799947,24.5,30.1,32.3,33.4,36.6
21,Bangkok,t,52.0,32.896154,2.320177,26.0,31.5625,33.6,34.2,36.7
31,Chiang Mai,t,52.0,31.696154,2.985994,24.8,29.95,32.0,34.0,36.4
42,Chonburi,t,52.0,31.527885,2.667661,23.7,29.475,32.7,33.5,36.2
52,Kanchanaburi,t,52.0,32.674359,3.482126,25.2,30.075,34.0,35.0,38.3
64,Lampang,t,52.0,30.602885,3.912654,21.9,28.175,30.65,34.025,38.2
75,Lamphun,t,52.0,31.069231,3.64587,22.0,28.875,32.0,34.0,38.2
84,Mueang Chiang Rai,t,52.0,30.268269,4.065747,17.7,28.6375,30.8,33.15,35.6
92,Mueang Khon Kaen,t,52.0,30.323077,5.024913,19.0,26.0,30.375,35.05,37.0
102,Nakhon Pathom,t,52.0,32.015385,3.128718,23.3,30.8,32.775,34.35,36.0


Summary statistics of w


Unnamed: 0,City,level_1,count,mean,std,min,25%,50%,75%,max
11,Ayutthaya,w,52.0,2.415385,1.32208,0.2,1.5,2.5,3.0,5.1
22,Bangkok,w,52.0,2.375,1.120071,0.5,1.5,2.0,3.15,4.6
32,Chiang Mai,w,52.0,1.490385,0.767479,0.5,1.0,1.5,2.0,3.6
43,Chonburi,w,52.0,2.683654,1.231201,1.0,2.0,2.5,3.0,6.1
53,Kanchanaburi,w,52.0,1.975962,0.911446,0.5,1.5,2.0,2.5,5.1
65,Lampang,w,52.0,1.348077,0.929735,0.5,0.5,1.0,1.5,5.0
76,Lamphun,w,52.0,1.816346,0.787134,0.5,1.425,1.875,2.175,3.3
85,Mueang Chiang Rai,w,52.0,1.601923,0.851697,0.1,1.0,1.625,2.0,3.6
93,Mueang Khon Kaen,w,52.0,1.793269,1.076397,0.5,1.0,1.5,2.125,5.6
103,Nakhon Pathom,w,52.0,2.276923,1.033956,0.5,1.5,2.5,3.0,4.6


## Hypothesis 1

In [22]:
# Get metadata
city_meta = df_no_missing[['City', 'City_region', 'City_type']].drop_duplicates().set_index("City")

results = []
feature = "pm25"

for region in city_meta["City_region"].unique():
    regional_cities = city_meta[city_meta["City_region"] == region]
    
    majors = regional_cities[regional_cities["City_type"] == "Major"].index
    peripherals = regional_cities[regional_cities["City_type"] == "Peripheral"].index

    for major_city, peripheral_city in product(majors, peripherals):
        try:
            series1 = resampled_aqi[(major_city, feature)].dropna()
            series2 = resampled_aqi[(peripheral_city, feature)].dropna()
        except KeyError:
            continue  # Skip if either city's data is missing

        # Align by common dates
        common_index = series1.index.intersection(series2.index)
        series1 = series1.loc[common_index]
        series2 = series2.loc[common_index]

        if len(series1) < 3 or len(series2) < 3:
            continue

        # Shapiro normality test
        shapiro1 = shapiro(series1)
        shapiro2 = shapiro(series2)

        if shapiro1.pvalue > 0.05 and shapiro2.pvalue > 0.05:
            levene_test = levene(series1, series2)
            ttest = ttest_ind(series1, series2, equal_var=(levene_test.pvalue > 0.05))

            results.append({
                "region": region,
                "major_city": major_city,
                "peripheral_city": peripheral_city,
                "test": "t-test",
                "p_value": ttest.pvalue,
                "shapiro1_p": shapiro1.pvalue,
                "shapiro2_p": shapiro2.pvalue,
                "levene_p": levene_test.pvalue,
            })
        else:
            u_test = mannwhitneyu(series1, series2, alternative="two-sided")
            results.append({
                "region": region,
                "major_city": major_city,
                "peripheral_city": peripheral_city,
                "test": "mannwhitneyu",
                "p_value": u_test.pvalue,
                "shapiro1_p": shapiro1.pvalue,
                "shapiro2_p": shapiro2.pvalue,
                "levene_p": None,
            })

# Wrap in DataFrame
comparison_results = pd.DataFrame(results)
significant_comparisons = comparison_results[comparison_results["p_value"] < 0.05]

In [23]:
comparison_results

Unnamed: 0,region,major_city,peripheral_city,test,p_value,shapiro1_p,shapiro2_p,levene_p
0,Central,Bangkok,Ayutthaya,mannwhitneyu,0.02423012,0.000836,0.168525,
1,Central,Bangkok,Nakhon Pathom,mannwhitneyu,0.007740254,0.000836,0.009433,
2,Central,Bangkok,Nakhon Sawan,mannwhitneyu,0.02793078,0.000836,0.002398,
3,Central,Bangkok,Nonthaburi,mannwhitneyu,0.0007640886,0.000836,0.000274,
4,Central,Bangkok,Pathum Thani,mannwhitneyu,0.008762573,0.000836,0.00464,
5,Central,Bangkok,Phitsanulok,mannwhitneyu,0.8555075,0.000836,0.015437,
6,Central,Bangkok,Samut Prakan,mannwhitneyu,0.7230096,0.000836,0.00171,
7,Central,Bangkok,Samut Sakhon,mannwhitneyu,2.046516e-05,0.000836,0.013579,
8,Central,Bangkok,Saraburi,mannwhitneyu,7.775359e-05,0.000836,0.011795,
9,Central,Bangkok,Suphan Buri,mannwhitneyu,2.926852e-06,0.000836,5.3e-05,


In [24]:
significant_comparisons

Unnamed: 0,region,major_city,peripheral_city,test,p_value,shapiro1_p,shapiro2_p,levene_p
0,Central,Bangkok,Ayutthaya,mannwhitneyu,0.02423012,0.000836,0.168525,
1,Central,Bangkok,Nakhon Pathom,mannwhitneyu,0.007740254,0.000836,0.009433,
2,Central,Bangkok,Nakhon Sawan,mannwhitneyu,0.02793078,0.000836,0.002398,
3,Central,Bangkok,Nonthaburi,mannwhitneyu,0.0007640886,0.000836,0.000274,
4,Central,Bangkok,Pathum Thani,mannwhitneyu,0.008762573,0.000836,0.00464,
7,Central,Bangkok,Samut Sakhon,mannwhitneyu,2.046516e-05,0.000836,0.013579,
8,Central,Bangkok,Saraburi,mannwhitneyu,7.775359e-05,0.000836,0.011795,
9,Central,Bangkok,Suphan Buri,mannwhitneyu,2.926852e-06,0.000836,5.3e-05,
10,North,Chiang Mai,Lampang,mannwhitneyu,7.084714e-05,0.002839,0.167802,
11,North,Chiang Mai,Lamphun,mannwhitneyu,0.000554852,0.002839,0.013255,


In [25]:
# export data for modeling
unstack_resampled = resampled_aqi.unstack().reset_index()
train_data = unstack_resampled.pivot_table(values=0, index=["City", "time"], columns=["level_1"]).reset_index()
train_data.to_csv("air_data_train.csv")