# PM2.5 Data Analysis in Guangzhou (2020–2024)
This notebook analyzes the PM2.5 air quality data for Guangzhou over five years, identifying seasonal patterns and yearly trends.

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Load the data
df = pd.read_csv('../data/guangzhou_pm25_2020_2024.csv', parse_dates=['date'])
df['month'] = df['date'].dt.to_period('M')

# Monthly average
monthly_avg = df.groupby('month')['pm25'].mean().reset_index()
monthly_avg['month'] = monthly_avg['month'].dt.to_timestamp()

# Plot
plt.figure(figsize=(12,6))
sns.lineplot(data=monthly_avg, x='month', y='pm25', marker='o')
plt.title('Monthly Average PM2.5 in Guangzhou (2020–2024)')
plt.xlabel('Month')
plt.ylabel('PM2.5 (μg/m³)')
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()