In [8]:
import requests
import config
import pandas as pd

# step 1 : send API requests

In [77]:
# 요청 확인하기
# api
airkorea_base_url = "https://apis.data.go.kr/B552584/ArpltnInforInqireSvc"
weather_base_url = "https://apis.data.go.kr/1360000/VilageFcstInfoService_2.0"

station_url = f"{airkorea_base_url}/getCtprvnRltmMesureDnsty"
params = {
    'serviceKey': config.AIRKOREA_API_KEY,
    'returnType': 'json',
    'numOfRows': '100',
    'pageNo': '1',
    'sidoName': '경기',
    'ver': '1.0'
}

# send request
res = requests.get(station_url, params=params)

print("응답 확인:", res.status_code)

print("응답 내용 미리보기:", res.text[:300])

응답 확인: 200
응답 내용 미리보기: {"response":{"body":{"totalCount":125,"items":[{"so2Grade":"1","coFlag":null,"khaiValue":"64","so2Value":"0.002","coValue":"0.4","pm25Flag":null,"pm10Flag":null,"o3Grade":"1","pm10Value":"28","khaiGrade":"2","pm25Value":"19","sidoName":"경기","no2Flag":null,"no2Grade":"1","o3Flag":null,"pm25Grade":"2"


# step 2 : export suwon data from json

In [78]:
# export suwon data from json
data = res.json()

# export data
items = data['response']['body']['items']

# change to pandas
df = pd.DataFrame(items)

# 수원시 관련 측정소만 보기
suwon_df = df[df['stationName'].str.contains("수원")]

# print only necessary columns
print(suwon_df[['stationName', 'dataTime', 'pm10Value', 'pm25Value']])

df

  stationName          dataTime pm10Value pm25Value
5   경수대로(동수원)  2025-06-08 05:00        33        19


Unnamed: 0,so2Grade,coFlag,khaiValue,so2Value,coValue,pm25Flag,pm10Flag,o3Grade,pm10Value,khaiGrade,...,no2Grade,o3Flag,pm25Grade,so2Flag,dataTime,coGrade,no2Value,stationName,pm10Grade,o3Value
0,1,,64,0.002,0.4,,,1,28,2,...,1,,2,,2025-06-08 05:00,1,0.004,신풍동,1,0.027
1,1,,69,0.002,0.4,,,1,28,2,...,1,,2,,2025-06-08 05:00,1,0.007,인계동,2,0.028
2,1,,72,0.002,0.5,,,1,33,2,...,1,,2,,2025-06-08 05:00,1,0.007,광교동,2,0.026
3,1,,69,0.001,0.4,,,2,23,2,...,1,,2,,2025-06-08 05:00,1,0.003,영통동,2,0.030
4,1,,59,0.002,0.4,,,2,22,2,...,1,,2,,2025-06-08 05:00,1,0.001,천천동,1,0.033
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,1,,66,0.003,0.2,,,2,26,2,...,1,,2,,2025-06-08 05:00,1,0.004,장현동,2,0.036
96,1,,54,0.002,0.3,,,2,15,2,...,1,,2,,2025-06-08 05:00,1,0.006,서해안로,1,0.031
97,1,,74,0.003,0.3,,,2,21,2,...,1,,2,,2025-06-08 05:00,1,0.005,배곧동,2,0.031
98,1,,-,0.001,0.2,통신장애,,1,34,,...,1,,,,2025-06-08 05:00,1,0.014,금곡동,2,0.011


# step 3 : collect weather data + merge with findust

goals
|item|desc|
|---|---|
|날씨 데이터 수집|기상청 API에서 기온/습도/풍속 받아오기|
|데이터 병합|미세먼지 + 날씨 데이터를 시간 기준으로 병합|

- **기온**(T1H) : 대기 상태 변동 영향
- **습도**(REH) : 입자 응결 → 미세먼지 농도 영향
- **풍속**(WSD) : 바람이 강하면 오염물질 확산

In [15]:
from datetime import datetime, timedelta

# current time
now = datetime.now()

# 30분 단위로 맞추기
minute = now.minute
if minute < 30:
  base_time = now.replace(minute=0)
else:
  base_time = now.replace(minute=30)

# 시각이 00:00보다 작아지면 날짜도 하루 전으로 보정해야 함
base_time = base_time - timedelta(minutes=10) # 데이터 생성 지연 고려

# base_date, base_time 선언
base_date = base_time.strftime("%Y%m%d")
base_time_str = base_time.strftime("%H%M")

weather_url = f"{weather_base_url}/getUltraSrtNcst"
# 기상청 api는 위도 경도 안 쓰고 이상한 그리드 씀
# 60, 120 = 수원 대표값
params = {
  'serviceKey' : config.WEATHER_API_KEY,
  'dataType' : 'json',
  'base_date' : base_date,
  'base_time' : base_time_str,
  'nx' : '60',
  'ny' : '120',
  'numOfRows' : '100',
  'pageNo' : '1'
}

# send request
res = requests.get(weather_url, params=params)

print(f"[DEBUG]응답 확인: {res.status_code}")
print(f"[DEBUG]응답 미리보기: {res.text}")

[DEBUG]응답 확인: 200
[DEBUG]응답 미리보기: {"response":{"header":{"resultCode":"00","resultMsg":"NORMAL_SERVICE"},"body":{"dataType":"JSON","items":{"item":[{"baseDate":"20250608","baseTime":"0400","category":"PTY","nx":60,"ny":120,"obsrValue":"0"},{"baseDate":"20250608","baseTime":"0400","category":"REH","nx":60,"ny":120,"obsrValue":"92"},{"baseDate":"20250608","baseTime":"0400","category":"RN1","nx":60,"ny":120,"obsrValue":"0"},{"baseDate":"20250608","baseTime":"0400","category":"T1H","nx":60,"ny":120,"obsrValue":"20.3"},{"baseDate":"20250608","baseTime":"0400","category":"UUU","nx":60,"ny":120,"obsrValue":"0.2"},{"baseDate":"20250608","baseTime":"0400","category":"VEC","nx":60,"ny":120,"obsrValue":"338"},{"baseDate":"20250608","baseTime":"0400","category":"VVV","nx":60,"ny":120,"obsrValue":"-0.4"},{"baseDate":"20250608","baseTime":"0400","category":"WSD","nx":60,"ny":120,"obsrValue":"0.5"}]},"pageNo":1,"numOfRows":100,"totalCount":8}}}


In [31]:
data = res.json()

# 1. take out item list from whole json response
items = data['response']['body']['items']['item']

# 2. make DataFrame from pd
df = pd.DataFrame(items)

# 3. filter needed items
df_filtered = df[df['category'].isin(['T1H', 'REH', 'WSD'])]

# 4. 데이터 정리 (pivot 형태로 가공)
weather_df = df_filtered.pivot_table(columns='category', values='obsrValue', aggfunc='first')

# 5. 컬럼 이름 정리
weather_df = weather_df[['T1H', 'REH', 'WSD']]

# 6. 값 타입 float로 변환
weather_df = weather_df.astype(float)

In [50]:
weather_df

wdf = weather_df.copy()

wdf['dateTime'] = datetime.now()

wdf['dateTime'] = pd.to_datetime(wdf['dateTime'])
wdf['dateTime'] = wdf['dateTime'].dt.strftime('%Y-%m-%d %H:%M')
wdf = wdf.rename(columns={'dateTime' : 'dataTime'})
wdf
# suwon_df

category,T1H,REH,WSD,dataTime
obsrValue,20.3,92.0,0.5,2025-06-08 05:38


In [65]:
# merge
merged_df = pd.merge(suwon_df, wdf, on=['dataTime'], how='inner')
merged_df.head()

Unnamed: 0,so2Grade,coFlag,khaiValue,so2Value,coValue,pm25Flag,pm10Flag,o3Grade,pm10Value,khaiGrade,...,so2Flag,dataTime,coGrade,no2Value,stationName,pm10Grade,o3Value,T1H,REH,WSD


In [64]:
# 엄
suwon_df['dataTime'] = pd.to_datetime(suwon_df['dataTime'])
wdf['dataTime'] = pd.to_datetime(wdf['dataTime'])

wdf['dataTime'].dtype


dtype('<M8[ns]')

In [66]:
suwon_df

Unnamed: 0,so2Grade,coFlag,khaiValue,so2Value,coValue,pm25Flag,pm10Flag,o3Grade,pm10Value,khaiGrade,...,no2Grade,o3Flag,pm25Grade,so2Flag,dataTime,coGrade,no2Value,stationName,pm10Grade,o3Value
40,1,,72,0.002,0.4,,,1,25,2,...,1,,2,,2025-06-08 03:00:00,1,0.005,경수대로(동수원),2,0.027
