# 프로젝트명: 데이터 수집 실습 2 (공공데이터포털 API로 받기) 

#### 📌 requests는 API를 호출할 때 주로 사용하는 라이브러리입니다. 
- 서버에 HTTP 요청을 보낼때 주로 사용하며, GET, POST, PUT, DELETE를 사용할 수 있습니다.
- 데이터 API의 요구사항에 맞게 요청해야하며, 보통은 "GET", "POST"를 주로 사용됩니다.

#### 🚨 주의사항
- 공공데이터포털에선 운영계정을 신청해서 **서비스키**를 받아야합니다.
- 자세한 내용은 보통 API 사용문서를 참고합니다. 

![dddd](https://firebasestorage.googleapis.com/v0/b/ls-storage-e452a.appspot.com/o/%E1%84%80%E1%85%A9%E1%86%BC%E1%84%80%E1%85%A9%E1%86%BC%E1%84%83%E1%85%A6%E1%84%8B%E1%85%B5%E1%84%90%E1%85%A5_api.png?alt=media&token=4f62a990-1274-4785-8fa7-8611e1bdc91b)

### 1일 차이의 사이 값 가져오기

In [1]:
from datetime import datetime, timedelta

start_date = datetime.strptime('2021-10-20', '%Y-%m-%d')
end_date = datetime.strptime('2021-10-31', '%Y-%m-%d')

date_range = []
for i in range((end_date - start_date).days + 1):
    res_date = start_date + timedelta(days=i)
    res_date = res_date.strftime('%Y-%m-%d')
    date_range.append(res_date)
    
print(date_range)

['2021-10-20', '2021-10-21', '2021-10-22', '2021-10-23', '2021-10-24', '2021-10-25', '2021-10-26', '2021-10-27', '2021-10-28', '2021-10-29', '2021-10-30', '2021-10-31']


### 1년 차이의 한달 간격 가져오기 

In [2]:
from datetime import datetime
from dateutil.relativedelta import relativedelta

start_date = datetime.strptime('2021-10-20', '%Y-%m-%d')
end_date = datetime.strptime('2021-10-31', '%Y-%m-%d')

date_range = []
current_date = start_date
while current_date < end_date:
    res_date = current_date.strftime('%Y-%m-%d')
    date_range.append(res_date)
    current_date += relativedelta(months=1)

print(date_range)

['2021-10-20']


### 기상청의 2023년 서울의 데이터 가져오기 (1년치 데이터 가져오기)

In [3]:
from datetime import datetime
from dateutil.relativedelta import relativedelta

start_date = datetime.strptime('2023-01-01', '%Y-%m-%d')
end_date = datetime.strptime('2024-01-01', '%Y-%m-%d')

date_range = []
current_date = start_date
while current_date < end_date:
    res_date = current_date.strftime('%Y%m%d')
    date_range.append(res_date)
    current_date += relativedelta(months=1)

print(date_range)

['20230101', '20230201', '20230301', '20230401', '20230501', '20230601', '20230701', '20230801', '20230901', '20231001', '20231101', '20231201']


In [4]:
start_end_date = []
for i in range(len(date_range) -1):
    start_end_date.append([date_range[i], date_range[i+1]])

In [5]:
from tqdm import tqdm
import requests
import pandas as pd
import time

url = 'http://apis.data.go.kr/1360000/AsosHourlyInfoService/getWthrDataList'
key = '''수정'''

dfs = []
for i in tqdm(start_end_date):
    print(i)

    params = {'serviceKey' : key, 
            'pageNo' : '1', 
            'numOfRows' : '999', 
            'dataType' : 'JSON', 
            'dataCd' : 'ASOS', ## 자료분류코드
            'dateCd' : 'HR', ## 날짜분류코드
            'startDt' : str(i[0]), ## 시작일
            'startHh' : '00', ## 시작시간
            'endDt' : str(i[1]), ## 종료일
            'endHh': '00', ## 종료시간
            'stnIds' : '131' ## 청주지역코드
            }

    response = requests.get(url, params=params,verify=False)
    result = response.json()
    df = pd.DataFrame(result['response']['body']['items']['item'])
    dfs.append(df)

final_df = pd.concat(dfs, axis=0, ignore_index=True)

  0%|          | 0/11 [00:00<?, ?it/s]

['20230101', '20230201']


  9%|▉         | 1/11 [00:01<00:16,  1.68s/it]

['20230201', '20230301']


 18%|█▊        | 2/11 [00:03<00:16,  1.82s/it]

['20230301', '20230401']


 27%|██▋       | 3/11 [00:04<00:12,  1.53s/it]

['20230401', '20230501']


 36%|███▋      | 4/11 [00:07<00:14,  2.13s/it]

['20230501', '20230601']


 45%|████▌     | 5/11 [00:08<00:10,  1.77s/it]

['20230601', '20230701']


 55%|█████▍    | 6/11 [00:10<00:08,  1.75s/it]

['20230701', '20230801']


 64%|██████▎   | 7/11 [00:15<00:11,  2.85s/it]

['20230801', '20230901']


 73%|███████▎  | 8/11 [00:16<00:06,  2.29s/it]

['20230901', '20231001']


 82%|████████▏ | 9/11 [00:18<00:03,  1.93s/it]

['20231001', '20231101']


 91%|█████████ | 10/11 [00:19<00:01,  1.69s/it]

['20231101', '20231201']


100%|██████████| 11/11 [00:20<00:00,  1.84s/it]


In [6]:
final_df

Unnamed: 0,tm,rnum,stnId,stnNm,ta,taQcflg,rn,rnQcflg,ws,wsQcflg,...,lcsCh,vs,gndSttCd,dmstMtphNo,ts,tsQcflg,m005Te,m01Te,m02Te,m03Te
0,2023-01-01 00:00,1,131,청주,-2.5,,,9,0.0,,...,,753,,19,-1.2,,,,,
1,2023-01-01 01:00,2,131,청주,-3.0,,,,0.0,,...,,703,,19,-1.7,,,,,
2,2023-01-01 02:00,3,131,청주,-3.3,,,,0.0,,...,,683,,19,-2.1,,,,,
3,2023-01-01 03:00,4,131,청주,-3.5,,,,0.0,,...,,666,,19,-2.5,,,,,
4,2023-01-01 04:00,5,131,청주,-3.5,,,,0.0,,...,10,726,,19,-2.8,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8022,2023-11-30 20:00,717,131,청주,-1.8,,,,0.6,,...,,4520,,,-1.6,,,,,
8023,2023-11-30 21:00,718,131,청주,-1.9,,,,1.6,,...,11,4668,,,-1.8,,,,,
8024,2023-11-30 22:00,719,131,청주,-2.1,,,,1.3,,...,10,4384,,,-1.8,,,,,
8025,2023-11-30 23:00,720,131,청주,-2.0,,,,1.4,,...,8,3855,,05,-1.0,,,,,


In [7]:
## final_df의 결측치 확인
final_df.isnull().sum()

tm            0
rnum          0
stnId         0
stnNm         0
ta            0
taQcflg       0
rn            0
rnQcflg       0
ws            0
wsQcflg       0
wd            0
wdQcflg       0
hm            0
hmQcflg       0
pv            0
td            0
pa            0
paQcflg       0
ps            0
psQcflg       0
ss            0
ssQcflg       0
icsr          0
dsnw          0
hr3Fhsc       0
dc10Tca       0
dc10LmcsCa    0
clfmAbbrCd    0
lcsCh         0
vs            0
gndSttCd      0
dmstMtphNo    0
ts            0
tsQcflg       0
m005Te        0
m01Te         0
m02Te         0
m03Te         0
dtype: int64

In [8]:
final_df['tm'] = pd.to_datetime(final_df['tm'])

In [9]:
final_df['date'] = final_df['tm'].dt.date
final_df['hour'] = final_df['tm'].dt.hour

In [10]:
final_df.groupby('date').size().reset_index(name='count').sort_values('count', ascending=False)

Unnamed: 0,date,count
212,2023-08-01,25
90,2023-04-01,25
59,2023-03-01,25
151,2023-06-01,25
243,2023-09-01,25
...,...,...
112,2023-04-23,24
111,2023-04-22,24
110,2023-04-21,24
109,2023-04-20,24


In [11]:
final_df[final_df['date'] == '2023-08-01']

Unnamed: 0,tm,rnum,stnId,stnNm,ta,taQcflg,rn,rnQcflg,ws,wsQcflg,...,gndSttCd,dmstMtphNo,ts,tsQcflg,m005Te,m01Te,m02Te,m03Te,date,hour


In [12]:
final_df['date'] = final_df['date'].astype(str)

In [13]:
final_df[final_df['date'] == '2023-08-01']

Unnamed: 0,tm,rnum,stnId,stnNm,ta,taQcflg,rn,rnQcflg,ws,wsQcflg,...,gndSttCd,dmstMtphNo,ts,tsQcflg,m005Te,m01Te,m02Te,m03Te,date,hour
5094,2023-08-01 00:00:00,745,131,청주,28.1,,,,2.0,,...,,,27.2,,,,,,2023-08-01,0
5095,2023-08-01 00:00:00,1,131,청주,28.1,,,,2.0,,...,,,27.2,,,,,,2023-08-01,0
5096,2023-08-01 01:00:00,2,131,청주,27.3,,,,1.9,,...,,,27.1,,,,,,2023-08-01,1
5097,2023-08-01 02:00:00,3,131,청주,26.6,,,,1.6,,...,,,26.6,,,,,,2023-08-01,2
5098,2023-08-01 03:00:00,4,131,청주,26.2,,,,1.9,,...,,,26.3,,,,,,2023-08-01,3
5099,2023-08-01 04:00:00,5,131,청주,26.1,,,,1.7,,...,,,26.2,,,,,,2023-08-01,4
5100,2023-08-01 05:00:00,6,131,청주,25.6,,,,1.3,,...,,,26.0,,,,,,2023-08-01,5
5101,2023-08-01 06:00:00,7,131,청주,25.3,,,,1.4,,...,,,26.0,,,,,,2023-08-01,6
5102,2023-08-01 07:00:00,8,131,청주,26.0,,,,1.3,,...,,,27.6,,,,,,2023-08-01,7
5103,2023-08-01 08:00:00,9,131,청주,27.6,,,,1.3,,...,,,30.9,,,,,,2023-08-01,8


In [14]:
final_df['stnNm'].unique()

array(['청주'], dtype=object)