In [1]:
import sys
sys.path.append('../')

In [2]:
import json
import time
import os
from datetime import datetime, timedelta

import pandas as pd
import requests

from src.config import REQUEST_PARAMS, URL
from src.utils import load_error_code, load_stations

In [3]:
# save your private service key to this path
SERVICE_KEY_PATH = '../data/private_key'

# set start and end date (both dates are included)
START_DATE = '2020-01-01'
END_DATE = '2020-12-31'
DATE_INTERVAL = 40

# path to save data
SAVE_PATH = '/mnt/sdb1/data/asos/20200101_20201231'
os.makedirs(SAVE_PATH, exist_ok=True)

## define util functions

In [4]:
def get_base_url(service_key_path):

    with open(service_key_path, 'r') as f:
        service_key = f.read()
        if service_key.endswith('\n'):
            service_key = service_key[:-1]

    url = URL.format(service_key=service_key)
    
    return url


def create_date_sequence(start_date, end_date, interval):
    
    # maximum rows per request should be less then 1000
    interval = 40

    start_date = datetime.strptime(start_date, '%Y-%m-%d').date()
    end_date = datetime.strptime(end_date, '%Y-%m-%d').date()
    n_days = (end_date - start_date).days

    # iterate over this date list
    date_seq = [
        start_date + timedelta(days=d)
        for d in range(n_days+1)
    ]
    date_seq = date_seq[::(interval)]
    if date_seq[-1] != end_date:
        date_seq += [end_date]
        
    return date_seq


def update_params(**kwargs):
    
    params = {k: v for k, v in REQUEST_PARAMS.items()}
    for k, v in kwargs.items():
        params[k] = v
    
    return params

In [5]:
date_list = create_date_sequence(START_DATE, END_DATE, DATE_INTERVAL)
print(date_list)

[datetime.date(2020, 1, 1), datetime.date(2020, 2, 10), datetime.date(2020, 3, 21), datetime.date(2020, 4, 30), datetime.date(2020, 6, 9), datetime.date(2020, 7, 19), datetime.date(2020, 8, 28), datetime.date(2020, 10, 7), datetime.date(2020, 11, 16), datetime.date(2020, 12, 26), datetime.date(2020, 12, 31)]


## load metadata

In [6]:
stations = load_stations()
stations['start_date'] = pd.to_datetime(stations.start_date, format='%Y-%m-%d')
stations['end_date'] = pd.to_datetime(stations.end_date, format='%Y-%m-%d')

# exclude stations that the operation is stopped
stations = stations[stations.end_date.isnull() | (stations.end_date.dt.date > date_list[-1])]
stations.head()

Unnamed: 0,stn_id,start_date,end_date,stn_name,stn_address,department,latitude,longitude,altitude,pressure_height,temperature_height,wind_speed_height,precipitation_height
0,90,1968-01-01,NaT,속초,강원도 고성군토성면 봉포5길9 속초자동기상관측소,속초기상대(90),38.2509,128.5647,17.53,18.73,1.7,10.0,1.4
1,93,2016-10-01,NaT,북춘천,강원도 춘천시신북읍 산천리264(장본1길 12) 춘천기상대,춘천기상대(101),37.9474,127.7544,95.78,96.78,1.5,10.0,1.4
2,95,1988-01-01,NaT,철원,강원도 철원군갈말읍 명성로179번길 26 철원자동기상관측소,춘천기상대(101),38.1479,127.3042,155.48,156.98,1.8,13.0,1.5
3,98,1998-02-01,NaT,동두천,경기도 동두천시방죽로 16-47동두천서비스센터,수도권기상청(119),37.9019,127.0607,115.62,116.74,1.7,10.0,1.0
4,99,2013-10-22,NaT,파주,경기도 파주시문산읍 마정로46-29(파주기상대),수도권기상청(119),37.8859,126.7665,30.59,31.99,1.7,10.0,1.0


In [7]:
err_cd = load_error_code()
err_cd.head()

Unnamed: 0,error_code,error_message,description
0,0,NORMAL_SERVICE,정상
1,1,APPLICATION_ERROR,어플리케이션 에러
2,2,DB_ERROR,데이터베이스 에러
3,3,NODATA_ERROR,데이터없음 에러
4,4,HTTP_ERROR,HTTP 에러


## request example

In [8]:
base_url = get_base_url(SERVICE_KEY_PATH)

In [9]:
# append failure request logs to this data
log_columns = ['result_code', 'error_message', 'params']
log_data = pd.DataFrame(columns=log_columns)

In [10]:
stn_ids = sorted(stations.stn_id.tolist())

In [11]:
start_time = time.time()
for stn_id in stn_ids:
    file_name = os.path.join(SAVE_PATH, 'stn_{}.csv'.format(stn_id))
    weather_data = pd.DataFrame()
    
    print('====='*5)
    print('stn_id: {}'.format(stn_id))
    print('====='*5)
    
    for i in range(len(date_list)-1):
        start, end = date_list[i], date_list[i+1]
        num_days = (end - start).days

        param_dict = {
            'numOfRows': (num_days+1)*24,
            'startDt': start.strftime('%Y%m%d'),
            'endDt': end.strftime('%Y%m%d'),
            'stnIds': stn_id,
        }    
        cur_params = update_params(**param_dict)
        try:
            result = requests.get(base_url, params=cur_params)
            response = json.loads(result.text)['response']    
            # result_cd = response.get('header').get('resultCode')
            data_list = response.get('body').get('items').get('item')
        except Exception as ex:
            result_cd = '999'
            error_log = [result_cd, str(ex), cur_params]
            log_data = log_data.append(pd.DataFrame([error_log], columns=log_columns))
        else:
            weather_data = weather_data.append(pd.DataFrame(data_list))
        finally:
            print('time elapsed: {} sec ({}~{})'.
                  format(time.time() - start_time, start, end))
    
    weather_data.to_csv(file_name, index=False)

stn_id: 90
time elapsed: 2.41677188873291 sec (2020-01-01~2020-02-10)
time elapsed: 2.93686842918396 sec (2020-02-10~2020-03-21)
time elapsed: 3.347696304321289 sec (2020-03-21~2020-04-30)
time elapsed: 3.957880973815918 sec (2020-04-30~2020-06-09)
time elapsed: 4.443737983703613 sec (2020-06-09~2020-07-19)
time elapsed: 4.949816465377808 sec (2020-07-19~2020-08-28)
time elapsed: 5.515632152557373 sec (2020-08-28~2020-10-07)
time elapsed: 5.970910549163818 sec (2020-10-07~2020-11-16)
time elapsed: 6.361437082290649 sec (2020-11-16~2020-12-26)
time elapsed: 6.50253438949585 sec (2020-12-26~2020-12-31)
stn_id: 93
time elapsed: 6.973740339279175 sec (2020-01-01~2020-02-10)
time elapsed: 7.424501895904541 sec (2020-02-10~2020-03-21)
time elapsed: 7.90342116355896 sec (2020-03-21~2020-04-30)
time elapsed: 8.322833776473999 sec (2020-04-30~2020-06-09)
time elapsed: 8.873376369476318 sec (2020-06-09~2020-07-19)
time elapsed: 9.371147155761719 sec (2020-07-19~2020-08-28)
time elapsed: 9.828564

time elapsed: 191.84810829162598 sec (2020-03-21~2020-04-30)
time elapsed: 192.34483981132507 sec (2020-04-30~2020-06-09)
time elapsed: 192.78108716011047 sec (2020-06-09~2020-07-19)
time elapsed: 193.3084273338318 sec (2020-07-19~2020-08-28)
time elapsed: 193.7564172744751 sec (2020-08-28~2020-10-07)
time elapsed: 194.24037313461304 sec (2020-10-07~2020-11-16)
time elapsed: 194.74392676353455 sec (2020-11-16~2020-12-26)
time elapsed: 194.92923951148987 sec (2020-12-26~2020-12-31)
stn_id: 114
time elapsed: 202.12309217453003 sec (2020-01-01~2020-02-10)
time elapsed: 202.7182800769806 sec (2020-02-10~2020-03-21)
time elapsed: 203.21311783790588 sec (2020-03-21~2020-04-30)
time elapsed: 203.63981819152832 sec (2020-04-30~2020-06-09)
time elapsed: 204.1671814918518 sec (2020-06-09~2020-07-19)
time elapsed: 205.6334969997406 sec (2020-07-19~2020-08-28)
time elapsed: 223.6415286064148 sec (2020-08-28~2020-10-07)
time elapsed: 224.13395524024963 sec (2020-10-07~2020-11-16)
time elapsed: 224.

time elapsed: 420.8239245414734 sec (2020-07-19~2020-08-28)
time elapsed: 421.31437397003174 sec (2020-08-28~2020-10-07)
time elapsed: 421.8466606140137 sec (2020-10-07~2020-11-16)
time elapsed: 422.34704780578613 sec (2020-11-16~2020-12-26)
time elapsed: 422.5401747226715 sec (2020-12-26~2020-12-31)
stn_id: 138
time elapsed: 423.0890510082245 sec (2020-01-01~2020-02-10)
time elapsed: 423.6201956272125 sec (2020-02-10~2020-03-21)
time elapsed: 424.16641664505005 sec (2020-03-21~2020-04-30)
time elapsed: 424.6526641845703 sec (2020-04-30~2020-06-09)
time elapsed: 425.25682950019836 sec (2020-06-09~2020-07-19)
time elapsed: 425.7730174064636 sec (2020-07-19~2020-08-28)
time elapsed: 426.32909417152405 sec (2020-08-28~2020-10-07)
time elapsed: 426.8391275405884 sec (2020-10-07~2020-11-16)
time elapsed: 429.3251082897186 sec (2020-11-16~2020-12-26)
time elapsed: 429.51460456848145 sec (2020-12-26~2020-12-31)
stn_id: 140
time elapsed: 443.0833387374878 sec (2020-01-01~2020-02-10)
time elaps

time elapsed: 625.1176993846893 sec (2020-12-26~2020-12-31)
stn_id: 170
time elapsed: 625.6071219444275 sec (2020-01-01~2020-02-10)
time elapsed: 627.1344528198242 sec (2020-02-10~2020-03-21)
time elapsed: 644.497091293335 sec (2020-03-21~2020-04-30)
time elapsed: 645.0018904209137 sec (2020-04-30~2020-06-09)
time elapsed: 645.5536062717438 sec (2020-06-09~2020-07-19)
time elapsed: 646.0475170612335 sec (2020-07-19~2020-08-28)
time elapsed: 646.6215093135834 sec (2020-08-28~2020-10-07)
time elapsed: 647.1248381137848 sec (2020-10-07~2020-11-16)
time elapsed: 647.5922813415527 sec (2020-11-16~2020-12-26)
time elapsed: 647.7638549804688 sec (2020-12-26~2020-12-31)
stn_id: 172
time elapsed: 648.3428010940552 sec (2020-01-01~2020-02-10)
time elapsed: 648.9059331417084 sec (2020-02-10~2020-03-21)
time elapsed: 649.3902485370636 sec (2020-03-21~2020-04-30)
time elapsed: 649.9197580814362 sec (2020-04-30~2020-06-09)
time elapsed: 650.3915579319 sec (2020-06-09~2020-07-19)
time elapsed: 650.93

time elapsed: 801.7423725128174 sec (2020-03-21~2020-04-30)
time elapsed: 802.1774561405182 sec (2020-04-30~2020-06-09)
time elapsed: 802.5921647548676 sec (2020-06-09~2020-07-19)
time elapsed: 803.0332252979279 sec (2020-07-19~2020-08-28)
time elapsed: 803.5678284168243 sec (2020-08-28~2020-10-07)
time elapsed: 804.0055296421051 sec (2020-10-07~2020-11-16)
time elapsed: 804.4328286647797 sec (2020-11-16~2020-12-26)
time elapsed: 804.6076967716217 sec (2020-12-26~2020-12-31)
stn_id: 212
time elapsed: 805.1006872653961 sec (2020-01-01~2020-02-10)
time elapsed: 805.6031789779663 sec (2020-02-10~2020-03-21)
time elapsed: 806.1069457530975 sec (2020-03-21~2020-04-30)
time elapsed: 806.5986173152924 sec (2020-04-30~2020-06-09)
time elapsed: 807.1456236839294 sec (2020-06-09~2020-07-19)
time elapsed: 807.6820816993713 sec (2020-07-19~2020-08-28)
time elapsed: 809.2042806148529 sec (2020-08-28~2020-10-07)
time elapsed: 827.7409913539886 sec (2020-10-07~2020-11-16)
time elapsed: 828.1831150054

time elapsed: 1009.6609349250793 sec (2020-08-28~2020-10-07)
time elapsed: 1010.1179554462433 sec (2020-10-07~2020-11-16)
time elapsed: 1010.6752460002899 sec (2020-11-16~2020-12-26)
time elapsed: 1010.8856899738312 sec (2020-12-26~2020-12-31)
stn_id: 245
time elapsed: 1011.3549547195435 sec (2020-01-01~2020-02-10)
time elapsed: 1011.8537664413452 sec (2020-02-10~2020-03-21)
time elapsed: 1012.26313829422 sec (2020-03-21~2020-04-30)
time elapsed: 1012.7436695098877 sec (2020-04-30~2020-06-09)
time elapsed: 1013.2399110794067 sec (2020-06-09~2020-07-19)
time elapsed: 1013.7551753520966 sec (2020-07-19~2020-08-28)
time elapsed: 1014.2549452781677 sec (2020-08-28~2020-10-07)
time elapsed: 1014.706773519516 sec (2020-10-07~2020-11-16)
time elapsed: 1015.1817190647125 sec (2020-11-16~2020-12-26)
time elapsed: 1015.3483891487122 sec (2020-12-26~2020-12-31)
stn_id: 247
time elapsed: 1015.8163139820099 sec (2020-01-01~2020-02-10)
time elapsed: 1016.2459318637848 sec (2020-02-10~2020-03-21)
tim

stn_id: 261
time elapsed: 1223.5214092731476 sec (2020-01-01~2020-02-10)
time elapsed: 1224.0500614643097 sec (2020-02-10~2020-03-21)
time elapsed: 1224.6458368301392 sec (2020-03-21~2020-04-30)
time elapsed: 1225.1358664035797 sec (2020-04-30~2020-06-09)
time elapsed: 1225.5918819904327 sec (2020-06-09~2020-07-19)
time elapsed: 1226.0665335655212 sec (2020-07-19~2020-08-28)
time elapsed: 1226.6072454452515 sec (2020-08-28~2020-10-07)
time elapsed: 1227.0277426242828 sec (2020-10-07~2020-11-16)
time elapsed: 1227.449543952942 sec (2020-11-16~2020-12-26)
time elapsed: 1227.587998867035 sec (2020-12-26~2020-12-31)
stn_id: 262
time elapsed: 1228.041439294815 sec (2020-01-01~2020-02-10)
time elapsed: 1228.4945735931396 sec (2020-02-10~2020-03-21)
time elapsed: 1229.0136282444 sec (2020-03-21~2020-04-30)
time elapsed: 1229.4945409297943 sec (2020-04-30~2020-06-09)
time elapsed: 1229.9272511005402 sec (2020-06-09~2020-07-19)
time elapsed: 1230.3783779144287 sec (2020-07-19~2020-08-28)
time e

time elapsed: 1331.6390960216522 sec (2020-03-21~2020-04-30)
time elapsed: 1332.0994048118591 sec (2020-04-30~2020-06-09)
time elapsed: 1333.549885749817 sec (2020-06-09~2020-07-19)
time elapsed: 1333.9922578334808 sec (2020-07-19~2020-08-28)
time elapsed: 1334.4950668811798 sec (2020-08-28~2020-10-07)
time elapsed: 1334.9049668312073 sec (2020-10-07~2020-11-16)
time elapsed: 1335.3208031654358 sec (2020-11-16~2020-12-26)
time elapsed: 1350.2048745155334 sec (2020-12-26~2020-12-31)
stn_id: 281
time elapsed: 1350.691901922226 sec (2020-01-01~2020-02-10)
time elapsed: 1351.1025445461273 sec (2020-02-10~2020-03-21)
time elapsed: 1351.5187783241272 sec (2020-03-21~2020-04-30)
time elapsed: 1351.9494895935059 sec (2020-04-30~2020-06-09)
time elapsed: 1352.3822951316833 sec (2020-06-09~2020-07-19)
time elapsed: 1352.7861320972443 sec (2020-07-19~2020-08-28)
time elapsed: 1353.249272108078 sec (2020-08-28~2020-10-07)
time elapsed: 1353.6660025119781 sec (2020-10-07~2020-11-16)
time elapsed: 1

In [12]:
log_data.to_csv(os.path.join(SAVE_PATH, 'error_log.csv'), index=False)