In [1]:
# Miscellaneous operating system interfaces
import os

# Mathematical functions
import math

# The fundamental package for scientific computing with Python.
import numpy as np

# Flexible and powerful data analysis / manipulation library for Python, providing labeled data structures similar to R data.frame objects, statistical functions, and much more
import pandas as pd

# Basic date and time types
from datetime import datetime

# A simple, yet elegant HTTP library.
import requests

# Python HTTP library with thread-safe connection pooling, file post support, user friendly, and more.
from urllib import parse

# Beautiful Soup is a Python library for pulling data out of HTML and XML files.
from bs4 import BeautifulSoup

API_KEY = 'GTKlVJwR2gbaUtOZT5szlqG7vyVdeSIvkvoiFGYSQafhIC0FZUQMFAMXmoBtdQ2YiUeyaQxzTxIBgtpHi8JZIw%3D%3D'

In [2]:
def openapi(request):

    basename = os.path.basename(request.get('endpoint'))
    endpoint = request.get('endpoint') + '?ServiceKey=' + API_KEY

    params = {
        'pageNo': 1,
        'numOfRows': 10,
        'startCreateDt': '20200410',
        'endCreateDt': '20200410',
    }

    for k, v in request.items():
        if k == 'endpoint':
            continue
        params.update({k: v})

    response = requests.get(endpoint, params=params)
    soup = BeautifulSoup(response.text, 'xml')
    data = []

    # paging
    paging = {
        'num_of_rows': int(soup.select_one('numOfRows').get_text(strip=True)),
        'total_count': int(soup.select_one('totalCount').get_text(strip=True)),
    }
    paging.update({ 'total_page': math.ceil(paging.get('total_count') / paging.get('num_of_rows')) })

    # period
    start_strptime = datetime.strptime(params.get('startCreateDt'), '%Y%m%d')
    end_strptime = datetime.strptime(params.get('endCreateDt'), '%Y%m%d')
    periods_strptime = pd.date_range(start_strptime, end_strptime, freq='M').notna().sum() + 1
    periods = pd.date_range(start_strptime, periods=periods_strptime, freq='M')

    for period in periods:
        try:
            monthly = []

            start_strftime = datetime.strptime(params.get('startCreateDt'), '%Y%m%d').strftime('%Y%m')
            end_strftime = datetime.strptime(params.get('endCreateDt'), '%Y%m%d').strftime('%Y%m')
            period_strftime = period.strftime('%Y%m')

            if start_strftime == period_strftime:
                start_date = params.get('startCreateDt')
            else:
                start_date = period_strftime + '01'

            if end_strftime == period_strftime:
                end_date = params.get('endCreateDt')
            else:
                end_date = period.strftime('%Y%m%d')

            params.update({'startCreateDt': start_date})
            params.update({'endCreateDt': end_date})

            for page_no in np.arange(1, paging.get('total_page')+1):
                try:
                    params.update({'pageNo': page_no})

                    r = requests.get(endpoint, params=params)

                    # parsing
                    soup = BeautifulSoup(r.text, 'xml')
                    item = soup.select('item')

                    for row in item:
                        daily = {}
                        for col in row:
                            daily.update({col.name: col.get_text(strip=True)})
                        monthly.append(daily)
                        data.append(daily)

                except Exception as e:
                    print(page_no, e)

        except Exception as e:
            print(period, e)

    # dataframe
    df = pd.DataFrame(data)

    return df.to_json(orient='columns', force_ascii=False)


In [3]:
# Miscellaneous operating system interfaces
import os

# JSON encoder and decoder
import json

# Basic date and time types
from datetime import date, datetime, timedelta

# The fundamental package for scientific computing with Python.
import numpy as np

# Flexible and powerful data analysis / manipulation library for Python, providing labeled data structures similar to R data.frame objects, statistical functions, and much more
import pandas as pd

# Import a custom module file.
# import modules.openapi as openapi

# Set the absolute directory path.
BASE_PATH = os.path.abspath('')
dirs = os.path.dirname(BASE_PATH)

In [None]:
def middleware(request):

    data = {
        'iserror': False, 'errmsg': '',
    }

    if not request:
        return data

    # dataframe
    df = pd.DataFrame(columns=['A', 'B'], index=np.arange(1))

    try:
        df = pd.DataFrame(json.loads(openapi(request), encoding='utf-8'))

        df.rename(columns={
            'accDefRate': '누적 환진률',
            'accExamCnt': '누적 검사 수',
            'accExamCompCnt': '누적 검사 완료 수',
            'careCnt': '치료중 환자 수',
            'clearCnt': '격리해제 수',
            'createDt': '등록일시분초',
            'deathCnt': '사망자 수',
            'decideCnt': '확진자 수',
            'examCnt': '검사진행 수',
            'resutlNegCnt': '결과 음성 수',
            'seq': '게시글번호(감염현황 고유값)',
            'stateDt': '기준일',
            'stateTime': '기준시간',
            'updateDt': '수정일시분초',
        }, inplace=True)

        # Change the default format of the data series.
        df['등록일시분초'] = df['등록일시분초'].replace('\.\d+$', '', regex=True)
        df['등록일시분초'] = pd.to_datetime(df['등록일시분초'], format='%Y-%m-%d %H:%M:%S')
        df['등록일시분초'] = df['등록일시분초'].dt.strftime('%Y-%m-%d %H:%M:%S')
        
        df['기준일'] = pd.to_datetime(df['기준일'], format='%Y%m%d')
        df['기준일'] = df['기준일'].dt.strftime('%Y-%m-%d')

        nans = ['누적 환진률', '누적 검사 수', '누적 검사 완료 수', '치료중 환자 수', '격리해제 수', '사망자 수', '확진자 수', '검사진행 수', '결과 음성 수', '게시글번호(감염현황 고유값)']

        for nan in nans:
            try:
                df[nan] = df[nan].replace('-', 0, regex=True)
                # TypeError: Object of type int64 is not JSON serializable
                # df[nan] = pd.to_numeric(df[nan])
            except Exception as e:
                data.update({'iserror': True, 'errmsg': e})

        df = df[['기준일', '누적 환진률', '누적 검사 수', '누적 검사 완료 수', '치료중 환자 수', '격리해제 수', '사망자 수', '확진자 수', '검사진행 수', '결과 음성 수', '등록일시분초']]

        # head
        cols = []
        
        for column in df.columns:
            cols.append({ 'title': column })
        
        data.update({ 'cols': cols })
        
        # body
        rows = []
        
        for i in df.reset_index().index:
            row = df.iloc[i].to_list()
            rows.append(row)
        
        data.update({ 'rows': rows })

    except Exception as e:
        data.update({'iserror': True, 'errmsg': e})

    return data['rows'][0]

request = {
    'endpoint': 'http://openapi.data.go.kr/openapi/service/rest/Covid19/getCovid19InfStateJson',
    'pageNo': 1,
    'numOfRows': 10,
    'startCreateDt': '20201219',
    'endCreateDt': '20201222',
}
middleware(request)

In [67]:
def middleware(request):

    data = {
        'iserror': False, 'errmsg': '',
    }

    if not request:
        return data

    # dataframe
    df = pd.DataFrame(columns=['A', 'B'], index=np.arange(1))

    try:
        df = pd.DataFrame(json.loads(openapi(request), encoding='utf-8'))

        df.rename(columns={
            'accDefRate': '누적 환진률',
            'accExamCnt': '누적 검사 수',
            'accExamCompCnt': '누적 검사 완료 수',
            'careCnt': '치료중 환자 수',
            'clearCnt': '격리해제 수',
            'createDt': '등록일시분초',
            'deathCnt': '사망자 수',
            'decideCnt': '확진자 수',
            'examCnt': '검사진행 수',
            'resutlNegCnt': '결과 음성 수',
            'seq': '게시글번호(감염현황 고유값)',
            'stateDt': '기준일',
            'stateTime': '기준시간',
            'updateDt': '수정일시분초',
        }, inplace=True)

        # Change the default format of the data series.
        df['등록일시분초'] = df['등록일시분초'].replace('\.\d+$', '', regex=True)
        df['등록일시분초'] = pd.to_datetime(df['등록일시분초'], format='%Y-%m-%d %H:%M:%S')
        df['등록일시분초'] = df['등록일시분초'].dt.strftime('%Y-%m-%d %H:%M:%S')
        
        df['기준일'] = pd.to_datetime(df['기준일'], format='%Y%m%d')
        df['기준일'] = df['기준일'].dt.strftime('%Y-%m-%d')

        # Remove duplicate data
        denies = []
        for k, v in df['기준일'].value_counts().items():
            if int(v) == 1:
                continue
            ids = df[df['기준일'] == k].index
            denies.extend(list(filter(lambda x: x != min(ids), ids)))

        df.drop(denies, axis=0, inplace=True)
        df.index = np.arange(0, len(df))

    except Exception as e:
        data.update({'iserror': True, 'errmsg': e})

    return df

request = {
    'endpoint': 'http://openapi.data.go.kr/openapi/service/rest/Covid19/getCovid19InfStateJson',
    'pageNo': 1,
    'numOfRows': 10,
    'startCreateDt': '20201219',
    'endCreateDt': '20201222',
}
results = middleware(request)
results

Unnamed: 0,누적 환진률,누적 검사 수,누적 검사 완료 수,치료중 환자 수,격리해제 수,등록일시분초,사망자 수,확진자 수,검사진행 수,결과 음성 수,게시글번호(감염현황 고유값),기준일,기준시간,수정일시분초
0,1.421035467,3772432,3621303,14810,35928,2020-12-22 09:35:08,722,51460,151129,3569843,362,2020-12-22,00:00,
1,1.4181385274,3713861,3567423,14738,35155,2020-12-21 09:35:07,698,50591,146438,3516832,361,2020-12-21,00:00,
2,1.4015332912,3683094,3543619,14269,34722,2020-12-20 09:32:54,674,49665,139475,3493954,360,2020-12-20,00:00,
3,1.3798240575,3646247,3520014,13577,34334,2020-12-19 10:32:55,659,48570,126233,3471444,359,2020-12-19,00:00,


In [53]:
denies = []
for k, v in results['기준일'].value_counts().items():
    if int(v) == 1:
        continue
    idxs = results[results['기준일'] == k].index
    denies.extend(list(filter(lambda x: x != min(idxs), idxs)))
denies = list(map(lambda x: int(x), denies))
print(denies)


[4]
