In [43]:
!pip install requests boto3
!pip install python-dotenv



In [44]:
import requests
import boto3
import pandas as pd
from io import BytesIO
import os
from dotenv import load_dotenv

In [47]:
load_dotenv()
api_key = os.getenv("API_KEY")
base_url = os.getenv("BASE_URL")
aws_access_key_id=os.getenv('S3_AWS_ACCESS_KEY_ID')
aws_secret_access_key=os.getenv('S3_AWS_SECRET_ACCESS_KEY')
region_name=os.getenv('S3_REGION')
bucket_name = os.getenv('S3_BUCKET_NAME')

In [49]:
s3_client = boto3.client(
    's3',
    aws_access_key_id=aws_access_key_id,
    aws_secret_access_key=aws_secret_access_key,
    region_name=region_name
)

bucket_name = bucket_name

In [57]:
def fetch_and_save_data(api_endpoints, output_file):
    """
    여러 API 엔드포인트에서 데이터를 가져와 하나의 CSV 파일로 저장 후 S3에 업로드합니다.

    Parameters:
    - api_endpoints (dict): 데이터를 가져올 API 엔드포인트와 날짜의 딕셔너리.
    - output_file (str): S3에 저장할 파일 이름.
    """
    # 모든 데이터를 누적할 DataFrame 초기화
    all_data = pd.DataFrame()

    # 각 API 엔드포인트에서 데이터 가져오기
    for endpoint, date in api_endpoints.items():
        page = 1
        per_page = 1000

        while True:
            url = f"{base_url}{endpoint}&serviceKey={api_key}"

            response = requests.get(url)
            if response.status_code == 200:
                data = response.json()
                if 'data' in data and data['data']:
                    df = pd.DataFrame(data['data'])
                    df['date'] = date
                    # 컬럼 이름 통일하기
                    df.rename(columns={
                        **{f'{month}월': f'2019-{month:02}' for month in range(1, 13)},
                        **{f'2020년{month}월': f'2020-{month:02}' for month in range(1, 13)},
                        **{f'2021년{month}월': f'2021-{month:02}' for month in range(1, 13)},
                        '상하구분': 'direction',
                        '구분': 'direction',
                        '출발역': 'departure_station',
                        '역명': 'departure_station',
                        '요일구분': 'day_type',
                        '조사일자': 'day_type',
                        '역번호': 'station_number',
                        '고유역번호': 'station_number',
                        '호선': 'line',
                        '수송연월': 'transportation_date',
                        '승하차인원수': 'passenger_count'
                    }, inplace=True)

                    if '연번' in df.columns:
                        df.drop(columns=['연번'], inplace=True)
                    all_data = pd.concat([all_data, df], ignore_index=True)
                    print(f"{endpoint} - Page {page} data fetched and added.")
                else:
                    print(f"No more data to fetch for {endpoint}.")
                    break
            else:
                print(f"Failed to fetch data from {endpoint} on page {page}: {response.status_code}")
                break

            page += 1

    # 최종 데이터를 CSV로 변환하고 S3에 업로드
    """
    csv_buffer = BytesIO()
    all_data.to_csv(csv_buffer, index=False, encoding='utf-8')
    csv_buffer.seek(0)

    try:
        s3_client.upload_fileobj(csv_buffer, bucket_name, output_file)
        print(f"모든 데이터가 {output_file} 파일로 S3에 성공적으로 업로드되었습니다.")
    except Exception as e:
        print(f"Failed to upload combined data to S3: {e}")
"""
# api_endpoints를 딕셔너리 형태로 수정
api_endpoints = {
    "/15044249/v1/uddi:2a73166e-6fde-4c5e-97b4-92f20ffd4282?page=1&perPage=1000": "20191231",
    "/15044249/v1/uddi:42c11f4c-fc38-4b38-bf66-8dc19ad1bca4?page=1&perPage=1000": "20201231",
    "/15044249/v1/uddi:38a25fdf-bf0b-4745-bc2c-db3e7a517997?page=1&perPage=1000": "20211231",
    "/15044249/v1/uddi:c2b72a04-63da-4d1c-8613-dfbd799b1a95?page=1&perPage=1000": "20221231",
    "/15044249/v1/uddi:796901f7-428f-4a5e-b8cd-b0a05603e3ce?page=1&perPage=1000": "20231231"
}

output_file = "prod_data/서울교통공사_월별_승하차인원_전체데이터.csv"
fetch_and_save_data(api_endpoints, output_file)

/15044249/v1/uddi:2a73166e-6fde-4c5e-97b4-92f20ffd4282?page=1&perPage=1000 - Page 1 data fetched and added.
/15044249/v1/uddi:2a73166e-6fde-4c5e-97b4-92f20ffd4282?page=1&perPage=1000 - Page 2 data fetched and added.
/15044249/v1/uddi:2a73166e-6fde-4c5e-97b4-92f20ffd4282?page=1&perPage=1000 - Page 3 data fetched and added.
/15044249/v1/uddi:2a73166e-6fde-4c5e-97b4-92f20ffd4282?page=1&perPage=1000 - Page 4 data fetched and added.
/15044249/v1/uddi:2a73166e-6fde-4c5e-97b4-92f20ffd4282?page=1&perPage=1000 - Page 5 data fetched and added.
/15044249/v1/uddi:2a73166e-6fde-4c5e-97b4-92f20ffd4282?page=1&perPage=1000 - Page 6 data fetched and added.
/15044249/v1/uddi:2a73166e-6fde-4c5e-97b4-92f20ffd4282?page=1&perPage=1000 - Page 7 data fetched and added.
/15044249/v1/uddi:2a73166e-6fde-4c5e-97b4-92f20ffd4282?page=1&perPage=1000 - Page 8 data fetched and added.
/15044249/v1/uddi:2a73166e-6fde-4c5e-97b4-92f20ffd4282?page=1&perPage=1000 - Page 9 data fetched and added.
/15044249/v1/uddi:2a73166e-6

KeyboardInterrupt: 