In [8]:
!pip install requests boto3
!pip install python-dotenv


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.2[0m[39;49m -> [0m[32;49m24.3.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.2[0m[39;49m -> [0m[32;49m24.3.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [9]:
import requests
import boto3
import pandas as pd
from io import BytesIO
import os 
from dotenv import load_dotenv

In [10]:
load_dotenv()
api_key = os.getenv("API_KEY")
base_url = os.getenv("BASE_URL")
aws_access_key_id=os.getenv('S3_AWS_ACCESS_KEY_ID')
aws_secret_access_key=os.getenv('S3_AWS_SECRET_ACCESS_KEY')
region_name=os.getenv('S3_REGION')
bucket_name = os.getenv('S3_BUCKET_NAME')

In [11]:
s3_client = boto3.client(
    's3',
    aws_access_key_id=aws_access_key_id,
    aws_secret_access_key=aws_secret_access_key,
    region_name=region_name
)

bucket_name = bucket_name

In [12]:
def fetch_and_save_data(api_endpoints, output_file):
    """
    여러 API 엔드포인트에서 데이터를 가져와 하나의 CSV 파일로 저장 후 S3에 업로드합니다.

    Parameters:
    - api_endpoints (list): 데이터를 가져올 API 엔드포인트 목록.
    - output_file (str): S3에 저장할 파일 이름.
    """
    # 모든 데이터를 누적할 DataFrame 초기화
    all_data = pd.DataFrame()

    # 각 API 엔드포인트에서 데이터 가져오기
    for endpoint, date in api_endpoints.items():
        page = 1
        per_page = 1000
        
        while True:
            url = f"{base_url}{endpoint}?page={page}&perPage={per_page}&serviceKey={api_key}"
            
            response = requests.get(url)
            if response.status_code == 200:
                data = response.json()
                if 'data' in data and data['data']:
                    df = pd.DataFrame(data['data'])
                    df['날짜'] = date
                    # 컬럼 이름 통일하기
                    df.rename(columns={
                        '24시00분': '00시00분',
                        '24시30분': '00시30분',
                        '구분': '상하구분',
                        '조사일자': '요일구분',
                        '역명': '출발역'
                    }, inplace=True)

                    # 불필요한 컬럼 제거하기
                    if '연번' in df.columns:
                        df.drop(columns=['연번'], inplace=True)
                    all_data = pd.concat([all_data, df], ignore_index=True)
                    print(f"{endpoint} - Page {page} data fetched and added.")
                else:
                    print(f"No more data to fetch for {endpoint}.")
                    break
            else:
                print(f"Failed to fetch data from {endpoint} on page {page}: {response.status_code}")
                break

            page += 1

    # 최종 데이터를 CSV로 변환하고 S3에 업로드
    csv_buffer = BytesIO()
    all_data.to_csv(csv_buffer, index=False, encoding='utf-8')
    csv_buffer.seek(0)

    try:
        s3_client.upload_fileobj(csv_buffer, bucket_name, output_file)
        print(f"모든 데이터가 {output_file} 파일로 S3에 성공적으로 업로드되었습니다.")
    except Exception as e:
        print(f"Failed to upload combined data to S3: {e}")

api_endpoints = {
    "/15071311/v1/uddi:70e3a3d3-0872-4828-8234-f0bca459b44f": "20191231",
    "/15071311/v1/uddi:b3803d43-ffe3-4d17-9024-fd6cfa37c284": "20211231",
    "/15071311/v1/uddi:75461a18-17a3-42fe-9322-a51148003b69": "20221231",
    "/15071311/v1/uddi:99771417-a036-46f1-8ad5-8edf4591c2ee": "20201231",
    "/15071311/v1/uddi:e477f1d9-2c3a-4dc8-b147-a55584583fa2": "20231231",
    "/15071311/v1/uddi:c87b6af0-0ef7-4182-b172-fd2680a79d6f": "20240331",
    "/15071311/v1/uddi:9aff0ee6-26e7-42c4-af0c-84bf31680ca9": "20240630",
    "/15071311/v1/uddi:da7cd08f-94f0-4dba-b33d-d02dcb35b57b": "20240930"
}

output_file = "prod_data/서울교통공사_지하철혼잡도_전체데이터.csv"
fetch_and_save_data(api_endpoints, output_file)

/15071311/v1/uddi:70e3a3d3-0872-4828-8234-f0bca459b44f - Page 1 data fetched and added.
/15071311/v1/uddi:70e3a3d3-0872-4828-8234-f0bca459b44f - Page 2 data fetched and added.
No more data to fetch for /15071311/v1/uddi:70e3a3d3-0872-4828-8234-f0bca459b44f.
/15071311/v1/uddi:b3803d43-ffe3-4d17-9024-fd6cfa37c284 - Page 1 data fetched and added.
/15071311/v1/uddi:b3803d43-ffe3-4d17-9024-fd6cfa37c284 - Page 2 data fetched and added.
No more data to fetch for /15071311/v1/uddi:b3803d43-ffe3-4d17-9024-fd6cfa37c284.
/15071311/v1/uddi:75461a18-17a3-42fe-9322-a51148003b69 - Page 1 data fetched and added.
/15071311/v1/uddi:75461a18-17a3-42fe-9322-a51148003b69 - Page 2 data fetched and added.
No more data to fetch for /15071311/v1/uddi:75461a18-17a3-42fe-9322-a51148003b69.
/15071311/v1/uddi:99771417-a036-46f1-8ad5-8edf4591c2ee - Page 1 data fetched and added.
/15071311/v1/uddi:99771417-a036-46f1-8ad5-8edf4591c2ee - Page 2 data fetched and added.
No more data to fetch for /15071311/v1/uddi:997714