In [49]:
!pip install requests boto3
!pip install python-dotenv


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.2[0m[39;49m -> [0m[32;49m24.3.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.2[0m[39;49m -> [0m[32;49m24.3.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [50]:
import requests
import boto3
import pandas as pd
from io import BytesIO
import os 
from dotenv import load_dotenv

In [51]:
load_dotenv()
api_key = os.getenv("API_KEY")
base_url = os.getenv("BASE_URL")
aws_access_key_id=os.getenv('S3_AWS_ACCESS_KEY_ID')
aws_secret_access_key=os.getenv('S3_AWS_SECRET_ACCESS_KEY')
region_name=os.getenv('S3_REGION')
bucket_name = os.getenv('S3_BUCKET_NAME')

In [52]:
s3_client = boto3.client(
    's3',
    aws_access_key_id=aws_access_key_id,
    aws_secret_access_key=aws_secret_access_key,
    region_name=region_name
)

In [53]:
column_mapping = {
    '06-07시간대': 'time_06_07',
    '06시이전': 'before_06',
    '07-08시간대': 'time_07_08',
    '08-09시간대': 'time_08_09',
    '09-10시간대': 'time_09_10',
    '10-11시간대': 'time_10_11',
    '11-12시간대': 'time_11_12',
    '12-13시간대': 'time_12_13',
    '13-14시간대': 'time_13_14',
    '14-15시간대': 'time_14_15',
    '15-16시간대': 'time_15_16',
    '16-17시간대': 'time_16_17',
    '17-18시간대': 'time_17_18',
    '18-19시간대': 'time_18_19',
    '19-20시간대': 'time_19_20',
    '20-21시간대': 'time_20_21',
    '21-22시간대': 'time_21_22',
    '22-23시간대': 'time_22_23',
    '23-24시간대': 'time_23_24',
    '24시이후': 'after_24',
    '수송일자': 'transport_date',
    '승하차구분': 'boarding_type',
    '역명': 'station_name',
    '역번호': 'station_id',
    '호선': 'line',
}

In [55]:
def fetch_hourly_subway_passengers(endpoint: str):
    """
    역별 일별 시간대별 승하차 인원의 여러 API 엔드포인트에서 데이터를 가져옵니다.
    :param endpoint (str): 데이터를 가져올 API 엔드포인트.
    """
    all_data = pd.DataFrame()
    page = 1
    per_page = 1000
    while True:
        url = f"{base_url}{endpoint}?page={page}&perPage={per_page}&serviceKey={api_key}"
        response = requests.get(url)
        if response.status_code != 200:
            print(f"Failed to fetch data from {endpoint} on page {page}: {response.status_code}")
            break
        data = response.json()
        if 'data' not in data or not data['data']:
            print(f"No more data to fetch for {endpoint}.")
            break
        df = pd.DataFrame(data['data'])
        df.rename(columns=column_mapping, inplace=True)
        if 'transport_date' in df.columns:
            df['transport_date'] = pd.to_datetime(df['transport_date'])
            df['weekday'] = df['transport_date'].dt.dayofweek
        if '연번' in df.columns:
            df.drop(columns=['연번'], inplace=True)
        all_data = pd.concat([all_data, df], ignore_index=True)
        print(f"{endpoint} - Page {page} data fetched and added.")
        page += 1

    # 최종 데이터를 CSV로 변환하고 S3에 업로드
    csv_buffer = BytesIO()
    all_data.to_csv(csv_buffer, index=False, encoding='utf-8')
    csv_buffer.seek(0)
    return csv_buffer

def save_to_s3(csv_buffer : BytesIO, bucket_name : str, output_file : str):
    """
    s3에 csv파일을 저장합니다.
    :param csv_buffer (list): 데이터를 가져올 API 엔드포인트 목록.
    :param bucket_name (str): s3 버킷 이름
    :param output_file (str): 저장할 파일 이름
    """
    try:
        s3_client.upload_fileobj(csv_buffer, bucket_name, output_file)
        print(f"모든 데이터가 {output_file} 파일로 S3에 성공적으로 업로드되었습니다.")
    except Exception as e:
        print(f"Failed to upload combined data to S3: {e}")
        
def fetch_and_save_data(endpoint: str, bucket_name: str, output_file: str):
    """
    fetch_hourhly_subway_passengers save_to_s3를 호출하여 데이터를 가져와서 s3에 저장합니다.
    :param api_endpoint (str): 데이터를 가져올 API 엔드포인트.
    :param bucket_name (str): s3 버킷 이름
    :param output_file {str}: 저장할 파일 이름
    """
    csv_buffer = fetch_hourly_subway_passengers(endpoint)
    save_to_s3(csv_buffer, bucket_name, output_file)

api_endpoint = "/15048032/v1/uddi:bff5665b-6c0b-4d2f-80ff-9af01761c355"
output_file = "prod_data/서울교통공사_역별_일별_시간대별_승하차인원정보_전체데이터.csv"
all_data = fetch_and_save_data(api_endpoint, bucket_name, output_file)

/15048032/v1/uddi:bff5665b-6c0b-4d2f-80ff-9af01761c355 - Page 1 data fetched and added.
/15048032/v1/uddi:bff5665b-6c0b-4d2f-80ff-9af01761c355 - Page 2 data fetched and added.
/15048032/v1/uddi:bff5665b-6c0b-4d2f-80ff-9af01761c355 - Page 3 data fetched and added.
/15048032/v1/uddi:bff5665b-6c0b-4d2f-80ff-9af01761c355 - Page 4 data fetched and added.
/15048032/v1/uddi:bff5665b-6c0b-4d2f-80ff-9af01761c355 - Page 5 data fetched and added.
/15048032/v1/uddi:bff5665b-6c0b-4d2f-80ff-9af01761c355 - Page 6 data fetched and added.
/15048032/v1/uddi:bff5665b-6c0b-4d2f-80ff-9af01761c355 - Page 7 data fetched and added.
/15048032/v1/uddi:bff5665b-6c0b-4d2f-80ff-9af01761c355 - Page 8 data fetched and added.
/15048032/v1/uddi:bff5665b-6c0b-4d2f-80ff-9af01761c355 - Page 9 data fetched and added.
/15048032/v1/uddi:bff5665b-6c0b-4d2f-80ff-9af01761c355 - Page 10 data fetched and added.
/15048032/v1/uddi:bff5665b-6c0b-4d2f-80ff-9af01761c355 - Page 11 data fetched and added.
/15048032/v1/uddi:bff5665b-6c0