In [None]:
import requests

def get_upbit_krw_tickers():
    url = "https://api.upbit.com/v1/market/all"
    headers = {"Accept": "application/json"}

    response = requests.get(url, headers=headers)
    
    if response.status_code == 200:
        markets = response.json()
        krw_tickers = [market['market'][4:] for market in markets if market['market'].startswith('KRW-')]
        return krw_tickers
    else:
        return None


import os
import re

def get_txt_tickers():
    path = './txt'

    if os.path.exists(path) and os.path.isdir(path):
        files = os.listdir(path)
        pattern = re.compile(r'^(.*?)\swhitepapers\s-\swhitepaper\.io\.txt$')
        extracted_last_words = [pattern.match(file).group(1).split()[-1] for file in files]
        return extracted_last_words
    
    else:
        print(f"The directory {path} does not exist.")
        return None


def sort_tickers():
    tickers_upbit = get_upbit_krw_tickers() 
    tickers_txt = get_txt_tickers()

    if tickers_upbit is not None:
        set_upbit = set(tickers_upbit)
        set_txt = set(tickers_txt)
        
        both = set_txt & set_upbit
        only_txt = set_txt - set_upbit
        only_upbit = set_upbit - set_txt

        both_list = list(both)
        only_txt_list = list(only_txt)
        only_upbit_list = list(only_upbit)

        result = {
            'both': both_list,
            'txt_only': only_txt_list,
            'upbit_only': only_upbit_list
        }

        return result
    else:
        return None
    

import requests
import json
from datetime import datetime, timedelta
import time

def get_hourly_price_data(ticker, to_datetime, count=200, max_retries=5):
    url = f"https://api.upbit.com/v1/candles/minutes/60"
    headers = {"Accept": "application/json"}
    params = {
        'market': ticker,
        'to': to_datetime,
        'count': count
    }
    
    for attempt in range(max_retries):
        response = requests.get(url, headers=headers, params=params)
        if response.status_code == 200:
            return response.json()
        else:
            time.sleep(1)  # api호출 터지는거 방지하기 위해서 1초 대기
    return None

def collect_data(tickers, start_date, end_date, output_file):
    with open(output_file, 'w') as f:
        f.write('[\n')

    for ticker in tickers:
        to_datetime = end_date.strftime("%Y-%m-%dT%H:%M:%S")
        while True:
            candles = get_hourly_price_data(f'KRW-{ticker}', to_datetime)
            if not candles:
                break

            # JSON에 일단 정보 저장 (메모리 문제)
            with open(output_file, 'a') as f:
                for candle in candles:
                    json.dump({
                        'market': ticker,
                        'candle_date_time_kst': candle['candle_date_time_kst'],
                        'opening_price': candle['opening_price'],
                        'high_price': candle['high_price'],
                        'low_price': candle['low_price'],
                        'trade_price': candle['trade_price'],
                        'timestamp': candle['timestamp'],
                        'candle_acc_trade_price': candle['candle_acc_trade_price'],
                        'candle_acc_trade_volume': candle['candle_acc_trade_volume']
                    }, f)
                    f.write(',\n')

            last_candle_time = datetime.strptime(candles[-1]['candle_date_time_utc'], "%Y-%m-%dT%H:%M:%S")
            if last_candle_time < start_date:
                break
            # 다음 시간대로 
            to_datetime = (last_candle_time - timedelta(minutes=1)).strftime("%Y-%m-%dT%H:%M:%S")
            time.sleep(0.1)  

    # json 열고서
    with open(output_file, 'rb+') as f:
        f.seek(-2, 2)  
        f.truncate()  # 마지막 콤마 제거
        f.write(b'\n]')


# 특정 날짜 범위 설정
start_date = datetime(2021, 1, 1)
end_date = datetime(2024, 5, 29)

# 티커 목록 정의
result = sort_tickers()
tickers = result['both']

# 데이터 수집 및 JSON 파일로 저장
output_file = 'upbit_2years_hourly_data.json'
collect_data(tickers, start_date, end_date, output_file)
print(f"Data saved to {output_file}")


In [1]:
import pandas as pd
import json

# JSON 파일을 읽어들이기
with open('upbit_2years_hourly_data.json', 'r') as f:
    data = json.load(f)

# pandas DataFrame으로 변환
df = pd.DataFrame(data)

# CSV 파일로 저장
df.to_csv('upbit_2years_hourly_data.csv', index=False)


JSON 데이터를 CSV 파일로 변환하여 저장했습니다.


In [2]:
result_df = pd.read_csv('upbit_2years_hourly_data.csv')

result_df

Unnamed: 0,market,candle_date_time_kst,opening_price,high_price,low_price,trade_price,timestamp,candle_acc_trade_price,candle_acc_trade_volume
0,ONT,2024-05-29T08:00:00,429.1,430.9,428.0,429.9,1716940797363,5.486395e+07,127820.222686
1,ONT,2024-05-29T07:00:00,431.4,431.5,429.4,429.4,1716937151068,3.157835e+07,73356.602316
2,ONT,2024-05-29T06:00:00,431.3,433.0,430.4,431.2,1716933573856,2.647460e+07,61293.638047
3,ONT,2024-05-29T05:00:00,433.3,433.7,431.4,431.4,1716929923280,2.289108e+07,52890.772610
4,ONT,2024-05-29T04:00:00,429.4,433.4,428.7,433.1,1716926380519,4.381985e+07,101605.923116
...,...,...,...,...,...,...,...,...,...
2146737,MLK,2021-01-01T07:00:00,160.0,161.0,160.0,161.0,1609455491377,4.046527e+06,25198.948690
2146738,MLK,2021-01-01T06:00:00,161.0,162.0,160.0,160.0,1609451758427,6.384262e+06,39584.680988
2146739,MLK,2021-01-01T05:00:00,161.0,162.0,160.0,161.0,1609448398265,1.684971e+06,10465.125294
2146740,MLK,2021-01-01T04:00:00,161.0,161.0,160.0,161.0,1609444642718,2.567751e+06,15958.360656


In [4]:
len(set(list(result_df['market'])))

77