In [1]:
import os
import datetime
import urllib.request
import zipfile
import json
from concurrent.futures import ThreadPoolExecutor
import logging

# Make sure the logs directory exists
logs_dir = 'logs'
os.makedirs(logs_dir, exist_ok=True)


In [2]:
logging.basicConfig(filename=os.path.join(logs_dir, 'download_metrics_log.log'), level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

def download_metrics_data(symbol, date, data_dir, file_period):
    symbol_path = os.path.join(data_dir, symbol)
    os.makedirs(symbol_path, exist_ok=True)
    
    file_name = f"{symbol}-metrics-{date}.zip"
    url = f"https://data.binance.vision/data/futures/um/{file_period}/metrics/{symbol}/{file_name}"
    file_path = os.path.join(symbol_path, file_name)
    
    try:
        urllib.request.urlretrieve(url, file_path)
        logging.info(f"Successfully downloaded {file_path}")

        with zipfile.ZipFile(file_path, 'r') as zip_ref:
            zip_ref.extractall(symbol_path)
            logging.info(f"Successfully extracted {file_name} to {symbol_path}")

        os.remove(file_path)
        logging.info(f"Deleted the zip file: {file_path}")
    except Exception as e:
        logging.error(f"Error handling {file_name}: {e}")
        if os.path.exists(file_path):
            os.remove(file_path)
            logging.info(f"Cleanup incomplete download {file_path}")

def download_for_symbol(symbol, start_date, end_date, data_dir, file_period):
    current_date = start_date
    while current_date < end_date:
        download_metrics_data(symbol, current_date.strftime('%Y-%m-%d'), data_dir, file_period)
        current_date += datetime.timedelta(days=1)

def main():
    with open('/Users/tedting/Documents/FactorTradingSystem/data_center/config/top_futures_symbol_list.json') as f:
        futures_symbol_list = json.loads(f.read())
    
    data_dir = '/Users/tedting/Documents/FactorTradingSystem/data_m/metrics'
    os.makedirs(data_dir, exist_ok=True)
    start_time = datetime.datetime.strptime('2020-01-01', '%Y-%m-%d')
    end_time = datetime.datetime.now()

    # Use ThreadPoolExecutor to download data in parallel
    with ThreadPoolExecutor(max_workers=4) as executor:
        for symbol in futures_symbol_list:
            executor.submit(download_for_symbol, symbol, start_time, end_time, data_dir, 'daily')

if __name__ == "__main__":
    main()