In [None]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
from clob_client import PolymarketClient, timestamp_to_datetime
from concurrent.futures import ThreadPoolExecutor
from functools import lru_cache
import time

# PolymarketClient 초기화
client = PolymarketClient()

# 캐싱을 위한 함수 정의
@lru_cache(maxsize=100)
def get_price_history(condition_id, outcome):
    resp = client.get_price(condition_id, outcome)
    return resp['history']

def get_price_at_date_cached(condition_id, outcome, target_date_str):
    price_history = get_price_history(condition_id, outcome)
    target_date = datetime.strptime(target_date_str, '%Y-%m-%dT%H:%M:%SZ')
    target_ts = int(target_date.timestamp())

    closest_price = None
    min_time_diff = float('inf')
    for price_point in price_history:
        time_diff = abs(price_point['t'] - target_ts)
        if time_diff < min_time_diff:
            min_time_diff = time_diff
            closest_price = price_point['p']
    time.sleep(0.5)  # 가격 조회 후 5초 지연
    return closest_price

def fetch_volatility_multi(condition_id, base_date_str, day_list):
    """
    여러 기간에 대한 변동성(표준편차) 값을 계산하여 소수점 4자리로 반올림
    :return: {'volatility_1d': float, 'volatility_3d': float, ...}
    """
    base_date = datetime.strptime(base_date_str, '%Y-%m-%dT%H:%M:%SZ')
    max_day = max(day_list)
    
    # Day0 ~ DayN 가격 수집
    daily_prices = []
    for offset in range(max_day + 1):
        check_date = base_date + timedelta(days=offset)
        check_date_str = check_date.strftime('%Y-%m-%dT%H:%M:%SZ')
        price = get_price_at_date_cached(condition_id, 'Yes', check_date_str)
        daily_prices.append(price)
        time.sleep(0.5)  # 각 가격 요청 사이에 2초 지연 추가

    # 일별 수익률 계산
    daily_returns = []
    for i in range(len(daily_prices) - 1):
        p1 = daily_prices[i]
        p2 = daily_prices[i + 1]
        if p1 is not None and p2 is not None and p1 != 0:
            r = (p2 - p1) / p1
            daily_returns.append(r)
        else:
            daily_returns.append(None)

    # 각 day 구간에 대한 변동성 값 계산 (소수점 4자리로 반올림)
    result = {}
    for day in day_list:
        returns_slice = daily_returns[:day]
        valid_returns = [r for r in returns_slice if r is not None]
        if len(valid_returns) > 1:
            std_val = np.std(valid_returns, ddof=1)
            result[f'volatility_{day}d'] = round(std_val, 4)  # 소수점 4자리로 반올림
        else:
            result[f'volatility_{day}d'] = None
    return result

# 메인 로직
df = pd.read_csv('combined_labeled_data_updated.csv')
questions = df['matching_questions'].tolist()
dates = df['upload_date'].tolist()
condition_ids = df['condition_id'].tolist()
results = []

# 측정할 기간 설정
day_list = [3]

# 병렬 처리 (직렬 처리로 유지)
with ThreadPoolExecutor(max_workers=5) as executor:  # max_workers=1 유지
    futures = []
    for i, (condition_id, date_str) in enumerate(zip(condition_ids, dates)):
        if condition_id is None or pd.isnull(date_str):
            results.append({
                'question': questions[i],
                'date': date_str,
                **{f'volatility_{d}d': None for d in day_list}
            })
            continue

        future = executor.submit(fetch_volatility_multi, condition_id, date_str, day_list)
        futures.append((i, future))

    for i, future in futures:
        volatility_result = future.result()
        results.append({
            'question': questions[i],
            'date': dates[i],
            **volatility_result
        })

# 결과 저장
result_df = pd.DataFrame(results)
result_df.to_csv('question_volatility.csv', index=False)

In [None]:
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
from clob_client import PolymarketClient, timestamp_to_datetime
from concurrent.futures import ThreadPoolExecutor
from functools import lru_cache
import time

# PolymarketClient 초기화
client = PolymarketClient()

# 캐싱을 위한 함수 정의
@lru_cache(maxsize=100)
def get_price_history(condition_id, outcome):
    resp = client.get_price(condition_id, outcome)
    return resp['history']

def get_price_at_date_cached(condition_id, outcome, target_date_str):
    price_history = get_price_history(condition_id, outcome)
    target_date = datetime.strptime(target_date_str, '%Y-%m-%dT%H:%M:%SZ')
    target_ts = int(target_date.timestamp())

    closest_price = None
    min_time_diff = float('inf')
    for price_point in price_history:
        time_diff = abs(price_point['t'] - target_ts)
        if time_diff < min_time_diff:
            min_time_diff = time_diff
            closest_price = price_point['p']
    time.sleep(0.5)  # 가격 조회 후 0.5초 지연
    return closest_price

def find_closest_price(price_history, target_date):
    target_ts = int(target_date.timestamp())
    closest_price = None
    closest_date = None
    min_time_diff = float('inf')
    for price_point in price_history:
        time_diff = abs(price_point['t'] - target_ts)
        if time_diff < min_time_diff:
            min_time_diff = time_diff
            closest_price = price_point['p']
            closest_date = timestamp_to_datetime(price_point['t'])
    return closest_price, closest_date

def fetch_volatility_multi(condition_id, base_date_str, past_days, future_days):
    """
    과거와 미래의 변동성(표준편차) 값을 계산하여 소수점 4자리로 반올림
    :param condition_id: 조건 ID
    :param base_date_str: 기준 날짜 (형식: 'YYYY-MM-DDTHH:MM:SZ')
    :param past_days: 과거 일수
    :param future_days: 미래 일수
    :return: {'volatility_past': float, 'volatility_future': float, 'replaced_dates': list}
    """
    base_date = datetime.strptime(base_date_str, '%Y-%m-%dT%H:%M:%SZ')
    
    # 과거와 미래 날짜 설정
    past_dates = [base_date - timedelta(days=i) for i in range(past_days, 0, -1)]
    future_dates = [base_date + timedelta(days=i) for i in range(1, future_days + 1)]
    all_dates = past_dates + [base_date] + future_dates
    
    # 가격 데이터 수집
    daily_prices = []
    replaced_dates = []
    price_history = get_price_history(condition_id, 'Yes')
    for date in all_dates:
        date_str = date.strftime('%Y-%m-%dT%H:%M:%SZ')
        price = get_price_at_date_cached(condition_id, 'Yes', date_str)
        if price is None:
            # 가격이 없는 경우, 가까운 날짜의 가격으로 대체
            closest_price, closest_date = find_closest_price(price_history, date)
            if closest_price is not None:
                daily_prices.append(closest_price)
                replaced_dates.append(date_str)
            else:
                daily_prices.append(None)
        else:
            daily_prices.append(price)
        time.sleep(0.5)  # 각 가격 요청 사이에 2초 지연 추가

    # 일별 수익률 계산
    daily_returns = []
    for i in range(len(daily_prices) - 1):
        p1 = daily_prices[i]
        p2 = daily_prices[i + 1]
        if p1 is not None and p2 is not None and p1 != 0:
            r = (p2 - p1) / p1
            daily_returns.append(r)
        else:
            daily_returns.append(None)

    # 과거와 미래의 변동성 계산
    past_returns = daily_returns[:past_days]
    future_returns = daily_returns[past_days:]
    
    volatility_past = None
    if len(past_returns) > 1:
        valid_past_returns = [r for r in past_returns if r is not None]
        if len(valid_past_returns) > 1:
            volatility_past = round(np.std(valid_past_returns, ddof=1), 4)
    
    volatility_future = None
    if len(future_returns) > 1:
        valid_future_returns = [r for r in future_returns if r is not None]
        if len(valid_future_returns) > 1:
            volatility_future = round(np.std(valid_future_returns, ddof=1), 4)
    
    return {
        'volatility_past': volatility_past,
        'volatility_future': volatility_future,
        'replaced_dates': replaced_dates
    }

# 메인 로직
df = pd.read_csv('combined_labeled_data_updated.csv')
questions = df['matching_questions'].tolist()
dates = df['upload_date'].tolist()
condition_ids = df['condition_id'].tolist()
results = []

# 기간 설정 (손쉽게 변경 가능)
past_days = 3  # 과거 3일
future_days = 3  # 미래 3일

# 병렬 처리
with ThreadPoolExecutor(max_workers=5) as executor:
    futures = []
    for i, (condition_id, date_str) in enumerate(zip(condition_ids, dates)):
        if condition_id is None or pd.isnull(date_str):
            results.append({
                'question': questions[i],
                'date': date_str,
                'volatility_past': None,
                'volatility_future': None,
                'replaced_dates': []
            })
            continue

        future = executor.submit(fetch_volatility_multi, condition_id, date_str, past_days, future_days)
        futures.append((i, future))

    for i, future in futures:
        volatility_result = future.result()
        results.append({
            'question': questions[i],
            'date': dates[i],
            **volatility_result
        })

# 결과 저장
result_df = pd.DataFrame(results)
result_df.to_csv('question_volatility_with_past.csv', index=False)

0.0002828427124746198
