In [None]:
import pandas as pd
from datetime import datetime, timedelta
from clob_client import PolymarketClient,timestamp_to_datetime


In [None]:

# PolymarketClient 초기화
client = PolymarketClient()

# 입력 데이터 읽기 (예: matching_questions_rollcall_updated.csv)
matching_df = pd.read_csv('matching_questions_rollcall_updated.csv')
questions = matching_df['matching_questions'].tolist()
dates = matching_df['date'].tolist()  # 'date' 열은 "YYYY-MM-DD" 형식

# condition_id 매핑 (예: closed_trump_questions_description.csv)
closed_trump_df = pd.read_csv('closed_trump_questions_description.csv')
condition_ids = []
for question in questions:
    matched_row = closed_trump_df[closed_trump_df['question'] == question]
    if not matched_row.empty:
        condition_ids.append(matched_row.iloc[0]['condition_id'])
    else:
        condition_ids.append(None)
        
# 결과를 저장할 리스트
results = []

# 변동을 계산할 날짜 오프셋 (1일, 3일, 5일 후)
offsets = [1, 3, 5]


In [9]:
# 각 질문과 날짜에 대해 처리
for i, (condition_id, date_str) in enumerate(zip(condition_ids, dates)):
    if condition_id is None:
        continue

    # 타겟 날짜를 datetime 객체로 변환
    target_date = datetime.strptime(date_str, '%Y-%m-%d')
    
    # 타겟 날짜의 Yes 가격 가져오기
    target_yes_price = client.get_price_at_date(condition_id, 'Yes', f"{date_str}T00:00:00Z")
    
    # 변동 라벨을 저장할 딕셔너리
    change_labels = {}
    
    # 1일, 3일, 5일 후 가격 변동 계산
    for offset in offsets:
        # 미래 날짜 계산
        future_date = target_date + timedelta(days=offset)
        future_date_str = future_date.strftime('%Y-%m-%d') + 'T00:00:00Z'
        
        # 미래 날짜의 Yes 가격 가져오기
        future_yes_price = client.get_price_at_date(condition_id, 'Yes', future_date_str)
        
        # 변동 라벨링
        if target_yes_price is not None and future_yes_price is not None:
            if future_yes_price > target_yes_price:
                change_labels[f'change_{offset}d'] = 1  # 상승
            elif future_yes_price < target_yes_price:
                change_labels[f'change_{offset}d'] = -1  # 하락
            else:
                change_labels[f'change_{offset}d'] = 0  # 중립
        else:
            change_labels[f'change_{offset}d'] = None  # 가격이 없는 경우 null
    
    # 결과 저장
    results.append({
        'question': questions[i],
        'date': date_str,
        'yes_price': target_yes_price,
        **change_labels  # 변동 라벨 추가
    })

# 결과를 DataFrame으로 변환하고 CSV로 저장
result_df = pd.DataFrame(results)
result_df.to_csv('question_prices_with_changes.csv', index=False)
print("가격 변동 라벨이 'question_prices_with_changes.csv'에 저장되었습니다.")

KeyboardInterrupt: 

In [11]:
import pandas as pd
from datetime import datetime, timedelta
from clob_client import PolymarketClient,timestamp_to_datetime
from concurrent.futures import ThreadPoolExecutor, as_completed
from functools import lru_cache

# PolymarketClient 초기화
client = PolymarketClient()

# 캐싱을 위한 함수 정의
@lru_cache(maxsize=100)
def get_price_history(condition_id, outcome):
    # 한 번에 가격 히스토리를 가져오는 가정 (API 구현에 따라 조정 필요)
    resp = client.get_price(condition_id, outcome)
    return resp['history']

def get_price_at_date_cached(condition_id, outcome, target_date_str):
    price_history = get_price_history(condition_id, outcome)
    target_date = datetime.strptime(target_date_str, '%Y-%m-%dT%H:%M:%SZ')
    target_ts = int(target_date.timestamp())  # 타임스탬프로 변환
    
    # 가장 가까운 시간대의 가격 찾기
    closest_price = None
    min_time_diff = float('inf')
    for price_point in price_history:
        time_diff = abs(price_point['t'] - target_ts)
        if time_diff < min_time_diff:
            min_time_diff = time_diff
            closest_price = price_point['p']
    return closest_price

# 병렬 처리를 위한 함수
def fetch_price_change(condition_id, date_str, offset):
    target_date = datetime.strptime(date_str, '%Y-%m-%d')
    future_date = target_date + timedelta(days=offset)
    future_date_str = future_date.strftime('%Y-%m-%d') + 'T00:00:00Z'
    
    target_yes_price = get_price_at_date_cached(condition_id, 'Yes', f"{date_str}T00:00:00Z")
    future_yes_price = get_price_at_date_cached(condition_id, 'Yes', future_date_str)
    
    if target_yes_price is not None and future_yes_price is not None:
        if future_yes_price > target_yes_price:
            return 1  # 상승
        elif future_yes_price < target_yes_price:
            return -1  # 하락
        else:
            return 0  # 중립
    return None

# 메인 로직
matching_df = pd.read_csv('matching_questions_rollcall_updated.csv')
questions = matching_df['matching_questions'].tolist()
dates = matching_df['date'].tolist()

closed_trump_df = pd.read_csv('closed_trump_questions_description.csv')
condition_ids = []
for question in questions:
    matched_row = closed_trump_df[closed_trump_df['question'] == question]
    condition_ids.append(matched_row.iloc[0]['condition_id'] if not matched_row.empty else None)

results = []
offsets = [1, 3, 5]

# 병렬 처리
with ThreadPoolExecutor(max_workers=10) as executor:
    futures = []
    for i, (condition_id, date_str) in enumerate(zip(condition_ids, dates)):
        if condition_id is None:
            continue
        
        # 타겟 날짜의 Yes 가격 조회
        target_yes_price = get_price_at_date_cached(condition_id, 'Yes', f"{date_str}T00:00:00Z")
        change_labels = {}
        
        # 미래 날짜 가격 조회 병렬 처리
        for offset in offsets:
            future = executor.submit(fetch_price_change, condition_id, date_str, offset)
            futures.append((i, offset, future))
        
        results.append({
            'question': questions[i],
            'date': date_str,
            'yes_price': target_yes_price,
            **change_labels
        })
    
    # 결과 수집
    for i, offset, future in futures:
        change_labels[f'change_{offset}d'] = future.result()
        results[i].update(change_labels)

# 결과를 DataFrame으로 변환하고 CSV로 저장
result_df = pd.DataFrame(results)
result_df.to_csv('question_prices_with_changes.csv', index=False)
print("가격 변동 라벨이 'question_prices_with_changes.csv'에 저장되었습니다.")

KeyboardInterrupt: 