In [1]:
import pandas as pd
from datetime import datetime, timedelta
from datetime import datetime, timedelta
from clob_client import PolymarketClient,timestamp_to_datetime
from concurrent.futures import ThreadPoolExecutor, as_completed
from functools import lru_cache


In [2]:

# PolymarketClient 초기화
client = PolymarketClient()

# 캐싱을 위한 함수 정의
@lru_cache(maxsize=100)
def get_price_history(condition_id, outcome):
    resp = client.get_price(condition_id, outcome)
    return resp['history']

def get_price_at_date_cached(condition_id, outcome, target_date_str):
    price_history = get_price_history(condition_id, outcome)
    target_date = datetime.strptime(target_date_str, '%Y-%m-%dT%H:%M:%SZ')
    target_ts = int(target_date.timestamp())
    
    closest_price = None
    min_time_diff = float('inf')
    for price_point in price_history:
        time_diff = abs(price_point['t'] - target_ts)
        if time_diff < min_time_diff:
            min_time_diff = time_diff
            closest_price = price_point['p']
    return closest_price

# 병렬 처리를 위한 함수
def fetch_price_change(condition_id, date_str, offset):
    target_date = datetime.strptime(date_str, '%Y-%m-%d')
    future_date = target_date + timedelta(days=offset)
    future_date_str = future_date.strftime('%Y-%m-%d') + 'T00:00:00Z'
    
    target_yes_price = get_price_at_date_cached(condition_id, 'Yes', f"{date_str}T00:00:00Z")
    future_yes_price = get_price_at_date_cached(condition_id, 'Yes', future_date_str)
    
    if target_yes_price is not None and future_yes_price is not None:
        if future_yes_price > target_yes_price:
            return 1  # 상승
        elif future_yes_price < target_yes_price:
            return -1  # 하락
        else:
            return 0  # 중립
    return None


In [3]:

# 1. matching_questions_cnn_953.csv에서 데이터 로드
matching_df = pd.read_csv('matching_questions_cnn_953.csv')
questions = matching_df['matching_questions'].tolist()
titles = matching_df['title'].tolist()  # title 열이 있다고 가정

# 2. data/cnn.csv에서 title과 upload_date 매칭
cnn_df = pd.read_csv('data/cnn.csv')
upload_dates = []
for title in titles:
    matched_row = cnn_df[cnn_df['title'] == title]
    if not matched_row.empty:
        upload_date = matched_row.iloc[0]['upload_date']  # 예: "2025-03-19T19:30:00Z"
        # "YYYY-MM-DDTHH:MM:SSZ" -> "YYYY-MM-DD"로 변환
        upload_date_str = upload_date.split('T')[0]
        upload_dates.append(upload_date_str)
    else:
        upload_dates.append(None)
        print(f"일치하는 title을 찾을 수 없습니다: {title}")

# 3. condition_id 매칭 (closed_trump_questions_description.csv 사용)
closed_trump_df = pd.read_csv('closed_trump_questions_description.csv')
condition_ids = []
for question in questions:
    matched_row = closed_trump_df[closed_trump_df['question'] == question]
    condition_ids.append(matched_row.iloc[0]['condition_id'] if not matched_row.empty else None)

# 결과 저장을 위한 리스트
results = []
offsets = [1, 3, 5]

# 4. 병렬 처리로 가격 변동 계산
with ThreadPoolExecutor(max_workers=10) as executor:
    futures = []
    for i, (condition_id, date_str) in enumerate(zip(condition_ids, upload_dates)):
        if condition_id is None or date_str is None:
            results.append({
                'question': questions[i],
                'date': date_str,
                'yes_price': None,
                'change_1d': None,
                'change_3d': None,
                'change_5d': None
            })
            continue
        
        # 타겟 날짜의 Yes 가격 조회
        target_yes_price = get_price_at_date_cached(condition_id, 'Yes', f"{date_str}T00:00:00Z")
        
        # 미래 날짜 가격 변동 계산 (병렬 처리)
        change_labels = {}
        for offset in offsets:
            future = executor.submit(fetch_price_change, condition_id, date_str, offset)
            futures.append((i, offset, future))
        
        results.append({
            'question': questions[i],
            'date': date_str,
            'yes_price': target_yes_price,
            'change_1d': None,
            'change_3d': None,
            'change_5d': None
        })
    
    # 병렬 처리 결과 수집
    for i, offset, future in futures:
        results[i][f'change_{offset}d'] = future.result()

# 5. 결과를 DataFrame으로 변환하고 CSV로 저장
result_df = pd.DataFrame(results)
result_df.to_csv('question_prices_with_changes_cnn.csv', index=False)
print("가격 변동 라벨이 'question_prices_with_changes_cnn.csv'에 저장되었습니다.")

가격 변동 라벨이 'question_prices_with_changes_cnn.csv'에 저장되었습니다.


In [5]:
import pandas as pd
from datetime import datetime, timedelta

from concurrent.futures import ThreadPoolExecutor, as_completed
from functools import lru_cache

# PolymarketClient 초기화
client = PolymarketClient()

# 캐싱을 위한 함수 정의
@lru_cache(maxsize=100)
def get_price_history(condition_id, outcome):
    resp = client.get_price(condition_id, outcome)
    return resp['history']

def get_price_at_date_cached(condition_id, outcome, target_date_str):
    price_history = get_price_history(condition_id, outcome)
    target_date = datetime.strptime(target_date_str, '%Y-%m-%dT%H:%M:%SZ')
    target_ts = int(target_date.timestamp())
    
    closest_price = None
    min_time_diff = float('inf')
    for price_point in price_history:
        time_diff = abs(price_point['t'] - target_ts)
        if time_diff < min_time_diff:
            min_time_diff = time_diff
            closest_price = price_point['p']
    return closest_price

# 병렬 처리를 위한 함수
def fetch_price_change(condition_id, date_str, offset):
    target_date = datetime.strptime(date_str, '%Y-%m-%d')
    future_date = target_date + timedelta(days=offset)
    future_date_str = future_date.strftime('%Y-%m-%d') + 'T00:00:00Z'
    
    target_yes_price = get_price_at_date_cached(condition_id, 'Yes', f"{date_str}T00:00:00Z")
    future_yes_price = get_price_at_date_cached(condition_id, 'Yes', future_date_str)
    
    if target_yes_price is not None and future_yes_price is not None:
        if future_yes_price > target_yes_price:
            return 1  # 상승
        elif future_yes_price < target_yes_price:
            return -1  # 하락
        else:
            return 0  # 중립
    return None

# 1. matching_questions_cnn_953.csv에서 데이터 로드
matching_df = pd.read_csv('matching_questions_cnn_953.csv')
questions = matching_df['matching_questions'].tolist()
titles = matching_df['title'].tolist()  # title 열이 있다고 가정

# 2. data/cnn.csv에서 title과 upload_date, url 매칭
cnn_df = pd.read_csv('data/cnn.csv')
upload_dates = []
urls = []
for title in titles:
    matched_row = cnn_df[cnn_df['title'] == title]
    if not matched_row.empty:
        upload_date = matched_row.iloc[0]['upload_date']  # 예: "2025-03-19T19:30:00Z"
        upload_date_str = upload_date.split('T')[0]  # "YYYY-MM-DD"로 변환
        url = matched_row.iloc[0]['url']  # url 열에서 링크 가져오기
        upload_dates.append(upload_date_str)
        urls.append(url)
    else:
        upload_dates.append(None)
        urls.append(None)
        print(f"일치하는 title을 찾을 수 없습니다: {title}")

# 3. condition_id 매칭 (closed_trump_questions_description.csv 사용)
closed_trump_df = pd.read_csv('closed_trump_questions_description.csv')
condition_ids = []
for question in questions:
    matched_row = closed_trump_df[closed_trump_df['question'] == question]
    condition_ids.append(matched_row.iloc[0]['condition_id'] if not matched_row.empty else None)

# 결과 저장을 위한 리스트
results = []
offsets = [1, 3, 5]

# 4. 병렬 처리로 가격 변동 계산
with ThreadPoolExecutor(max_workers=10) as executor:
    futures = []
    for i, (condition_id, date_str, title, url) in enumerate(zip(condition_ids, upload_dates, titles, urls)):
        if condition_id is None or date_str is None:
            results.append({
                'question': questions[i],
                'title': title,
                'url': url,
                'date': date_str,
                'yes_price': None,
                'change_1d': None,
                'change_3d': None,
                'change_5d': None
            })
            continue
        
        # 타겟 날짜의 Yes 가격 조회
        target_yes_price = get_price_at_date_cached(condition_id, 'Yes', f"{date_str}T00:00:00Z")
        
        # 미래 날짜 가격 변동 계산 (병렬 처리)
        change_labels = {}
        for offset in offsets:
            future = executor.submit(fetch_price_change, condition_id, date_str, offset)
            futures.append((i, offset, future))
        
        results.append({
            'question': questions[i],
            'title': title,
            'url': url,
            'date': date_str,
            'yes_price': target_yes_price,
            'change_1d': None,
            'change_3d': None,
            'change_5d': None
        })
    
    # 병렬 처리 결과 수집
    for i, offset, future in futures:
        results[i][f'change_{offset}d'] = future.result()

# 5. 결과를 DataFrame으로 변환하고 CSV로 저장
result_df = pd.DataFrame(results)
result_df.to_csv('question_prices_with_changes_cnn.csv', index=False)
print("가격 변동 라벨이 'question_prices_with_changes_cnn.csv'에 저장되었습니다.")

KeyError: 'url'