# Polymarket Crypto Arbitrage Analysis

This notebook demonstrates how to use the Polymarket analysis framework to:
1. Collect market data from Polymarket
2. Process and clean the data
3. Detect arbitrage opportunities
4. Backtest trading strategies
5. Visualize results

In [None]:
# Import required libraries
import pandas as pd
import numpy as np
import asyncio
from datetime import datetime, timedelta
import warnings
warnings.filterwarnings('ignore')

# Import our analysis modules
import sys
sys.path.append('../src')

from polymarket_analysis.data.data_collector import DataCollector
from polymarket_analysis.data.data_processor import DataProcessor
from polymarket_analysis.strategies.arbitrage_detector import ArbitrageDetector
from polymarket_analysis.strategies.strategy_backtester import StrategyBacktester
from polymarket_analysis.visualization.dashboard import PolymarketVisualizer
from polymarket_analysis.utils.config import config
from polymarket_analysis.utils.logger import get_default_logger

# Set up logging
logger = get_default_logger()
print("Polymarket Analysis Framework Loaded Successfully!")

## 1. Data Collection

First, let's collect crypto market data from Polymarket and reference prices from Yahoo Finance.

In [None]:
# Initialize data collector
collector = DataCollector()

# Collect complete dataset
# Note: This will make API calls to Polymarket and Yahoo Finance
print("Starting data collection...")
print("This may take a few minutes depending on API rate limits...")

try:
    markets, histories, crypto_prices = await collector.collect_complete_dataset(
        days_back=7,  # Start with 7 days for demo
        min_volume=500,  # Lower threshold for demo
        crypto_symbols=['BTC-USD', 'ETH-USD', 'SOL-USD']
    )
    
    print(f"Collected {len(markets)} markets")
    print(f"Collected price histories for {len(histories)} markets")
    print(f"Collected reference prices for {crypto_prices.shape[1]} crypto symbols")
    
except Exception as e:
    print(f"Error during data collection: {e}")
    print("Using mock data for demonstration...")
    
    # Create mock data for demonstration
    import random
    from polymarket_analysis.api.polymarket_client import Market, PricePoint
    
    # Mock markets
    markets = [
        Market(
            id=f"market_{i}",
            question=f"Will Bitcoin reach ${40000 + i*1000} by end of 2025?",
            description="Bitcoin price prediction market",
            end_date=datetime.now() + timedelta(days=30),
            outcome_prices={"Yes": 0.6 + random.uniform(-0.2, 0.2), "No": 0.4 + random.uniform(-0.2, 0.2)},
            volume=random.uniform(1000, 10000),
            liquidity=random.uniform(500, 5000),
            active=True,
            tags=["crypto", "bitcoin"]
        )
        for i in range(5)
    ]
    
    # Mock price histories
    histories = {}
    base_time = datetime.now() - timedelta(days=7)
    
    for market in markets:
        price_points = []
        current_price = 0.5
        
        for hour in range(168):  # 7 days * 24 hours
            timestamp = base_time + timedelta(hours=hour)
            current_price += random.uniform(-0.05, 0.05)
            current_price = max(0.01, min(0.99, current_price))
            
            price_points.append(PricePoint(
                market_id=market.id,
                outcome="Yes",
                price=current_price,
                timestamp=timestamp,
                volume=random.uniform(10, 100)
            ))
        
        histories[market.id] = price_points
    
    # Mock crypto prices
    timestamps = pd.date_range(base_time, periods=168, freq='H')
    crypto_prices = pd.DataFrame({
        'BTC-USD': np.cumsum(np.random.randn(168) * 100) + 50000,
        'ETH-USD': np.cumsum(np.random.randn(168) * 50) + 3000,
        'SOL-USD': np.cumsum(np.random.randn(168) * 5) + 100
    }, index=timestamps)
    
    print("Mock data created successfully!")

## 2. Data Processing

Now let's process the collected data and prepare it for analysis.

In [None]:
# Initialize data processor
processor = DataProcessor()

# Convert data to DataFrames
markets_df = processor.create_market_dataframe(markets)
price_df = processor.create_price_dataframe(histories, outcome_filter="Yes")

print("Market Data:")
print(markets_df.head())
print(f"\nPrice Data Shape: {price_df.shape}")
print(price_df.head())

In [None]:
# Calculate technical features
price_features_df = processor.calculate_price_features(price_df)

# Align with crypto prices
aligned_df = processor.align_crypto_prices(price_features_df, crypto_prices)

print(f"Features added. New shape: {price_features_df.shape}")
print(f"Aligned data shape: {aligned_df.shape}")
print("\nNew columns:")
print([col for col in price_features_df.columns if col not in price_df.columns])

In [None]:
# Detect potential arbitrage opportunities
arbitrage_df = processor.detect_arbitrage_opportunities(markets_df)

print(f"Found {len(arbitrage_df)} potential arbitrage opportunities:")
if not arbitrage_df.empty:
    print(arbitrage_df[['market1_question', 'market2_question', 'price_diff', 'opportunity_score']].head())

## 3. Arbitrage Signal Detection

Let's use our arbitrage detector to find trading opportunities.

In [None]:
# Initialize arbitrage detector
detector = ArbitrageDetector(
    lookback_window=24,
    confidence_threshold=0.7,
    min_return_threshold=0.02
)

print("Arbitrage Detector initialized with:")
print(f"- Lookback window: {detector.lookback_window} hours")
print(f"- Confidence threshold: {detector.confidence_threshold}")
print(f"- Min return threshold: {detector.min_return_threshold:.1%}")

In [None]:
# Detect different types of arbitrage opportunities
print("Detecting arbitrage signals...")

# Mean reversion signals
mean_reversion_signals = detector.detect_mean_reversion_opportunities(price_features_df)
print(f"Mean reversion signals: {len(mean_reversion_signals)}")

# Momentum signals
momentum_signals = detector.detect_momentum_opportunities(price_features_df)
print(f"Momentum signals: {len(momentum_signals)}")

# Price divergence signals (if crypto data is aligned)
if not aligned_df.empty:
    divergence_signals = detector.detect_price_divergence_opportunities(
        aligned_df, crypto_prices
    )
    print(f"Price divergence signals: {len(divergence_signals)}")
else:
    divergence_signals = []
    print("Price divergence signals: 0 (no aligned data)")

# Combine all signals
all_signals = mean_reversion_signals + momentum_signals + divergence_signals
print(f"\nTotal signals detected: {len(all_signals)}")

In [None]:
# Analyze signal quality
if all_signals:
    signal_df = pd.DataFrame([
        {
            'market_id': s.market_id,
            'signal_type': s.signal_type,
            'timestamp': s.timestamp,
            'confidence': s.confidence,
            'potential_return': s.potential_return,
            'risk_score': s.risk_score
        }
        for s in all_signals
    ])
    
    print("Signal Summary:")
    print(signal_df.groupby('signal_type').agg({
        'confidence': ['count', 'mean'],
        'potential_return': 'mean',
        'risk_score': 'mean'
    }).round(3))
else:
    print("No signals to analyze")

## 4. Strategy Backtesting

Let's backtest our arbitrage strategies to evaluate their performance.

In [None]:
# Initialize backtester
backtester = StrategyBacktester(
    initial_capital=10000,
    position_sizing='proportional',
    max_position_size=0.1,  # 10% max per trade
    transaction_cost=0.01,  # 1%
    max_holding_period=12,  # 12 hours max
    stop_loss=0.05,  # 5%
    take_profit=0.10  # 10%
)

print("Strategy Backtester initialized with:")
print(f"- Initial capital: ${backtester.initial_capital:,.0f}")
print(f"- Max position size: {backtester.max_position_size:.1%}")
print(f"- Transaction cost: {backtester.transaction_cost:.1%}")
print(f"- Max holding period: {backtester.max_holding_period} hours")

In [None]:
# Run backtest
if all_signals:
    print("Running strategy backtest...")
    
    performance = backtester.backtest_strategy(
        signals=all_signals,
        price_data=price_features_df
    )
    
    print("\n=== BACKTEST RESULTS ===")
    print(f"Total Trades: {performance.total_trades}")
    print(f"Win Rate: {performance.win_rate:.1%}")
    print(f"Total Return: {performance.total_return:.1%}")
    print(f"Total P&L: ${performance.total_pnl:.2f}")
    print(f"Sharpe Ratio: {performance.sharpe_ratio:.2f}")
    print(f"Max Drawdown: {performance.max_drawdown:.1%}")
    print(f"Average Trade Duration: {performance.avg_trade_duration:.1f} hours")
    
    if performance.winning_trades > 0 and performance.losing_trades > 0:
        print(f"Average Win: ${performance.avg_win:.2f}")
        print(f"Average Loss: ${performance.avg_loss:.2f}")
        print(f"Profit Factor: {performance.profit_factor:.2f}")
        
else:
    print("No signals available for backtesting")
    performance = None

## 5. Visualization

Let's create comprehensive visualizations of our analysis results.

In [None]:
# Initialize visualizer
visualizer = PolymarketVisualizer(style='plotly_dark')

print("Creating visualizations...")

In [None]:
# Market overview
market_overview_fig = visualizer.plot_market_overview(markets_df)
market_overview_fig.show()

In [None]:
# Price history
price_history_fig = visualizer.plot_price_history(price_df)
price_history_fig.show()

In [None]:
# Arbitrage signals
if all_signals:
    signals_fig = visualizer.plot_arbitrage_signals(all_signals, price_df)
    signals_fig.show()
else:
    print("No signals to visualize")

In [None]:
# Strategy performance
if performance and performance.total_trades > 0:
    performance_fig = visualizer.plot_strategy_performance(performance)
    performance_fig.show()
else:
    print("No performance data to visualize")

In [None]:
# Correlation analysis
if not aligned_df.empty:
    correlation_fig = visualizer.plot_correlation_analysis(aligned_df)
    correlation_fig.show()
else:
    print("No aligned data for correlation analysis")

## 6. Advanced Analysis

Let's perform some advanced analysis including parameter optimization and walk-forward testing.

In [None]:
# Parameter optimization (if we have enough signals)
if all_signals and len(all_signals) > 10:
    print("Running parameter optimization...")
    
    # Define parameter ranges to test
    parameter_ranges = {
        'max_position_size': [0.05, 0.10, 0.15],
        'stop_loss': [0.03, 0.05, 0.07],
        'take_profit': [0.08, 0.10, 0.12],
        'max_holding_period': [8, 12, 16]
    }
    
    optimization_result = backtester.optimize_parameters(
        signals=all_signals,
        price_data=price_features_df,
        parameter_ranges=parameter_ranges
    )
    
    print("\n=== OPTIMIZATION RESULTS ===")
    print(f"Best Parameters: {optimization_result['best_parameters']}")
    print(f"Best Sharpe Ratio: {optimization_result['optimization_value']:.3f}")
    
    best_performance = optimization_result['best_performance']
    if best_performance:
        print(f"Optimized Return: {best_performance.total_return:.1%}")
        print(f"Optimized Win Rate: {best_performance.win_rate:.1%}")

else:
    print("Not enough signals for parameter optimization")

## 7. Summary and Next Steps

Let's summarize our analysis and suggest next steps.

In [None]:
print("=== POLYMARKET ARBITRAGE ANALYSIS SUMMARY ===")
print(f"Data Collection: {len(markets)} markets, {len(histories)} price histories")
print(f"Signal Detection: {len(all_signals)} total signals")

if all_signals:
    signal_types = pd.Series([s.signal_type for s in all_signals]).value_counts()
    print("Signal breakdown:")
    for signal_type, count in signal_types.items():
        print(f"  - {signal_type}: {count}")

if performance:
    print(f"\nBacktest Performance:")
    print(f"  - Total Return: {performance.total_return:.1%}")
    print(f"  - Win Rate: {performance.win_rate:.1%}")
    print(f"  - Sharpe Ratio: {performance.sharpe_ratio:.2f}")

print("\n=== NEXT STEPS ===")
print("1. Collect more historical data for robust analysis")
print("2. Implement more sophisticated signal detection algorithms")
print("3. Add real-time data streaming capabilities")
print("4. Integrate with actual trading APIs for live execution")
print("5. Implement risk management and position sizing optimization")
print("6. Add market microstructure analysis for better execution")
print("7. Create automated monitoring and alerting systems")

In [None]:
# Save results for future analysis
print("Saving analysis results...")

try:
    # Save processed data
    processor.save_processed_data(
        market_df=markets_df,
        price_df=price_features_df,
        aligned_df=aligned_df if not aligned_df.empty else None,
        arbitrage_df=arbitrage_df if not arbitrage_df.empty else None
    )
    
    # Create comprehensive dashboard
    dashboard_figures = visualizer.create_dashboard(
        markets_df=markets_df,
        price_df=price_df,
        signals=all_signals,
        performance=performance
    )
    
    print("Analysis results saved successfully!")
    print(f"Check the 'data/processed' directory for CSV files")
    print(f"Check the 'data/processed/visualizations' directory for HTML plots")
    
except Exception as e:
    print(f"Error saving results: {e}")