# Polymarket API Guide

This notebook demonstrates how to use Polymarket's APIs to fetch trading and user data.

## API Overview

Polymarket provides several APIs:
- **Gamma API**: Market metadata, events, and general market information
- **Data API**: Trading data, user activity, and positions
- **CLOB API**: Central Limit Order Book for placing orders (requires authentication)
- **Subgraph**: GraphQL interface for complex queries

Documentation: https://docs.polymarket.com/


In [67]:
# Install required packages
# Run this in terminal: pip install requests pandas polymarket-apis

import requests
import pandas as pd
import numpy as np
from datetime import datetime
import json


## 1. Gamma API - Market Data

The Gamma API provides market metadata including titles, descriptions, volumes, and more.
Base URL: https://gamma-api.polymarket.com


In [107]:
# Fetch all markets
def get_events(active=None, closed=None, limit=100, offset=0, sort_by='start_date'):
    """
    Fetch events from Gamma API and sort by recency
    
    Parameters:
    - active: True for active markets, False for inactive
    - closed: True for closed markets, False for open
    - limit: Number of markets to return
    - offset: Pagination offset
    - sort_by: Field to sort by ('start_date', 'end_date', 'created_at', 'volume')
    """
    url = "https://gamma-api.polymarket.com/events"
    params = {
        "limit": limit,
        "offset": offset,
        "ascending": False
    }
    
    if active is not None:
        params["active"] = str(active).lower()
    if closed is not None:
        params["closed"] = str(closed).lower()
    
    response = requests.get(url, params=params)
    events = response.json()
    
    # Sort by the specified field (most recent first)
    if isinstance(events, list) and events:
        # Try to sort by different date fields that might exist
        date_fields = ['start_date_iso', 'end_date_iso', 'created_at', 'start_date']
        
        if sort_by == 'start_date':
            # Sort by start date (most recent first)
            if 'start_date_iso' in events[0]:
                events.sort(key=lambda x: x.get('start_date_iso', ''), reverse=True)
            elif 'start_date' in events[0]:
                events.sort(key=lambda x: x.get('start_date', 0), reverse=True)
        elif sort_by == 'end_date':
            if 'end_date_iso' in events[0]:
                events.sort(key=lambda x: x.get('end_date_iso', ''), reverse=True)
        elif sort_by == 'volume':
            events.sort(key=lambda x: x.get('volume', 0), reverse=True)
    
    return events


# Example: Get active markets
events = get_events(active=True, closed=False, limit=100)
print(f"Retrieved {len(events)} events")

# Display first market
# if markets:
#     for market in markets:
#         print(f"\nExample Market:")
#         print(f"ID: {market.get('condition_id', 'N/A')}")
#         print(f"Title: {market.get('question', 'N/A')}")
#         print(f"Description: {market.get('description', 'N/A')[:100]}...")
        #print(f"Volume: ${first_market.get('volume', 0):,.2f}")
        #print(f"Liquidity: ${first_market.get('liquidity', 0):,.2f}")


Retrieved 100 events


In [124]:
events[2]

{'id': '16092',
 'ticker': 'us-recession-in-2025',
 'slug': 'us-recession-in-2025',
 'title': 'US recession in 2025?',
 'description': 'This market will resolve to “Yes”, if either of the following conditions are met:\n\n1.  The National Bureau of Economic Research (NBER) publicly announces that a recession has occurred in the United States, at any point in 2025, with the announcement made by December 31, 2025, 11:59 PM ET.\n\n2.  The seasonally adjusted annualized percent change in quarterly U.S. real GDP from the previous quarter is less than 0.0 for two consecutive quarters between Q4 2024 and Q4 2025 (inclusive), as reported by the Bureau of Economic Analysis (BEA). \n\nOtherwise, this market will resolve to "No". \n\nNote that advance estimates will be considered. For example, if upon release, the advance estimate for Q2 2025 was negative, and the Q1 2025\'s most recent, up-to-date estimate was also negative, this market would resolve to "Yes". If on December 31, 2025 the latest e

In [126]:
markets_df.columns

Index(['id', 'question', 'conditionId', 'slug', 'resolutionSource', 'endDate',
       'liquidity', 'startDate', 'image', 'icon', 'description', 'outcomes',
       'outcomePrices', 'volume', 'active', 'closed', 'marketMakerAddress',
       'createdAt', 'updatedAt', 'new', 'featured', 'submitted_by', 'archived',
       'resolvedBy', 'restricted', 'groupItemTitle', 'groupItemThreshold',
       'questionID', 'enableOrderBook', 'orderPriceMinTickSize',
       'orderMinSize', 'volumeNum', 'liquidityNum', 'endDateIso',
       'startDateIso', 'hasReviewedDates', 'volume24hr', 'volume1wk',
       'volume1mo', 'volume1yr', 'clobTokenIds', 'umaBond', 'umaReward',
       'volume24hrClob', 'volume1wkClob', 'volume1moClob', 'volume1yrClob',
       'volumeClob', 'liquidityClob', 'acceptingOrders', 'negRisk', 'events',
       'ready', 'funded', 'acceptingOrdersTimestamp', 'cyom', 'competitive',
       'pagerDutyNotificationEnabled', 'approved', 'rewardsMinSize',
       'rewardsMaxSpread', 'spread', 'o

In [122]:
event = events[1]

print("title:", event['title'])
print("number of markets:", len(event['markets']))

title: Largest Company end of 2025?
number of markets: 18


In [68]:
# Fetch all markets
def get_markets(active=None, closed=None, limit=100, offset=0, sort_by='start_date'):
    """
    Fetch markets from Gamma API and sort by recency
    
    Parameters:
    - active: True for active markets, False for inactive
    - closed: True for closed markets, False for open
    - limit: Number of markets to return
    - offset: Pagination offset
    - sort_by: Field to sort by ('start_date', 'end_date', 'created_at', 'volume')
    """
    url = "https://gamma-api.polymarket.com/markets"
    params = {
        "limit": limit,
        "offset": offset,
        "ascending": False
    }
    
    if active is not None:
        params["active"] = str(active).lower()
    if closed is not None:
        params["closed"] = str(closed).lower()
    
    response = requests.get(url, params=params)
    markets = response.json()
    
    # Sort by the specified field (most recent first)
    if isinstance(markets, list) and markets:
        # Try to sort by different date fields that might exist
        date_fields = ['start_date_iso', 'end_date_iso', 'created_at', 'start_date']
        
        if sort_by == 'start_date':
            # Sort by start date (most recent first)
            if 'start_date_iso' in markets[0]:
                markets.sort(key=lambda x: x.get('start_date_iso', ''), reverse=True)
            elif 'start_date' in markets[0]:
                markets.sort(key=lambda x: x.get('start_date', 0), reverse=True)
        elif sort_by == 'end_date':
            if 'end_date_iso' in markets[0]:
                markets.sort(key=lambda x: x.get('end_date_iso', ''), reverse=True)
        elif sort_by == 'volume':
            markets.sort(key=lambda x: x.get('volume', 0), reverse=True)
    
    return markets


# Example: Get active markets
markets = get_markets(active=True, closed=False, limit=100)
print(f"Retrieved {len(markets)} markets")

# Display first market
# if markets:
#     for market in markets:
#         print(f"\nExample Market:")
#         print(f"ID: {market.get('condition_id', 'N/A')}")
#         print(f"Title: {market.get('question', 'N/A')}")
#         print(f"Description: {market.get('description', 'N/A')[:100]}...")
        #print(f"Volume: ${first_market.get('volume', 0):,.2f}")
        #print(f"Liquidity: ${first_market.get('liquidity', 0):,.2f}")


Retrieved 100 markets


In [69]:
# Convert markets to DataFrame for analysis
markets_df = pd.DataFrame(markets)

# mass change numeric columns to float
numeric_columns = ['volume', 'liquidity', 'volumeNum', 'liquidityNum']
markets_df[numeric_columns] = markets_df[numeric_columns].astype(float)

# date columns to datetime
date_columns = ['startDate', 'endDate', 'createdAt', 'updatedAt']
for col in date_columns:    
    markets_df[col] = pd.to_datetime(markets_df[col])

# remove columns with more than 50% na
markets_df = markets_df.dropna(axis=1, thresh=0.5*len(markets_df))

# Display key columns
if not markets_df.empty:
    key_columns = ['condition_id', 'question', 'volume', 'liquidity', 'end_date_iso']
    available_columns = [col for col in key_columns if col in markets_df.columns]
    
    print(markets_df[available_columns].head())
    
    # Summary statistics
    if 'volume' in markets_df.columns:
        print(f"\nTotal Volume: ${markets_df['volume'].sum():,.2f}")
        print(f"Average Volume: ${markets_df['volume'].mean():,.2f}")
        print(f"Max Volume: ${markets_df['volume'].max():,.2f}")


                          question        volume     liquidity
0           Fed rate hike in 2025?  1.168881e+06   38409.32452
1            US recession in 2025?  1.063633e+07   95149.02162
2  Fed emergency rate cut in 2025?  1.301590e+06   30394.51806
3        Tether insolvent in 2025?  4.525979e+05   30793.97432
4              USDT depeg in 2025?  1.481766e+06  113604.92075

Total Volume: $427,827,208.26
Average Volume: $4,278,272.08
Max Volume: $51,672,807.51


In [56]:
# Get specific event with all its markets
def get_event(event_slug):
    """
    Fetch a specific event by slug
    
    Parameters:
    - event_slug: The event slug identifier
    """
    url = f"https://gamma-api.polymarket.com/events/{event_slug}"
    response = requests.get(url)
    return response.json()

# Example: Search for events
def search_markets(query):
    """
    Search markets by query string
    """
    url = "https://gamma-api.polymarket.com/search"
    params = {"q": query}
    response = requests.get(url, params=params)
    return response.json()

# Example search
# search_results = search_markets("bitcoin")
# print(f"Found {len(search_results)} results for 'bitcoin'")

# example get event
event__results = get_event(markets_df.iloc[0]['slug'])
event__results


{'type': 'validation error', 'error': '"id" has a wrong value'}

## 2. Data API - Trading Data

The Data API provides access to trades, user activity, and positions.
Base URL: https://data-api.polymarket.com

Reference: https://docs.polymarket.com/developers/CLOB/trades/trades-data-api


In [82]:
# Fetch trades data
def get_trades(user=None, market=None, event_id=None, side=None, limit=100, offset=0):
    """
    Fetch trades from Data API
    
    Parameters:
    - user: User wallet address (0x-prefixed)
    - market: Condition ID(s) - comma-separated for multiple
    - event_id: Event ID(s) - comma-separated for multiple
    - side: Trade side - 'BUY' or 'SELL'
    - limit: Number of records (max 10,000)
    - offset: Pagination offset (max 10,000)
    """
    url = "https://data-api.polymarket.com/trades"
    params = {
        "limit": limit,
        "offset": offset
    }
    
    if user:
        params["user"] = user
    if market:
        params["market"] = market
    if event_id:
        params["eventId"] = event_id
    if side:
        params["side"] = side
    
    response = requests.get(url, params=params)
    return response.json()

# Example: Get recent trades (general)
recent_trades = get_trades(limit=100000, offset=10000)
print(f"Retrieved {len(recent_trades)} recent trades")

# Convert to DataFrame
if recent_trades:
    trades_df = pd.DataFrame(recent_trades)
    print("\nRecent Trades:")
    display_cols = ['timestamp', 'side', 'size', 'price', 'title', 'outcome']
    available_cols = [col for col in display_cols if col in trades_df.columns]
    if available_cols:
        # Convert timestamp if present
        if 'timestamp' in trades_df.columns:
            trades_df['datetime'] = pd.to_datetime(trades_df['timestamp'], unit='s')
            available_cols = ['datetime'] + [c for c in available_cols if c != 'timestamp']
        print(trades_df[available_cols].head())


Retrieved 500 recent trades

Recent Trades:
             datetime  side        size     price  \
0 2025-12-10 19:00:06   BUY  242.939333  0.972177   
1 2025-12-10 19:00:06  SELL   61.420000  0.002141   
2 2025-12-10 19:00:06   BUY   96.268666  0.997000   
3 2025-12-10 19:00:06   BUY  117.920000  0.987000   
4 2025-12-10 19:00:06   BUY  200.000000  0.580000   

                                               title outcome  
0               Will 2 Fed rate cuts happen in 2025?      No  
1  Will there be at least 1900 measles cases in t...      No  
2               Will 4 Fed rate cuts happen in 2025?      No  
3     HumidiFi FDV above $200M one day after launch?     Yes  
4  Bitcoin Up or Down - December 10, 2:00PM-2:15P...      Up  


In [88]:
markets_df.head()

Unnamed: 0,id,question,conditionId,slug,resolutionSource,endDate,liquidity,startDate,image,icon,...,negRiskOther,umaResolutionStatuses,pendingDeployment,deploying,rfqEnabled,holdingRewardsEnabled,feesEnabled,seriesColor,showGmpSeries,showGmpOutcome
0,516706,Fed rate hike in 2025?,0x4319532e181605cb15b1bd677759a3bc7f7394b2fdf1...,fed-rate-hike-in-2025,,2025-12-10 12:00:00+00:00,38409.32452,2024-12-29 22:50:33.584839+00:00,https://polymarket-upload.s3.us-east-2.amazona...,https://polymarket-upload.s3.us-east-2.amazona...,...,False,[],False,False,False,False,False,,,
1,516710,US recession in 2025?,0xfa48a99317daef1654d5b03e30557c4222f276657275...,us-recession-in-2025,,2026-02-28 12:00:00+00:00,95149.02162,2025-01-08 01:33:54.924000+00:00,https://polymarket-upload.s3.us-east-2.amazona...,https://polymarket-upload.s3.us-east-2.amazona...,...,False,[],False,False,True,False,False,,,
2,516711,Fed emergency rate cut in 2025?,0xcddb4f52babca280f8e5fc4b9146f2520f0bfb88e134...,fed-emergency-rate-cut-in-2025,,2025-12-31 12:00:00+00:00,30394.51806,2024-12-29 22:41:48.240000+00:00,https://polymarket-upload.s3.us-east-2.amazona...,https://polymarket-upload.s3.us-east-2.amazona...,...,False,[],False,False,False,False,False,,,
3,516712,Tether insolvent in 2025?,0x1bbdc459d73e6972f5ba1f687d739fde70a9774f4875...,tether-insolvent-in-2025,,2025-12-31 12:00:00+00:00,30793.97432,2024-12-29 22:41:19.741000+00:00,https://polymarket-upload.s3.us-east-2.amazona...,https://polymarket-upload.s3.us-east-2.amazona...,...,False,[],False,False,False,False,False,,,
4,516713,USDT depeg in 2025?,0x416316490efec1038ce09ec0184f82f1f7921876ee05...,usdt-depeg-in-2025,,2025-12-31 12:00:00+00:00,113604.92075,2024-12-29 22:40:52.581438+00:00,https://polymarket-upload.s3.us-east-2.amazona...,https://polymarket-upload.s3.us-east-2.amazona...,...,False,[],False,False,False,False,False,,,


## 3. User Activity and Positions

Fetch on-chain activity and current positions for specific users.


In [93]:
# Fetch user activity
def get_user_activity(user_address, market=None, event_id=None, activity_type=None, 
                      limit=100, offset=0, sort_by="TIMESTAMP", sort_direction="DESC"):
    """
    Fetch on-chain activity for a user
    
    Parameters:
    - user_address: User wallet address (0x-prefixed) - REQUIRED
    - market: Condition ID(s) - comma-separated
    - event_id: Event ID(s) - comma-separated
    - activity_type: List of types - TRADE, SPLIT, MERGE, REDEEM, etc.
    - limit: Number of records (max 500)
    - offset: Pagination offset (max 10,000)
    - sort_by: TIMESTAMP, TOKENS, or CASH
    - sort_direction: ASC or DESC
    """
    url = "https://data-api.polymarket.com/activity"
    params = {
        "user": user_address,
        "limit": limit,
        "offset": offset,
        "sortBy": sort_by,
        "sortDirection": sort_direction
    }
    
    if market:
        params["market"] = market
    if event_id:
        params["eventId"] = event_id
    if activity_type:
        params["type"] = activity_type
    
    response = requests.get(url, params=params)
    return response.json()

# Example: Get activity for a specific user
# Replace with an actual user address to test
example_user = "0x1f4c45517fac61b0673f2904e8a2f5ba4f20de22" 

try:
    user_activity = get_user_activity(example_user, limit=10)
    print(f"Retrieved {len(user_activity)} activity records for user")
    
    if user_activity:
        activity_df = pd.DataFrame(user_activity)
        print("\nUser Activity:")
        display_cols = ['timestamp', 'type', 'side', 'size', 'price', 'title', 'outcome']
        available_cols = [col for col in display_cols if col in activity_df.columns]
        
        if 'timestamp' in activity_df.columns:
            activity_df['datetime'] = pd.to_datetime(activity_df['timestamp'], unit='s')
            available_cols = ['datetime'] + [c for c in available_cols if c != 'timestamp']
        
        if available_cols:
            print(activity_df[available_cols].head())
except Exception as e:
    print(f"Note: Replace example_user with a real address. Error: {e}")


Retrieved 10 activity records for user

User Activity:
             datetime   type  side   size     price  \
0 2025-12-10 19:01:40  TRADE   BUY  20.00  0.540000   
1 2025-12-10 19:00:06  TRADE  SELL  61.42  0.002141   
2 2025-12-10 18:59:50  TRADE  SELL  12.95  0.002000   
3 2025-12-10 18:59:44  TRADE  SELL  12.95  0.003000   
4 2025-12-10 18:59:30  TRADE  SELL  12.68  0.003000   

                                               title outcome  
0  Will there be at least 2000 measles cases in t...     Yes  
1  Will there be at least 1900 measles cases in t...      No  
2  Will there be at least 1900 measles cases in t...      No  
3  Will there be at least 1900 measles cases in t...      No  
4  Will there be at least 1900 measles cases in t...      No  


In [None]:
# Fetch user positions
def get_user_positions(user_address, market=None, event_id=None, 
                       size_threshold=1, redeemable=False, mergeable=False,
                       limit=100, offset=0, sort_by="TOKENS", sort_direction="DESC"):
    """
    Fetch current positions for a user
    
    Parameters:
    - user_address: User wallet address (0x-prefixed) - REQUIRED
    - market: Condition ID(s) - comma-separated
    - event_id: Event ID(s) - comma-separated
    - size_threshold: Minimum position size (default: 1)
    - redeemable: Filter redeemable positions
    - mergeable: Filter mergeable positions
    - limit: Number of records (max 500)
    - offset: Pagination offset (max 10,000)
    - sort_by: CURRENT, INITIAL, TOKENS, CASHPNL, PERCENTPNL, etc.
    - sort_direction: ASC or DESC
    """
    url = "https://data-api.polymarket.com/positions"
    params = {
        "user": user_address,
        "sizeThreshold": size_threshold,
        "redeemable": str(redeemable).lower(),
        "mergeable": str(mergeable).lower(),
        "limit": limit,
        "offset": offset,
        "sortBy": sort_by,
        "sortDirection": sort_direction
    }
    
    if market:
        params["market"] = market
    if event_id:
        params["eventId"] = event_id
    
    response = requests.get(url, params=params)
    return response.json()

# Example: Get positions for a user
try:
    user_positions = get_user_positions(example_user, limit=10)
    print(f"Retrieved {len(user_positions)} positions for user")
    
    if user_positions:
        positions_df = pd.DataFrame(user_positions)
        print("\nUser Positions:")
        display_cols = ['title', 'outcome', 'size', 'avgPrice', 'curPrice', 
                       'cashPnl', 'percentPnl', 'redeemable']
        available_cols = [col for col in display_cols if col in positions_df.columns]
        
        if available_cols:
            print(positions_df[available_cols].head())
            
        # Summary stats
        if 'cashPnl' in positions_df.columns:
            print(f"\nTotal P&L: ${positions_df['cashPnl'].sum():,.2f}")
            print(f"Average P&L: ${positions_df['cashPnl'].mean():,.2f}")
except Exception as e:
    print(f"Note: Replace example_user with a real address. Error: {e}")


## Overall flow to get user data
1. set up supabase
2. get all recent markets -> store to `markets` table
3. pull recent 10M trades -> store to `trades` table
4. identify frequent users (those making min 10 trades in last month etc.) ->  store to `users` table

In [101]:
# bets that require multiple bets and synthetize

In [105]:
trades_df.columns

Index(['proxyWallet', 'side', 'asset', 'conditionId', 'size', 'price',
       'timestamp', 'title', 'slug', 'icon', 'eventSlug', 'outcome',
       'outcomeIndex', 'name', 'pseudonym', 'bio', 'profileImage',
       'profileImageOptimized', 'transactionHash', 'datetime'],
      dtype='object')

## Data pipeline testing

In [1]:
#from database.load_data_to_db import *
from database.init_data import *

In [None]:
# out = load_events_with_markets(limit=10, active_only=True)

out = init_load_all_events_with_markets(batch_size=100, start_offset=800)

TypeError: init_load_all_events_with_markets() got an unexpected keyword argument 'offset'