### Imports

In [51]:
import pandas as pd
pd.set_option('display.max_columns', None)
import pytz
import duckdb
import time

import requests as rq
import json
from datetime import datetime
import plotly.graph_objects as go
from plotly.subplots import make_subplots

import os
from dotenv import load_dotenv

import asyncio
import json
import websockets
from urllib.parse import urlparse, parse_qs

### Keys

In [65]:
# Loads variables from .env file into environment
load_dotenv()

CG_DEMO_API_KEY = os.getenv("CG_DEMO_API_KEY")
if not CG_DEMO_API_KEY:
    raise RuntimeError("Missing Demo API key in the environment")

CG_PRO_API_KEY = os.getenv("CG_PRO_API_KEY")
if not CG_PRO_API_KEY:
    raise RuntimeError("Missing Pro API key in the environment")

CG_ANALYST_API_KEY = os.getenv("CG_ANALYST_API_KEY")
if not CG_ANALYST_API_KEY:
    raise RuntimeError("Missing Analyst API key in the environment")

### API status

In [7]:
PUB_URL = "https://api.coingecko.com/api/v3"
PRO_URL = "https://pro-api.coingecko.com/api/v3"

In [8]:
def get_response(endpoint, headers, params, URL):

    url = "".join((URL, endpoint))
    response = rq.get(url, headers=headers, params=params)

    try:
        data = response.json()
    except ValueError:
        print("Invalid JSON response")
        return None

    if response.status_code != 200:
        print(f"Failed to fetch data, status code {response.status_code}")

    return data

In [10]:
use_demo = {
           "accept": "application/json",
           "x-cg-demo-api-key" : CG_DEMO_API_KEY
}

use_pro = {
         "accept": "application/json",
         "x-cg-pro-api-key" : CG_PRO_API_KEY
}

In [11]:
get_response("/ping", use_demo, "", PUB_URL)

{'gecko_says': '(V3) To the Moon!'}

### Get new pools

In [14]:
def safe_get(d, path, default=None):
    """Safely get a nested dictionary value."""
    for key in path:
        if isinstance(d, dict) and key in d:
            d = d[key]
        else:
            return default
    return d

def collect_response(list_response):

    response_all = []

    for response in list_response.get("data", []):
        
        all_attributes = response.get("attributes", {})
        rel = response.get("relationships", {})
        
        base_token_add = safe_get(rel, ["base_token", "data", "id"], "NA")
        
        # If token_add exists, split it.
        token_add = base_token_add.split("_")[1] if base_token_add != "NA" and "_" in base_token_add else "NA"
        
        temp_dict = dict(
            pair = safe_get(all_attributes, ["name"], "NA"),
            pool_created_at = safe_get(all_attributes, ["pool_created_at"], "NA"),
            dex = safe_get(rel, ["dex", "data", "id"], "NA"),
            network = safe_get(rel, ["network", "data", "id"], "NA"),
            token_add = token_add,
            pool_add = safe_get(all_attributes, ["address"], "NA"),
            fdv_usd = safe_get(all_attributes, ["fdv_usd"], "NA"),
            market_cap_usd = safe_get(all_attributes, ["market_cap_usd"], "NA"),
            daily_volume = safe_get(all_attributes, ["volume_usd", "h24"], "NA"),
            daily_price_change = safe_get(all_attributes, ["price_change_percentage", "h24"], "NA"),
        )
        
        response_all.append(temp_dict)

    return response_all

In [None]:
def get_new_pools(network, sort_by_col, max_pages=None):
    
    endpoint = f"/onchain/networks/{network}/new_pools"
    params = {}
    newpools_all = []
    page_count = 0

    # Follow pagination via the response links.next and collect across pages, with an optional max_pages cap.
    while True:
        pools_list_response = get_response(endpoint, use_pro, params, PRO_URL)
        if not pools_list_response:
            break

        newpools_all.extend(collect_response(pools_list_response))
        page_count += 1

        if max_pages is not None and page_count >= max_pages:
            break

        links = pools_list_response.get("links", {})
        next_link = links.get("next") if isinstance(links, dict) else None
        if not next_link:
            break

        parsed = urlparse(next_link)
        endpoint = parsed.path
        params = {k: v[0] for k, v in parse_qs(parsed.query).items()}

    df_new_pools = pd.DataFrame(newpools_all)

    # Change to local timezone
    df_new_pools["pool_created_at"] = pd.to_datetime(df_new_pools["pool_created_at"], utc=True)
    df_new_pools["pool_created_at"] = df_new_pools["pool_created_at"].dt.tz_convert("Europe/Berlin")

    return df_new_pools[df_new_pools["dex"] == "pump-fun"].sort_values(
        by=[f"{sort_by_col}"], ascending=False
    )


In [100]:
#get_new_pools("solana", "pool_created_at", max_pages = 5).head(50)

### Filter profitable pools

In [None]:
def collect_pool_response(list_response):

    response = list_response.get("data", {})
    all_attributes = response.get("attributes", {})
    daily_tx = all_attributes["transactions"]["h24"]
    rel = response["relationships"]
    
    # Safely extract launchpad_details or default to empty dict
    launchpad_details = all_attributes.get("launchpad_details", {})
        
    response_dict = dict(
        pair = all_attributes["name"],
        dex = rel["dex"]["data"]["id"],
        token_add = rel["base_token"]["data"]["id"].split("_")[1],
        pool_add = all_attributes["address"],
        pool_created_at = all_attributes["pool_created_at"],
        fdv_usd = all_attributes["fdv_usd"],
        market_cap_usd = all_attributes["market_cap_usd"],
        daily_volume = all_attributes["volume_usd"]["h24"],
        daily_price_change = all_attributes["price_change_percentage"]["h24"],
        daily_buys = daily_tx["buys"],
        daily_sells = daily_tx["sells"],
        daily_buyers = daily_tx["buyers"],
        daily_sellers = daily_tx["sellers"],
        grad_pert = (
            launchpad_details.get("graduation_percentage")
            if launchpad_details else 0
        ),
        completed = launchpad_details.get("completed", False),
        completed_at = launchpad_details.get("completed_at", None),
        dest_pool = launchpad_details.get("migrated_destination_pool_address", None)
    )

    return response_dict

In [None]:
def get_pool_data(network, pool_address):

    target_url = f"/onchain/networks/{network}/pools/{pool_address}"

    pool_list_response = get_response(target_url,
                                      use_pro,
                                      "",
                                      PRO_URL)

    pool_all = collect_pool_response(pool_list_response)

    return pool_all

In [91]:
target_url = f"/onchain/networks/solana/pools/3KbAXjXrHs71yrcjW5CU1h5uiPVzd5cyAKF8XFY6481X"

pool_list_response = get_response(target_url,
                                    use_pro,
                                    "",
                                    PRO_URL)

In [96]:
pool_list_response["data"]

{'id': 'solana_3KbAXjXrHs71yrcjW5CU1h5uiPVzd5cyAKF8XFY6481X',
 'type': 'pool',
 'attributes': {'base_token_price_usd': '0.00000360673286231391700413473199114159005808897775622945856056670222',
  'base_token_price_native_currency': '0.0000000284062003202499',
  'quote_token_price_usd': '126.969915780773576305035479353251020822703954254',
  'quote_token_price_native_currency': '1.0',
  'base_token_price_quote_token': '0.00000002840620032',
  'quote_token_price_base_token': '35203581.9196533',
  'address': '3KbAXjXrHs71yrcjW5CU1h5uiPVzd5cyAKF8XFY6481X',
  'name': 'Startup / SOL',
  'pool_name': 'Startup / SOL',
  'pool_fee_percentage': None,
  'pool_created_at': '2026-01-24T21:40:33Z',
  'fdv_usd': '3606.732862',
  'market_cap_usd': None,
  'price_change_percentage': {'m5': '0',
   'm15': '-6.15',
   'm30': '-6.15',
   'h1': '-6.15',
   'h6': '-6.15',
   'h24': '-6.15'},
  'transactions': {'m5': {'buys': 0, 'sells': 0, 'buyers': 0, 'sellers': 0},
   'm15': {'buys': 11, 'sells': 20, 'buyer

In [56]:
def collect_pool_data(network, num_rows, max_pages):

    df_new_pools = get_new_pools(network, "pool_created_at", max_pages).head(num_rows)
    
    all_pool_data = []

    for pool_add in df_new_pools["pool_add"]:
        pool_data = get_pool_data(network, pool_add)
        all_pool_data.append(pool_data)

    df = pd.DataFrame(all_pool_data)

    df = df.astype({
        "pair": "string",
        "dex": "string",
        "add": "string",
        "daily_buys": "Int64",
        "daily_sells": "Int64",
        "daily_buyers": "Int64",
        "daily_sellers": "Int64",
        "completed": "boolean",
        "dest_pool": "string",
    })

    # Numeric columns (coerce invalids to NaN)
    for col in ["fdv_usd", "market_cap_usd", "daily_volume", "daily_price_change", "grad_pert"]:
        df[col] = pd.to_numeric(df[col], errors="coerce")

    # Timestamps
    df["pool_created_at"] = pd.to_datetime(df["pool_created_at"], utc=True, errors="coerce")
    df["completed_at"] = pd.to_datetime(df["completed_at"], utc=True, errors="coerce")

    return df

In [82]:
def analyze_pools(network, num_rows, max_pages=5):

    df_pool_data = collect_pool_data(network, num_rows, max_pages)

    # Inspect key metrics such as Fully Diluted Volume (FDV) and age of the pool. We want
    # to filter out pools which are older than 10 minutes and have FDV less than $5000.
    cutoff = pd.Timestamp.now(tz="UTC") - pd.Timedelta(minutes=10)

    df_filtered = df_pool_data[
                        (df_pool_data["pool_created_at"] >= cutoff) &
                        (df_pool_data["fdv_usd"] > 4000)
                        ].copy()

    # Convert to local timezone
    df_filtered["pool_created_at"] = df_filtered["pool_created_at"].dt.tz_convert("Europe/Berlin")
    
    return df_filtered

In [83]:
analyze_pools("solana", num_rows=200)

Unnamed: 0,pair,dex,add,pool_created_at,fdv_usd,market_cap_usd,daily_volume,daily_price_change,daily_buys,daily_sells,daily_buyers,daily_sellers,grad_pert,completed,completed_at,dest_pool
4,Startup / SOL,pump-fun,3KbAXjXrHs71yrcjW5CU1h5uiPVzd5cyAKF8XFY6481X,2026-01-24 22:40:33+01:00,4649.201073,,968.195684,20.97,7,13,7,11,17.05,False,NaT,
7,comput / SOL,pump-fun,CbBrgjhfjNvNXg4g7sFPPbst7Zwo7gA5oX65qzrcuYL4,2026-01-24 22:40:27+01:00,7382.566557,,1498.100761,83.79,9,3,9,3,41.57,False,NaT,


### Monitor real-time price

In [101]:
WS_URL = f"wss://stream.coingecko.com/v1?x_cg_pro_api_key={CG_ANALYST_API_KEY}"

TOKEN_ADDRESS = "solana_8m4qFQP91BFAXf6Mb8FjwXtM6J2kdJ8FeoyfDEkUpump"
NETWORK_ID = "solana"

In [106]:
rename_map = {
    "c": "channel_type",
    "n": "network_id",
    "ta": "token_address",
    "p": "usd_price",
    "pp": "usd_price_24h_change_percentage",
    "m": "usd_market_cap",
    "v": "usd_24h_vol",
    "t": "last_updated_at",
}

async def stream_token_price():
    
    async with websockets.connect(WS_URL) as ws:
        # 1) Subscribe
        subscribe_msg = {
            "command": "subscribe",
            "identifier": json.dumps({"channel": "OnchainSimpleTokenPrice"})
        }
        await ws.send(json.dumps(subscribe_msg))

        # 2) Send message to set tokens
        data_payload = {
            f"{NETWORK_ID}:token_addresses": [TOKEN_ADDRESS],
            "action": "set_tokens"
        }
        message_msg = {
            "command": "message",
            "identifier": json.dumps({"channel": "OnchainSimpleTokenPrice"}),
            "data": json.dumps(data_payload)
        }
        await ws.send(json.dumps(message_msg))

        # # Stream data
        # while True:
        #     msg = await ws.recv()
        #     payload = json.loads(msg)

        #     # Unwrap if needed
        #     if isinstance(payload, dict) and "message" in payload:
        #         data = payload["message"]
        #     else:
        #         data = payload

        #     # Only process if we get valid data
        #     if isinstance(data, dict) and "c" in data: 

        #         row = {rename_map[k]: payload.get(k) for k in rename_map}
        #         df = pd.DataFrame([row])

        #         # Convert UNIX seconds to CET/CEST
        #         df["last_updated_at"] = pd.to_datetime(df["last_updated_at"], unit="s", utc=True)
        #         df["last_updated_at"] = df["last_updated_at"].dt.tz_convert("Europe/Berlin")

        #         print(df)

        #     else:
        #         print(data)

In [107]:
#await stream_token_price()