### Imports

In [43]:
import pandas as pd
pd.set_option('display.max_columns', None)
import pytz
import duckdb
import time

import requests as rq
import json
from datetime import datetime
import plotly.graph_objects as go
from plotly.subplots import make_subplots

import os
from dotenv import load_dotenv

import asyncio
import json
import websockets

### Keys

In [None]:
# Loads variables from .env file into environment
load_dotenv()

CG_DEMO_API_KEY = os.getenv("CG_DEMO_API_KEY")
if not CG_DEMO_API_KEY:
    raise RuntimeError("Missing Demo API key in the environment")

CG_PRO_API_KEY = os.getenv("CG_PRO_API_KEY")
if not CG_PRO_API_KEY:
    raise RuntimeError("Missing Pro API key in the environment")

### API status

In [7]:
PUB_URL = "https://api.coingecko.com/api/v3"
PRO_URL = "https://pro-api.coingecko.com/api/v3"

In [8]:
def get_response(endpoint, headers, params, URL):

    url = "".join((URL, endpoint))
    response = rq.get(url, headers=headers, params=params)

    try:
        data = response.json()
    except ValueError:
        print("Invalid JSON response")
        return None

    if response.status_code != 200:
        print(f"Failed to fetch data, status code {response.status_code}")

    return data

In [10]:
use_demo = {
           "accept": "application/json",
           "x-cg-demo-api-key" : CG_DEMO_API_KEY
}

use_pro = {
         "accept": "application/json",
         "x-cg-pro-api-key" : CG_PRO_API_KEY
}

In [11]:
get_response("/ping", use_demo, "", PUB_URL)

{'gecko_says': '(V3) To the Moon!'}

### Get new pools

In [14]:
def safe_get(d, path, default=None):
    """Safely get a nested dictionary value."""
    for key in path:
        if isinstance(d, dict) and key in d:
            d = d[key]
        else:
            return default
    return d

def collect_response(list_response):

    response_all = []

    for response in list_response.get("data", []):
        
        all_attributes = response.get("attributes", {})
        rel = response.get("relationships", {})
        
        base_token_add = safe_get(rel, ["base_token", "data", "id"], "NA")
        
        # If token_add exists, split it.
        token_add = base_token_add.split("_")[1] if base_token_add != "NA" and "_" in base_token_add else "NA"
        
        temp_dict = dict(
            pair = safe_get(all_attributes, ["name"], "NA"),
            pool_created_at = safe_get(all_attributes, ["pool_created_at"], "NA"),
            dex = safe_get(rel, ["dex", "data", "id"], "NA"),
            network = safe_get(rel, ["network", "data", "id"], "NA"),
            token_add = token_add,
            pool_add = safe_get(all_attributes, ["address"], "NA"),
            fdv_usd = safe_get(all_attributes, ["fdv_usd"], "NA"),
            market_cap_usd = safe_get(all_attributes, ["market_cap_usd"], "NA"),
            daily_volume = safe_get(all_attributes, ["volume_usd", "h24"], "NA"),
            daily_price_change = safe_get(all_attributes, ["price_change_percentage", "h24"], "NA"),
        )
        
        response_all.append(temp_dict)

    return response_all

In [21]:
def get_new_pools(network, sort_by_col):

    pools_list_response = get_response(f"/onchain/networks/{network}/new_pools",
                                       use_pro,
                                       "",
                                       PRO_URL)

    newpools_all = collect_response(pools_list_response)

    df_new_pools = pd.DataFrame(newpools_all)

    # Change to local timezone
    df_new_pools["pool_created_at"] = pd.to_datetime(df_new_pools["pool_created_at"], utc=True)
    df_new_pools["pool_created_at"] = df_new_pools["pool_created_at"].dt.tz_convert("Europe/Berlin")
    
    return df_new_pools[df_new_pools["dex"] == "pump-fun"].sort_values(by = [f"{sort_by_col}"],
                                                                       ascending = False)


In [22]:
get_new_pools("solana", "pool_created_at").head(5)

Unnamed: 0,pair,pool_created_at,dex,network,token_add,pool_add,fdv_usd,market_cap_usd,daily_volume,daily_price_change
0,kissed / SOL,2026-01-18 21:20:52+01:00,pump-fun,,Fwe6BcjWTi42gFwicYhPRQDj5CpgyqKfMKQ1FA2upump,GTv8LJfsU8qwcxt8ztKYsJSHSMmK9htK6p7ikvQht56i,,,0.1420985557,0.0
2,SNP100 / SOL,2026-01-18 21:20:45+01:00,pump-fun,,EnaSzbCHq5PvG9xtW5q5sL77ZtLv3VYKHtxCkMPspump,BMmZhZDRU7UohHgB298euUxr9uyrR2nw5Shr3gv743kr,,,1.4247486918,0.0
3,DOG / SOL,2026-01-18 21:20:45+01:00,pump-fun,,A6nggivGgitM3yRKAPRnFiU2reZixufxRYtGmBRrpump,4yeMrnfHBrnRtjGzComh49WPKeDrZiJbc2rjN8dM98wg,,,56.0811122696,0.0
4,MONEYBACK / SOL,2026-01-18 21:20:44+01:00,pump-fun,,3YB9Wixaxbguq1a4mny6a9Vy9hF19KEhnCS8YRJcpump,GTFR3iSEk5g8BcYiUU5PaNkTkfT81c2spPnmvuwg16NA,3973.0864520045,,0.1402045347,0.0
5,TROUVE / SOL,2026-01-18 21:20:38+01:00,pump-fun,,3z5zyJKwZndGVjhQgSr8XJe9CfKLPbA4BvJmtzTrpump,Cr7BzrFiQtk2K8m28SMxPcTpe53jqXM3pX881QEXEYJx,3974.4409182299,,6701.4764205011,27.357


### Filter profitable pools

In [29]:
def collect_pool_response(list_response):

    response = list_response.get("data", {})
    all_attributes = response.get("attributes", {})
    daily_tx = all_attributes["transactions"]["h24"]
    rel = response["relationships"]
    
    # Safely extract launchpad_details or default to empty dict
    launchpad_details = all_attributes.get("launchpad_details", {})
        
    response_dict = dict(
        pair = all_attributes["name"],
        dex = rel["dex"]["data"]["id"],
        add = all_attributes["address"],
        pool_created_at = all_attributes["pool_created_at"],
        fdv_usd = all_attributes["fdv_usd"],
        market_cap_usd = all_attributes["market_cap_usd"],
        daily_volume = all_attributes["volume_usd"]["h24"],
        daily_price_change = all_attributes["price_change_percentage"]["h24"],
        daily_buys = daily_tx["buys"],
        daily_sells = daily_tx["sells"],
        daily_buyers = daily_tx["buyers"],
        daily_sellers = daily_tx["sellers"],
        grad_pert = (
            launchpad_details.get("graduation_percentage")
            if launchpad_details else 0
        ),
        completed = launchpad_details.get("completed", False),
        completed_at = launchpad_details.get("completed_at", None),
        dest_pool = launchpad_details.get("migrated_destination_pool_address", None)
    )

    return response_dict

In [None]:
def get_pool_data(network, pool_address):

    target_url = f"/onchain/networks/{network}/pools/{pool_address}"

    pool_list_response = get_response(target_url,
                                      use_pro,
                                      "",
                                      PRO_URL)

    pool_all = collect_pool_response(pool_list_response)

    return pool_all

In [39]:
def collect_pool_data(network, num_rows):

    df_new_pools = get_new_pools(network, "pool_created_at").head(num_rows)
    
    all_pool_data = []

    for pool_add in df_new_pools["pool_add"]:

        pool_data = get_pool_data(network, pool_add)
        all_pool_data.append(pool_data)

    df = pd.DataFrame(all_pool_data)

    df = df.astype({
        "pair": "string",
        "dex": "string",
        "add": "string",
        "daily_buys": "Int64",
        "daily_sells": "Int64",
        "daily_buyers": "Int64",
        "daily_sellers": "Int64",
        "completed": "boolean",
        "dest_pool": "string",
    })

    # Numeric columns (coerce invalids to NaN)
    for col in ["fdv_usd", "market_cap_usd", "daily_volume", "daily_price_change", "grad_pert"]:
        df[col] = pd.to_numeric(df[col], errors="coerce")

    # Timestamps
    df["pool_created_at"] = pd.to_datetime(df["pool_created_at"], utc=True, errors="coerce")
    df["completed_at"] = pd.to_datetime(df["completed_at"], utc=True, errors="coerce")

    return df

In [40]:
def analyze_pools(network, num_rows=5):

    df_pool_data = collect_pool_data(network, num_rows)

    # Inspect key metrics such as Fully Diluted Volume (FDV) and age of the pool. We want
    # to filter out pools which are older than 10 minutes and have FDV less than $5000.
    cutoff = pd.Timestamp.now(tz="UTC") - pd.Timedelta(minutes=10)

    df_filtered = df_pool_data[
                        (df_pool_data["pool_created_at"] >= cutoff) &
                        (df_pool_data["fdv_usd"] > 5000)
                        ].copy()

    # Convert to local timezone
    df_filtered["pool_created_at"] = df_filtered["pool_created_at"].dt.tz_convert("Europe/Berlin")
    
    return df_filtered

In [42]:
analyze_pools("solana", num_rows=50)

Unnamed: 0,pair,dex,add,pool_created_at,fdv_usd,market_cap_usd,daily_volume,daily_price_change,daily_buys,daily_sells,daily_buyers,daily_sellers,grad_pert,completed,completed_at,dest_pool
6,LEGOSIAN / SOL,pump-fun,4zFWcWq5Pyc4UmSsxNDFhRwTAoAZsBvUhKKwPHpmTEV,2026-01-18 21:53:32+01:00,10470.57399,,3012.689067,9.25,14,7,14,7,,False,NaT,


### Monitor real-time price

In [48]:
# POOL_ADDRESS = "4zFWcWq5Pyc4UmSsxNDFhRwTAoAZsBvUhKKwPHpmTEV"

# WS_URL = "wss://stream.coingecko.com/v1"

# async def main():
#     async with websockets.connect(
#         WS_URL,
#         extra_headers={"x-cg-pro-api-key": CG_PRO_API_KEY}
#     ) as ws:
#         # Subscription payload
#         sub_msg = {
#             "type": "subscribe",
#             "channel": "OnchainSimpleTokenPrice",
#             "pool_address": POOL_ADDRESS
#         }
#         await ws.send(json.dumps(sub_msg))

#         while True:
#             msg = await ws.recv()
#             print(msg)

# await main()