# Idea

Find times of the quarter with big movements in price and watch the drift of the asset for several periods after. Trade based on that (either hypothesizing momentum or reversal).

Example signal: long or short assets for days or a couple of weeks right after earnings according to size and direction of 1-day, post-event returns (similar to "post-earnings announcement drift").

# Imports

In [18]:
import pandas as pd

from binance.client import Client as bnb_client
from datetime import datetime

# Get Historical Data For Cryptocurrencies

In [19]:
# The universe is based on this snapshot from December 20, 2020: https://coinmarketcap.com/historical/20201220/

univ = [
    "BTCUSDT", "ETHUSDT", "ADAUSDT", "BNBUSDT", "XRPUSDT", "DOTUSDT", "MATICUSDT", "LTCUSDT", "BCHUSDT",
    "LINKUSDT", "XLMUSDT", "USDCUSDT", "EOSUSDT", "TRXUSDT", "XTZUSDT", "FILUSDT", "NEOUSDT", "DAIUSDT",
    "DASHUSDT", "VETUSDT", "ATOMUSDT", "AAVEUSDT", "UNIUSDT", "GRTUSDT", "THETAUSDT", "IOTAUSDT", "BUSDUSDT",
    "ZECUSDT", "YFIUSDT", "ETCUSDT", "WAVESUSDT", "COMPUSDT", "SNXUSDT", "DOGEUSDT", "MKRUSDT", "ZILUSDT",
    "SUSHIUSDT", "KSMUSDT", "OMGUSDT", "ONTUSDT", "ALGOUSDT", "EGLDUSDT", "BATUSDT", "DGBUSDT", "ZRXUSDT",
    "TUSDUSDT", "QTUMUSDT", "ICXUSDT", "AVAXUSDT", "RENUSDT", "HBARUSDT", "NEARUSDT", "LRCUSDT", "CELOUSDT",
    "KNCUSDT", "LSKUSDT", "OCEANUSDT", "QNTUSDT", "USTUSDT", "BANDUSDT", "MANAUSDT", "ENJUSDT", "ANTUSDT",
    "BNTUSDT", "ZENUSDT", "NMRUSDT", "RVNUSDT", "IOSTUSDT", "OXTUSDT", "CRVUSDT", "MATICUSDT", "HNTUSDT",
    "BALUSDT", "CHZUSDT"
]

should_download_stock_data = False

In [20]:
client = bnb_client(tld='US')

def get_binance_px(symbol, freq, start_ts = '2020-12-20'):
    data = client.get_historical_klines(symbol, freq, start_ts)
    columns = ['open_time', 'open', 'high', 'low', 'close', 'volume', 'close_time', 'quote_volume',
               'num_trades', 'taker_base_volume', 'taker_quote_volume', 'ignore']
    data = pd.DataFrame(data, columns = columns)
    
    # Convert from POSIX timestamp (number of millisecond since jan 1, 1970)
    data['open_time'] = data['open_time'].map(lambda x: datetime.utcfromtimestamp(x/1000))
    data['close_time'] = data['close_time'].map(lambda x: datetime.utcfromtimestamp(x/1000))
    return data 


if should_download_stock_data:
    freq = '4h'
    px = {}
    for x in univ:
        print(f"Downloading data for symbol {x}")
        data = get_binance_px(x, freq)
        px[x] = data.set_index('open_time')['close']

    px = pd.DataFrame(px).astype(float)
    px.to_csv('./class_project_input_prices.csv')
else:
    px = pd.read_csv('./class_project_input_prices.csv')
    date_format = "%Y-%m-%d %H:%M:%S"
    px['open_time'] = px['open_time'].apply(lambda t:  datetime.strptime(t, date_format))
    px.set_index('open_time', inplace=True)

# Data up to 2025-08-31 08:00:00
px

Unnamed: 0_level_0,BTCUSDT,ETHUSDT,ADAUSDT,BNBUSDT,XRPUSDT,DOTUSDT,MATICUSDT,LTCUSDT,BCHUSDT,LINKUSDT,...,BNTUSDT,ZENUSDT,NMRUSDT,RVNUSDT,IOSTUSDT,OXTUSDT,CRVUSDT,HNTUSDT,BALUSDT,CHZUSDT
open_time,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2020-12-20 00:00:00,23353.97,646.62,0.16284,32.9681,0.56944,,,117.34,313.46,,...,,12.056,,,,0.2492,,1.41095,,
2020-12-20 04:00:00,23604.24,655.23,0.16638,33.6559,0.57916,,,121.30,340.00,,...,,12.107,,,,0.2477,,1.43156,,
2020-12-20 08:00:00,23549.50,652.88,0.16463,34.8228,0.57948,,,118.09,349.70,,...,,12.237,,,,0.2477,,1.44273,,
2020-12-20 12:00:00,23880.85,653.24,0.16542,35.0120,0.57798,,,119.10,361.21,,...,,12.074,,,,0.2533,,1.47130,,
2020-12-20 16:00:00,23932.71,649.82,0.16502,34.7042,0.57306,,,116.60,357.09,,...,,12.008,,,,0.2503,,1.43083,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2025-08-30 16:00:00,108921.64,4352.10,0.81950,856.7600,2.80720,3.816,,110.23,542.00,23.36,...,0.725,7.240,16.62,0.01323,0.00342,0.0535,0.7646,,,0.03830
2025-08-30 20:00:00,108569.75,4374.56,0.82170,862.6200,2.81940,3.773,,110.77,552.90,23.47,...,0.725,7.240,15.16,0.01323,0.00342,0.0535,0.7646,,,0.03830
2025-08-31 00:00:00,109155.73,4486.79,0.83590,863.4900,2.85180,3.867,,111.67,551.30,23.90,...,0.764,7.240,15.21,0.01353,0.00342,0.0535,0.7936,,,0.04045
2025-08-31 04:00:00,108660.63,4451.64,0.82760,858.7700,2.82920,3.823,,111.23,549.80,23.76,...,0.764,7.240,14.87,0.01331,0.00342,0.0535,0.7838,,,0.04045


In [None]:
# Split into train and test data
# Look at returns over different periods in train data: 4h, 8h, 12h, 1d, 2d
# Try following trend for unusual coins

# Fetching News Data

In [11]:
from polygon import RESTClient
from polygon.rest.models import (
    TickerNews,
)

client = RESTClient("Z18nRpCOazbM35uu5cazvg25oTp1XS25")

news = []
for n in client.list_ticker_news(
    ticker="GM",
    published_utc_lte="2025-10-01",
    order="asc",
    limit="4",
    sort="published_utc",
):
    news.append(n)

#print(news)

# print date + title
for index, item in enumerate(news):
    # verify this is an agg
    if isinstance(item, TickerNews):
        print("{:<25}{:<15}".format(item.published_utc, item.title))
        
        if index == 20:
            break

MaxRetryError: HTTPSConnectionPool(host='api.polygon.io', port=443): Max retries exceeded with url: /v2/reference/news?cursor=YXA9MjAyMS0wNC0yN1QxNiUzQTAzJTNBMDBaJmFzPVk4VzlsekRQX3pXTkNtZTZGd19VZzNrOGU4cXd3T2hfcGIwSnl2c0J4WWMmbGltaXQ9NCZvcmRlcj1hc2NlbmRpbmcmcHVibGlzaGVkX3V0Yy5sdGU9MjAyNS0xMC0wMSZ0aWNrZXI9R00 (Caused by ResponseError('too many 429 error responses'))

In [7]:
# client = RESTClient(api_key="Z18nRpCOazbM35uu5cazvg25oTp1XS25")
# trades = [t for t in client.list_trades(ticker="TSLA", limit=100)]

In [5]:
import requests
from bs4 import BeautifulSoup

# Step 1: Define the URL of the website to scrape
# url = 'http://www.webscraper-testsite.com/news/'
url = 'https://www.cnn.com/'

# Step 2: Send a GET request to the website
try:
    response = requests.get(url)
    # Raise an exception for bad status codes (4xx or 5xx)
    response.raise_for_status()
    
    print(response.text)

    # Step 3: Parse the HTML content using Beautiful Soup
    soup = BeautifulSoup(response.text, 'html.parser')

    # Step 4: Find and extract the data
    # Inspect the website's HTML to find the correct tags and classes
    # For this site, headlines are within <h2> tags with class 'story-headline'
    headlines = soup.find_all('h2', class_='story-headline')

    # Step 5: Print the extracted data
    if headlines:
        print("Latest News Headlines:")
        for index, headline in enumerate(headlines, 1):
            print(f"{index}. {headline.text.strip()}")
    else:
        print("No headlines found.")

except requests.exceptions.RequestException as e:
    print(f"Error fetching the URL: {e}")

IOPub data rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_data_rate_limit`.

Current values:
NotebookApp.iopub_data_rate_limit=1000000.0 (bytes/sec)
NotebookApp.rate_limit_window=3.0 (secs)

