In [3]:
import pandas as pd
import yfinance as yf
import ccxt
from datetime import datetime, timezone, timedelta
from mom import influxDB_utils as influx
from mom import Mandelbrot


In [19]:
# Config
ASSETS = ["BTC", "ETH"]
CURRENCIES = ["EUR"]
GRANULARITIES = {
    "Day": "1d",
    "Hour": "1h",
    "Minute": "1m",
    # "Week": not directly supported in yfinance → use 1d and resample
}

In [20]:
client = influx.get_client()
query_api = client.query_api()

In [29]:
def get_last_timestamp(measurement: str, asset: str, currency: str) -> datetime:
    """
    Query InfluxDB to get the most recent timestamp for a measurement/asset/currency.
    """
    query = f"""
    from(bucket: "{influx.INFLUX_BUCKET}")
      |> range(start: 0)
      |> filter(fn: (r) => r._measurement == "{measurement}")
      |> filter(fn: (r) => r.asset == "{asset}")
      |> filter(fn: (r) => r.currency == "{currency}")
      |> filter(fn: (r) => r._field == "high")
      |> last()
    """
    tables = query_api.query(org=influx.INFLUX_ORG, query=query)
    if not tables or not tables[0].records:
        return None
    return tables[0].records[0].get_time()


def fetch_data(asset: str,currency: str, interval: str, start: datetime) -> pd.DataFrame:
    """
    Fetch OHLC data from yfinance from 'start' until now.
    """
    ticker = yf.Ticker(f"{asset}-{currency}")
    print(GRANULARITIES.get(interval))
    df = ticker.history(interval=GRANULARITIES.get(interval), start=start)
    if df.empty:
        return df
    
    df = Mandelbrot.reset_index(df)
    df = Mandelbrot.process_raw_DF(df)
    df = df.set_index("date")
    df["asset"] = asset
    df["currency"] = currency
    df["interval"] = interval
    for i in ["volume", "high","low","delta","delta_log","return","return_log"]:
        df[i] = df[i].astype(float)
    #print(df.head())
    return df


def update():
    for curr in CURRENCIES:
        for asset in ASSETS:
            for interval, gran in GRANULARITIES.items():
                measurement = f"{interval}"
                print(gran, interval)
                last_time = get_last_timestamp(measurement, asset, curr)
                start = last_time + timedelta(seconds=1) if last_time else datetime(2015, 1, 1, tzinfo=timezone.utc)

                print(f"Updating {measurement} for {asset} since {start}")

                df = fetch_data(asset,curr , interval, start)
                if df.empty:
                    print(f"No new data for {asset} ({interval})")
                    continue

                influx.write_dataframe(df)
                print(f"Updated {asset} ({interval}) with {len(df)} new rows")


if __name__ == "__main__":
    update()

1d Day
Updating Day for BTC since 2025-08-29 00:00:01+00:00
1d
Wrote 3 rows to InfluxDB bucket 'CryptoPrices'
Updated BTC (Day) with 3 new rows
1h Hour
Updating Hour for BTC since 2025-08-29 00:00:01+00:00
1h
Wrote 60 rows to InfluxDB bucket 'CryptoPrices'
Updated BTC (Hour) with 60 new rows
1m Minute
Updating Minute for BTC since 2025-08-29 00:00:01+00:00
1m
Wrote 3090 rows to InfluxDB bucket 'CryptoPrices'
Updated BTC (Minute) with 3090 new rows
1d Day
Updating Day for ETH since 2025-08-29 00:00:01+00:00
1d
Wrote 3 rows to InfluxDB bucket 'CryptoPrices'
Updated ETH (Day) with 3 new rows
1h Hour
Updating Hour for ETH since 2025-08-29 00:00:01+00:00
1h
Wrote 60 rows to InfluxDB bucket 'CryptoPrices'
Updated ETH (Hour) with 60 new rows
1m Minute
Updating Minute for ETH since 2025-08-29 00:00:01+00:00
1m
Wrote 3090 rows to InfluxDB bucket 'CryptoPrices'
Updated ETH (Minute) with 3090 new rows


In [22]:
GRANULARITIES.get(interval)

NameError: name 'interval' is not defined

In [30]:
last_time = get_last_timestamp("Day", "BTC", "EUR")
print(last_time)

2025-08-31 00:00:00+00:00


In [6]:
start = last_time + timedelta(seconds=1) if last_time else datetime(2015, 1, 1, tzinfo=timezone.utc)
print(start)

2025-08-29 00:00:01+00:00


In [16]:
df = fetch_data("BTC","EUR","Day", start)

Index(['unix', 'volume', 'high', 'low', 'delta', 'delta_log', 'return',
       'return_log'],
      dtype='object')
                  unix       volume          high           low        delta  \
date                                                                           
2025-08-29  1756425600  66615328407  96446.390625  92045.328125          NaN   
2025-08-30  1756512000  44054324942  93201.585938  91946.765625 -3244.804688   
2025-08-31  1756598400  38736064512  93657.445312  92745.210938   455.859375   

            delta_log    return  return_log asset currency interval  
date                                                                 
2025-08-29        NaN       NaN         NaN   BTC      EUR      Day  
2025-08-30  -3.511189 -0.033644   -0.033644   BTC      EUR      Day  
2025-08-31   2.658831  0.004891    0.004891   BTC      EUR      Day  


In [14]:
df.head()

Unnamed: 0,unix,date,volume,high,low,delta,delta_log,return,return_log,asset,currency,interval
0,1756425600,2025-08-29,66615328407,96446.390625,92045.328125,,,,,BTC,EUR,Day
1,1756512000,2025-08-30,44054324942,93201.585938,91946.765625,-3244.804688,-3.511189,-0.033644,-0.033644,BTC,EUR,Day
2,1756598400,2025-08-31,38760181760,93657.445312,92745.210938,455.859375,2.658831,0.004891,0.004891,BTC,EUR,Day


In [17]:
ticker = yf.Ticker("BTC")
df = ticker.history(interval="1d", start=start)

In [15]:
x = ["high_x","high_y","high"]

if "high_x" in x:
    print("yes")
else:
    print("no")

yes


In [13]:
import requests
import pandas as pd
import time
from datetime import datetime

# Binance Kline endpoint
BASE_URL = "https://api.binance.com/api/v3/klines"

def fetch_klines(symbol="BTCEUR", interval="1m", start_time=None, end_time=None, limit=1000):
    """
    Fetch candlestick data from Binance API.
    """
    params = {
        "symbol": symbol,
        "interval": interval,
        "limit": limit
    }
    if start_time:
        params["startTime"] = start_time
    if end_time:
        params["endTime"] = end_time

    response = requests.get(BASE_URL, params=params)
    data = response.json()

    if "code" in data:  # error
        raise Exception(data)

    return data

def get_historical_klines(symbol="BTCEUR", interval="1m", start_date="2019-01-01", save_csv="BTC_EUR_m_2019.csv"):
    """
    Download all historical klines from start_date until now.
    """
    end = "2019-12-31"
    # convert start date to milliseconds
    start_time = int(datetime.strptime(start_date, "%Y-%m-%d").timestamp() * 1000)
    end_time = int(datetime.strptime(end, "%Y-%m-%d").timestamp() * 1000)
    #end_time = int(time.time() * 1000)

    all_data = []
    while start_time < end_time:
        klines = fetch_klines(symbol, interval, start_time=start_time, limit=1000)
        if not klines:
            break

        all_data.extend(klines)

        # move to next batch (last candle's close time + 1 ms)
        start_time = klines[-1][6] + 1  

        print(f"Fetched {len(all_data)} rows so far...")

        time.sleep(0.2)  # avoid hitting API rate limits

    # Convert to DataFrame
    df = pd.DataFrame(all_data, columns=[
        "open_time", "open", "high", "low", "close", "volume",
        "close_time", "quote_asset_volume", "num_trades",
        "taker_buy_base", "taker_buy_quote", "ignore"
    ])

    # Convert timestamp to datetime
    df["open_time"] = pd.to_datetime(df["open_time"], unit="ms")
    df["close_time"] = pd.to_datetime(df["close_time"], unit="ms")

    # Save CSV
    df.to_csv(save_csv, index=False)
    print(f"Saved {len(df)} rows to {save_csv}")

    return df



In [14]:
df = get_historical_klines()

Fetched 1000 rows so far...
Saved 1000 rows to BTC_EUR_m_2019.csv


In [5]:
df.head()
#df.to_csv("BTC_EUR_m_2022-2025.csv")

Unnamed: 0,open_time,open,high,low,close,volume,close_time,quote_asset_volume,num_trades,taker_buy_base,taker_buy_quote,ignore
0,2021-12-31 23:00:00,40698.13,40751.92,40693.92,40745.48,0.87253,2021-12-31 23:00:59.999,35531.8449275,25,0.83561,34027.5111239,0
1,2021-12-31 23:01:00,40752.66,40763.74,40752.66,40763.74,0.73071,2021-12-31 23:01:59.999,29782.9692948,14,0.73071,29782.9692948,0
2,2021-12-31 23:02:00,40771.0,40785.53,40770.86,40785.53,0.28062,2021-12-31 23:02:59.999,11442.4995692,6,0.25855,10542.6857566,0
3,2021-12-31 23:03:00,40785.53,40794.58,40781.93,40781.93,0.04652,2021-12-31 23:03:59.999,1897.2354511,7,0.01326,540.8249063,0
4,2021-12-31 23:04:00,40781.93,40800.77,40780.2,40795.85,0.07778,2021-12-31 23:04:59.999,3173.2944316,16,0.06675,2723.3849677,0


In [40]:
DF.dtypes

high          float64
volume        float64
asset          object
currency       object
delta         float64
delta_log     float64
return        float64
return_log    float64
interval       object
dtype: object

In [15]:
DF = df
DF.columns = map(str.lower, DF.columns)
DF["currency"] = "EUR"
DF["asset"] = "BTC"
#delta etc
DF["high"] = DF["high"].astype(float)
DF["delta"] = DF["high"].diff()
DF["delta_log"] = Mandelbrot.log10(DF["delta"])
DF["return"] = Mandelbrot.compute_returns(DF["high"])[0]
DF["return_log"] = Mandelbrot.compute_returns(DF["high"])[1]

DF['date'] = pd.to_datetime(DF['open_time'], unit='s')
DF = DF.set_index("date")
DF["interval"] = "Minute"
co =['high','volume','asset', 'currency', 'delta', 'delta_log', 'return','return_log', 'interval']
DF = DF[co]

 # Ensure numeric columns are floats
for col in ["high","low","volume", "delta", "delta_log", "return", "return_log"]:
    if col in DF.columns:
        DF[col] = pd.to_numeric(DF[col], errors="coerce")
# Ensure tag columns are strings
for col in ["asset", "currency", "interval"]:
    if col in DF.columns:
        DF[col] = DF[col].astype(str)



DF.head()


Unnamed: 0_level_0,high,volume,asset,currency,delta,delta_log,return,return_log,interval
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2020-01-03 08:00:00,6244.33,0.002,BTC,EUR,,,,,Minute
2020-01-03 08:01:00,6437.82,0.064656,BTC,EUR,193.49,2.286659,0.030987,0.030516,Minute
2020-01-03 08:02:00,6433.99,0.0,BTC,EUR,-3.83,-0.583199,-0.000595,-0.000595,Minute
2020-01-03 08:03:00,6478.91,0.064122,BTC,EUR,44.92,1.65244,0.006982,0.006957,Minute
2020-01-03 08:04:00,6478.91,0.0,BTC,EUR,0.0,0.0,0.0,0.0,Minute


In [34]:
DF.head()

Unnamed: 0_level_0,high,volume,asset,currency,delta,delta_log,return,return_log,interval
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
2024-12-31 23:00:00,88795.64,0.18342,BTC,EUR,,,,,Minute
2024-12-31 23:01:00,89013.48,0.75052,BTC,EUR,217.84,2.338138,0.002453,0.00245,Minute
2024-12-31 23:02:00,89203.2,0.09794,BTC,EUR,189.72,2.278113,0.002131,0.002129,Minute
2024-12-31 23:03:00,89756.53,4.6311,BTC,EUR,553.33,2.742984,0.006203,0.006184,Minute
2024-12-31 23:04:00,89788.8,1.53803,BTC,EUR,32.27,1.508799,0.00036,0.000359,Minute


In [16]:
influx.write_dataframe(DF, measurement="Minute")

Writing to InfluxDB bucket 'CryptoPrices-Minute'
Wrote 1000 rows to InfluxDB bucket 'CryptoPrices-Minute'


In [None]:
#combine with csv data



def old_csv(c,g,year,interv):

        
    DF = pd.read_csv(f"historical_data/{year}_{g}_{c}_to_EUR.csv")
    DF.columns = map(str.lower, DF.columns)
    DF["currency"] = DF["curr"]
    #delta etc
    DF["delta"] = DF["high"].diff()
    DF["delta_log"] = Mandelbrot.log10(DF["delta"])
    DF["return"] = Mandelbrot.compute_returns(DF["high"])[0]
    DF["return_log"] = Mandelbrot.compute_returns(DF["high"])[1]

    #datetype M
    DF['date'] = pd.to_datetime(DF['unix timestamp'], unit='s')
    rfc3339 = []
    for i in DF["unix timestamp"]:
        dt = datetime.fromtimestamp(i, tz= timezone.utc)
        date = dt.isoformat().replace("+00:00","Z")
        rfc3339.append(date)
    DF["date_rfc"] = rfc3339

    DF = DF.set_index("date")
    DF["interval"] = interv
    co =['high','volume','asset', 'currency', 'delta', 'delta_log', 'return',
           'return_log', 'interval']
    DF = DF[co]

    # Ensure numeric columns are floats
    for col in ["high","low","volume", "delta", "delta_log", "return", "return_log"]:
        if col in DF.columns:
            DF[col] = pd.to_numeric(DF[col], errors="coerce")

    # Ensure tag columns are strings
for col in ["asset", "currency", "interval"]:
    if col in DF.columns:
        DF[col] = DF[col].astype(str)


DF.index = pd.to_datetime(DF.index, utc=True