In [2]:
import requests
import pandas as pd

In [3]:
def fetch(cryptoId= 'bitcoin', vsCurrency='usd'):
    url = f'https://api.coingecko.com/api/v3/coins/{cryptoId}/market_chart'
    params = {
        'vs_currency': vsCurrency,
        'days': '30',  # Fetch data for the last 30 days
        # 'interval': 'hourly'  # Use 'daily' interval for daily data
    }
    response = requests.get(url, params=params)
    if response.status_code != 200:
        raise Exception(f"API request failed with status code {response.status_code}: {response.text}")
    
    data = response.json()
    prices = data['prices']  # List of [timestamp, price]
    df = pd.DataFrame(prices, columns=["timestamp", "price"] )
    df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms')
    return df
    

In [4]:
df =fetch()

In [5]:
df.tail()

Unnamed: 0,timestamp,price
715,2024-08-15 18:09:40.880,58148.516633
716,2024-08-15 19:18:57.013,57182.569052
717,2024-08-15 20:18:24.902,57182.199562
718,2024-08-15 21:08:06.000,56765.926472
719,2024-08-15 22:10:02.000,57487.554571


In [6]:
df.set_index('timestamp', inplace=True)
df.tail()

Unnamed: 0_level_0,price
timestamp,Unnamed: 1_level_1
2024-08-15 18:09:40.880,58148.516633
2024-08-15 19:18:57.013,57182.569052
2024-08-15 20:18:24.902,57182.199562
2024-08-15 21:08:06.000,56765.926472
2024-08-15 22:10:02.000,57487.554571


In [7]:
ohlc = df['price'].resample('30min').ohlc()
ohlc["return"] = ohlc['close'].pct_change(fill_method=None)
ohlc.dropna(inplace=True)
ohlc.tail()

Unnamed: 0_level_0,open,high,low,close,return
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2024-08-06 13:00:00,54822.418107,54822.418107,54822.418107,54822.418107,-0.007327
2024-08-06 17:00:00,56943.221647,56943.221647,56943.221647,56943.221647,0.010164
2024-08-07 03:00:00,56768.594557,56768.594557,56768.594557,56768.594557,-0.008676
2024-08-07 10:00:00,57356.352899,57356.352899,57356.352899,57356.352899,-0.002724
2024-08-07 17:00:00,56228.692331,56228.692331,56228.692331,56228.692331,-2.5e-05


In [8]:
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split

In [9]:
X = ohlc[["open", 'high', 'low', 'return']].values
y = ohlc['close'].shift(-1).dropna().values

X = X[:-1]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)





In [10]:
model = LinearRegression()
model.fit(X_train, y_train)
print(f'Model R^2 score: {model.score(X_test, y_test)}')

Model R^2 score: 0.9413871083390957


In [11]:
from xgboost import XGBRegressor

model2 = XGBRegressor(objective='reg:squarederror',  n_estimators=100, learning_rate=0.05)
model2.fit(X_train, y_train)
print(f'Model R^2 score: {model2.score(X_test, y_test)}')


Model R^2 score: 0.9611165308889182


In [12]:
import joblib
def save_model(model, filepath='crypto_model2.pkl'):
    joblib.dump(model, filepath)

save_model(model2)

In [13]:
import ccxt

In [18]:
# exchange = ccxt.bybit()
# ohlv = exchange.fetch_ohlcv('BTC/USDT', timeframe='1d', limit=1)

In [15]:
import yfinance as yf

In [30]:
btc_data = yf.download(tickers='BTC-USD', start='2024-07-01', end='2024-08-15', interval='30m')

[*********************100%%**********************]  1 of 1 completed

                             Open          High           Low         Close  \
Datetime                                                                      
2024-07-01 00:00:00  62673.605469  62675.726562  62510.226562  62648.875000   
2024-07-01 00:30:00  62622.617188  62820.148438  62611.332031  62820.148438   
2024-07-01 01:00:00  62837.519531  63556.449219  62836.957031  63395.187500   
2024-07-01 01:30:00  63390.902344  63567.218750  63390.902344  63531.328125   
2024-07-01 02:00:00  63529.867188  63700.453125  63449.398438  63463.726562   
...                           ...           ...           ...           ...   
2024-08-14 21:30:00  59112.960938  59112.960938  58906.050781  58906.050781   
2024-08-14 22:00:00  58924.429688  58945.265625  58584.503906  58902.156250   
2024-08-14 22:30:00  58916.050781  59014.472656  58847.753906  58981.777344   
2024-08-14 23:00:00  58987.371094  59010.570312  58939.402344  58939.402344   
2024-08-14 23:30:00  58954.160156  58954.160156  587




In [32]:
btc_data.tail()

Unnamed: 0_level_0,Open,High,Low,Close,Adj Close,Volume
Datetime,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2024-08-14 21:30:00,59112.960938,59112.960938,58906.050781,58906.050781,58906.050781,10428416
2024-08-14 22:00:00,58924.429688,58945.265625,58584.503906,58902.15625,58902.15625,189612032
2024-08-14 22:30:00,58916.050781,59014.472656,58847.753906,58981.777344,58981.777344,30433280
2024-08-14 23:00:00,58987.371094,59010.570312,58939.402344,58939.402344,58939.402344,55711744
2024-08-14 23:30:00,58954.160156,58954.160156,58732.335938,58749.320312,58749.320312,30574592


In [34]:
btc_data['Return'] = btc_data['Close'].pct_change()
btc_data['Open-Close'] = btc_data['Open'] - btc_data['Close']  # Open-Close difference
btc_data['High-Low'] = btc_data['High'] - btc_data['Low'] 

btc_data.dropna(inplace=True)

X = btc_data[['Open', 'High', 'Low', 'Close', 'Volume', 'Return', 'Open-Close', 'High-Low']].values
y = btc_data['Close'].shift(-1).dropna().values

# Align X with y after shift
X = X[:-1]

In [35]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [36]:
from sklearn.metrics import mean_squared_error

model = LinearRegression()
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
print(f'Model Mean Squared Error: {mse}')
print(f'Model R^2 score: {model.score(X_test, y_test)}')

Model Mean Squared Error: 105406.36062630369
Model R^2 score: 0.9940980118608796


In [45]:
import numpy as np
open_price = 56635.60
high_price = 57100.00
low_price = 56569.80

X = np.array([open_price, high_price, low_price]).reshape(1, -1)



In [49]:
# predicted_close =model.predict(X)[0]
# print(predicted_close)

In [61]:
data = {
    'Open':[59112.960938, 58924.429688, 58916.050781, 58987.371094, 58954.160156],
    'High': [59112.960938, 58945.265625, 59014.472656, 59010.570312, 58954.160156],
    'Low': [58906.050781, 58584.503906, 58847.753906, 58939.402344, 58732.335938],
    'Close': [58906.050781, 58902.156250, 58981.777344, 58939.402344, 58749.320312],
    'Adj Close': [58906.050781, 58902.156250, 58981.777344, 58939.402344, 58749.320312],
    'Volume': [10428416, 189612032, 30433280, 55711744, 30574592]
}

df = pd.DataFrame(data)
print(df)

           Open          High           Low         Close     Adj Close  \
0  59112.960938  59112.960938  58906.050781  58906.050781  58906.050781   
1  58924.429688  58945.265625  58584.503906  58902.156250  58902.156250   
2  58916.050781  59014.472656  58847.753906  58981.777344  58981.777344   
3  58987.371094  59010.570312  58939.402344  58939.402344  58939.402344   
4  58954.160156  58954.160156  58732.335938  58749.320312  58749.320312   

      Volume  
0   10428416  
1  189612032  
2   30433280  
3   55711744  
4   30574592  


In [62]:
def calculate_return(previous_close, current_close):
    return (current_close - previous_close) / previous_close 

# Calculate the return for the last row
previous_close = df['Close'].iloc[-2]
current_close = df['Close'].iloc[-1]
return_value = calculate_return(previous_close, current_close)

df['Return'] = return_value
df['Open-Close'] = df['Open'].iloc[-1] - df['Close'].iloc[-1]   # Open-Close difference
df['High-Low'] = df['High'].iloc[-1]  - df['Low'].iloc[-1]  

In [63]:

# Prepare the input features
last_row = df.iloc[-1]
X = np.array([last_row['Open'], last_row['High'], last_row['Low'],  last_row['Return'],  last_row['Open-Close'],  last_row['High-Low'], last_row['Adj Close'], last_row['Volume'], ]).reshape(1, -1)
print(X)

[[ 5.89541602e+04  5.89541602e+04  5.87323359e+04 -3.22504173e-03
   2.04839844e+02  2.21824218e+02  5.87493203e+04  3.05745920e+07]]


In [65]:
predicted_close =model.predict(X)
print(predicted_close)

[-116979.25584602]
