In [1]:
import pandas as pd 
import numpy as np  
import datetime as dt 

from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.preprocessing import MinMaxScaler

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from tensorflow.keras.layers import LSTM

from itertools import cycle
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots

import ccxt

In [2]:
exchange = ccxt.binance({'enableRateLimit': True})

symbol = 'ETH/USDT'
timeframe = '1d'
limit = 1000  

data = exchange.fetch_ohlcv(symbol, timeframe=timeframe, limit=limit)

df = pd.DataFrame(data, columns=[
    'timestamp', 'open', 'high', 'low', 'close', 'volume'
])

df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms')

df.head()

Unnamed: 0,timestamp,open,high,low,close,volume
0,2023-03-07,1565.84,1584.49,1536.14,1561.96,437947.9
1,2023-03-08,1561.95,1570.89,1523.61,1532.38,460101.8
2,2023-03-09,1532.38,1546.45,1408.0,1437.32,718485.1
3,2023-03-10,1437.32,1439.43,1368.39,1426.44,960262.5
4,2023-03-11,1426.43,1478.07,1408.4,1471.97,1374860.0


In [3]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 6 columns):
 #   Column     Non-Null Count  Dtype         
---  ------     --------------  -----         
 0   timestamp  1000 non-null   datetime64[ns]
 1   open       1000 non-null   float64       
 2   high       1000 non-null   float64       
 3   low        1000 non-null   float64       
 4   close      1000 non-null   float64       
 5   volume     1000 non-null   float64       
dtypes: datetime64[ns](1), float64(5)
memory usage: 47.0 KB


In [4]:
df.describe()

Unnamed: 0,timestamp,open,high,low,close,volume
count,1000,1000.0,1000.0,1000.0,1000.0,1000.0
mean,2024-07-18 12:00:00,2693.25042,2758.94721,2623.32112,2694.69473,452406.0
min,2023-03-07 00:00:00,1426.43,1439.43,1368.39,1426.44,43982.32
25%,2023-11-11 18:00:00,1890.6975,1923.6825,1860.8525,1891.4275,253536.4
50%,2024-07-18 12:00:00,2543.235,2623.67,2476.88,2545.89,378572.8
75%,2025-03-25 06:00:00,3351.8775,3457.33,3251.25,3351.875,569793.8
max,2025-11-30 00:00:00,4832.07,4956.78,4711.0,4832.07,2807979.0
std,,830.029236,853.875376,803.084175,829.321944,291119.4


In [5]:
start_date = '2023-11-28'
end_date   = '2025-11-29'

df = df[(df['timestamp'] >= start_date) & (df['timestamp'] <= end_date)]

print("New Starting Date:", df['timestamp'].min())
print("New Ending Date:", df['timestamp'].max())

New Starting Date: 2023-11-28 00:00:00
New Ending Date: 2025-11-29 00:00:00


In [6]:
df['MonthYear'] = df['timestamp'].dt.to_period('M').astype(str)

monthwise = (
    df.groupby('MonthYear')[['open', 'close']]
    .mean()
    .reset_index()
)

monthwise['MonthYear'] = pd.to_datetime(monthwise['MonthYear'])
monthwise = monthwise.sort_values('MonthYear')

monthwise['MonthYear'] = monthwise['MonthYear'].dt.strftime('%b %Y')

# Plotly chart 
fig = go.Figure()

fig.add_trace(go.Bar(
    x=monthwise['MonthYear'],
    y=monthwise['open'],
    name='Open Price',
    marker_color='crimson'
))

fig.add_trace(go.Bar(
    x=monthwise['MonthYear'],
    y=monthwise['close'],
    name='Close Price',
    marker_color='lightsalmon'
))

fig.update_layout(
    barmode='group',
    xaxis_tickangle=-45,
    title='Monthly Open vs Close Price (2023–2025)',
    xaxis_title='Month-Year',
    yaxis_title='Price',
    width=1200
)

fig.show()

In [7]:
df['MonthYear'] = df['timestamp'].dt.to_period('M').astype(str)

monthwise = (
    df.groupby('MonthYear')[['high', 'low']]
    .mean()
    .reset_index()
)

monthwise['MonthYear'] = pd.to_datetime(monthwise['MonthYear'])
monthwise = monthwise.sort_values('MonthYear')

monthwise['MonthYear'] = monthwise['MonthYear'].dt.strftime('%b %Y')

# Plotly chart
fig = go.Figure()

fig.add_trace(go.Bar(
    x=monthwise['MonthYear'],
    y=monthwise['high'],
    name='High Price',
    marker_color='rgb(0, 153, 204)'   
))

fig.add_trace(go.Bar(
    x=monthwise['MonthYear'],
    y=monthwise['low'],
    name='Low Price',
    marker_color='rgb(255, 128, 0)'   
))

fig.update_layout(
    barmode='group',
    xaxis_tickangle=-45,
    title='Monthly High vs Low Price (2023–2025)',
    xaxis_title='Month-Year',
    yaxis_title='Price',
    width=1200
)

fig.show()

In [8]:
names = cycle(['Open Price', 'Close Price', 'High Price', 'Low Price'])

fig = px.line(
    df,
    x=df['timestamp'],
    y=[df['open'], df['close'], df['high'], df['low']],
    labels={'timestamp': 'Date', 'value': 'Price'}
)

fig.update_layout(
    title='Price Analysis Chart (Open, Close, High, Low)',
    font_size=15,
    font_color='black',
    legend_title_text='Price Type'
)

fig.for_each_trace(lambda t: t.update(name=next(names)))

fig.update_xaxes(showgrid=False)
fig.update_yaxes(showgrid=False)

fig.show()

In [9]:
closedf = df[['timestamp','close']]

In [10]:
closedf.head()

Unnamed: 0,timestamp,close
266,2023-11-28,2048.14
267,2023-11-29,2028.81
268,2023-11-30,2051.96
269,2023-12-01,2087.24
270,2023-12-02,2164.74


In [11]:
fig = px.line(
    closedf,
    x='timestamp',
    y='close',
    title='Close Price Over Time',
    labels={'timestamp': 'Date', 'close': 'Close Price'}
)

fig.update_layout(
    font_size=15,
    font_color='black',
    xaxis_title='Date',
    yaxis_title='Close Price'
)

fig.update_xaxes(showgrid=False)
fig.update_yaxes(showgrid=False)

fig.show()

In [12]:
del closedf['timestamp']
scaler=MinMaxScaler(feature_range=(0,1))
closedf=scaler.fit_transform(np.array(closedf).reshape(-1,1))
print(closedf.shape)

(733, 1)


In [13]:
training_size=int(len(closedf)*0.80)
test_size=len(closedf)-training_size
train_data,test_data=closedf[0:training_size,:],closedf[training_size:len(closedf),:1]
print("train_data: ", train_data.shape)
print("test_data: ", test_data.shape)

train_data:  (586, 1)
test_data:  (147, 1)


In [14]:
def create_dataset(dataset, time_step=1):
    dataX, dataY = [], []
    for i in range(len(dataset)-time_step-1):
        a = dataset[i:(i+time_step), 0]
        dataX.append(a)
        dataY.append(dataset[i + time_step, 0])
    return np.array(dataX), np.array(dataY)


In [15]:
time_step = 60

X_train, y_train = create_dataset(train_data, time_step)
X_test, y_test   = create_dataset(test_data, time_step)
print("X_train shape: ", X_train.shape)
print("y_train shape: ", y_train.shape)
print("X_test shape: ", X_test.shape)
print("y_test shape: ", y_test.shape)

X_train shape:  (525, 60)
y_train shape:  (525,)
X_test shape:  (86, 60)
y_test shape:  (86,)


In [16]:
X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
X_test  = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)
print("X_train reshaped: ", X_train.shape)
print("X_test reshaped: ", X_test.shape)

X_train reshaped:  (525, 60, 1)
X_test reshaped:  (86, 60, 1)


In [17]:
model = Sequential()
model.add(LSTM(50, return_sequences=True, input_shape=(time_step, 1), activation='relu'))
model.add(LSTM(50, return_sequences=False))
model.add(Dense(1))

model.compile(optimizer='adam', loss='mean_squared_error')

history = model.fit(
    X_train, y_train,
    validation_data=(X_test, y_test),
    epochs=200,
    batch_size=32,
    verbose=1
)


Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.



Epoch 1/200
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 51ms/step - loss: 0.0537 - val_loss: 0.0182
Epoch 2/200
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 32ms/step - loss: 0.0094 - val_loss: 0.0066
Epoch 3/200
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 31ms/step - loss: 0.0054 - val_loss: 0.0058
Epoch 4/200
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 37ms/step - loss: 0.0046 - val_loss: 0.0054
Epoch 5/200
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 35ms/step - loss: 0.0044 - val_loss: 0.0055
Epoch 6/200
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 36ms/step - loss: 0.0039 - val_loss: 0.0052
Epoch 7/200
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 37ms/step - loss: 0.0039 - val_loss: 0.0077
Epoch 8/200
[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 37ms/step - loss: 0.0038 - val_loss: 0.0105
Epoch 9/200
[1m17/17[0m [32m━━━━━━━━━

In [18]:
loss = history.history['loss']
val_loss = history.history['val_loss']

epochs = list(range(1, len(loss) + 1))

fig = go.Figure()

# Training Loss curve
fig.add_trace(go.Scatter(
    x=epochs,
    y=loss,
    mode='lines',
    name='Training Loss',
    line=dict(width=3)
))

# Validation Loss curve
fig.add_trace(go.Scatter(
    x=epochs,
    y=val_loss,
    mode='lines',
    name='Validation Loss',
    line=dict(width=3)
))

fig.update_layout(
    title='Training vs Validation Loss Curve',
    xaxis_title='Epoch',
    yaxis_title='Loss',
    font=dict(size=15),
    legend_title='Metrics',
    template='plotly_white'
)

fig.show()

In [19]:
train_predict = model.predict(X_train)
test_predict = model.predict(X_test)

[1m17/17[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 23ms/step
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 18ms/step


In [20]:
train_predict = scaler.inverse_transform(train_predict)
test_predict  = scaler.inverse_transform(test_predict)

y_train_actual = scaler.inverse_transform(y_train.reshape(-1,1))
y_test_actual  = scaler.inverse_transform(y_test.reshape(-1,1))

In [21]:
rmse = np.sqrt(mean_squared_error(y_test_actual, test_predict))
mae = mean_absolute_error(y_test_actual, test_predict)
mape = np.mean(np.abs((y_test_actual - test_predict) / y_test_actual)) * 100
r2 = r2_score(y_test_actual, test_predict)

print("RMSE:", rmse)
print("MAE :", mae)
print("MAPE:", mape)
print("R² Score:", r2)

RMSE: 354.80524258115537
MAE : 299.76259413608295
MAPE: 7.227958181524931
R² Score: 0.5749955110499754


In [22]:
look_back = time_step

trainPredictPlot = np.empty_like(closedf)
trainPredictPlot[:] = np.nan
trainPredictPlot[look_back:len(train_predict) + look_back, :] = train_predict

testPredictPlot = np.empty_like(closedf)
testPredictPlot[:] = np.nan
testPredictPlot[len(train_predict) + (look_back * 2) + 1 : len(closedf) - 1, :] = test_predict


In [23]:
plotdf = pd.DataFrame({
    'date': df['timestamp'],
    'original_close': scaler.inverse_transform(closedf).reshape(-1),
    'train_predicted_close': trainPredictPlot.reshape(-1),
    'test_predicted_close': testPredictPlot.reshape(-1)
})

In [24]:
names = cycle([
    'Original Close Price',
    'Train Predicted Close Price',
    'Test Predicted Close Price'
])

fig = px.line(
    plotdf,
    x='date',
    y=['original_close', 'train_predicted_close', 'test_predicted_close'],
    labels={'value': 'ETH Price', 'date': 'Date'}
)

fig.update_layout(
    title='Original vs Train Prediction vs Test Prediction (ETH Close Price)',
    font_size=15,
    font_color='black',
    plot_bgcolor='white',
    legend_title_text='Price Type'
)

fig.for_each_trace(lambda t: t.update(name=next(names)))

fig.update_xaxes(showgrid=False)
fig.update_yaxes(showgrid=False)

fig.show()

In [25]:
# Predict next 15 days
n_future_days = 15

x_input = test_data[len(test_data)-time_step:].reshape(1, -1)
temp_input = list(x_input[0])

lst_output = []
i = 0

while i < n_future_days:
    if len(temp_input) > time_step:
        x_input = np.array(temp_input[-time_step:])
        x_input = x_input.reshape((1, time_step, 1))
        yhat = model.predict(x_input, verbose=0)

        temp_input.append(yhat[0][0])
        lst_output.append(yhat[0][0])
        i += 1
    else:
        x_input = np.array(temp_input).reshape((1, time_step, 1))
        yhat = model.predict(x_input, verbose=0)

        temp_input.append(yhat[0][0])
        lst_output.append(yhat[0][0])
        i += 1

print("Predicted next 15 days:", len(lst_output))

Predicted next 15 days: 15


In [26]:
future_predictions = scaler.inverse_transform(np.array(lst_output).reshape(-1, 1))

In [27]:
last_15_actual = df['close'].tail(15).values

In [28]:
plot_future = pd.DataFrame({
    'date': pd.date_range(start=df['timestamp'].iloc[-1], periods=16, freq='D')[1:],  
    'predicted_price': future_predictions.flatten()
})

plot_actual = pd.DataFrame({
    'date': df['timestamp'].tail(15),
    'actual_price': last_15_actual
})


In [29]:
# Last 15 actual
fig.add_trace(go.Scatter(
    x=plot_actual['date'],
    y=plot_actual['actual_price'],
    mode='lines+markers',
    name='Last 15 Days Actual',
    line=dict(width=3),
))

# Next 15 predicted
fig.add_trace(go.Scatter(
    x=plot_future['date'],
    y=plot_future['predicted_price'],
    mode='lines+markers',
    name='Next 15 Days Predicted',
    line=dict(width=3, dash='dash'),
))

fig.update_layout(
    title="ETH Price Prediction (Last 15 Actual vs Next 15 Predicted)",
    xaxis_title="Date",
    yaxis_title="ETH Price (USD)",
    plot_bgcolor='white',
    font=dict(size=14)
)

fig.show()

In [30]:
print("Predicted ETH Prices for Next 15 Days:\n")

for i, price in enumerate(future_predictions.flatten(), start=1):
    print(f"Day {i}: {price:.2f} USD")

Predicted ETH Prices for Next 15 Days:

Day 1: 2940.28 USD
Day 2: 2890.52 USD
Day 3: 2837.77 USD
Day 4: 2782.41 USD
Day 5: 2724.64 USD
Day 6: 2664.68 USD
Day 7: 2602.83 USD
Day 8: 2539.72 USD
Day 9: 2476.14 USD
Day 10: 2413.03 USD
Day 11: 2351.44 USD
Day 12: 2292.31 USD
Day 13: 2236.29 USD
Day 14: 2184.05 USD
Day 15: 2136.23 USD
