#Step 1 – Dataset Selection

In [None]:
import yfinance as yf
import pandas as pd

# Download dataset (XRP-USD daily data)
df = yf.download("XRP-USD", start="2017-01-01", end="2025-10-01", progress=False)
df.reset_index(inplace=True)

# Rename Adj Close
df.rename(columns={'Adj Close': 'Adj_Close'}, inplace=True)

print("Shape:", df.shape)
print(df.head())

#Step 2 – Data Understanding & Preprocessing


*   Clean the data (fix column names & duplicates)
*   Add technical indicators (MA7, MA21, RSI, MACD, Bollinger Bands, etc.)
* Check missing values, duplicates, and summary statistics

In [None]:
import numpy as np

# --- Fix MultiIndex columns (caused by Yahoo Finance) ---
df.columns = ['Date', 'Close', 'High', 'Low', 'Open', 'Volume']

# --- Remove duplicates & sort by date ---
df.drop_duplicates(subset=['Date'], inplace=True)
df.sort_values('Date', inplace=True)
df.reset_index(drop=True, inplace=True)

# --- Check for missing values ---
print("Missing values per column:\n", df.isnull().sum())

# --- Handle missing values (if any) ---
df.fillna(method='ffill', inplace=True)

# --- Add technical indicators ---
df['MA7'] = df['Close'].rolling(7).mean()
df['MA21'] = df['Close'].rolling(21).mean()
df['MA50'] = df['Close'].rolling(50).mean()
df['Return'] = df['Close'].pct_change()
df['Volatility21'] = df['Return'].rolling(21).std()

# RSI (Relative Strength Index)
delta = df['Close'].diff()
gain = np.where(delta > 0, delta, 0)
loss = np.where(delta < 0, -delta, 0)
avg_gain = pd.Series(gain).rolling(14).mean()
avg_loss = pd.Series(loss).rolling(14).mean()
rs = avg_gain / avg_loss
df['RSI'] = 100 - (100 / (1 + rs))

# MACD (12-day EMA - 26-day EMA)
ema12 = df['Close'].ewm(span=12, adjust=False).mean()
ema26 = df['Close'].ewm(span=26, adjust=False).mean()
df['MACD'] = ema12 - ema26
df['MACD_signal'] = df['MACD'].ewm(span=9, adjust=False).mean()

# Bollinger Bands
df['BB_upper'] = df['MA21'] + 2*df['Close'].rolling(21).std()
df['BB_lower'] = df['MA21'] - 2*df['Close'].rolling(21).std()

# --- Final clean-up ---
df.fillna(method='bfill', inplace=True)

print("✅ Data cleaned and enriched with technical indicators!")
print("Shape:", df.shape)
print("Columns:", df.columns.tolist())
print(df.head())

#Step 3 – Data Visualization

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
plt.style.use('seaborn-v0_8-darkgrid')

# 1️⃣ Closing Price Trend
plt.figure(figsize=(12,6))
plt.plot(df['Date'], df['Close'], label='XRP Close Price', color='blue')
plt.title('XRP Closing Price Over Time')
plt.xlabel('Date'); plt.ylabel('Price (USD)')
plt.legend()
plt.show()

print("Insight 1️⃣: Shows XRP price fluctuations over years — clear volatility and long-term trends visible.")

# 2️⃣ Correlation Heatmap
plt.figure(figsize=(10,6))
sns.heatmap(df[['Close','High','Low','Open','Volume','RSI','MACD','MA21','Volatility21']].corr(), annot=True, cmap='coolwarm')
plt.title('Feature Correlation Heatmap')
plt.show()

print("Insight 2️⃣: Close, High, Low, Open are strongly correlated (>0.9). Volume and RSI show weaker relationships, indicating market momentum effects.")

# 3️⃣ Distribution of Daily Returns
plt.figure(figsize=(8,5))
sns.histplot(df['Return'], bins=50, kde=True, color='purple')
plt.title('Distribution of Daily Returns')
plt.xlabel('Daily Return')
plt.ylabel('Frequency')
plt.show()

print("Insight 3️⃣: Daily returns are mostly centered near zero, with a few extreme outliers — highlighting XRP's high volatility nature.")

# 4️⃣ RSI vs Price (Overbought/Oversold Analysis)
plt.figure(figsize=(12,6))
plt.plot(df['Date'], df['RSI'], label='RSI', color='orange')
plt.axhline(70, color='red', linestyle='--', label='Overbought (70)')
plt.axhline(30, color='green', linestyle='--', label='Oversold (30)')
plt.title('Relative Strength Index (RSI) Over Time')
plt.xlabel('Date'); plt.ylabel('RSI Value')
plt.legend()
plt.show()

print("Insight 4️⃣: RSI fluctuates between 30–70; spikes above 70 indicate overbought zones followed by price corrections.")

# 5️⃣ Bollinger Bands (Volatility Analysis)
plt.figure(figsize=(12,6))
plt.plot(df['Date'], df['Close'], label='Close Price', color='blue')
plt.plot(df['Date'], df['BB_upper'], label='Upper Band', color='red', linestyle='--')
plt.plot(df['Date'], df['BB_lower'], label='Lower Band', color='green', linestyle='--')
plt.title('Bollinger Bands – Price Volatility')
plt.xlabel('Date'); plt.ylabel('Price (USD)')
plt.legend()
plt.show()

print("Insight 5️⃣: When price touches or crosses the upper/lower Bollinger Bands, it indicates strong market volatility and potential reversals.")


#Step 4 – Model Building: LSTM for XRP Price Prediction

Step 4.1 – Data Preparation for Model

In [None]:
from sklearn.preprocessing import MinMaxScaler
import numpy as np

# Select features for model
features = ['Close','High','Low','Open','Volume','MA7','MA21','MA50','RSI','MACD','Volatility21']
data = df[features].values

# Normalize
scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(data)

# Sequence creation (60 days history → next day prediction)
def create_sequences(data, seq_length=60):
    X, y = [], []
    for i in range(seq_length, len(data)):
        X.append(data[i-seq_length:i])
        y.append(data[i, 0])  # Predict next day's Close
    return np.array(X), np.array(y)

X, y = create_sequences(scaled_data, 60)

# Split
split = int(0.8 * len(X))
X_train, X_test = X[:split], X[split:]
y_train, y_test = y[:split], y[split:]

print("✅ Sequences created successfully!")
print(f"X_train shape: {X_train.shape}")
print(f"y_train shape: {y_train.shape}")
print(f"X_test shape: {X_test.shape}")
print(f"y_test shape: {y_test.shape}")

Step 4.2 – Build and Train the LSTM Model

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout

model = Sequential([
    LSTM(64, return_sequences=True, input_shape=(X_train.shape[1], X_train.shape[2])),
    Dropout(0.2),
    LSTM(32),
    Dense(1)
])

model.compile(optimizer='adam', loss='mean_squared_error')
history = model.fit(X_train, y_train, epochs=20, batch_size=32, validation_split=0.1, verbose=1)


#Step 5 – Model Evaluation & Visualization

We’ll visualize:
* Loss vs Epoch (training curve)

* Actual vs Predicted price (performance plot)

* Error distribution

In [None]:
import matplotlib.pyplot as plt

predicted = model.predict(X_test)
predicted_prices = scaler.inverse_transform(np.concatenate([predicted, np.zeros((len(predicted), data.shape[1]-1))], axis=1))[:,0]
actual_prices = scaler.inverse_transform(np.concatenate([y_test.reshape(-1,1), np.zeros((len(y_test), data.shape[1]-1))], axis=1))[:,0]

# Compare visually
plt.figure(figsize=(10,6))
plt.plot(actual_prices, label='Actual Price')
plt.plot(predicted_prices, label='Predicted Price')
plt.title('XRP Price Prediction (LSTM)')
plt.xlabel('Days'); plt.ylabel('Price (USD)')
plt.legend()
plt.show()

plt.figure(figsize=(8,5))
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Model Loss vs Epochs')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.show()

import seaborn as sns

errors = actual_prices - predicted_prices
plt.figure(figsize=(8,5))
sns.histplot(errors, bins=30, kde=True)
plt.title('Prediction Error Distribution')
plt.xlabel('Prediction Error (Actual - Predicted)')
plt.show()

for i in range(5):
    print(f"Day {i+1} → Actual: {actual_prices[i]:.4f}, Predicted: {predicted_prices[i]:.4f}")

#Step 6 – Conclusion & Future Scope

Conclusion:

The project applied Exploratory Data Analysis (EDA) and Deep Learning (LSTM) for predicting the future price of XRP cryptocurrency using real-world historical data (2017–2025).
The dataset contained six core features — Open, High, Low, Close, Volume, Date — and was enhanced with ten derived technical indicators such as moving averages (MA7, MA21, MA50), RSI, MACD, Volatility, and Bollinger Bands to provide stronger predictive signals.

After preprocessing and scaling, the data was used to train an LSTM-based time series model that captures sequential patterns across 60 previous trading days.
The model achieved low validation loss (≈ 5×10⁻⁵) and visually demonstrated strong alignment between predicted and actual price trends.

Key Learnings

* EDA revealed clear volatility cycles and price momentum patterns.

* RSI and MACD proved influential in detecting uptrends and corrections.

* LSTM networks effectively captured temporal dependencies compared to linear models.

* Proper normalization and feature engineering significantly improved convergence.


Challenges Faced

* High variance in crypto data caused early overfitting without dropout layers.

* Scaling back the predictions required careful inverse transformation handling.

* Handling missing and noisy data before 2017 was critical to avoid distortions.

Future Scope

* Use Transformer-based architectures (e.g., Temporal Fusion Transformer or BERT for time series) to improve long-term forecasting.

* Integrate sentiment analysis from Twitter or Reddit to account for social signals.

* Build a real-time dashboard using Streamlit or Power BI for live prediction updates.

* Incorporate multiple cryptocurrencies (BTC, ETH, ADA) for multi-asset modeling.