In [2]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
import plotly.graph_objects as go
import webbrowser

In [None]:
# Import Data

"""merged_path = r"C:\Users\yulig\Desktop\Class Project\Load Data\Step 3 Merged Features CSVs"
symbols = ['BTCUSDT', 'ETHUSDT', 'BNBUSDT', 'SOLUSDT', 'XRPUSDT']

merged_data = {}
for symbol in symbols:
    file_path = os.path.join(merged_path, f"merged_{symbol}_4h.csv")
    if os.path.exists(file_path):
        df = pd.read_csv(file_path, index_col=0, parse_dates=True)
        merged_data[symbol] = df
    else:
        print(f"File not found: {file_path}")"""

In [3]:
# BLOCK 1 – Load and scale data

symbols = ['BTCUSDT', 'ETHUSDT', 'BNBUSDT', 'SOLUSDT', 'XRPUSDT']
features = [
    'return_1', 'return_5', 'return_20',     # Directional signal
    'rsi',                                   # Momentum tension
    'bb_width',                              # Volatility expansion
    'vol_short', 'vol_medium', 'vol_long',   # Only one volatility term
    'histogram'                              # MACD Diff
]

input_path = r"C:\Users\yulig\Desktop\Class Project\Clustering Data"

data = {}

for symbol in symbols:
    file_path = os.path.join(input_path, f"rr_merged_{symbol}_4h.csv")
    df = pd.read_csv(file_path, index_col=0, parse_dates=True)
    X = df[features].dropna()
    scaler = StandardScaler()
    X_scaled = scaler.fit_transform(X)
    data[symbol] = {
        "df": df,
        "X": X,
        "X_scaled": X_scaled
    }

In [4]:
# BLOCK 2 – Apply KMeans
# Primary Clustering

n_clusters = 3

for symbol in symbols:
    X_scaled = data[symbol]['X_scaled']
    kmeans = KMeans(n_clusters=n_clusters, random_state=42, n_init=10)
    labels = kmeans.fit_predict(X_scaled)
    
    # Store cluster labels
    data[symbol]['df'].loc[data[symbol]['X'].index, 'cluster'] = labels
    data[symbol]['cluster_model'] = kmeans

# Bullish Clustering

bullish_subclusters = 2

for symbol in symbols:
    df = data[symbol]['df']
    model = data[symbol]['cluster_model']
    X = data[symbol]['X']
    
    # Determine bullish cluster (highest mean return_5)
    cluster_returns = df.groupby('cluster')['return_5'].mean()
    bullish_cluster = cluster_returns.idxmax()
    
    # Get subset of data and features
    bullish_df = df[df['cluster'] == bullish_cluster].copy()
    X_bullish = X.loc[bullish_df.index]
    
    # Rescale and fit sub-KMeans
    scaler_bullish = StandardScaler()
    X_bullish_scaled = scaler_bullish.fit_transform(X_bullish)

    subkmeans_bull = KMeans(n_clusters=bullish_subclusters, random_state=42, n_init=10)
    bull_labels = subkmeans_bull.fit_predict(X_bullish_scaled)
    
    # Store labels and model
    df.loc[bullish_df.index, 'bullish_phase'] = bull_labels
    data[symbol]['bullish_model'] = subkmeans_bull
    data[symbol]['bullish_scaler'] = scaler_bullish

# Bearish Clustering

bearish_subclusters = 2

for symbol in symbols:
    df = data[symbol]['df']
    model = data[symbol]['cluster_model']
    X = data[symbol]['X']
    
    # Determine bearish cluster (lowest mean return_5)
    cluster_returns = df.groupby('cluster')['return_5'].mean()
    bearish_cluster = cluster_returns.idxmin()
    
    # Get subset
    bearish_df = df[df['cluster'] == bearish_cluster].copy()
    X_bearish = X.loc[bearish_df.index]
    
    # Rescale and fit sub-KMeans
    scaler_bearish = StandardScaler()
    X_bearish_scaled = scaler_bearish.fit_transform(X_bearish)

    subkmeans_bear = KMeans(n_clusters=bearish_subclusters, random_state=42, n_init=10)
    bear_labels = subkmeans_bear.fit_predict(X_bearish_scaled)
    
    # Store labels and model
    df.loc[bearish_df.index, 'bearish_phase'] = bear_labels
    data[symbol]['bearish_model'] = subkmeans_bear
    data[symbol]['bearish_scaler'] = scaler_bearish

# Neutral Clustering

neutral_subclusters = 2

for symbol in symbols:
    df = data[symbol]['df']
    model = data[symbol]['cluster_model']
    X = data[symbol]['X']

    # Find neutral cluster: the one with return_5 closest to zero
    cluster_returns = df.groupby('cluster')['return_5'].mean()
    neutral_cluster = cluster_returns.sub(0).abs().idxmin()

    neutral_df = df[df['cluster'] == neutral_cluster].copy()
    X_neutral = X.loc[neutral_df.index]

    # Rescale and fit KMeans
    scaler_neutral = StandardScaler()
    X_neutral_scaled = scaler_neutral.fit_transform(X_neutral)

    subkmeans_neutral = KMeans(n_clusters=neutral_subclusters, random_state=42, n_init=10)
    neutral_labels = subkmeans_neutral.fit_predict(X_neutral_scaled)

    # Save to df
    df.loc[neutral_df.index, 'neutral_phase'] = neutral_labels

    # Store models
    data[symbol]['neutral_model'] = subkmeans_neutral
    data[symbol]['neutral_scaler'] = scaler_neutral
    data[symbol]['df'] = df  # update

In [14]:
# Block 3 - Sanity Check

for symbol, content in data.items():
    df = content['df']
    print(f"\n🔍 {symbol}:")

    # Check main clustering
    if 'cluster' in df.columns:
        print("  🟡 Primary Cluster Counts:")
        print(df['cluster'].value_counts())
    else:
        print("  ⚠️ No primary clusters found.")

    # Check bullish subclustering
    if 'bullish_phase' in df.columns:
        print("  🟢 Bullish Subcluster Counts:")
        print(df['bullish_phase'].dropna().value_counts())
    else:
        print("  ⚠️ No bullish subclusters found.")

    # Check bearish subclustering
    if 'bearish_phase' in df.columns:
        print("  🔴 Bearish Subcluster Counts:")
        print(df['bearish_phase'].dropna().value_counts())
    else:
        print("  ⚠️ No bearish subclusters found.")



🔍 BTCUSDT:
  🟡 Primary Cluster Counts:
cluster
0.0    7188
1.0    2764
2.0    1785
Name: count, dtype: int64
  🟢 Bullish Subcluster Counts:
bullish_phase
0.0    1577
1.0    1187
Name: count, dtype: int64
  🔴 Bearish Subcluster Counts:
bearish_phase
0.0    1431
1.0     354
Name: count, dtype: int64

🔍 ETHUSDT:
  🟡 Primary Cluster Counts:
cluster
1.0    7060
0.0    2936
2.0    1741
Name: count, dtype: int64
  🟢 Bullish Subcluster Counts:
bullish_phase
1.0    2293
0.0     643
Name: count, dtype: int64
  🔴 Bearish Subcluster Counts:
bearish_phase
1.0    1499
0.0     242
Name: count, dtype: int64

🔍 BNBUSDT:
  🟡 Primary Cluster Counts:
cluster
1.0    8095
2.0    2520
0.0    1122
Name: count, dtype: int64
  🟢 Bullish Subcluster Counts:
bullish_phase
1.0    2349
0.0     171
Name: count, dtype: int64
  🔴 Bearish Subcluster Counts:
bearish_phase
0.0    790
1.0    332
Name: count, dtype: int64

🔍 SOLUSDT:
  🟡 Primary Cluster Counts:
cluster
1.0    4476
0.0    4246
2.0    1450
Name: count, dtype

In [15]:
# Block 4 - Primary Cluster Labeling for BTC and ETH

cluster_label_mappings = {
    'BTCUSDT': {
        0: 'Neutral',
        1: 'Bullish',
        2: 'Bearish'
    },
    'ETHUSDT': {
        0: 'Bullish',
        1: 'Neutral',
        2: 'Bearish'
    }
}

# Assign primary labels to each asset
for symbol in ['BTCUSDT', 'ETHUSDT']:
    df = data[symbol]['df']
    mapping = cluster_label_mappings[symbol]
    
    # Assign primary label based on cluster
    df['primary_label'] = df['cluster'].map(mapping)

# Primary Cluster Labeling Check

print(data.keys())
print(data['BTCUSDT'].keys())
print(data['ETHUSDT'].keys())

print(data['BTCUSDT']['df'][['cluster', 'primary_label']].dropna().tail(10))
print(data['ETHUSDT']['df'][['cluster', 'primary_label']].dropna().head(10))

print(data['BTCUSDT']['df']['primary_label'].value_counts())
print(data['ETHUSDT']['df']['primary_label'].value_counts())


dict_keys(['BTCUSDT', 'ETHUSDT', 'BNBUSDT', 'SOLUSDT', 'XRPUSDT'])
dict_keys(['df', 'X', 'X_scaled', 'cluster_model', 'bullish_model', 'bullish_scaler', 'bearish_model', 'bearish_scaler', 'neutral_model', 'neutral_scaler'])
dict_keys(['df', 'X', 'X_scaled', 'cluster_model', 'bullish_model', 'bullish_scaler', 'bearish_model', 'bearish_scaler', 'neutral_model', 'neutral_scaler'])
                     cluster primary_label
open_time                                 
2025-05-30 12:00:00      0.0       Neutral
2025-05-30 16:00:00      0.0       Neutral
2025-05-30 20:00:00      0.0       Neutral
2025-05-31 00:00:00      0.0       Neutral
2025-05-31 04:00:00      0.0       Neutral
2025-05-31 08:00:00      0.0       Neutral
2025-05-31 12:00:00      0.0       Neutral
2025-05-31 16:00:00      0.0       Neutral
2025-05-31 20:00:00      0.0       Neutral
2025-06-01 00:00:00      0.0       Neutral
                     cluster primary_label
open_time                                 
2020-01-22 00:00:

In [16]:
# Block 5 - BTC Subcluster Label Creation

# Get subcluster model for bearish phase
subkmeans_bearish = data['BTCUSDT']['bearish_model']

# Extract original (unscaled) feature data for bearish points
bearish_df = data['BTCUSDT']['df'][data['BTCUSDT']['df']['cluster'] == bearish_cluster]
X_bearish = bearish_df[features].dropna()

# Inverse transform the cluster centers
scaler_bearish = StandardScaler().fit(X_bearish)
centroids_bearish = subkmeans_bearish.cluster_centers_
real_centroids_bearish = scaler_bearish.inverse_transform(centroids_bearish)

# Make a readable DataFrame of subcluster averages
bearish_centroid_df = pd.DataFrame(real_centroids_bearish, columns=features)
print("Bearish Subcluster Centroids:")
display(bearish_centroid_df)

# Count how many data points fall into each bearish subcluster
bearish_counts = pd.Series(data['BTCUSDT']['df'].loc[bearish_df.index, 'bearish_phase']).value_counts()
print("\nBearish Subcluster Counts:")
display(bearish_counts)

symbol = 'BTCUSDT'
bullish_cluster = 1.0  # this is already known for BTC

# Extract model and data
subkmeans_bullish = data[symbol]['bullish_model']
bullish_df = data[symbol]['df'][data[symbol]['df']['cluster'] == bullish_cluster]
X_bullish = bullish_df[features].dropna()

# Get inverse-transformed centroids
scaler_bullish = StandardScaler().fit(X_bullish)
centroids_bullish = subkmeans_bullish.cluster_centers_
real_centroids_bullish = scaler_bullish.inverse_transform(centroids_bullish)

# Centroid summary
bullish_centroid_df = pd.DataFrame(real_centroids_bullish, columns=features)
print("📈 Bullish Subcluster Centroids:")
display(bullish_centroid_df)

# Count data points in each subcluster
bullish_counts = pd.Series(data[symbol]['df'].loc[bullish_df.index, 'bullish_phase']).value_counts()
print("\n📊 Bullish Subcluster Counts:")
display(bullish_counts)

# BTC Bearish Subcluster Labels (2 clusters)
bearish_labels = {
    1.0: 'Strong Downtrend',
    0.0: 'Weak Downtrend'
}

# BTC Bullish Subcluster Labels (2 clusters)
bullish_labels = {
    0.0: 'Strong Uptrend',
    1.0: 'Weak Uptrend'
}

# Apply to BTC DataFrame
symbol = 'BTCUSDT'
df = data[symbol]['df']
df['bearish_label'] = df['bearish_phase'].map(bearish_labels)
df['bullish_label'] = df['bullish_phase'].map(bullish_labels)
data[symbol]['df'] = df

# Label counts
print(f"\n✅ {symbol} Bullish Label Counts:")
print(df['bullish_label'].value_counts(dropna=False))
print(f"\n✅ {symbol} Bearish Label Counts:")
print(df['bearish_label'].value_counts(dropna=False))



Bearish Subcluster Centroids:


Unnamed: 0,return_1,return_5,return_20,rsi,bb_width,vol_short,vol_medium,vol_long,histogram
0,-0.006239,-0.025775,-0.062248,32.052447,0.125759,0.016242,0.014867,0.014431,-519.134417
1,-0.004823,-0.035115,-0.113174,35.364284,0.241832,0.031363,0.028009,0.023924,-594.105003



Bearish Subcluster Counts:


bearish_phase
0.0    1431
1.0     354
Name: count, dtype: int64

📈 Bullish Subcluster Centroids:


Unnamed: 0,return_1,return_5,return_20,rsi,bb_width,vol_short,vol_medium,vol_long,histogram
0,0.004726,0.026838,0.089948,76.67172,0.123646,0.01259,0.012702,0.012697,595.670048
1,0.006045,0.023061,0.046282,59.806075,0.105035,0.017395,0.019635,0.01955,260.540713



📊 Bullish Subcluster Counts:


bullish_phase
0.0    1577
1.0    1187
Name: count, dtype: int64


✅ BTCUSDT Bullish Label Counts:
bullish_label
NaN               9099
Strong Uptrend    1577
Weak Uptrend      1187
Name: count, dtype: int64

✅ BTCUSDT Bearish Label Counts:
bearish_label
NaN                 10078
Weak Downtrend       1431
Strong Downtrend      354
Name: count, dtype: int64


In [17]:
# Block 6 - ETH Subcluster Label Creation

symbol = 'ETHUSDT'
bearish_cluster = 2.0  # confirmed earlier for ETH

# Get subcluster model for bearish phase
subkmeans_bearish = data[symbol]['bearish_model']

# Extract original (unscaled) feature data for bearish points
bearish_df = data[symbol]['df'][data[symbol]['df']['cluster'] == bearish_cluster]
X_bearish = bearish_df[features].dropna()

# Inverse transform the cluster centers
scaler_bearish = StandardScaler().fit(X_bearish)
centroids_bearish = subkmeans_bearish.cluster_centers_
real_centroids_bearish = scaler_bearish.inverse_transform(centroids_bearish)

# Make a readable DataFrame of subcluster averages
bearish_centroid_df = pd.DataFrame(real_centroids_bearish, columns=features)
print("🔻 Bearish Subcluster Centroids (ETH):")
display(bearish_centroid_df)

# Count how many data points fall into each bearish subcluster
bearish_counts = pd.Series(data[symbol]['df'].loc[bearish_df.index, 'bearish_phase']).value_counts()
print("\n📊 Bearish Subcluster Counts (ETH):")
display(bearish_counts)

symbol = 'ETHUSDT'
bullish_cluster = 0.0  # confirmed earlier for ETH

# Extract model and data
subkmeans_bullish = data[symbol]['bullish_model']
bullish_df = data[symbol]['df'][data[symbol]['df']['cluster'] == bullish_cluster]
X_bullish = bullish_df[features].dropna()

# Get inverse-transformed centroids
scaler_bullish = StandardScaler().fit(X_bullish)
centroids_bullish = subkmeans_bullish.cluster_centers_
real_centroids_bullish = scaler_bullish.inverse_transform(centroids_bullish)

# Centroid summary
bullish_centroid_df = pd.DataFrame(real_centroids_bullish, columns=features)
print("🟢 Bullish Subcluster Centroids (ETH):")
display(bullish_centroid_df)

# Count data points in each subcluster
bullish_counts = pd.Series(data[symbol]['df'].loc[bullish_df.index, 'bullish_phase']).value_counts()
print("\n📈 Bullish Subcluster Counts (ETH):")
display(bullish_counts)

# ETH Bullish Subcluster Labels (2 clusters)
bullish_labels = {
    0.0: 'Strong Uptrend',
    1.0: 'Weak Uptrend'
}

# ETH Bearish Subcluster Labels (2 clusters)
bearish_labels = {
    0.0: 'Strong Downtrend',
    1.0: 'Weak Downtrend'
}

# Apply to ETH DataFrame
symbol = 'ETHUSDT'
df = data[symbol]['df']
df['bullish_label'] = df['bullish_phase'].map(bullish_labels)
df['bearish_label'] = df['bearish_phase'].map(bearish_labels)
data[symbol]['df'] = df

# Label counts
print(f"\n✅ {symbol} Bullish Label Counts:")
print(df['bullish_label'].value_counts(dropna=False))
print(f"\n✅ {symbol} Bearish Label Counts:")
print(df['bearish_label'].value_counts(dropna=False))



🔻 Bearish Subcluster Centroids (ETH):


Unnamed: 0,return_1,return_5,return_20,rsi,bb_width,vol_short,vol_medium,vol_long,histogram
0,-0.005153,-0.044548,-0.191541,33.186696,0.399843,0.046449,0.038332,0.028641,-60.250439
1,-0.007564,-0.033616,-0.083504,32.039584,0.173623,0.022088,0.020258,0.020162,-34.559434



📊 Bearish Subcluster Counts (ETH):


bearish_phase
1.0    1499
0.0     242
Name: count, dtype: int64

🟢 Bullish Subcluster Centroids (ETH):


Unnamed: 0,return_1,return_5,return_20,rsi,bb_width,vol_short,vol_medium,vol_long,histogram
0,0.009645,0.050698,0.160854,71.686626,0.217745,0.02735,0.027078,0.024497,45.092835
1,0.006174,0.026738,0.068022,67.240511,0.120476,0.016132,0.018131,0.018635,21.437555



📈 Bullish Subcluster Counts (ETH):


bullish_phase
1.0    2293
0.0     643
Name: count, dtype: int64


✅ ETHUSDT Bullish Label Counts:
bullish_label
NaN               8927
Weak Uptrend      2293
Strong Uptrend     643
Name: count, dtype: int64

✅ ETHUSDT Bearish Label Counts:
bearish_label
NaN                 10122
Weak Downtrend       1499
Strong Downtrend      242
Name: count, dtype: int64


In [7]:
# Block 6C – BTC Neutral Subcluster Label Creation

symbol = 'BTCUSDT'
neutral_cluster = 0.0  # Confirmed earlier as the 'neutral' cluster in main clustering

# Get subcluster model for neutral phase
subkmeans_neutral = data[symbol]['neutral_model']

# Extract original (unscaled) feature data for neutral points
neutral_df = data[symbol]['df'][data[symbol]['df']['cluster'] == neutral_cluster]
X_neutral = neutral_df[features].dropna()

# Inverse transform the centroids
scaler_neutral = StandardScaler().fit(X_neutral)
centroids_neutral = subkmeans_neutral.cluster_centers_
real_centroids_neutral = scaler_neutral.inverse_transform(centroids_neutral)

# Make a readable DataFrame
neutral_centroid_df = pd.DataFrame(real_centroids_neutral, columns=features)
print("📊 Neutral Subcluster Centroids (BTCUSDT):")
display(neutral_centroid_df)

# Count how many data points fall into each neutral subcluster
neutral_counts = pd.Series(data[symbol]['df'].loc[neutral_df.index, 'neutral_phase']).value_counts()
print("\n✅ Neutral Subcluster Counts (BTCUSDT):")
display(neutral_counts)

# Assign human-readable labels
neutral_labels = {
    0.0: 'Neutral Drift Up',     # Positive returns, lower volatility
    1.0: 'Neutral Drift Down'    # Negative returns, higher volatility
}

# Apply to BTC DataFrame
df = data[symbol]['df']
df['neutral_label'] = df['neutral_phase'].map(neutral_labels)
data[symbol]['df'] = df

# Show label distribution
print(f"\n✅ {symbol} Neutral Label Counts:")
print(df['neutral_label'].value_counts(dropna=False))


📊 Neutral Subcluster Centroids (BTCUSDT):


Unnamed: 0,return_1,return_5,return_20,rsi,bb_width,vol_short,vol_medium,vol_long,histogram
0,0.000957,0.004152,0.012819,55.736428,0.040583,0.005834,0.007725,0.00881,39.800464
1,-0.00145,-0.006509,-0.016445,41.512518,0.062202,0.009712,0.011234,0.011898,-153.860692



✅ Neutral Subcluster Counts (BTCUSDT):


neutral_phase
0.0    4146
1.0    3042
Name: count, dtype: int64


✅ BTCUSDT Neutral Label Counts:
neutral_label
NaN                   4675
Neutral Drift Up      4146
Neutral Drift Down    3042
Name: count, dtype: int64


In [8]:
# Block 6B – ETH Neutral Subcluster Label Creation

symbol = 'ETHUSDT'
neutral_cluster = 1.0  # Confirmed earlier as the 'neutral' cluster in main clustering

# Get subcluster model for neutral phase
subkmeans_neutral = data[symbol]['neutral_model']

# Extract original (unscaled) feature data for neutral points
neutral_df = data[symbol]['df'][data[symbol]['df']['cluster'] == neutral_cluster]
X_neutral = neutral_df[features].dropna()

# Inverse transform the centroids
scaler_neutral = StandardScaler().fit(X_neutral)
centroids_neutral = subkmeans_neutral.cluster_centers_
real_centroids_neutral = scaler_neutral.inverse_transform(centroids_neutral)

# Make a readable DataFrame
neutral_centroid_df = pd.DataFrame(real_centroids_neutral, columns=features)
print("📊 Neutral Subcluster Centroids (ETH):")
display(neutral_centroid_df)

# Count how many data points fall into each neutral subcluster
neutral_counts = pd.Series(data[symbol]['df'].loc[neutral_df.index, 'neutral_phase']).value_counts()
print("\n✅ Neutral Subcluster Counts (ETH):")
display(neutral_counts)

# Assign human-readable labels
neutral_labels = {
    1.0: 'Neutral Drift Up',     # positive returns, lower vol
    0.0: 'Neutral Drift Down'    # negative returns, higher vol
}

# Apply to ETH DataFrame
df = data[symbol]['df']
df['neutral_label'] = df['neutral_phase'].map(neutral_labels)
data[symbol]['df'] = df

# Show label distribution
print(f"\n✅ {symbol} Neutral Label Counts:")
print(df['neutral_label'].value_counts(dropna=False))


📊 Neutral Subcluster Centroids (ETH):


Unnamed: 0,return_1,return_5,return_20,rsi,bb_width,vol_short,vol_medium,vol_long,histogram
0,-0.002682,-0.010259,-0.019442,41.621976,0.080585,0.012415,0.014401,0.015397,-9.588477
1,0.001192,0.005155,0.015337,55.406522,0.052125,0.007623,0.00968,0.011081,4.314439



✅ Neutral Subcluster Counts (ETH):


neutral_phase
1.0    4035
0.0    3025
Name: count, dtype: int64


✅ ETHUSDT Neutral Label Counts:
neutral_label
NaN                   4803
Neutral Drift Up      4035
Neutral Drift Down    3025
Name: count, dtype: int64


In [11]:
# Block 7 - Primary Subcluster Plot

import plotly.graph_objects as go
import os, webbrowser

def plot_primary_clusters(symbol, n_clusters=3):
    df = data[symbol]['df'].copy()
    df = df[df['cluster'].notna()]

    fig = go.Figure()

    for cluster_id in sorted(df['cluster'].dropna().unique()):
        cluster_df = df[df['cluster'] == cluster_id]
        fig.add_trace(go.Scatter(
            x=cluster_df.index,
            y=cluster_df['close'],
            mode='markers',
            name=f"Cluster {int(cluster_id)} ({len(cluster_df)})",
            marker=dict(size=6, symbol='circle'),
            opacity=0.75
        ))

    # Overlay price line
    fig.add_trace(go.Scatter(
        x=df.index,
        y=df['close'],
        mode='lines',
        name='Price',
        line=dict(color='gray', width=1),
        opacity=0.5
    ))

    fig.update_layout(
        title=f"{symbol} – Primary Clustering Labels",
        xaxis_title="Time",
        yaxis_title="Price",
        hovermode='x unified',
        template='plotly_dark'
    )

    filename = f"{symbol.lower()}_primary_clusters.html"
    fig.write_html(filename)
    webbrowser.open("file://" + os.path.abspath(filename))

# Example usage
plot_primary_clusters('BTCUSDT')
plot_primary_clusters('ETHUSDT')


In [18]:
# Block 8 - BTC & ETH Subcluster Graphs

import plotly.graph_objects as go
import os, webbrowser

def plot_price_with_subclusters(symbol):
    df = data[symbol]['df'].copy()

    fig = go.Figure()

    # Plot bullish labeled points
    if 'bullish_label' in df.columns:
        for label in sorted(df['bullish_label'].dropna().unique()):
            sub_df = df[df['bullish_label'] == label]
            fig.add_trace(go.Scatter(
                x=sub_df.index,
                y=sub_df['close'],
                mode='markers',
                name=f'🟢 {label}',
                marker=dict(size=6, symbol='circle'),
                opacity=0.75
            ))

    # Plot bearish labeled points
    if 'bearish_label' in df.columns:
        for label in sorted(df['bearish_label'].dropna().unique()):
            sub_df = df[df['bearish_label'] == label]
            fig.add_trace(go.Scatter(
                x=sub_df.index,
                y=sub_df['close'],
                mode='markers',
                name=f'🔴 {label}',
                marker=dict(size=6, symbol='x'),
                opacity=0.75
            ))

    # Overlay price line
    fig.add_trace(go.Scatter(
        x=df.index,
        y=df['close'],
        mode='lines',
        name='Price',
        line=dict(color='gray', width=1),
        opacity=0.5
    ))

    fig.update_layout(
        title=f"{symbol} – Bullish & Bearish Subcluster Labels",
        xaxis_title="Time",
        yaxis_title="Price",
        hovermode='x unified',
        template='plotly_dark'
    )

    filename = f"{symbol.lower()}_labeled_subclusters.html"
    fig.write_html(filename)
    webbrowser.open("file://" + os.path.abspath(filename))

# Run for BTC and ETH
plot_price_with_subclusters('BTCUSDT')
plot_price_with_subclusters('ETHUSDT')


In [13]:
# Block 9 - Neutral Split Plot

def plot_primary_clusters_with_neutral_split(symbol):
    df = data[symbol]['df'].copy()
    df = df[df['cluster'].notna()]

    # You must define this manually per asset
    if symbol == 'BTCUSDT':
        bullish_cluster = 2.0
        bearish_cluster = 1.0
        neutral_cluster = 0.0
    elif symbol == 'ETHUSDT':
        bullish_cluster = 0.0
        bearish_cluster = 2.0
        neutral_cluster = 1.0
    else:
        print(f"Symbol {symbol} not configured.")
        return

    fig = go.Figure()

    # Bullish cluster
    if 'bullish_label' in df.columns:
        bull_df = df[df['cluster'] == bullish_cluster]
        for label in sorted(bull_df['bullish_label'].dropna().unique()):
            sub_df = bull_df[bull_df['bullish_label'] == label]
            fig.add_trace(go.Scatter(
                x=sub_df.index,
                y=sub_df['close'],
                mode='markers',
                name=f"🟢 {label} ({len(sub_df)})",
                marker=dict(size=6, symbol='circle'),
                opacity=0.75
            ))

    # Bearish cluster
    if 'bearish_label' in df.columns:
        bear_df = df[df['cluster'] == bearish_cluster]
        for label in sorted(bear_df['bearish_label'].dropna().unique()):
            sub_df = bear_df[bear_df['bearish_label'] == label]
            fig.add_trace(go.Scatter(
                x=sub_df.index,
                y=sub_df['close'],
                mode='markers',
                name=f"🔴 {label} ({len(sub_df)})",
                marker=dict(size=6, symbol='x'),
                opacity=0.75
            ))

    # Neutral cluster split into two labels
    if 'neutral_label' in df.columns:
        neut_df = df[df['cluster'] == neutral_cluster]
        for label in sorted(neut_df['neutral_label'].dropna().unique()):
            sub_df = neut_df[neut_df['neutral_label'] == label]
            fig.add_trace(go.Scatter(
                x=sub_df.index,
                y=sub_df['close'],
                mode='markers',
                name=f"⚪ {label} ({len(sub_df)})",
                marker=dict(size=6, symbol='square'),
                opacity=0.75
            ))

    # Price line overlay
    fig.add_trace(go.Scatter(
        x=df.index,
        y=df['close'],
        mode='lines',
        name='Price',
        line=dict(color='gray', width=1),
        opacity=0.5
    ))

    fig.update_layout(
        title=f"{symbol} – Clusters with Subcluster Labels (Neutral Split)",
        xaxis_title="Time",
        yaxis_title="Price",
        hovermode='x unified',
        template='plotly_dark'
    )

    filename = f"{symbol.lower()}_clusters_neutral_split.html"
    fig.write_html(filename)
    webbrowser.open("file://" + os.path.abspath(filename))

# Run for both
plot_primary_clusters_with_neutral_split('BTCUSDT')
plot_primary_clusters_with_neutral_split('ETHUSDT')


In [None]:
# Make sure data directory exists
import os
os.makedirs("clustered_output", exist_ok=True)

# Save BTC clustered DataFrame
btc_df = data['BTCUSDT']['df']
btc_df.to_csv("clustered_output/btcusdt_clustered.csv")

# Save ETH clustered DataFrame
eth_df = data['ETHUSDT']['df']
eth_df.to_csv("clustered_output/ethusdt_clustered.csv")

print("✅ BTC and ETH clustered data saved to 'clustered_output/' folder.")
