In [3]:
import requests
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
import matplotlib.pyplot as plt

API_KEY = '2acb79dd-f8ca-48b5-b10c-bbee2290ed94'
BASE_URL = 'https://api.tokenterminal.com/v2'

def fetch_projects():
    url = f'{BASE_URL}/projects'
    headers = {
        'Authorization': f'Bearer {API_KEY}'
    }
    response = requests.get(url, headers=headers)
    response.raise_for_status()
    return response.json()

def fetch_metrics():
    url = f'{BASE_URL}/metrics'
    headers = {
        'Authorization': f'Bearer {API_KEY}'
    }
    response = requests.get(url, headers=headers)
    response.raise_for_status()
    return response.json()

def fetch_time_series_data(metric_ids, project_ids, start_date, end_date):
    cache = {project_id: [] for project_id in project_ids}
    project_ids_str = ','.join(project_ids)
    
    for metric_id in metric_ids:
        url = f'{BASE_URL}/metrics/{metric_id}'
        headers = {
            'Authorization': f'Bearer {API_KEY}'
        }
        params = {
            'project_ids': project_ids_str,
            'start': start_date,
            'end': end_date
        }
        response = requests.get(url, headers=headers, params=params)
        if response.status_code == 404:
            print(f"Data for projects or metric ID '{metric_id}' not found.")
            continue
        response.raise_for_status()
        data = response.json()['data']
        
        for entry in data:
            cache[entry['project_id']].append(entry)
    
    for project_id in cache:
        if cache[project_id]:
            df = pd.DataFrame(cache[project_id])
            df['timestamp'] = pd.to_datetime(df['timestamp'])
            df = df.pivot(index='timestamp', columns='metric_id', values='value')
            df.reset_index(inplace=True)
            cache[project_id] = df
        else:
            cache[project_id] = pd.DataFrame()
    
    return cache

projects = fetch_projects()
metrics = fetch_metrics()

projects_df = pd.DataFrame(projects['data'])
metrics_df = pd.DataFrame(metrics['data'])

metric_ids = ['market_cap_fully_diluted', 'earnings', 'tvl']
project_ids = ['synthetix', 'lyra', 'mux']
start_date = '2023-01-01'
end_date = '2024-05-28'

time_series_data = fetch_time_series_data(metric_ids, project_ids, start_date, end_date)

# Aligning data
def align_data(data_dict):
    all_timestamps = pd.date_range(start=start_date, end=end_date, freq='D')
    for project_id, df in data_dict.items():
        df.set_index('timestamp', inplace=True)
        df = df.reindex(all_timestamps)
        df.fillna(method='ffill', inplace=True)
        df.fillna(method='bfill', inplace=True)
        data_dict[project_id] = df
    return data_dict

time_series_data = align_data(time_series_data)

# Extracting relevant data
synthetix_data = time_series_data['synthetix'][['market_cap_fully_diluted']].copy()
other_projects_data = pd.concat([
    time_series_data['lyra'][['earnings', 'tvl']],
    time_series_data['mux'][['earnings', 'tvl']]
], axis=1)

# Debugging: print data shapes
print("Synthetix Data Shape:", synthetix_data.shape)
print("Other Projects Data Shape:", other_projects_data.shape)

# Combining into a single DataFrame
combined_data = pd.concat([synthetix_data, other_projects_data], axis=1)
combined_data.dropna(inplace=True)

# Debugging: print combined data shape and head
print("Combined Data Shape:", combined_data.shape)
print(combined_data.head())

# Preprocessing
if combined_data.empty:
    raise ValueError("Combined data is empty. Check the data preparation steps.")

scaler = MinMaxScaler()
scaled_data = scaler.fit_transform(combined_data)

# Preparing the data for the LSTM model
def create_dataset(data, time_step=1):
    X, Y = [], []
    for i in range(len(data) - time_step - 1):
        a = data[i:(i + time_step), 1:]  # TVL and Earnings of other projects
        X.append(a)
        Y.append(data[i + time_step, 0])  # Market Cap of Synthetix
    return np.array(X), np.array(Y)

time_step = 60  # You can adjust this
X, Y = create_dataset(scaled_data, time_step)

# Reshaping the data to fit the LSTM model (samples, time steps, features)
X = X.reshape(X.shape[0], X.shape[1], X.shape[2])

# Splitting the data into training and test sets
train_size = int(len(X) * 0.8)
test_size = len(X) - train_size
X_train, X_test = X[0:train_size], X[train_size:len(X)]
Y_train, Y_test = Y[0:train_size], Y[train_size:len(Y)]

# Building the LSTM model
model = Sequential()
model.add(LSTM(50, return_sequences=True, input_shape=(time_step, X.shape[2])))
model.add(Dropout(0.2))
model.add(LSTM(50, return_sequences=False))
model.add(Dropout(0.2))
model.add(Dense(1))

model.compile(optimizer='adam', loss='mean_squared_error')

# Training the model
model.fit(X_train, Y_train, epochs=50, batch_size=32, validation_data=(X_test, Y_test), verbose=1)

# Predicting and inverse transforming the results
train_predict = model.predict(X_train)
test_predict = model.predict(X_test)

# Inverting the predictions
train_predict_full = np.concatenate((train_predict, X_train[:, -1, :]), axis=1)
test_predict_full = np.concatenate((test_predict, X_test[:, -1, :]), axis=1)

train_predict_full = scaler.inverse_transform(np.concatenate((train_predict, np.zeros((train_predict.shape[0], scaled_data.shape[1]-1))), axis=1))[:, 0]
test_predict_full = scaler.inverse_transform(np.concatenate((test_predict, np.zeros((test_predict.shape[0], scaled_data.shape[1]-1))), axis=1))[:, 0]

# Evaluating the model
plt.figure(figsize=(14, 5))
plt.plot(combined_data.index, combined_data['market_cap_fully_diluted'], label='Actual Market Cap')
plt.plot(combined_data.index[time_step + 1:train_size + time_step + 1], train_predict_full, label='Train Predict')
plt.plot(combined_data.index[train_size + time_step + 1:], test_predict_full, label='Test Predict')
plt.legend()
plt.show()


Synthetix Data Shape: (514, 1)
Other Projects Data Shape: (514, 4)
Combined Data Shape: (0, 5)
Empty DataFrame
Columns: [market_cap_fully_diluted, earnings, tvl, earnings, tvl]
Index: []


ValueError: Combined data is empty. Check the data preparation steps.