In [1]:
import sys
import zmq
import json
import pandas as pd
from pandas_gbq import read_gbq
import plotly.express as px
import plotly.offline as pyo
import plotly.io as pio

pio.renderers.default = 'notebook'  # or 'jupyterlab' if using JupyterLab


In [2]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import OneHotEncoder
from sklearn.metrics import accuracy_score

## Libraries

In [3]:
NODE_DOMAIN_MAP = {
	"GABMKJM6I25XI4K7U6XWMULOUQIQ27BCTMLS6BYYSOWKTBUXVRJSXHYQ": "Stellar Development Foundation",
	"GCGB2S2KGYARPVIA37HYZXVRM2YZUEXA6S33ZU5BUDC6THSB62LZSTYH": "Stellar Development Foundation",
	"GCM6QMP3DLRPTAZW2UZPCPX2LF3SXWXKPMP3GKFZBDSF3QZGV2G5QSTK": "Stellar Development Foundation",
	"GAK6Z5UVGUVSEK6PEOCAYJISTT5EJBB34PN3NOLEQG2SUKXRVV2F6HZY": "SatoshiPay",
	"GBJQUIXUO4XSNPAUT6ODLZUJRV2NPXYASKUBY4G5MYP3M47PCVI55MNT": "SatoshiPay",
	"GC5SXLNAM3C4NMGK2PXK4R34B5GNZ47FYQ24ZIBFDFOCU6D4KBN4POAE": "SatoshiPay",
	"GCFONE23AB7Y6C5YZOMKUKGETPIAJA4QOYLS5VNS4JHBGKRZCPYHDLW7": "LOBSTR",
	"GCB2VSADESRV2DDTIVTFLBDI562K6KE3KMKILBHUHUWFXCUBHGQDI7VL": "LOBSTR",
	"GD5QWEVV4GZZTQP46BRXV5CUMMMLP4JTGFD7FWYJJWRL54CELY6JGQ63": "LOBSTR",
	"GA7TEPCBDQKI7JQLQ34ZURRMK44DVYCIGVXQQWNSWAEQR6KB4FMCBT7J": "LOBSTR",
	"GA5STBMV6QDXFDGD62MEHLLHZTPDI77U3PFOD2SELU5RJDHQWBR5NNK7": "LOBSTR",
	"GAAV2GCVFLNN522ORUYFV33E76VPC22E72S75AQ6MBR5V45Z5DWVPWEU": "Blockdaemon Inc.",
	"GAVXB7SBJRYHSG6KSQHY74N7JAFRL4PFVZCNWW2ARI6ZEKNBJSMSKW7C": "Blockdaemon Inc.",
	"GAYXZ4PZ7P6QOX7EBHPIZXNWY4KCOBYWJCA4WKWRKC7XIUS3UJPT6EZ4": "Blockdaemon Inc.",
	"GBLJNN3AVZZPG2FYAYTYQKECNWTQYYUUY2KVFN2OUKZKBULXIXBZ4FCT": "Public Node",
	"GCIXVKNFPKWVMKJKVK2V4NK7D4TC6W3BUMXSIJ365QUAXWBRPPJXIR2Z": "Public Node",
	"GCVJ4Z6TI6Z2SOGENSPXDQ2U4RKH3CNQKYUHNSSPYFPNWTLGS6EBH7I2": "Public Node",
	"GA7DV63PBUUWNUFAF4GAZVXU2OZMYRATDLKTC7VTCG7AU4XUPN5VRX4A": "Franklin Templeton",
	"GARYGQ5F2IJEBCZJCBNPWNWVDOFK7IBOHLJKKSG2TMHDQKEEC6P4PE4V": "Franklin Templeton",
	"GCMSM2VFZGRPTZKPH5OABHGH4F3AVS6XTNJXDGCZ3MKCOSUBH3FL6DOB": "Franklin Templeton",
	"GD6SZQV3WEJUH352NTVLKEV2JM2RH266VPEM7EH5QLLI7ZZAALMLNUVN": "Whalestack LLC",
	"GADLA6BJK6VK33EM2IDQM37L5KGVCY5MSHSHVJA4SCNGNUIEOTCR6J5T": "Whalestack LLC",
	"GAZ437J46SCFPZEDLVGDMKZPLFO77XJ4QVAURSJVRZK2T5S7XUFHXI2Z": "Whalestack LLC",
}

## Full history

In [4]:
nodes = NODE_DOMAIN_MAP.keys()
nodes_string = ",".join(f"'{item}'" for item in nodes)
query = f"""
  SELECT
    hl.node_id as node_id,
    hl.closed_at AS close_at
  FROM crypto-stellar.crypto_stellar.history_ledgers AS hl
  WHERE hl.closed_at BETWEEN '2024-01-01 00:00:00 UTC' AND '2025-01-01 00:00:00 UTC'
  AND hl.node_id in ({nodes_string})
"""
full_df = read_gbq(query, project_id='crypto-stellar')
full_df['close_at'] = full_df['close_at'].dt.tz_localize(None)


Downloading: 100%|[32m█████████████████████████████████████████████████████████████████[0m|[0m


In [5]:
full_df

Unnamed: 0,node_id,close_at
0,GA5STBMV6QDXFDGD62MEHLLHZTPDI77U3PFOD2SELU5RJD...,2024-10-01 00:28:02
1,GA5STBMV6QDXFDGD62MEHLLHZTPDI77U3PFOD2SELU5RJD...,2024-10-01 00:36:55
2,GA5STBMV6QDXFDGD62MEHLLHZTPDI77U3PFOD2SELU5RJD...,2024-10-01 00:41:00
3,GA5STBMV6QDXFDGD62MEHLLHZTPDI77U3PFOD2SELU5RJD...,2024-10-01 01:03:24
4,GA5STBMV6QDXFDGD62MEHLLHZTPDI77U3PFOD2SELU5RJD...,2024-10-01 01:52:20
...,...,...
4231681,GD6SZQV3WEJUH352NTVLKEV2JM2RH266VPEM7EH5QLLI7Z...,2024-07-28 02:08:32
4231682,GD6SZQV3WEJUH352NTVLKEV2JM2RH266VPEM7EH5QLLI7Z...,2024-07-28 02:10:11
4231683,GD6SZQV3WEJUH352NTVLKEV2JM2RH266VPEM7EH5QLLI7Z...,2024-07-28 02:17:53
4231684,GD6SZQV3WEJUH352NTVLKEV2JM2RH266VPEM7EH5QLLI7Z...,2024-07-31 22:04:03


In [69]:
import numpy as np
import pandas as pd
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, SimpleRNN, Dense
from tensorflow.keras.utils import to_categorical
from sklearn.preprocessing import LabelEncoder

full_df.sort_values('close_at', inplace=True)
df = full_df.tail(50000).copy()

# Prepare the target variable
label_encoder = LabelEncoder()
df['node_id'] = label_encoder.fit_transform(df['node_id'])

data = pd.Series(df['node_id']).tolist()

characters = sorted(set(data))  # Unique characters

seq_length = 200  # Length of input sequences
X = []
y = []

for i in range(len(data) - seq_length):
    sequence = data[i:i + seq_length]
    label = data[i + seq_length:i + seq_length + 1]
    X.append(sequence)
    y.append(label)

X = np.array(X)
y = to_categorical(y, num_classes=len(characters))  # One-hot encoding

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = Sequential()
model.add(Dense(256, activation='relu', input_shape=(X_train.shape[1],)))
model.add(Dense(y_train.shape[1], activation='softmax'))  # Assuming y_train is one-hot encoded
model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

model.fit(X_train, y_train, batch_size=256, epochs=100)

predictions = model.predict(X_test)

predicted_classes = np.argmax(predictions, axis=1)
actual_classes = np.argmax(y_test, axis=1)

accuracy = np.mean(predicted_classes == actual_classes)
print(f'Accuracy: {accuracy * 100:.2f}%')

Epoch 1/100



Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead.



[1m156/156[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 732us/step - accuracy: 0.0448 - loss: 7.8590
Epoch 2/100
[1m156/156[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 774us/step - accuracy: 0.0480 - loss: 3.1371
Epoch 3/100
[1m156/156[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 787us/step - accuracy: 0.0496 - loss: 3.1296
Epoch 4/100
[1m156/156[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 788us/step - accuracy: 0.0513 - loss: 3.1256
Epoch 5/100
[1m156/156[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 760us/step - accuracy: 0.0508 - loss: 3.1221
Epoch 6/100
[1m156/156[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 771us/step - accuracy: 0.0506 - loss: 3.1214
Epoch 7/100
[1m156/156[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 791us/step - accuracy: 0.0516 - loss: 3.1175
Epoch 8/100
[1m156/156[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 795us/step - accuracy: 0.0507 - loss: 3.1173
Epoch 9/100
[1m156/156[0m 