In [5]:
import json
import torch
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
import plotly.graph_objects as go
from torch_geometric.data import Data
from torch_geometric.utils.convert import from_networkx, to_networkx

In [6]:
with open("stocks.json") as json_file:
    stock_paths = json.load(json_file)
    predict_path = stock_paths['predict']
    other_paths = stock_paths['other']

predictee = pd.read_csv(f'../data/{predict_path}')
stocks = [pd.read_csv(f'../data/{path}') for path in other_paths]


def clean_stocks(df, start_date):
    df = df.drop(columns=['status'])
    df['per_change'] = df['per_change'].fillna(0)
    df['published_date'] = pd.to_datetime(df['published_date'])
    df.set_index('published_date', inplace=True)
    df = df[df.index >= start_date]
    return df


def clean_stock(df):
    df = df.drop(columns=['status'])
    df['per_change'] = df['per_change'].fillna(0)
    df['published_date'] = pd.to_datetime(df['published_date'])
    df.set_index('published_date', inplace=True)
    return df


predictee = clean_stock(predictee)
start_date = predictee.index[0]
stocks = [clean_stocks(stock, start_date) for stock in stocks]

In [7]:
window_size = 30
step_size = 20
vis_col = 'close'
l = len(predictee)

In [8]:
predict_frames = []
predict_dates = predictee.index
predict_values = predictee['close'].values
cols = predictee.columns
stocks_frames = []
targets = []

for i in range(0, l, step_size):
    frames = []
    end = i + window_size
    if end > l:
        predict_frames.append(predictee[l - window_size - 1 : l - 1])
        targets.append(predict_values[l - 1])

        start_date = predict_dates[l - window_size - 1]
        end_date = predict_dates[l - 2]

        for stock in stocks:
            frame = stock[stock.index >= start_date]
            frame = frame[frame.index <= end_date]
            frames.append(frame)

        stocks_frames.append(frames)
        break

    predict_frames.append(predictee[i:end])
    targets.append(predict_values[end])

    start_date = predict_dates[i]
    end_date = predict_dates[end - 1]

    for stock in stocks:
        frame = stock[stock.index >= start_date]
        frame = frame[frame.index <= end_date]
        frames.append(frame)

    stocks_frames.append(frames)

In [9]:
predict_edge_indexes = [from_networkx(nx.visibility_graph(frame[vis_col])).edge_index for frame in predict_frames]
predict_frame_dates = [frame.index.strftime('%Y%m%d').astype(int).tolist() for frame in predict_frames]
stocks_edge_indexes = [[from_networkx(nx.visibility_graph(frame[vis_col])).edge_index for frame in frames] for frames in stocks_frames]
stocks_frames_dates = [[frame.index.strftime('%Y%m%d').astype(int).tolist() for frame in frames] for frames in stocks_frames]

In [10]:
graphs = []
for i in range(len(predict_frames)):
    predict_x = torch.tensor(predict_frames[i].values)
    predict_dates = torch.tensor(predict_frame_dates[i])
    predict_edge_index = predict_edge_indexes[i]
    predict_graph = Data(x=predict_x, edge_index=predict_edge_index, dates=predict_dates)

    main_x = predict_x
    main_edge_index = predict_edge_index
    main_y = torch.tensor(targets[i])
    offset = predict_graph.x.size(0)

    for j in range(len(stocks_frames[i])):
        stock_x = torch.tensor(stocks_frames[i][j].values)
        stock_dates = torch.tensor(stocks_frames_dates[i][j])
        stock_edge_index = stocks_edge_indexes[i][j]
        stock_graph = Data(x=stock_x, edge_index=stock_edge_index, dates=stock_dates)

        common_dates = torch.tensor([date for date in predict_dates if date in stock_dates])

        new_edge_index = []
        for date in common_dates:
            nodes_in_predict = (predict_graph.dates == date).nonzero(as_tuple=True)[0]
            nodes_in_stock = (stock_graph.dates == date).nonzero(as_tuple=True)[0]

            for node1 in nodes_in_predict:
                for node2 in nodes_in_stock:
                    new_edge_index.append([node1.item(), node2.item() + offset])

        new_edge_index = torch.tensor(new_edge_index).t().contiguous()
        main_x = torch.cat([main_x, stock_graph.x], dim=0)

        main_edge_index = torch.cat([main_edge_index, stock_graph.edge_index + offset, new_edge_index], dim=1)
        offset += stock_graph.x.size(0)

    graphs.append(Data(x=main_x, edge_index=main_edge_index, y = main_y))

In [18]:
print(f'total graphs: {len(graphs)}')
print(f'first graph: {graphs[0]}')
G = to_networkx(graphs[0])

pos = nx.spring_layout(G)

x_coords = [pos[node][0] for node in G.nodes()]
y_coords = [pos[node][1] for node in G.nodes()]

edge_x = []
edge_y = []
for edge in G.edges():
    edge_x.extend([pos[edge[0]][0], pos[edge[1]][0], None])
    edge_y.extend([pos[edge[0]][1], pos[edge[1]][1], None])

edge_trace = go.Scatter(
    x=edge_x, y=edge_y,
    line=dict(width=1, color='#888'),
    hoverinfo='none',
    mode='lines')

node_trace = go.Scatter(
    x=x_coords, y=y_coords,
    mode='markers',
    hoverinfo='text',
    marker=dict(
        size=10,
    )
)

node_trace.text = [f"Node {node}" for node in G.nodes()]

fig = go.Figure(data=[edge_trace, node_trace],
                layout=go.Layout(
                    title='Interactive Graph Visualization',
                    titlefont_size=16,
                    showlegend=False,
                    hovermode='closest',
                    margin=dict(b=0, l=0, r=0, t=40),
                    xaxis=dict(showgrid=False, zeroline=False),
                    yaxis=dict(showgrid=False, zeroline=False)
                ))

fig.show()

total graphs: 156
first graph: Data(x=[697, 7], edge_index=[2, 5651], y=1140.0)


In [25]:
torch.save(graphs, 'graphs.pt')