In [2]:
import torch
import torch_geometric
import torch_geometric.utils
from torch_geometric.data import Data
import networkx as nx
import numpy as np

### traditional graph with large features
includes: audio features and genres

In [3]:
A = np.load("../components/A.npy")
G = nx.from_numpy_array(A)

In [4]:
G.number_of_nodes()

2894

In [5]:
Xaudiofeatures = np.load("../components/Xaudiofeatures.npy")
Xartist = np.load("../components/Xartist.npy")
Xcomplete = np.concatenate((Xaudiofeatures, Xartist), axis=1)

In [6]:
complete_dataset = torch_geometric.utils.convert.from_networkx(G)

In [7]:
Xcomplete_tensor = torch.from_numpy(Xcomplete)

In [8]:
complete_dataset.weight = None
complete_dataset.x = Xcomplete_tensor

In [9]:
complete_dataset

Data(edge_index=[2, 45740], num_nodes=2894, x=[2894, 1358])

In [10]:
torch.save(complete_dataset, '../datasets/complete/data.pt')

In [11]:
complete_verify = torch.load('../datasets/complete/data.pt')

### small graph with only audio features + popularity and followers
can be used for classification

In [19]:
Xgenres = Xartist[:, 2:]

In [21]:
sums = np.sum(Xgenres, axis=0)

In [28]:
index_sums = list(enumerate(sums))

In [31]:
sorted_index_sums = sorted(index_sums, key= lambda x:-x[1])

In [68]:
sorted_15_index_sums = sorted_index_sums[:15]
indices = [i  for (i, j) in sorted_15_index_sums]

In [69]:
indices

[592, 532, 163, 759, 930, 888, 1139, 743, 38, 429, 1315, 1317, 1077, 904, 537]

In [64]:
indices

[592, 532, 163, 759, 930, 888, 1139, 743, 38, 429, 1315, 1317, 1077, 904, 537]

In [None]:
# 592, 743, 537

In [94]:
indices = [759, 743, 537, 1139, 1315]

In [95]:
Xgenres_slimmed = Xgenres[:, indices]
Xgenres_slimmed

array([[0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       ...,
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0.]])

In [96]:
# criteria: belong to EXACTLY 1 of the six most common genres
fit_criteria = np.isclose(np.sum(Xgenres_slimmed, axis=1), 1)
np.sum(Xgenres_slimmed[fit_criteria], axis=0)

array([166., 137., 110., 102.,  87.])

In [44]:
np.sum(fit_criteria)

468

In [37]:
non_zero_rows = np.any(Xgenres[:, indices] != 0, axis=1)

In [38]:
non_zero_rows

array([ True,  True,  True, ..., False,  True, False])

In [25]:
sorted(sums, key= lambda x:-x)

[400.0,
 312.0,
 255.0,
 178.0,
 171.0,
 143.0,
 143.0,
 137.0,
 131.0,
 131.0,
 129.0,
 126.0,
 116.0,
 115.0,
 111.0,
 105.0,
 104.0,
 101.0,
 93.0,
 92.0,
 89.0,
 82.0,
 82.0,
 79.0,
 75.0,
 74.0,
 73.0,
 73.0,
 71.0,
 67.0,
 65.0,
 64.0,
 61.0,
 60.0,
 60.0,
 59.0,
 58.0,
 56.0,
 53.0,
 53.0,
 52.0,
 52.0,
 51.0,
 50.0,
 49.0,
 47.0,
 45.0,
 44.0,
 42.0,
 42.0,
 41.0,
 41.0,
 40.0,
 40.0,
 40.0,
 40.0,
 39.0,
 39.0,
 38.0,
 37.0,
 35.0,
 34.0,
 34.0,
 33.0,
 32.0,
 32.0,
 32.0,
 32.0,
 31.0,
 29.0,
 28.0,
 28.0,
 28.0,
 28.0,
 28.0,
 27.0,
 27.0,
 26.0,
 26.0,
 25.0,
 25.0,
 25.0,
 25.0,
 25.0,
 25.0,
 24.0,
 24.0,
 24.0,
 24.0,
 24.0,
 24.0,
 24.0,
 23.0,
 23.0,
 23.0,
 23.0,
 22.0,
 22.0,
 22.0,
 22.0,
 22.0,
 22.0,
 21.0,
 21.0,
 21.0,
 20.0,
 20.0,
 19.0,
 19.0,
 19.0,
 19.0,
 19.0,
 19.0,
 19.0,
 18.0,
 18.0,
 18.0,
 18.0,
 18.0,
 17.0,
 17.0,
 17.0,
 17.0,
 17.0,
 17.0,
 17.0,
 17.0,
 16.0,
 16.0,
 16.0,
 16.0,
 16.0,
 15.0,
 15.0,
 15.0,
 15.0,
 15.0,
 15.0,
 14.0,
 14.0,
 1