# Deep Graph Library (dgl) サンプルデータの取得

In [1]:
!pip install  dgl -f https://data.dgl.ai/wheels/torch-2.1/repo.html

Looking in links: https://data.dgl.ai/wheels/torch-2.1/repo.html
Collecting dgl
  Downloading https://data.dgl.ai/wheels/torch-2.1/dgl-2.4.0-cp310-cp310-manylinux1_x86_64.whl (7.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m19.7 MB/s[0m eta [36m0:00:00[0m
Collecting torch<=2.4.0 (from dgl)
  Downloading torch-2.4.0-cp310-cp310-manylinux1_x86_64.whl.metadata (26 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch<=2.4.0->dgl)
  Downloading nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch<=2.4.0->dgl)
  Downloading nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch<=2.4.0->dgl)
  Downloading nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch<=2.4.0->dgl)
  Downloading nvidia_cudn

In [25]:
import pandas as pd

In [3]:
import os

os.environ["DGLBACKEND"] = "pytorch"
import dgl

## AmazonCoBuyComputerDataset
https://docs.dgl.ai/generated/dgl.data.AmazonCoBuyComputerDataset.html#dgl.data.AmazonCoBuyComputerDataset

In [5]:
data = dgl.data.AmazonCoBuyComputerDataset()

Downloading /root/.dgl/amazon_co_buy_computer.zip from https://data.dgl.ai/dataset/amazon_co_buy_computer.zip...


/root/.dgl/amazon_co_buy_computer.zip:   0%|          | 0.00/3.42M [00:00<?, ?B/s]

Extracting file to /root/.dgl/amazon_co_buy_computer_b5999b2e


In [6]:
data

Dataset("amazon_co_buy_computer", num_graphs=1, save_path=/root/.dgl/amazon_co_buy_computer_b5999b2e)

In [7]:
type(data)

In [8]:
len(data)

1

In [9]:
data[0]

Graph(num_nodes=13752, num_edges=491722,
      ndata_schemes={'feat': Scheme(shape=(767,), dtype=torch.float32), 'label': Scheme(shape=(), dtype=torch.int64)}
      edata_schemes={'__orig__': Scheme(shape=(), dtype=torch.int64)})

In [11]:
print(data[0])

Graph(num_nodes=13752, num_edges=491722,
      ndata_schemes={'feat': Scheme(shape=(767,), dtype=torch.float32), 'label': Scheme(shape=(), dtype=torch.int64)}
      edata_schemes={'__orig__': Scheme(shape=(), dtype=torch.int64)})


In [12]:
graph = data[0]

In [15]:
type(graph)

In [16]:
graph.ndata

{'feat': tensor([[0., 1., 0.,  ..., 0., 0., 1.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 1., 0.,  ..., 1., 0., 0.],
        ...,
        [0., 0., 1.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]]), 'label': tensor([8, 8, 1,  ..., 4, 8, 4])}

In [18]:
graph.ndata.keys()

dict_keys(['feat', 'label'])

In [19]:
graph.edges()

(tensor([    1,     2,     3,  ..., 13468, 13470, 13469]),
 tensor([    0,     0,     0,  ..., 13469, 13469, 13470]))

In [20]:
# 1. ノード特徴量を取得
features = graph.ndata['feat'].numpy()  # DGLTensor を numpy に変換
labels = graph.ndata['label'].numpy()   # ノードのラベル

# 2. エッジリストを取得
src, dst = graph.edges()

In [21]:
features

array([[0., 1., 0., ..., 0., 0., 1.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 1., 0., ..., 1., 0., 0.],
       ...,
       [0., 0., 1., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]], dtype=float32)

In [22]:
features.shape

(13752, 767)

In [23]:
labels

array([8, 8, 1, ..., 4, 8, 4])

In [24]:
labels.shape

(13752,)

In [28]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [40]:
!ls drive/MyDrive/tmp/amazon_data

In [41]:
# 3. データフレームを作成
# ノード特徴量を保存
features_df = pd.DataFrame(features)
features_df.to_csv('drive/MyDrive/tmp/amazon_data/AmazonCoBuyComputerDataset_node_features.csv', index=False)

# ラベルを保存
labels_df = pd.DataFrame(labels, columns=['label'])
labels_df.to_csv('drive/MyDrive/tmp/amazon_data/AmazonCoBuyComputerDataset_node_labels.csv', index=False)

# エッジリストを保存 (src: 始点, dst: 終点)
edges_df = pd.DataFrame({'src': src.numpy(), 'dst': dst.numpy()})
edges_df.to_csv('drive/MyDrive/tmp/amazon_data/AmazonCoBuyComputerDataset_edge_list.csv', index=False)

In [27]:
features_df.shape, labels_df.shape, edges_df.shape

((13752, 767), (13752, 1), (491722, 2))