# Graph classification

**Authors:** Olympio Hacquard and Vadim Lebovici

**Date:** March 2023

-----

TODO: dire où trouver les functions on graph

In [None]:
import sys
sys.path.append('../')

# Computing multi-parameter filtrations

We load the dataset `MUTAG` and compute its vectorized simplex trees associated to the combination of all filtrations available in this repository (for some choice of parameters).

**TODO**: describe filtrations

In [None]:
from eulearning.datasets import load_graph_dataset

dataset = 'DHFR' # The other datasets 'MUTAG', 'COX2', 'PROTEINS', 'NCI1', 'NCI109','IMDB-BINARY' and 'IMDB-MULTI' are available on the Perslay repository https://github.com/MathieuCarriere/perslay.
path_to_dataset = '../data/' + dataset + '/'

name_filtrations = ['hks_10.0', 'ricci_0.5_0', 'forman', 'laplacian_2', 'edge_betweenness', 'centrality', 'func_3']
n_params = len(name_filtrations)

vec_sts, y = load_graph_dataset(dataset, path_to_dataset, name_filtrations)

# Computing Euler characteristic descriptors
We compute the Euler characteristic profiles of the above multi-filtrations, as well as their Radon transform and hybrid transforms.

### Euler characteristic profiles

In [None]:
from eulearning.descriptors import EulerCharacteristicProfile

euler_profile = EulerCharacteristicProfile(resolution=tuple(10 for _ in range(n_params)), quantiles=[(0, 1) for _ in range(n_params)]) # ECPs are flatten by default to fit with sklearn classifiers. Set flatten=False to unflatten them.
ecps = euler_profile.fit_transform(vec_sts)

### Radon transforms

In [None]:
from eulearning.descriptors import RadonTransform

radon_transform = RadonTransform(tuple(10 for _ in range(n_params)), quantiles=[0]*n_params)
rdns = radon_transform.fit_transform(vec_sts)

### Hybrid transforms

In [None]:
from eulearning.descriptors import HybridTransform

hybrid_transform = HybridTransform(tuple(10 for _ in range(n_params)), quantiles=[0]*n_params, kernel_name='exp_4')
hts = hybrid_transform.fit_transform(vec_sts)

# Classifying dataset

In [None]:
import numpy as np
from sklearn.model_selection 	import train_test_split
from xgboost					import XGBClassifier

clf = XGBClassifier()

# Classifying using Euler characteristic profiles
ecps_train, ecps_test, y_train, y_test = train_test_split(ecps, y, test_size=0.1)
clf.fit(ecps_train, y_train)
ecps_score = clf.score(ecps_test, y_test)
print('ECPs score:', np.round(ecps_score*100, decimals=2), '%')

# Classifying using Radon transforms
rdns_train, rdns_test, y_train, y_test = train_test_split(rdns, y, test_size=0.1)
clf.fit(rdns_train, y_train)
rdns_score = clf.score(rdns_test, y_test)
print(' RTs score:', np.round(rdns_score*100, decimals=2), '%')

# Classifying using hybrid transforms
hts_train, hts_test, y_train, y_test = train_test_split(hts, y, test_size=0.1)
clf.fit(hts_train, y_train)
hts_score = clf.score(hts_test, y_test)
print(' HTs score:', np.round(hts_score*100, decimals=2), '%')