In [None]:
import pandas as pd
import networkx as nx
import community as community_louvain
import matplotlib.cm as cm
import matplotlib.pyplot as plt
import numpy as np
from sklearn import cluster

# **1. DATA ANALYST**

In [None]:
book1 = pd.read_csv('/content/drive/MyDrive/network/data/asoiaf-book1-edges.csv')
book1.drop(['Type', 'book'],axis=1, inplace=True)

**1.1 EGDES CHECKING**

In [None]:
r, c = book1.shape
print('NUMBER OF EDGES: {}'.format(r))

**So there are *684* EDGES in our network. But are there any DUPLICATE EDGES?**

**1.2  DUPLICATE EGDES CHECKING**

In [None]:
print('Duplicate rows of dataFrame:',book1.duplicated().sum())

**NO DUPLICATE. Let's check how many NODES in our network?**

**1.3 NODES CHECKING**

In [None]:
nodeSource = book1['Source']
nodeSource = nodeSource.rename({'Source': 'Node'})
nodeTarget = book1['Target']
nodeTarget = nodeTarget.rename({'Target': 'Node'})

In [None]:
nodeChecking = nodeSource.append(nodeTarget)
nodeChecking = nodeChecking.drop_duplicates()
nodeChecking.reset_index(drop=True, inplace=True)
r, = nodeChecking.shape
print('NUMBER OF NODES: {}'.format(r))

**There are *187* NODES in total of our networks. But any NULL VALUES in our dataframe?**

**1.4 CHECKING NULL VALUES**

In [None]:
 pd.isnull(book1).sum() > 0

**NO NULL in our dataframe. PERFECT!!!. Let's move to next part**

#**2. NETWORK BUILDING**

In [None]:
Graph = nx.Graph()
for _, edge in book1.iterrows():
    Graph.add_edge(edge['Source'], edge['Target'], weight=edge['weight'])

**2.1 GEOMETRIC MEASURES**
*   Degree Centrality
*   Closeness Centrality

 **2.1.1 Degree Centrality**

In [None]:
degreeCen= nx.degree_centrality(Graph)
dfDegreeCen = pd.DataFrame(list(degreeCen.items()),columns = ['Character','Degree Centrality'])
dfDegreeCen.sort_values("Degree Centrality", axis = 0, ascending = False, inplace = True) 
dfDegreeCen.head(5)

In [None]:
dfDegreeCen.to_csv(r'/content/drive/MyDrive/network/DegreeCentrality.csv', index = False)

**2.1.2 Closeness Centrality**

In [None]:
closenessCen= nx.closeness_centrality(Graph)
dfClosenessCen = pd.DataFrame(list(closenessCen.items()),columns = ['Character','Closeness Centrality'])
dfClosenessCen.sort_values("Closeness Centrality", axis = 0, ascending = False, inplace = True) 
dfClosenessCen.head(5)

In [None]:
dfClosenessCen.to_csv(r'/content/drive/MyDrive/network/ClosenessCentrality.csv', index = False)

In [None]:
**2.2 SPECTRAL MEASURES**

*   Pagerank
*   Eigenvector Centrality

In [None]:
**2.2.1 PageRank**

In [None]:
pageRank =nx.pagerank(Graph)
dfPageRank = pd.DataFrame(list(pageRank.items()),columns = ['Character','PageRank'])
dfPageRank.sort_values("PageRank", axis = 0, ascending = False, inplace = True) 
dfPageRank.head(5)

In [None]:
**2.2.2 Eigenvector Centrality**

In [None]:
eigenCen = nx.eigenvector_centrality(Graph)
dfEigenCen = pd.DataFrame(list(eigenCen.items()),columns = ['Character','Eigenvector Centrality'])
dfEigenCen.sort_values("Eigenvector Centrality", axis = 0, ascending = False, inplace = True) 
dfEigenCen.head(5)

In [None]:
dfEigenCen.to_csv(r'/content/drive/MyDrive/network/EigenvectorCentrality.csv', index = False)

In [None]:
**2.3 PATH-BASED MEASURES**

*   Betweenness Centrality

In [None]:
betweenCen = nx.betweenness_centrality(Graph)
dfBetweenCen = pd.DataFrame(list(betweenCen.items()),columns = ['Character','Betweenness Centrality'])
dfBetweenCen.sort_values("Betweenness Centrality", axis = 0, ascending = False, inplace = True) 
dfBetweenCen.head(5)

In [None]:
dfBetweenCen.to_csv(r'/content/drive/MyDrive/network/BetweennessCentrality.csv', index = False)

In [None]:
## **Let's sumary our network Cetrality**

In [None]:
sum = pd.DataFrame((dfDegreeCen['Character'].values,
        dfClosenessCen['Character'].values,
        dfPageRank['Character'].values,
        dfEigenCen['Character'].values,
        dfBetweenCen['Character'].values),
        index=['Degree Centrality', 'Closeness Centrality', 'Pagerank', 'Eigenvector Centrality', 'Betweenness Centrality']).transpose()
sum.head()

In [None]:
sum.to_csv(r'/content/drive/MyDrive/network/sumary.csv', index = False)

In [None]:
**2.4  COMMUNITY DETECTION**


*   Modularity Clustering
*   K-means Clustering

In [None]:
**2.4.1 Modularity Clustering**

In [None]:
on(Graph,weight='None')
dfModulCluster = pd.DataFrame(list(modulCluster.items()),columns = ['Character','Community'])
dfModulCluster.sort_values("Character",  inplace = True) 
dfModulCluster.head(5)

In [None]:
dfModulCluster.to_csv(r'/content/drive/MyDrive/network/ModularityClustering.csv', index = False)

In [None]:
modulCommunity = dfModulCluster["Community"].value_counts()
modulCommunity = pd.DataFrame(list(modulCommunity.items()),columns = ['Community','Number of Character'])
modulCommunity.sort_values("Community",  inplace = True) 
modulCommunity.head(10)

In [None]:
**2.4.2 K-means Clustering**

In [None]:
def graph_to_edge_matrix(G):
    edge_mat = np.zeros((len(G), len(G)), dtype=int)
    for node in G:
        for neighbor in G.neighbors(node):
            edge_mat[node][neighbor] = 1
        edge_mat[node][node] = 1

    return edge_mat

In [None]:
nodeChecking.reset_index(drop=True, inplace=True)
data_dict = nodeChecking.to_dict()
data_dict = {v: k for k, v in data_dict.items()}

In [None]:
X = book1.copy()
X.drop(['weight'],axis=1,inplace = True)
X = X.replace({"Source": data_dict})
X = X.replace({"Target": data_dict})

In [None]:
G = nx.Graph()
for _, edge in X.iterrows():
    G.add_edge(edge['Source'], edge['Target'])

edge_mat = graph_to_edge_matrix(G)

In [None]:
kmeans = cluster.KMeans(n_clusters=7).fit(edge_mat)
resultKmeans = kmeans.labels_

In [None]:
results = nodeChecking.to_dict()
results = {v: k for k, v in results.items()}
i = 0
for key, value in results.items():
    results[key] = resultKmeans[i]
    i+=1

In [None]:
dfKmeansCluster = pd.DataFrame(list(results.items()),columns = ['Character','Community'])
dfKmeansCluster.sort_values("Character",  inplace = True) 
dfKmeansCluster.head(5)

In [None]:
dfKmeansCluster.to_csv(r'/content/drive/MyDrive/network/K-meansClustering.csv', index = False)

In [None]:
kmeansCommunity = dfKmeansCluster["Community"].value_counts()
kmeansCommunity = pd.DataFrame(list(kmeansCommunity.items()),columns = ['Community','Number of Character'])
kmeansCommunity.sort_values("Community",  inplace = True) 
kmeansCommunity.head(10)