In [None]:
import numpy as np
import pandas as pd
import networkx as nx
from collections import Counter
import matplotlib.pyplot as plt
import seaborn as sns

sns.set_style('white')

In [None]:
df_iris = pd.read_csv('iris.txt', sep= ',', header= None)
df_iris.head()

# Calculating similarities between samples

## defining function for calculating similarity

In [None]:
def calculate_similarity(A, B, std):
    '''The function definition fgoes here.'''
    from numpy.linalg import norm
    return np.exp(np.divide(norm(np.subtract(A, B))**2, -2 * std**2))


## Calculating IRIS standard deviation

In [None]:
std = 0.73

## Calculating similarities

In [None]:
similarity_values = np.zeros((len(df_iris), len(df_iris)))
similarity_values.fill(np.nan)
for i in range(len(df_iris)):
    for j in range(i + 1, len(df_iris)):
        similarity_values[i, j] = np.round(calculate_similarity(df_iris.loc[i][:4], df_iris.loc[j][:4], std), 3)

## Calculating mean & standard deviation of similarities

In [None]:
similarity_mean = np.round(np.nanmean(similarity_values), 3)
similarity_std = np.round(np.nanstd(similarity_values), 3)
print(f'Similarity mean: {similarity_mean}, std: {similarity_std}')

# Calculating adjacent matrix based on similarities

## Defining the threshold

In [None]:
threshold = np.round(similarity_mean + (2 * similarity_std), 3)
threshold

## Producing adjacent matrix based on threshold value

In [None]:
adjacent = np.copy(similarity_values)
adjacent = np.where(adjacent >= threshold, 1, 0)
adjacent[np.isnan(adjacent)] = 0
print('Number of edges: ', adjacent.sum())

# Graph analysis

In [None]:
adjacent_df = pd.DataFrame(adjacent.T + adjacent)
# adjacent_df = adjacent_df.loc[adjacent_df.sum() != 0, adjacent_df.sum() != 0 ]
adjacent_df.shape

## Calculating degree of nodes

In [None]:
degree_of_nodes = adjacent_df.sum().to_dict()
mean_of_degree = np.sum(list(degree_of_nodes.values()))/ 150.0
print('Mean of nodes degree: ', np.round(mean_of_degree, 3))
plt.bar(degree_of_nodes.keys(), degree_of_nodes.values())
plt.title('The Degree of Nodes in IRIS Similarity Graph')
plt.xlabel('Nodes')
plt.ylabel('Degree')
plt.grid()
plt.show()

## Representing degree sequence

In [None]:
degree_sequence = dict(sorted(degree_of_nodes.items(), key=lambda item: item[1], reverse= True))
print(list(degree_sequence.values()))

## Representing degree distribution

In [None]:
degree_distribution = Counter(degree_sequence.values())
plt.bar(degree_distribution.values(), degree_distribution.values())
plt.title('The Degree Distribution of IRIS Similarity Graph')
plt.xlabel('Degree of Nodes')
plt.ylabel('Degree Frequencies')
plt.grid()
plt.show()

## Representing degree probability mass function (PMF)

In [None]:
pmf = {d: np.round(nk / 150, 3) for d, nk in degree_distribution.items()}
plt.bar(pmf.keys(), pmf.values())
plt.title('The PMF of Degree of the Nodes of IRIS Similarity Graph ')
plt.xlabel('Degree')
plt.ylabel('Degree Probabilities')
plt.grid()
plt.show()

## Calculating path average of the graph

In [None]:
G = nx.from_pandas_adjacency(adjacent_df, create_using= nx.DiGraph)
print('Number of strongly connected components in the IRIS graph', nx.number_strongly_connected_components(G))

### Extracting Components

In [None]:
graphs = nx.condensation(G)
component_0 = graphs.nodes.data()[0]['members']
component_1 = graphs.nodes.data()[1]['members']
G0 = G.subgraph(component_0)
G1 = G.subgraph(component_1)
print('Diameter of component_0: ', nx.diameter(G0), end= '')
print(' Radius of component_0: ', nx.radius(G0))
print('Diameter of component_1: ', nx.diameter(G1), end= '')
print('  Radius of component_1: ', nx.radius(G1))

### Average shortest path for each component

In [None]:
print('Path average for component_0: ', np.round(nx.average_shortest_path_length(G0), 2))
print('Path average for component_1: ', np.round(nx.average_shortest_path_length(G1), 2))

# Exercises

## Exercise1 : Draw the calculated adjacency matrix(graph) with networkx library

In [None]:
from decimal import *
g = nx.from_pandas_adjacency(adjacent_df)
pos = nx.spring_layout(g)
x, y = 1, 1
for i in range(0,150):
    pos[i] = [float(x),float(y)]
    if x < -0.39:
        y-= Decimal(0.1)
        x = 1
    else: x-= Decimal(0.1)
nx.draw(g ,pos=pos ,alpha=0.9 ,width=0.8 ,with_labels=True)

## Exercise2: Draw each IRIS class with different node shapes from the calculated graph

In [None]:
list_1 = []
list_2 = []
list_3 = []
for i in range(0, 150):
    if df_iris.loc[i][4] == 'Iris-setosa':
        list_1.append(i)
    elif df_iris.loc[i][4] == 'Iris-virginica':
        list_2.append(i)
    elif df_iris.loc[i][4] == 'Iris-versicolor':
        list_3.append(i)
subgraph_1 = nx.subgraph(g ,list_1)
subgraph_2 = nx.subgraph(g ,list_2)
subgraph_3 = nx.subgraph(g ,list_3)
nx.draw(subgraph_1 ,pos=pos ,node_shape='o' ,node_color='red' ,alpha=0.9 ,width=0.8 ,with_labels=True)
nx.draw(subgraph_2 ,pos=pos ,node_shape='s' ,node_color='green' ,alpha=0.9 ,width=0.8 ,with_labels=True)
nx.draw(subgraph_3 ,pos=pos ,node_shape='p' ,node_color='orange' ,alpha=0.9 ,width=0.8 ,with_labels=True)

## Exercise3: Draw degree distribution of the calculated graph