In [None]:
from chemocommons import * # https://github.com/dqwei-lab/ATC
import scipy.io as scio
import scipy.stats as ss
from skmultilearn.cluster.networkx import NetworkXLabelGraphClusterer
from skmultilearn.cluster import LabelCooccurrenceGraphBuilder
from skmultilearn.ensemble import LabelSpacePartitioningClassifier
from skmultilearn.problem_transform import ClassifierChain, LabelPowerset
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier
from sklearn.model_selection import LeaveOneOut
from sklearn.metrics import jaccard_score
import pandas as pd
import numpy as np
# import community as community_louvain
import matplotlib.cm as cm
import matplotlib.pyplot as plt
import networkx as nx
import community_louvain # https://github.com/taynaud/python-louvain
import seaborn as sns; sns.set()

In [None]:
data = pd.read_csv('../input/Function of Protein.csv')
data = data.iloc[:,1:]
data

In [None]:
correlation_matrix = np.zeros(shape=(data.shape[1], data.shape[1]))
correlation_matrix.shape

In [None]:
def cramers_corrected_stat(confusion_matrix):
    """ calculate Cramers V statistic for categorial-categorial association.
        uses correction from Bergsma and Wicher, 
        Journal of the Korean Statistical Society 42 (2013): 323-328
    """
    chi2 = ss.chi2_contingency(confusion_matrix)[0]   # 进行卡方检验
    n = confusion_matrix.sum()
    phi2 = list(chi2/n)
    r,k = confusion_matrix.shape
    value = 1/(n-1)
    maxer = max(phi2)
    phi2corr = max(0,max(phi2-value))

    rcorr = r - ((r-1)**2)/(n-1)    # r修正
    kcorr = k - ((k-1)**2)/(n-1)    # c修正
    miner = min(min(rcorr),min(rcorr))
    return np.sqrt(phi2corr / (miner-1))

In [None]:
correlation_matrix = np.zeros(shape=(24, 24))
for i in range(24):
    for j in range(24):
        confusion_matrix = pd.crosstab(data.iloc[:, i], data.iloc[:, j])
        correlation_matrix[i, j] = cramers_corrected_stat(confusion_matrix)
        
for i in range(correlation_matrix.shape[1]):
    correlation_matrix[:,i] = correlation_matrix[:,i]/correlation_matrix[i,i]

In [None]:
correlation_matrix = pd.DataFrame(correlation_matrix)
correlation_matrix.columns = data.columns
correlation_matrix.index = data.columns
# correlation_matrix.to_csv('../output/Crames_V.csv')

In [None]:
plt.figure(figsize=(20,20))
sns.heatmap(correlation_matrix,linewidths=3, annot=True, vmax=0.4)

In [None]:
correlation_matrix = pd.read_csv('../output/Crames_V.csv')

In [None]:
G = nx.Graph()
all_member = set(range(1,25))
G.name  = 'Mouse Protein'

In [None]:
gramerVdat = correlation_matrix.values
for i in range(gramerVdat.shape[0]):
    gramerVdat[i,i] = 0
gramerVdat

In [None]:
for i in range(gramerVdat.shape[0]):
    for j in range(i+1,gramerVdat.shape[0]):
        if gramerVdat[i,j]!=0:
            G.add_edge(str(i+1),str(j+1),weight = gramerVdat[i,j])

In [None]:
partition = community_louvain.best_partition(G)
print('final_partion:',partition)