In [1]:
%matplotlib  inline
import pandas as pd
import numpy as np
from snap import *


def  getGraph(nodes,edges):
    G = TUNGraph.New()
    ## 添加节点
    for node in nodes:
        G.AddNode(node) 
    ## 添加边
    for A,B in edges:
        G.AddEdge(A,B)
    return G


### 入度出度计算
def getInOutDegrees(UGraph=None):
    nodes_stats=[]
    for NI in UGraph.Nodes():
        temp=[NI.GetId(), NI.GetOutDeg(), NI.GetInDeg()]
        nodes_stats.append(temp)
    call_net_nodes=pd.DataFrame(data=nodes_stats,columns=['id','OutDeg','InDeg']).set_index('id')
    return call_net_nodes


##Uses the Clauset-Newman-Moore community detection method for large networks. 
# At every step of the algorithm two communities that contribute maximum positive value 
# to global modularity are merged. Fills CmtyV with all the communities detected and 
# returns the modularity of the network.
def getCNMCommDetect(UGraph=None):
    CmtyV = TCnComV()
    modularity = CommunityCNM(UGraph, CmtyV)
    comm=pd.DataFrame([list(line) for line in list(CmtyV)])
    return comm,modularity

## Uses the Girvan-Newman community detection algorithm based on betweenness centrality on Graph.
def getGNCommDetect(UGraph=None):
    CmtyV = TCnComV()
    modularity = snap.CommunityGirvanNewman(UGraph, CmtyV)
    comm=pd.DataFrame([list(line) for line in list(CmtyV)])
    return comm,modularity

## 得到由图中节点组成的子图
def getSubGraph(Graph,nodes):
    NIdV = snap.TIntV()
    for i in nodes:
        NIdV.Add(i)
    SubGraph = snap.GetSubGraph(Graph, NIdV)
    return SubGraph

def GetStats():
    diam = GetBfsFullDiam(Graph, 100, False)  ## 网络直径
    GraphClustCoeff= GetClustCf (Graph, -1)     ## 聚集系数
    NumTriads=GetTriads(Graph, 50)   ## 三元闭包的个数

# GetEdgesInOut(Graph, Nodes)  ## 社区内部边，社区外部的边，Graph可以是有向图或者无向图。Nodes是图中的节点Id
# snap.GetModularity(Graph, Nodes, 1000)  ## 又Nodes组成的社区的模块度，Gedges是可选参数，如果Gedes和实际不一样则得到的错误的模块度
# # convert undirected graph to directed
# GOut = snap.ConvertGraph(snap.PNGraph, GIn)  ## 实现无向图向有向图的转换，图类别的转换

def apply_packed_function_for_map((dumped_function, item, args, kwargs),):
    """
    Unpack dumped function as target function and call it with arguments.

    :param (dumped_function, item, args, kwargs):
        a tuple of dumped function and its arguments
    :return:
        result of target function
    """
    target_function = dill.loads(dumped_function)
    res = target_function(item, *args, **kwargs)
    return res


def pack_function_for_map(target_function, items, *args, **kwargs):
    """
    Pack function and arguments to object that can be sent from one
    multiprocessing.Process to another. The main problem is:
        «multiprocessing.Pool.map*» or «apply*»
        cannot use class methods or closures.
    It solves this problem with «dill».
    It works with target function as argument, dumps it («with dill»)
    and returns dumped function with arguments of target function.
    For more performance we dump only target function itself
    and don't dump its arguments.
    How to use (pseudo-code):

        ~>>> import multiprocessing
        ~>>> images = [...]
        ~>>> pool = multiprocessing.Pool(100500)
        ~>>> features = pool.map(
        ~...     *pack_function_for_map(
        ~...         super(Extractor, self).extract_features,
        ~...         images,
        ~...         type='png'
        ~...         **options,
        ~...     )
        ~... )
        ~>>>

    :param target_function:
        function, that you want to execute like  target_function(item, *args, **kwargs).
    :param items:
        list of items for map
    :param args:
        positional arguments for target_function(item, *args, **kwargs)
    :param kwargs:
        named arguments for target_function(item, *args, **kwargs)
    :return: tuple(function_wrapper, dumped_items)
        It returs a tuple with
            * function wrapper, that unpack and call target function;
            * list of packed target function and its' arguments.
    """
    dumped_function = dill.dumps(target_function)
    dumped_items = [(dumped_function, item, args, kwargs) for item in items]
    return apply_packed_function_for_map, dumped_items

def getNodeEdge(filename):
    df=pd.read_csv(filename,index_col=0)
    df.drop_duplicates(['cust_a','cust_b'],inplace=True)
    cond=(df.cust_a!=df.cust_b)
    df=df[cond]
    nodes=np.union1d(df.cust_a.unique(),df.cust_b.unique())
    edges=df.values
    return nodes,edges

def getGraph(filename,U=True):
    nodes,edges=getNodeEdge(filename)
    if U:
        G=TUNGraph.New()
    else:
        G=TNGraph.New()
    for node in nodes:
        G.AddNode(node) 
    ## 添加边
    for A,B in edges:
        G.AddEdge(A,B)
    return G

In [3]:
# app_conatcts=getGraph('chapter1data/app_contacts.csv')
# sms=getGraph('chapter1data/sms_net.csv')
call_record=getGraph('chapter1data/call_record_net.csv')
# %time getNodeStats(call_record).to_csv('chapter1data/call_record_nodes_stats.csv')


In [2]:
def getNodeStats(UGraph):
    stats_df=getInOutDegrees(UGraph)
    Nodes = TIntFltH()
    Edges = TIntPrFltH()
    GetBetweennessCentr(UGraph, Nodes, Edges, 1.0)
    stats_df['BetweennessCentr']=[Nodes[node] for node in Nodes]
    stats_df['DegreeCentr']=[GetDegreeCentr(UGraph, NI.GetId()) for NI in UGraph.Nodes()]
    stats_df['ClosenessCentr']=[GetClosenessCentr(UGraph, NI.GetId()) for NI in UGraph.Nodes()]
    stats_df['FarnessCentr']=[GetFarnessCentr(UGraph, NI.GetId()) for NI in UGraph.Nodes()]
    stats_df['NodeEcc']=[GetNodeEcc(UGraph, NI.GetId()) for NI in UGraph.Nodes()]
    stats_df['NodeClustCf']=[GetNodeClustCf(UGraph, NI.GetId()) for NI in UGraph.Nodes()]
    stats_df['NodeTriads']=[GetNodeTriads(UGraph, NI.GetId()) for NI in UGraph.Nodes()]
    return stats_df
    

In [57]:
CDRs=pd.read_csv('chapter1data/call_record_net.csv',index_col=0)

In [68]:
%%time
import dill
import multiprocessing
CDRs_un=np.union1d(CDRs.cust_a.unique(),CDRs.cust_b.unique())
nodes=CDRs_un[np.random.randint(0,CDRs_un.shape[0],20000)]
# nodes=CDRs.cust_a.values[:10000]
pool = multiprocessing.Pool(400)
features = pool.map(
     *pack_function_for_map(
        lambda  NI:[GetNodeEcc(call_record,NI ),GetNodeClustCf(call_record,NI ),GetNodeTriads(call_record,NI ),GetFarnessCentr(call_record,NI )],
         nodes,
       
    )
)
## 最大最短路径
## 节点的聚类系数
## 节点三元闭包数量
## 节点的平均路劲长度
pd.DataFrame(features,columns=['NodeEcc','ClustCf','NodeTriads','FarnessCentr']).to_csv('chapter1data/CDRs_NodeEcc.csv')

CPU times: user 3min 34s, sys: 42min 11s, total: 45min 45s
Wall time: 1h 41min 35s


In [None]:
%%time
import dill
import multiprocessing
filename='chapter1data/sms_MxWcc.out'
sample_num=10*10000

save_fileanme='chapter1data/sms_MxWcc.csv'
call_record=LoadPajek_PUNGraph(filename)
CDRs=[NI.GetId() for NI in call_records.Nodes()]
print 'number of Nodes:',len(CDRs)
nodes=np.array(CDRs)[np.random.randint(1,len(CDRs),sample_num)]
pool = multiprocessing.Pool(500)
features = pool.map(
     *pack_function_for_map(
        lambda  NI:[call_record.GetNI(NI).GetDeg(),GetNodeEcc(call_record,NI ),GetNodeClustCf(call_record,NI ),GetNodeTriads(call_record,NI )],
         nodes,
       
    )
)
## 最大最短路径
## 节点的聚类系数
## 节点三元闭包数量
## 节点的平均路劲长度
pd.DataFrame(features,columns=['Deg','NodeEcc','ClustCf','NodeTriads','FarnessCentr']).to_csv(save_fileanme)

number of Nodes: 1012668


In [98]:
5*10000

50000

In [97]:
len(CDRs)

3870431

In [58]:
CDRs_un=np.union1d(CDRs.cust_a.unique(),CDRs.cust_b.unique())
nodes=np.random.randint(0,CDRs_un.shape[0],100)

In [71]:
cc=pd.DataFrame(features,columns=['NodeEcc','ClustCf','NodeTriads'])

In [86]:
cc.describe()

Unnamed: 0,NodeEcc,ClustCf,NodeTriads
count,20000.0,20000.0,20000.0
mean,14.53795,0.069282,1.80545
std,2.125767,0.1942,12.767512
min,1.0,0.0,0.0
25%,14.0,0.0,0.0
50%,15.0,0.0,0.0
75%,15.0,0.018182,1.0
max,20.0,1.0,1062.0


In [None]:
import snap

UGraph = sms
# for NI in UGraph.Nodes():
#     DegCentr = GetDegreeCentr(UGraph, NI.GetId())   ## 度的中心性
    
#     print "node: %d centrality: %f" % (NI.GetId(), DegCentr)
sms_df['ClosenessCentr']=[GetClosenessCentr(UGraph, NI.GetId()) for NI in UGraph.Nodes()]
sms_df['FarnessCentr']=[GetFarnessCentr(UGraph, NI.GetId()) for NI in UGraph.Nodes()]
sms_df['NodeEcc']=[GetNodeEcc(UGraph, NI.GetId()) for NI in UGraph.Nodes()]
sms_df['NodeClustCf']=[GetNodeClustCf(UGraph, NI.GetId()) for NI in UGraph.Nodes()]
sms_df['NodeTriads']=[GetNodeTriads(UGraph, NI.GetId()) for NI in UGraph.Nodes()]

In [None]:
import snap

# Graph = snap.GenRndGnm(snap.PNGraph, 100, 1000)
Nodes = snap.TIntFltH()
Edges = snap.TIntPrFltH()
snap.GetBetweennessCentr(UGraph, Nodes, Edges, 1.0)
# for node in Nodes:
#     print "node: %d centrality: %f" % (node, Nodes[node])
    
[(Nodes[node],node) for node in Nodes]

In [None]:
nodes_stats=[]
DelDegKNodes(G2, 1, 0)
DelDegKNodes(G2, 0, 1)

for NI in G2.Nodes():
    temp=[NI.GetId(), NI.GetOutDeg(), NI.GetInDeg()]
    nodes_stats.append(temp)
call_net_nodes=pd.DataFrame(data=nodes_stats,columns=['id','OutDeg','InDeg']).set_index('id')

In [14]:
import snap

Graph = snap.GenRndGnm(snap.PNGraph, 100, 1000)
snap.SavePajek(Graph, "Pajek_Graph1.out")

In [64]:
from igraph   import Graph 
g=Graph()
g=g.Read_Pajek("Pajek_Graph1.out")

In [None]:
comm=g.community_label_propagation()


In [65]:
g.summary()

'IGRAPH U-W- 732038 1679813 -- \n+ attr: color (v), fontsize (v), id (v), color (e), weight (e)'

In [59]:
for x in comm:
    print x

<igraph.clustering.VertexClustering at 0x7fffbf940650>

In [3]:
import snap

Graph = snap.GenRndGnm(snap.PNGraph, 100, 1000)
DegToCntV = snap.TIntPrV()
snap.GetDegCnt(Graph, DegToCntV)
for item in DegToCntV:
    print "%d nodes with degree %d" % (item.GetVal2(), item.GetVal1())



2 nodes with degree 11
1 nodes with degree 12
2 nodes with degree 13
1 nodes with degree 14
7 nodes with degree 15
4 nodes with degree 16
8 nodes with degree 17
7 nodes with degree 18
12 nodes with degree 19
13 nodes with degree 20
11 nodes with degree 21
6 nodes with degree 22
10 nodes with degree 23
6 nodes with degree 24
4 nodes with degree 25
3 nodes with degree 27
1 nodes with degree 28
1 nodes with degree 29
1 nodes with degree 32
