In [207]:
import pandas as pd
import numpy as np
from igraph import *
import scipy.sparse as sparse
from datetime import date

In [180]:
data = pd.read_csv('../ucu_sentiment-master/data/train_set.csv', usecols=range(1,11),  parse_dates=['timestamp', 'thread_timestamp'])

In [181]:
#select top 40 channels by message count

channels_best = list(data.channel.value_counts()[:40].index)
data = data[data["channel"].isin(channels_best)]

In [182]:
#split data by years

date_2017 = date(2017, 1, 1)
date_2016 = date(2016, 1, 1)
data_train_2015 = data[data['timestamp'] < date_2016]
data_train_2016 = data[(data['timestamp'] < date_2017) & (data['timestamp'] >= date_2016)]
data_train_2017 = data[data['timestamp'] >= date_2017]

In [183]:
#create table rows - user, column - channels, value - message count, only 2017

data_table = pd.DataFrame(data_train_2017.groupby(['user_id', 'channel'])['msg_id'].count()).unstack(fill_value=0)

In [186]:
# for reducing count of edges with low weight - update values with < 20 to 0

for i, rows in data_table.head().iterrows():
    for j, item in rows.iteritems():
        if item < 20:
            data_table.loc[i,j] = 0

In [None]:
# make natrix
A = data_table.as_matrix()

In [None]:
# transpose matrix
A_persons = np.dot(A, A.T)
A_channel = np.dot(A.T, A)

In [209]:
# exclude value in diagonal 
np.fill_diagonal(A_channel, 0)

In [253]:
# for reducing count of edges with low weight - update values with < 10000 to 0

for (x,y), value in np.ndenumerate(A_channel):
    if value < 10000:
        A_channel[x, y] = 0

In [255]:
# sparse matrix for graf
A_channel_sparce = sparse.coo_matrix(A_channel)

In [259]:
# create graf
g = Graph()
g.add_vertices(A_channel_sparce.shape[0])

# saved names of channels
g.vs.set_attribute_values('name', data_table.columns.levels[1].tolist())

for i,j,v in zip(A_channel_sparce.row, A_channel_sparce.col, A_channel_sparce.data):
        g.add_edge(i,j,weight=v)

In [263]:
g.summary()

'IGRAPH UNW- 40 1022 -- \n+ attr: name (v), weight (e)'

In [264]:
#saved graphml file for Gephi
g.save('channels2017_2.graphml')