In [None]:
from google.colab import drive
drive.mount('/content/gdrive', force_remount=True)

In [None]:
%cd /content/gdrive/MyDrive/DL/Facebook/fbscraper/nytimes

In [None]:
import pickle as pkl
import numpy as np
import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt
from matplotlib import colors
from tqdm import tqdm
from pprint import pprint

In [None]:
comment_count = dict()  # author -> number of comments/replies one wrote
reply_count = dict()    # author -> number of replies one received
interaction = dict()    # (author1, author2) -> number of times author2 replied to author1

In [None]:
posts = []

with open("nytimes.pkl", "rb") as f:
    try:
        while True:
            posts.append(pkl.load(f))
    except EOFError:
        pass

In [None]:
for post in posts:
    for comment in post["comments"]:
        try:
            comment_count[comment["author"]["name"]] += 1
        except:
            comment_count[comment["author"]["name"]] = 1
        try:
            reply_count[comment["author"]["name"]] += len(comment["replies"])
        except:
            reply_count[comment["author"]["name"]] = len(comment["replies"])
        for reply in comment["replies"]:
            pair = (comment["author"]["name"], reply["author"]["name"])
            try:
                interaction[pair] += 1
            except:
                interaction[pair] = 1

In [None]:
def build_graph(n1=0, n2=0):
    """Create network of users for link farming detection"""
    A = [] # list of authors with >= n1 level-1 comments, and >= n2 direct replies

    candidates = []
    for k, v in comment_count.items():
        if v >= n1:
            candidates.append(k)

    for x in candidates:
        try:
            r_count = reply_count[x]
            if r_count >= n2:
                A.append(x)
        except:
            pass
    
    author_map = dict() # contains unique id for any author
    rev_map = ["" for x in range(len(A))] # used to extract author's name, given the id
    author_count = len(A)

    for i in range(author_count):
        rev_map[i] = A[i]
        author_map[A[i]] = i

    matrix = [[0 for j in range(author_count)] for i in range(author_count)]

    for i in range(author_count):
        for j in range(author_count):
            try:
                matrix[i][j] = interaction[(A[i], A[j])]
            except:
                pass

    ntwrk = nx.DiGraph()

    for i in range(author_count):
        for j in range(author_count):
            if matrix[i][j] != 0:
                ntwrk.add_weighted_edges_from([(i, j, matrix[i][j])])

    return matrix, ntwrk, author_map, rev_map, author_count

In [None]:
lambdas = [1, 2, 5, 10, 20, 50]
rhos = [1, 2, 5, 10, 20, 50, 100]

lambdas_str, rhos_str = [], []

for l in lambdas:
    lambdas_str.append(str(l))
for r in rhos:
    rhos_str.append(str(r))

L = len(lambdas)
R = len(rhos)

count = [[0 for j in range(R)] for i in range(L)]
reciprocity = [[0 for j in range(R)] for i in range(L)]
n_scc = [[0 for j in range(R)] for i in range(L)]

for i in range(L):
    for j in range(R):
        matrix, ntwrk, author_map, rev_map, author_count = build_graph(lambdas[i], rhos[j])
        count[i][j] = author_count
        reciprocity[i][j] = nx.algorithms.reciprocity(ntwrk)
        n_scc[i][j] = nx.number_strongly_connected_components(ntwrk)

In [None]:
fig = plt.figure()
ax = fig.add_subplot(111)
cax = ax.matshow(count, interpolation='nearest', norm=colors.LogNorm())
fig.colorbar(cax)
ax.set_xticks(np.arange(R))
ax.set_yticks(np.arange(L))
ax.set_xticklabels(rhos_str)
ax.set_yticklabels(lambdas_str)
ax.set_ylabel("$\lambda$", rotation='horizontal')
ax.set_xlabel("$\\rho$")
plt.setp(ax.get_xticklabels(), rotation=90)
plt.show()

In [None]:
fig = plt.figure()
ax = fig.add_subplot(111)
cax = ax.matshow(reciprocity, interpolation='nearest', norm=colors.LogNorm())
fig.colorbar(cax)
ax.set_xticks(np.arange(R))
ax.set_yticks(np.arange(L))
ax.set_xticklabels(rhos_str)
ax.set_yticklabels(lambdas_str)
ax.set_ylabel("$\lambda$", rotation='horizontal')
ax.set_xlabel("$\\rho$")
plt.setp(ax.get_xticklabels(), rotation=90)
plt.show()