Code to (re)produce results in the paper 
"Manipulating the Online Marketplace of Ideas" 
by Xiaodan Lou, Alessandro Flammini, and Filippo Menczer
https://arxiv.org/abs/1907.06130

Notes:
* Need Python 3.6 or later; eg: `module load python/3.6.6`
* Remember link direction is following, opposite of info spread!

In [None]:
import os
import networkx as nx
import random
import numpy
import numpy as np
import math
import statistics
import csv
import matplotlib.pyplot as plt
from scipy import stats
from operator import itemgetter
from collections import defaultdict
import sys
import fcntl
import time
import pickle

In [None]:
# parameters and utility globals

n_humans = 1000 # 10k for paper
beta = 0.1 # bots/humans ratio; 0.1 for paper
p = 0.5 # for network clustering; 0.5 for paper
k_out = 3 # average no. friends within humans & bots; 3 for paper
alpha = 15 # depth of feed; 15 for paper
mu = 0.75 # average prob of new meme vs retweet; 0.75 for paper or draw from empirical distribution
# phi = 1 # bot deception >= 1: meme fitness higher than quality
# gamma = 0.1 # infiltration: probability that a human follows each bot
epsilon = 0.01 # threshold used to check for steady-state convergence
n_runs = 10 # number of simulations to average results
cvsfile = 'results.csv' # to save results for plotting

phis = [1, 5, 10] # bot deception >= 1: meme fitness higher than quality 
gammas = [0.001, 0.005, 0.01, 0.05, 0.1, 0.5, 1.0] # infiltration: probability that a human follows each bot

# if called with gamma as a command line params
if len(sys.argv) == 2:
  gamma = float(sys.argv[1])
  assert(0 <= gamma <= 1)

Above are definitions

---

Below is main experiment

In [None]:
# experiment, save results to CSV file
# this is slow for large n_humans; better to run in parallel 
# on a server or cluster, eg, one process per gamma value
save_dir = "results/random"
if not os.path.exists(save_dir):
    os.makedirs(save_dir)

q_random_all = {}
for phi in phis:
  for gamma in gammas:
    q_random = []
    valid_tracked_memes_random_all = []
    bad_memes_selected_time_random_all = {}
    avg_quality_random_all = []
    avg_diversity_random_all = []
    for sim in range(n_runs):
      print('Running Simulation ', sim, ' for phi = ', phi, ', gamma = ', gamma, ' ...', flush=True)
    
      # simulation start
      qr, qr_net = simulation(False, True, True, True, True) # random attach
      q_random.append(qr)
      if (phi, gamma) not in q_random_all:
        q_random_all[(phi, gamma)] = []
      q_random_all[(phi, gamma)].append(qr)
    
      #### statistic current nth-run data ####
      ## tracked meme ##
      valid_tracked_memes = []
      for meme in track_memes.tracked_memes:
        valid = True
        for agent in qr_net.nodes:
          for m in qr_net.nodes[agent]['feed']:
            if meme == m:
              valid = False
        if valid:
          valid_tracked_memes.append((meme[0], track_memes.tracked_memes[meme]))
      valid_tracked_memes_random_all.extend(valid_tracked_memes)
      ## end tracked meme ##
    
      ## bad meme select ##
      for meme, selected_time in select_time.bad_memes_selected_time.items():
        if meme[1] not in bad_memes_selected_time_all:
          bad_memes_selected_time_random_all[meme[1]] = [0, 0]
        bad_memes_selected_time_random_all[meme[1]][0] += selected_time[0]
        bad_memes_selected_time_random_all[meme[1]][1] += selected_time[1]
      ## end bad meme select ##

      ## avg quality ##
      avg_quality_random_all.append(qr)
      ## end avg quality ##

      ## avg diversity ##
      for agent in qr_net.nodes:
        qualities = []
        fitnesses = []
        for m in qr_net.nodes[agent]['feed']:
          qualities.append(m[0])
          fitnesses.append(m[1])
        unique_qua, unique_qua_cnt = np.unique(qualities, return_counts=True)
        portion_of_qua = unique_qua_cnt / np.sum(unique_qua_cnt)
        diversity = - np.sum(portion_of_qua * np.log(portion_of_qua))
        avg_diversity_random_all.append(diversity)
        
        # unique_fit, unique_fit_cnt = np.unique(fitnesses, return_counts=True)
        # portion_of_fit = unique_fit_cnt / np.sum(unique_fit_cnt)
        # diversity = - np.sum(portion_of_fit * np.log(portion_of_fit))
        # avg_diversity_random_all.append(diversity)
      ## end avg diversity ##
      #### end statistic current nth-run data ####

    for fitness, selected_time in bad_memes_selected_time_all.items():
      bad_memes_selected_time_all[fitness][0] /= n_runs
      bad_memes_selected_time_all[fitness][1] /= n_runs

    # save tracked memes
    fp = open("{}/tracked_memes_random_phi{}_gamma{}.pkl".format(save_dir, phi, gamma), "wb")
    pickle.dump(valid_tracked_memes_random_all, fp)
    fp.close()

    # save bad meme selected times
    fp = open("{}/bad_memes_selected_time_random_phi{}_gamma{}.pkl".format(save_dir, phi, gamma), "wb")
    pickle.dump(bad_memes_selected_time_random_all, fp)
    fp.close()

    # save avg_quality
    fp = open("{}/avg_quality_random_phi{}_gamma{}.pkl".format(save_dir, phi, gamma), "wb")
    pickle.dump(np.mean(avg_quality_random_all), fp)
    fp.close()
    
    # save avg_fitness
    fp = open("{}/avg_diversity_random_phi{}_gamma{}.pkl".format(save_dir, phi, gamma), "wb")
    pickle.dump(np.mean(avg_diversity_random_all), fp)
    fp.close()

    # save kendall
    quality, number_selected = zip(*valid_tracked_memes_random_all)
    kendall_tau, _ = stats.kendalltau(quality, number_selected)
    fp = open("{}/kendall_random_phi{}_gamma{}.pkl".format(save_dir, phi, gamma), "wb")
    pickle.dump(kendall_tau, fp)
    fp.close()

In [None]:
save_dir = "results/prefer"
if not os.path.exists(save_dir):
    os.makedirs(save_dir)

q_prefer_all = {}
for phi in phis:
  for gamma in gammas:
    q_prefer = []
    valid_tracked_memes_prefer_all = []
    bad_memes_selected_time_prefer_all = {}
    avg_quality_prefer_all = []
    avg_diversity_prefer_all = []
    for sim in range(n_runs):
      print('Running Simulation ', sim, ' for phi = ', phi, ', gamma = ', gamma, ' ...', flush=True)

      # simulation start
      qp, qp_net = simulation(True, True, True, True, True) # preferential attach
      q_prefer.append(qp)
      if (phi, gamma) not in q_prefer_all:
        q_prefer_all[(phi, gamma)] = []
      q_prefer_all[(phi, gamma)].append(qp)
    
      #### statistic current nth-run data ####
      ## tracked meme ##
      valid_tracked_memes = []
      for meme in track_memes.tracked_memes:
        valid = True
        for agent in qr_net.nodes:
          for m in qr_net.nodes[agent]['feed']:
            if meme == m:
              valid = False
        if valid:
          valid_tracked_memes.append((meme[0], track_memes.tracked_memes[meme]))
      valid_tracked_memes_prefer_all.extend(valid_tracked_memes)
      ## end tracked meme ##
    
      ## bad meme select ##
      for meme, selected_time in select_time.bad_memes_selected_time.items():
        if meme[1] not in bad_memes_selected_time_all:
          bad_memes_selected_time_prefer_all[meme[1]] = [0, 0]
        bad_memes_selected_time_prefer_all[meme[1]][0] += selected_time[0]
        bad_memes_selected_time_prefer_all[meme[1]][1] += selected_time[1]
      ## end bad meme select ##

      ## avg quality ##
      avg_quality_prefer_all.append(qp)
      ## end avg quality ##

      ## avg diversity ##
      for agent in qp_net.nodes:
        qualities = []
        fitnesses = []
        for m in qp_net.nodes[agent]['feed']:
          qualities.append(m[0])
          fitnesses.append(m[1])
        unique_qua, unique_qua_cnt = np.unique(qualities, return_counts=True)
        portion_of_qua = unique_qua_cnt / np.sum(unique_qua_cnt)
        diversity = - np.sum(portion_of_qua * np.log(portion_of_qua))
        avg_diversity_prefer_all.append(diversity)
        
        # unique_fit, unique_fit_cnt = np.unique(fitnesses, return_counts=True)
        # portion_of_fit = unique_fit_cnt / np.sum(unique_fit_cnt)
        # diversity = - np.sum(portion_of_fit * np.log(portion_of_fit))
        # avg_diversity_prefer_all.append(diversity)
      ## end avg diversity ##
      #### end statistic current nth-run data ####

    for fitness, selected_time in bad_memes_selected_time_all.items():
      bad_memes_selected_time_all[fitness][0] /= n_runs
      bad_memes_selected_time_all[fitness][1] /= n_runs

    # save tracked memes
    fp = open("{}/tracked_memes_prefer_phi{}_gamma{}.pkl".format(save_dir, phi, gamma), "wb")
    pickle.dump(valid_tracked_memes_prefer_all, fp)
    fp.close()

    # save bad meme selected times
    fp = open("{}/bad_memes_selected_time_prefer_phi{}_gamma{}.pkl".format(save_dir, phi, gamma), "wb")
    pickle.dump(bad_memes_selected_time_prefer_all, fp)
    fp.close()

    # save avg_quality
    fp = open("{}/avg_quality_prefer_phi{}_gamma{}.pkl".format(save_dir, phi, gamma), "wb")
    pickle.dump(np.mean(avg_quality_prefer_all), fp)
    fp.close()
    
    # save avg_fitness
    fp = open("{}/avg_diversity_prefer_phi{}_gamma{}.pkl".format(save_dir, phi, gamma), "wb")
    pickle.dump(np.mean(avg_diversity_prefer_all), fp)
    fp.close()

    # save kendall
    quality, number_selected = zip(*valid_tracked_memes_prefer_all)
    kendall_tau, _ = stats.kendalltau(quality, number_selected)
    fp = open("{}/kendall_prefer_phi{}_gamma{}.pkl".format(save_dir, phi, gamma), "wb")
    pickle.dump(kendall_tau, fp)
    fp.close()

In [None]:
q_ratio = []

for phi in phis:
  for gamma in gammas:
    q_random = q_random_all[(phi, gamma)]
    q_preferential = q_prefer_all[(phi, gamma)]
    q_ratio = (np.array(q_preferential) / np.array(q_random)).tolist()
    # save results to CSV file
    save_csv([gamma, statistics.mean(q_random), 
            statistics.stdev(q_random) / math.sqrt(n_runs), 
            statistics.mean(q_preferential), 
            statistics.stdev(q_preferential) / math.sqrt(n_runs), 
            statistics.mean(q_ratio), 
            statistics.stdev(q_ratio) / math.sqrt(n_runs)])

In [None]:
# plot data from CSV file

q_mean_random, q_stderr_random, q_mean_preferential, q_stderr_preferential, q_mean_ratio, q_stderr_ratio = read_csv('results.csv')

ymin = [q_mean_ratio[x] - q_stderr_ratio[x] for x in q_mean_ratio.keys()]
ymax = [q_mean_ratio[x] + q_stderr_ratio[x] for x in q_mean_ratio.keys()]
plt.xlabel(r'$\gamma$', fontsize=16)
plt.ylabel('Average Quality Ratio', fontsize=16)
plt.xscale('log')
plt.axhline(y=1, lw=0.5, color='black')
plt.plot(list(q_mean_ratio.keys()), list(q_mean_ratio.values()))
plt.fill_between(list(q_mean_ratio.keys()), ymax, ymin, alpha=0.2)

In [None]:
# plot from files for different values of mu

plt.subplots(figsize=plt.figaspect(1.5))

_, _, _, _, ratio_mu75, stderr_mu75 = read_csv('results.csv')
ymin_mu75 = [ratio_mu75[x] - 2*stderr_mu75[x] for x in ratio_mu75.keys()]
ymax_mu75 = [ratio_mu75[x] + 2*stderr_mu75[x] for x in ratio_mu75.keys()]
plt.plot(list(ratio_mu75.keys()), list(ratio_mu75.values()), label=r'$\mu=0.75$')
plt.fill_between(list(ratio_mu75.keys()), ymax_mu75, ymin_mu75, alpha=0.2)

_, _, _, _, ratio_mu25, stderr_mu25 = read_csv('results.csv')
ymin_mu25 = [ratio_mu25[x] - 2*stderr_mu25[x] for x in ratio_mu25.keys()]
ymax_mu25 = [ratio_mu25[x] + 2*stderr_mu25[x] for x in ratio_mu25.keys()]
plt.plot(list(ratio_mu25.keys()), list(ratio_mu25.values()), label=r'$\mu=0.25$')
plt.fill_between(list(ratio_mu25.keys()), ymax_mu25, ymin_mu25, alpha=0.2)

plt.xlabel(r'$\gamma$', fontsize=16)
plt.ylabel('Average Quality Ratio', fontsize=16)
plt.xticks(fontsize=14)
plt.yticks(fontsize=14)
plt.xscale('log')
plt.gca().spines['top'].set_visible(False)
plt.gca().spines['right'].set_visible(False)
plt.axhline(y=1, lw=0.5, color='black')
plt.legend(fontsize=14, loc='upper center')
plt.tight_layout()
plt.savefig('fig_targeting.pdf')

Above is main experiment

---

Everything below is supplementary testing and analyses

In [None]:
# plot Fig3

xs = phis
ys = gammas
phis1 = phis
phis2 = phis
wires = gammas
new_wires = gammas
cmap = None
xlabel = '$\\phi$'
ylabel = '$\\gamma$'

kendall_pic_title = 'Discriminative power'
avg_quality_pic_title = 'Average Quality'
diversity_pic_title = 'Diversity'

figure = plt.figure(figsize=(13, 15), facecolor='w')
markers = ["o", "s", "^"]

save_dir = "results/prefer"

### 1. average quality ###
if save_dir == "results/random":
    file_template = "{}/avg_quality_random_phi{}_gamma{}.pkl"
else:
    file_template = "{}/avg_quality_prefer_phi{}_gamma{}.pkl"

# distr plot
ax = figure.add_subplot(3,2,1)
for idx, phi in enumerate(phis1):
    avg_qualities = []
    stds = []
    for gamma in wires:
        fname = file_template.format(save_dir, phi, gamma)
        fp = open(fname, "rb")
        data = pickle.load(fp)
        fp.close()
        avg_qualities.append(np.mean(data))
        stds.append(np.std(data))
    ax.plot(new_wires, avg_qualities, marker=markers[idx], label='$\\phi$:'+str(h))

ax.set_xlabel('$\\gamma$', fontsize=14)
ax.set_ylabel('Average quality', fontsize=14)
ax.set_xscale('log')
ax.set_xlim((new_wires[0], new_wires[-1]))
ax.set_xlim((new_wires[0], new_wires[-1]))
ax.legend(loc='upper right', fontsize=14)

# heatmap plot
ax = figure.add_subplot(3,2,2)
grid = np.zeros((len(wires), len(phis2)))
for i, gamma in enumerate(wires):
    for j, phi in enumerate(phis2):
        fname = file_template.format(save_dir, phi, gamma)
        fp = open(fname, "rb")
        data = pickle.load(fp)
        fp.close()
        grid[i, j] = np.mean(data)
draw_heatmap(ax, grid, xs, ys, xlabel, ylabel, cmap, avg_quality_pic_title, vmin=None, vmax=None)


### 2. average diversity ###
if save_dir == "results/random":
    file_template = "{}/avg_diversity_prefer_phi{}_gamma{}.pkl"
else:
    file_template = "{}/avg_diversity_prefer_phi{}_gamma{}.pkl"

# distr plot
ax = figure.add_subplot(3,2,3)
for idx, phi in enumerate(phis1):
    avg_diversities = []
    stds = []
    for gamma in wires:
        fname = file_template.format(save_dir, phi, gamma)
        fp = open(fname, "rb")
        data = pickle.load(fp)
        fp.close()
        avg_diversities.append(np.mean(data))
        stds.append(np.std(data))
    ax.plot(new_wires, avg_diversities, marker=markers[idx], label='$\\phi$:'+str(h))

ax.set_xlabel('$\\gamma$', fontsize=14)
ax.set_ylabel('Diversity', fontsize=14)
ax.set_xscale('log')
ax.set_xlim((new_wires[0], new_wires[-1]))
ax.set_xlim((new_wires[0], new_wires[-1]))

# heatmap plot
ax = figure.add_subplot(3,2,4)
grid = np.zeros((len(wires), len(phis2)))
for i, gamma in enumerate(wires):
    for j, phi in enumerate(phis2):
        fname = file_template.format(save_dir, phi, gamma)
        fp = open(fname, "rb")
        data = pickle.load(fp)
        fp.close()
        grid[i, j] = np.mean(data)
draw_heatmap(ax, grid, xs, ys, xlabel, ylabel, cmap, diversity_pic_title, vmin=None, vmax=None)

### 3. kendall ###
if save_dir == "results/random":
    file_template = "{}/kendall_random_phi{}_gamma{}.pkl"
else:
    file_template = "{}/kendall_prefer_phi{}_gamma{}.pkl"

# distr plot
ax = figure.add_subplot(3,2,5)
for idx, phi in enumerate(phis1):
    kendalls = []
    stds = []
    for gamma in wires:
        fname = file_template.format(save_dir, phi, gamma)
        fp = open(fname, "rb")
        data = pickle.load(fp)
        fp.close()
        kendalls.append(np.mean(data))
        stds.append(np.std(data))
    ax.plot(new_wires, kendalls, marker=markers[idx], label='$\\phi$:'+str(h))

ax.set_xlabel('$\\gamma$', fontsize=14)
ax.set_ylabel('Discriminative power', fontsize=14)
ax.set_xscale('log')
ax.set_xlim((new_wires[0], new_wires[-1]))
ax.set_xlim((new_wires[0], new_wires[-1]))
# ax.legend(loc='lower left', fontsize=14)

# heatmap plot
ax = figure.add_subplot(3,2,6)
grid = np.zeros((len(wires), len(phis2)))
for i, gamma in enumerate(wires):
    for j, phi in enumerate(phis2):
        fname = file_template.format(save_dir, phi, gamma)
        fp = open(fname, "rb")
        data = pickle.load(fp)
        fp.close()
        grid[i, j] = np.mean(data)
draw_heatmap(ax, grid, xs, ys, xlabel, ylabel, cmap, kendall_pic_title, vmin=None, vmax=None)

### 4. save plot ###
plt.subplots_adjust(left=0.1, right=0.95, top=0.95, bottom=0.05, wspace=0.3, hspace=0.3)
plt.savefig(save_dir + "/all_distr_heatmap.png")

In [None]:
# plot Fig5

save_dir = "results/random"
if save_dir == "results/random":
    file_template = "{}/tracked_memes_random_phi{}_gamma{}.pkl"
else:
    file_template = "{}/tracked_memes_prefer_phi{}_gamma{}.pkl"

fig, axs = plt.subplots(2, 3, figsize=(14, 8))
for i, phi in enumerate([1, 10]):
    for j, gamma in enumerate([0.001, 0.005, 0.01]):
        fname = file_template.format(save_dir, phi, gamma)
        fp = open(fname, "rb")
        data = pickle.load(fp)
        fp.close()

        quality, number_selected = zip(*data)

        low_quality_pop = []
        high_quality_pop = []
        for qua, pop in zip(quality, number_selected):
            if qua > 0:
                high_quality_pop.append(pop)
            else:
                low_quality_pop.append(pop)

        count = get_count(high_quality_pop)
        distr, sum_ = get_distr(count)
        h_mids, h_heights = getbins(distr, sum_)

        count = get_count(low_quality_pop)
        distr, sum_ = get_distr(count)
        l_mids, l_heights = getbins(distr, sum_)

        h_dict = defaultdict(list)
        for hm, hh in zip(h_mids, h_heights):
            h_dict[hm].append(hh)
        l_dict = defaultdict(list)
        for lm, lh in zip(l_mids, l_heights):
            l_dict[lm].append(lh)

        hs = []
        for k, v in h_dict.items():
            hs.append([k, np.mean(v)])
        h_mids, h_heights = zip(*sorted(hs, key=lambda x:x[0]))
        ls = []
        for k, v in l_dict.items():
            ls.append([k, np.mean(v)])
        l_mids, l_heights = zip(*sorted(ls, key=lambda x:x[0]))

        ax = axs[i][j]
        ax.loglog(h_mids, h_heights, marker='s', label='high quality')
        ax.loglog(l_mids, l_heights, marker='^', label='low quality')
        ax.set_xlabel('popularity', fontsize=14)
        ax.set_ylabel('P(popularity)', fontsize=14)
        ax.tick_params(labelsize=14)
        ax.annotate('$\\gamma={}$\n$\\phi={}$'.format(gamma, phi), xy=(0.05, 0.05), xycoords='axes fraction', fontsize=12)
        if i == 0 and j == 0:
            ax.legend(loc="upper right", fontsize=15)

plt.subplots_adjust(left=0.08, right=0.92, top=0.92, wspace=0.3, hspace=0.3)
plt.show()
plt.savefig(save_dir + "meme_quality_random_distr.png")
plt.close()


In [None]:
# plot Fig6

save_dir = "results/random"
if save_dir == "results/random":
    file_template = "{}/bad_memes_selected_time_random_phi{}_gamma{}.pkl"
else:
    file_template = "{}/bad_memes_selected_time_prefer_phi{}_gamma{}.pkl"

for i, phi in enumerate([1]):
    plt.figure(figsize=(10, 5))
    for j, gamma in enumerate([0.001]): #[0.5]
        fname = file_template.format(save_dir, phi, gamma)
        fp = open(fname, "rb")
        data = pickle.load(fp)
        fp.close()
        
        good_selected = []
        bad_selected = []
        for _, value in data.items():
            if value[0] <= 0 or value[1] <= 0:
                continue
            good_selected.append(value[0])
            bad_selected.append(value[1])

        count = dict([val for val in zip(bad_selected, good_selected)])
        distr_x, distr_y = get_distr(count)
        mids, heights = getbins(distr_x, distr_y)
        ratios = [np.log(height_)/np.log(mid_) for height_, mid_ in zip(heights, mids)]

        plt.subplot(121)
        plt.loglog(mids, heights, marker='o', label='$\\gamma$:'+str(gamma))
        plt.subplot(122)
        plt.plot(mids, ratios, marker='o', label='$\\gamma$:'+str(gamma))
        plt.xscale('log')
    
    # save fig
    plt.subplot(121)
    plt.loglog([min(mids), max(mids)], [min(mids), max(mids)], '--')
    plt.xlabel("Bot posts per meme", fontsize=14)
    plt.ylabel("Human posts per meme", fontsize=14)
    plt.xticks(fontsize=14)
    plt.yticks(fontsize=14)
    plt.margins(0.1)
    plt.legend(loc='best', fontsize=14)

    plt.subplot(122)
    plt.xlabel("Bot posts per meme", fontsize=14)
    plt.ylabel("Exponent $\\eta$", fontsize=14)
    plt.xticks(fontsize=14)
    plt.yticks(fontsize=14)
    plt.margins(0.1)
    plt.legend(loc='best', fontsize=14)

    plt.subplots_adjust(left=0.1, bottom=0.14, wspace=0.4)
    plt.show()
    plt.savefig(save_dir + "bad_meme_selected_random_distr_{}".format(phi))
    plt.close()