In [None]:
#from laserembeddings import Laser
import pandas as pd
import numpy as np
import seaborn as sns
from sklearn.manifold import TSNE
from sklearn.cluster import KMeans

from sklearn.feature_extraction.text import TfidfVectorizer

from sklearn.decomposition import PCA
import pickle
import csv
import re

import tensorflow as tf
import traja
from tslearn.metrics import dtw, dtw_path
from datetime import datetime, timedelta
from holoviews import opts, dim
from matplotlib import pyplot as plt

In [None]:
tf.config.list_physical_devices('GPU')

In [None]:
fb = pd.read_csv ("sv_keywords_filtered_NEW2.csv")
big_df = pd.read_csv("big_df_keywords_filtered_NEW.csv")

In [None]:
tsne_scores_df = pd.read_csv("tsne_scores_df2.csv")

# BUILD PLOTS

In [None]:
sv_actors_summary = tsne_scores_df.groupby ('Group.Name', as_index=False).agg({'denial_binary': 'sum', 
                                                                                'X1': 'mean', 
                                                                                'X2': 'mean',
                                                                                'fasttext': 'count'})
sv_actors_summary

In [None]:
sv_actors_years = tsne_scores_df.groupby (['Year', 'Group.Name'], as_index=False).agg({'denial_binary': 'sum', 
                                                                                      'X1': 'mean', 
                                                                                      'X2': 'mean',
                                                                                      'fasttext': 'count'})

sv_actors_years ['coord'] = sv_actors_years[['X1', 'X2']].apply(lambda r: tuple(r), axis=1)

In [None]:
sv_actors_count = sv_actors_years.groupby ('Group.Name', as_index = False).agg({'fasttext': 'count'})

sv_actors_count
sv_actors_count = sv_actors_count[sv_actors_count.fasttext >1].reset_index(drop = True)
active_actors = sv_actors_count['Group.Name'].unique().tolist()

unique_years = sv_actors_years['Year'].unique().tolist()
len(active_actors)

In [None]:
sv_actors_years  = sv_actors_years [sv_actors_years["Group.Name"].isin (active_actors)].reset_index(drop = True)
sv_actors_years

sv_actors_summary  = sv_actors_summary [sv_actors_summary["Group.Name"].isin (active_actors)].reset_index(drop = True)

In [None]:
#IDENTIFY ACTORS SHARING CLIMATE CHANGE DENIAL CONTENT

denial_df = tsne_scores_df.groupby ('Group.Name', as_index = False).agg ({'fasttext': 'count', 'denial_binary': 'sum'}).sort_values (by = 'denial_binary',
                                                                                           ascending = False)

denial_df["denial_ratio"] = denial_df["denial_binary"]/denial_df["fasttext"]
denial_df

denial_df = denial_df[(denial_df.denial_binary >1)&(denial_df.denial_ratio >0.05)]
denial_df  = denial_df [denial_df ["Group.Name"].isin (active_actors)].reset_index(drop = True)
denial_names = denial_df['Group.Name'].tolist()
len(denial_names)

In [None]:
print(np.mean(denial_df.denial_ratio))
print(np.median(denial_df.denial_ratio))

# FIND TRAJECTORIES

In [None]:
coord_list = []
year_list=[]

for actor in active_actors:
    arr = []
    yr = []
    for year in unique_years:
        coor = sv_actors_years.coord[(sv_actors_years.Year == year)&(sv_actors_years['Group.Name'] == actor)]
        if coor.shape != (0,):
            arr.append(coor.values)
            yr.append(year)
        
    arr = np.concatenate(np.array(arr))
    arr = np.vstack(np.array(arr.tolist()))
    coord_list.append(arr)
    year_list.append(yr)

# DYNAMIC TIME WARPING + SMOOTHING + CLUSTERING

In [None]:
with open('ar_smoothed', 'rb') as file:
    # Load the pickled data
    ar_smoothed = pickle.load(file)

In [None]:
from sklearn.cluster import AgglomerativeClustering
import warnings
warnings.filterwarnings("ignore")

with tf.device('/gpu:0'):  

    clust_smoothed = AgglomerativeClustering(metric='precomputed', 
                                             linkage = 'complete', 
                                             n_clusters = 54).fit_predict(ar_smoothed)
    

In [None]:
from sklearn.metrics import silhouette_score
sil_score = silhouette_score(ar_smoothed, clust_smoothed, metric = 'precomputed')
sil_score

In [None]:
sv_actors_summary['cluster_smoothed'] = ""

for i in range(0, len(active_actors)):
    
    sv_actors_summary['cluster_smoothed'].loc [sv_actors_summary['Group.Name'] == active_actors[i]] = clust_smoothed [i]

In [None]:
sv_actors_years['cluster_smoothed'] = ""

for i in range(0, len(active_actors)):
    
    sv_actors_years['cluster_smoothed'].loc [sv_actors_years['Group.Name'] == active_actors[i]] = clust_smoothed [i]
    
sv_actors_years  

In [None]:
g = sns.FacetGrid(sv_actors_years, col = "cluster_smoothed", col_wrap=3)#, margin_titles=True)
g.map_dataframe(sns.lineplot, x="X1", y="X2", hue = "Group.Name", alpha = 0.3)
g.set(xlim=(-100, 100), ylim=(-100, 100))
g.tight_layout()

In [None]:
sv_actors_summary.to_csv("sv_actors_summary.csv")

# FIND AVERAGE PATHS FOR EACH CLUSTER

In [None]:
clusters_smoothed = sv_actors_years['cluster_smoothed'].unique()
mean_clusters_years = []

for cluster in clusters_smoothed:
    
    df = sv_actors_years [sv_actors_years['cluster_smoothed'] == cluster]
    df = df.groupby ('Year', as_index = False).agg({'X1': 'mean', 'X2': 'mean'})
    df["cluster_smoothed"] = cluster
    mean_clusters_years.append(df)
    
mean_clusters_years = pd.concat (mean_clusters_years)    

In [None]:
dat_denial = sv_actors_summary[sv_actors_summary["Group.Name"].isin(denial_names)].reset_index(drop = True)
denial_clusters = dat_denial['cluster_smoothed'].unique().tolist()
len(denial_clusters)

In [None]:
mean_clusters_denial = mean_clusters_years [mean_clusters_years.cluster_smoothed.isin(denial_clusters)]
mean_clusters_denial

In [None]:
sv_actors_summary ["denial_ratio"] = sv_actors_summary["denial_binary"]/sv_actors_summary["fasttext"]

In [None]:
sns.set(rc={'figure.figsize':(8, 10)})
p5= sns.scatterplot(x="X1", y="X2",
                    palette=sns.color_palette("flare", as_cmap=True),
                    hue="denial_ratio", 
                    data=sv_actors_summary, alpha = 1)

sns.move_legend(p5, "lower left",
                title='Ratio of climate change\ndenial-related messages shared', alignment = "left")


In [None]:
denial_actors_summary = sv_actors_summary[sv_actors_summary["Group.Name"].isin (denial_names)]
denial_actors_summary
denial_actors_summary["denial_ratio"] = denial_actors_summary["denial_binary"] / denial_actors_summary["fasttext"]

plt.hist(denial_actors_summary["denial_ratio"], bins = 100)
plt.show()

# MEAN DISTANCES

In [None]:
#ACTORS SHARING CLIMATE CHANGE DENIAL CONTENT

denial_lengths = []
denial_distances=[]
denial_displacements = []



for i in range(0, len(denial_names)):
    df = sv_actors_years[["X1", "X2", "Year"]][sv_actors_years["Group.Name"] == denial_names[i]].reset_index(drop = True)
    df = df.rename(columns = {"X1": 'x', "X2": 'y', "Year": "time"})
    df = traja.TrajaDataFrame(df)
    denial_lengths.append(traja.length(df))
    denial_distances.append(traja.distance(df))
    denial_displacements.append(traja.calc_displacement(df))

In [None]:
denial_actors_traja = pd.DataFrame ({'length': denial_lengths, 
                                  'distance': denial_distances, 
                                  'displacement': denial_displacements,
                                 'Group.Name': denial_names})

denial_actors_traja 

In [None]:
#ALL ACTORS

lengths = []
distances=[]
displacements = []

for i in range(0, len(active_actors)):
    df = sv_actors_years[["X1", "X2", "Year"]][sv_actors_years["Group.Name"] == active_actors[i]].reset_index(drop = True)
    df = df.rename(columns = {"X1": 'x', "X2": 'y', "Year": "time"})
    df = traja.TrajaDataFrame(df)
    lengths.append(traja.length(df))
    distances.append(traja.distance(df))
    displacements.append(traja.calc_displacement(df))

In [None]:
all_actors_traja = pd.DataFrame ({'length': lengths, 
                                  'distance': distances, 
                                  'displacement': displacements,
                                 'Group.Name': active_actors})

sv_actors_summary = sv_actors_summary.merge(all_actors_traja, on = 'Group.Name')
sv_actors_summary ['denial_ratio'] = sv_actors_summary ['denial_binary'] / sv_actors_summary ['fasttext']
sv_actors_summary.to_csv("sv_actors_summary.csv")

In [None]:
sv_actors_summary.corr(numeric_only=True).style.background_gradient(cmap='coolwarm')

In [None]:
clusters_smoothed  = mean_clusters_years.cluster_smoothed.unique()
clusters_smoothed

In [None]:
#ALL CLUSTERS

lengths = []
distances=[]
displacements = []
clusters=[]

plt.rcParams['figure.figsize'] = [12, 50]
fig, axes = plt.subplots(nrows=14, ncols=4, sharex=True, sharey=True)
fig.delaxes(axes[12,1]) 
custom_xlim = (-100, 100)
custom_ylim = (-100, 100)

plt.setp(axes, xlim=custom_xlim, ylim=custom_ylim)
axes = axes.ravel()

for i in range(0, len(clusters_smoothed)):
    df = mean_clusters_years[["X1", "X2", "Year"]][mean_clusters_years.cluster_smoothed == clusters_smoothed[i]].reset_index(drop = True)
    df = df.rename(columns = {"X1": 'x', "X2": 'y', "Year": "time"})
    df = traja.TrajaDataFrame(df)
    lengths.append(traja.length(df))
    distances.append(traja.distance(df))
    displacements.append(traja.calc_displacement(df))
    clusters.append(clusters_smoothed[i])
    
    df = traja.smooth_sg(df, w = 3, p=1)
    
    plot_title = "Cluster " + str (clusters_smoothed[i])
    axes[i].plot(df.x, df.y, '-o')
    axes[i].title.set_text(plot_title)
    axes[i].text(df.x[0]-1, df.y[0]+1, df.time[0])
    
    l = len(df)-1
    axes[i].text(df.x[l], df.y[l], df.time[l])
   
plt.show()   


In [None]:

df = mean_clusters_years[["X1", "X2", "Year"]][mean_clusters_years.cluster_smoothed == 52].reset_index(drop = True)
df = df.rename(columns = {"X1": 'x', "X2": 'y', "Year": "time"})
df = traja.TrajaDataFrame(df)
df = traja.smooth_sg(df, w = 3, p=1)

plt.rcParams['figure.figsize'] = [12, 50]
fig, axes = plt.subplots(nrows=1, ncols=1, sharex=True, sharey=True)
 
custom_xlim = (-100, 100)
custom_ylim = (-100, 100)

plt.setp(axes, xlim=custom_xlim, ylim=custom_ylim)

plot_title = "Cluster " + str (52)
axes.plot(df.x, df.y, '-o')
axes.title.set_text(plot_title)
axes.text(df.x[0], df.y[0], df.time[0])
    
l = len(df)-1
axes.text(df.x[l], df.y[l], df.time[l])
plt.show()   
   

In [None]:
## DIFFERENT DIRECTIONS

cluster_directions = [[18, 9, 6],
                     [25, 33, 52],
                     [29, 34, 20],
                     [7, 49, 32],
                     [0, 26, 43]]


for j in range (0, len(cluster_directions)):


    fig, axes = plt.subplots(nrows=1, ncols=3, sharex=True, sharey=True, figsize = [14, 4]) 
    
    custom_xlim = (-100, 100)
    custom_ylim = (-100, 100)

    plt.setp(axes, xlim=custom_xlim, ylim=custom_ylim)

    for i in range(0, len(cluster_directions[j])):
        df = mean_clusters_years[["X1", "X2", "Year"]][mean_clusters_years.cluster_smoothed == cluster_directions[j][i]].reset_index(drop = True)
        df = df.rename(columns = {"X1": 'x', "X2": 'y', "Year": "time"})
        df = traja.TrajaDataFrame(df)
        df = traja.smooth_sg(df, w = 3, p=1)
    
        plot_title = "Cluster " + str (cluster_directions[j][i])
        axes[i].plot(df.x, df.y, '-o')
        axes[i].title.set_text(plot_title)
        axes[i].text(df.x[0], df.y[0], df.time[0])
    
        l = len(df)-1
        axes[i].text(df.x[l], df.y[l], df.time[l])

    plt.savefig('cluster_directions' + str(j)+'.png')
    plt.show() 

In [None]:
mean_clus_traja_info = pd.DataFrame (list(zip(lengths, distances, clusters)), columns = ['lengths', 'distances', 'clusters'])
mean_clus_traja_info.to_excel("mean_clus_traja_info.xlsx")

In [None]:
denial_actors_summary.groupby('cluster_smoothed', as_index = False).agg({'Group.Name': 'count'})

In [None]:
#CLUSTERS OF ACTORS SHARING CLIMATE CHANGE DENIAL CONTENT

denial_clusters_max = [4, 14, 0, 29, 48, 39, 10, 36]

fig, axes = plt.subplots(nrows=4, ncols=2, sharex=True, sharey=True, figsize=(8, 12))
 
custom_xlim = (-100, 100)
custom_ylim = (-100, 100)

plt.setp(axes, xlim=custom_xlim, ylim=custom_ylim)
axes = axes.ravel()

for i in range(0, len(denial_clusters_max)):
    df = mean_clusters_years[["X1", "X2", "Year"]][mean_clusters_years.cluster_smoothed == denial_clusters_max[i]].reset_index(drop = True)
    df = df.rename(columns = {"X1": 'x', "X2": 'y', "Year": "time"})
    df = traja.TrajaDataFrame(df)
    df = traja.smooth_sg(df, w = 3, p=1)
    
    plot_title = "Cluster " + str(denial_clusters_max [i])
    axes[i].plot(df.x, df.y, '-o')
    axes[i].title.set_text(plot_title)
    axes[i].text(df.x[0], df.y[0], df.time[0])
    
    l = len(df)-1
    axes[i].text(df.x[l], df.y[l], df.time[l])
   
plt.savefig('denial_cluster_directions' + str(j)+'.png')

In [None]:

mean_denial_clusters_years = []

for cluster in denial_clusters_max:
    
    df = sv_actors_years [(sv_actors_years['cluster_smoothed'] == cluster) &( sv_actors_years['Group.Name'].isin(denial_names))]
    df = df.groupby ('Year', as_index = False).agg({'X1': 'mean', 'X2': 'mean'})
    df["cluster_smoothed"] = cluster
    mean_denial_clusters_years.append(df)
    
mean_denial_clusters_years = pd.concat (mean_denial_clusters_years)
mean_denial_clusters_years

In [None]:
traj_stats = pd.DataFrame({'cluster_smoothed' : clusters,
                           'length' : lengths,
                           'distance': distances,
                           'displacement' : displacements})

denial_traj_stats = traj_stats[traj_stats.cluster_smoothed.isin(denial_clusters)]

denial_actors_summary = denial_actors_summary.merge (denial_traj_stats, on = 'cluster_smoothed')
denial_actors_summary  ['Ratio of climate change denial messages'] = denial_actors_summary ['denial_binary'] / denial_actors_summary ['fasttext']


In [None]:
denial_actors_summary['denial_cluster_smoothed'] = ""

for i in range(0, len(denial_names)):
    
    denial_actors_summary['denial_cluster_smoothed'].loc [denial_actors_summary['Group.Name'] == denial_names[i]] = denial_clust_smoothed [i]

In [None]:
denial_actors_summary = denial_actors_summary.drop (columns = ['length', 'distance', 'displacement'])
denial_actors_summary = denial_actors_summary.merge (denial_actors_traja, on = 'Group.Name') 
denial_actors_summary.to_excel ("denial_actors_summary.xlsx")
denial_actors_summary.to_csv ("denial_actors_summary.csv")

In [None]:
denial_actors_summary.groupby ('denial_cluster_smoothed', as_index = False).agg ({'Group.Name': 'count'})

In [None]:
denial_actors_summary.corr(numeric_only=True).style.background_gradient(cmap='coolwarm')

In [None]:
denial_actors_summary.groupby ('cluster_smoothed', as_index = False).agg ({'Group.Name': 'count'})

In [None]:
np.median(denial_actors_summary.denial_ratio)

In [None]:
sv_actors_summary.corr(numeric_only=True).style.background_gradient(cmap='coolwarm')
sv_actors_summary.to_excel("sv_actors_summary.xlsx")

In [None]:
sv_mean_clusters = sv_actors_summary.groupby ('cluster_smoothed', as_index = False).agg ({'X1': 'mean', 
                                                                                          'X2': 'mean', 
                                                                                          'denial_binary': 'sum',
                                                                                         'fasttext': 'sum'})

sv_mean_clusters ['denial_ratio'] = sv_mean_clusters ['denial_binary'] / sv_mean_clusters ['fasttext']
sv_mean_clusters  = sv_mean_clusters.merge (traj_stats, on = 'cluster_smoothed')
sv_mean_clusters.to_excel ("sv_mean_clusters_summary.xlsx")

In [None]:
sns.set(rc={'figure.figsize':(10,15)})
p5= sns.scatterplot(x="X1", y="X2",
                palette=sns.color_palette("tab20"), 
                hue = 'cluster_smoothed',
                data= denial_actors_summary[denial_actors_summary.cluster_smoothed.isin(denial_clusters_max)]
                   )
p5.set(xlim=(-100, 100), ylim=(-100, 100))
sns.move_legend(p5, "lower right",
                title='Cluster assignment', alignment = "left")
plt.show()

# CLIMATE CHANGE DENIAL VS MAINSTREAM

In [None]:
media_list_mm = pd.read_excel ("media_list_facebook.xlsx")
media_list_mm = media_list_mm.drop ("Unnamed: 0", axis = 1)
media_list_mm["Group category"] = "Mainstream media"

In [None]:
denial_act = denial_actors_summary[["Group.Name"]][denial_actors_summary.denial_ratio >0.05]
denial_act["Group category"] = "Groups with climate change denial-related narratives"

denial_act

In [None]:
polit_act = ["Vänsterpartiet", 
             "Centerpartiet", 
             "Kristdemokraterna", 
             "Liberalerna", 
             "Moderaterna", 
             "Miljöpartiet de gröna", 
             "Sverigedemokraterna", 
             "Socialdemokraterna"]

polit_act = sv_actors_summary [["Group.Name"]][sv_actors_summary["Group.Name"].isin (polit_act)].reset_index(drop = True)
polit_act

polit_act["Group category"] = "Political parties"



In [None]:
act_df = pd.concat([denial_act, media_list_mm, polit_act], ignore_index=True)
b = sv_actors_summary[["Group.Name", "X1", "X2"]][sv_actors_summary["Group.Name"].isin (act_df["Group.Name"])].reset_index(drop = True)
act_df = act_df.merge (b, on = 'Group.Name')
act_df

In [None]:
sns.set(rc={'figure.figsize':(8,10)})
p5= sns.scatterplot(x="X1", y="X2",
                palette=sns.color_palette(),
                hue="Group category", 
                data=act_df, s=50)

p5.set_ylim(-100, 100)
p5.set_xlim(-100, 100)

plt.show(p5)
plt.savefig ("cc_vs_mm.png")