We first used Wikidata to create a dataset of countries and their demonyms.

In [None]:
"""
QUERY FOR COUNTRY NAMES, CAPITALS, AND DEMONYNMS 

#List of present-day countries and capital(s)
SELECT DISTINCT ?country ?countryLabel ?capital ?capitalLabel ?demonym
WHERE
{
  ?country wdt:P31 wd:Q3624078 .
  #not a former country
  FILTER NOT EXISTS {?country wdt:P31 wd:Q3024240}
  #and no an ancient civilisation (needed to exclude ancient Egypt)
  FILTER NOT EXISTS {?country wdt:P31 wd:Q28171280}
  ?language wdt:P218 'sv'.
  OPTIONAL { ?country wdt:P36 ?capital }
  ?country wdt:P1549 ?demonym FILTER (lang(?demonym) = "en").
  SERVICE wikibase:label { bd:serviceParam wikibase:language "en". }
}
ORDER BY ?countryLabel"""

In [None]:
import pandas as pd
import numpy as np
import os
from sklearn.metrics import pairwise


os.chdir('C:/Users/Venia/Desktop/political-manifestos')
text=pd.read_csv("data/english_text.csv")
core = pd.read_csv("data/english_core.csv")
metadata = pd.read_csv("data/english_metadata.csv")

metadata['language'] = metadata['language'].fillna("english")
french_keys = metadata[metadata['language']=='french']['key'].values

words_to_remove = ['canada','canadians','america','americans','britain']
country_list = ['Canada','United Kingdom','United States']

df = pd.merge(text,core, on = 'key')
df = df[~df['key'].isin(french_keys)]

In [92]:
from string import punctuation, digits
import re

def clean_text(x):
    x = x.lower()
    x = x.replace("\n", " ")
    x = x.replace('\t', ' ')
    x = x.replace("\'", '')
    x=x.strip()
    remove_digits = str.maketrans('', '', digits)
    regex = re.compile('[%s]' % re.escape(punctuation))
    x = regex.sub('', x)
    x = x.translate(remove_digits)
    return x 
    

In [93]:
df['text_clean'] = df['text'].apply(lambda x: clean_text(x))

In [105]:

country_names = pd.read_csv("country_names.csv")
country_names = country_names[['countryLabel','capitalLabel','demonym']]
countries = country_names[['countryLabel','demonym']].drop_duplicates().reset_index()
demonyms = countries.groupby('countryLabel')['demonym'].apply(list).reset_index()
demonyms['list'] = demonyms.apply(lambda x: [x['countryLabel']] + x['demonym'], axis =1)
demonyms['list'] = demonyms['list'].apply(lambda x: [k.lower() for k in x])

import nltk
from nltk.corpus import stopwords
stop = stopwords.words('english')
pat = r'\b(?:{})\b'.format('|'.join(stop))
df['text_clean_no_stop'] = df['text_clean'].str.replace(pat,'')
df['text_clean_no_stop'] = df['text_clean_no_stop'].str.replace(' +', ' ')
df['text_clean_no_stop'] = df['text_clean_no_stop'].apply(lambda x: x[1:] if x[0] == ' ' else x)
df['tokenized']=df['text_clean_no_stop'].apply(lambda x: x.split(' '))
countries = demonyms['list'].apply(lambda x: x[0]).values


In [107]:
for row in demonyms['list']:
    df[row[0]] = 0 
for row in demonyms['list']:
    country = row[0]
    for val in row:
        df[country] += df['tokenized'].apply(lambda x: x.count(val))

In [108]:
df['year'] = df['date'].apply(lambda x: str(x)[0:4])
country= df[df['countryname'] == 'United States']

In [194]:
def context_windows(country_df, country_name, n=4):
    list_of_instances = []
    metadata_list = []
    for i,j in country[['tokenized','countryname','date','partyname']].iterrows():
        metadata = j[['countryname','date','partyname']].values
        indices = [k for k, x in enumerate(j['tokenized']) if x == country_name]
        for i in indices:
            list_of_instances.append([k for k in j['tokenized'][i-n:i+n] if k != country_name])
            metadata_list.append(list(metadata))
    list_exploded = [k for j in list_of_instances for k in j]
    return metadata_list, list_of_instances, list_exploded

In [219]:
metadata, list_of_contexts, list_exploded = context_windows(country,'iraq')

In [220]:
def top_words(list_exploded):
    word_count  = {}
    for val in list_exploded:
        if val in word_count.keys():
            word_count[val] += 1
        else:
            word_count.update({val: 1})
    sorted_dict = dict(sorted(word_count.items(), key = lambda item:item[1], reverse=True))
    return sorted_dict

In [221]:
country_words = top_words(list_exploded)

In [222]:
country_contexts = pd.DataFrame(metadata,columns = ['country','date','party'])
country_contexts['contexts'] = list_of_contexts

In [223]:
election_words = country_contexts.groupby(['date', 'party'])['contexts'].apply(sum)
# election_words = country_contexts.groupby(['party'])['contexts'].apply(sum)

In [224]:
from collections import Counter
election_words = election_words.apply(lambda x: Counter(x))

In [225]:
election_words = election_words.apply(lambda x: dict(sorted(x.items(), key = lambda item:item[1], reverse=True)))

In [226]:
from itertools import islice
top_election_words = election_words.apply(lambda x: [k[0] for k in list(islice(x.items(), 4))])

In [227]:
top_election_words.reset_index()

Unnamed: 0,date,party,contexts
0,199211,Republican Party,"[president, bush, quagmire, indefinite]"
1,199611,Democratic Party,"[iran, delivery, nuclear, weapons]"
2,199611,Republican Party,"[iran, syria, libya, north]"
3,200011,Democratic Party,"[mass, destruction, delivery, systems]"
4,200011,Republican Party,"[friends, community, long, also]"
5,200411,Democratic Party,"[administration, war, afghanistan, challenges]"
6,200411,Republican Party,"[afghanistan, nations, america, forces]"
7,200811,Democratic Party,"[war, ending, end, responsibly]"
8,200811,Republican Party,"[offer, special, circumstances, conflict]"
9,201211,Democratic Party,"[war, responsibly, forces, charting]"


## Sentiment analysis
Understand which election was most positive towards which countries

In [120]:
from nltk.sentiment.vader import SentimentIntensityAnalyzer
sid = SentimentIntensityAnalyzer()
sentiment = top_election_words.explode().apply(sid.polarity_scores).apply(lambda x: x['compound'])

In [125]:
df = pd.read_csv("for_sentiment_analysis.csv")
sentiment_scores = pd.read_excel("sentiment_values_with_topics.xlsx")

In [126]:
sentiment = sentiment.reset_index().groupby(['date','party'])['contexts'].mean()

In [127]:
sentiment = df['tokenized'].apply(sid.polarity_scores).apply(lambda x: x['compound'])

In [137]:
topic_list = []
for i in range(22):
    topic_list.append({'topic_id': i+1, 'topic': ""})

In [138]:
topic_info = [{'topic_id': 1, 'topic': 'care, health, patient'},
 {'topic_id': 2, 'topic': 'nuclear, weapon, security'},
 {'topic_id': 3, 'topic': 'right, marriage, state'},
 {'topic_id': 4, 'topic': 'energies, technology,climate'},
 {'topic_id': 5, 'topic': 'work, new, job'},
 {'topic_id': 6, 'topic': 'business, small, work'},
 {'topic_id': 7, 'topic': 'education, student, law'},
 {'topic_id': 8, 'topic': 'secur, support, israel'},
 {'topic_id': 9, 'topic': 'right, vote, constitute'},
 {'topic_id': 10, 'topic': 'new, today, families'},
 {'topic_id': 11, 'topic': 'security, iraq, people'},
 {'topic_id': 12, 'topic': 'tax, family, right'},
 {'topic_id': 13, 'topic': 'right, constitute, individual'},
 {'topic_id': 14, 'topic': 'govern, program, spend'},
 {'topic_id': 15, 'topic': 'energies, nation, trade'},
 {'topic_id': 16, 'topic': 'human, global, nation'},
 {'topic_id': 17, 'topic': 'veteran, nation, military'},
 {'topic_id': 18, 'topic': 'security, nation, system'},
 {'topic_id': 19, 'topic': 'govern, support, care'},
 {'topic_id': 20, 'topic': 'school, education, student'},
 {'topic_id': 21, 'topic': 'community, nation, support'},
 {'topic_id': 22, 'topic': 'security, nation, system'}]

In [139]:
topic_df = pd.DataFrame(topic_info)
merged = pd.merge(sentiment_scores, topic_df, left_on = 'Topic',right_on = 'topic_id')

In [175]:
df

Unnamed: 0.1,Unnamed: 0,key,countryname,edate,tokenized,key2,partyname
0,0,61320_200811,United States,04/11/2008,preamble,61320_200811_0,Democratic Party
1,0,61320_200811,United States,04/11/2008,come,61320_200811_0,Democratic Party
2,0,61320_200811,United States,04/11/2008,together,61320_200811_0,Democratic Party
3,0,61320_200811,United States,04/11/2008,defining,61320_200811_0,Democratic Party
4,0,61320_200811,United States,04/11/2008,moment,61320_200811_0,Democratic Party
...,...,...,...,...,...,...,...
27825,27,61620_200811,United States,04/11/2008,care,61620_200811_27,Republican Party
27826,27,61620_200811,United States,04/11/2008,respect,61620_200811_27,Republican Party
27827,27,61620_200811,United States,04/11/2008,earned,61620_200811_27,Republican Party
27828,27,61620_200811,United States,04/11/2008,service,61620_200811_27,Republican Party


In [181]:
merged['key3'] = merged['Key2'].apply(lambda x: int(x.split('_')[-1])) 

merged2 = pd.merge(df,merged, left_on = 'Unnamed: 0',right_on = 'key3')

In [183]:
merged2 = merged2[['key2','countryname','edate','partyname','topic','Sentiment Value']].drop_duplicates()

In [186]:
ok = merged2[['edate','topic','partyname','Sentiment Value']].sort_values(by = 'Sentiment Value', ascending = False)

In [189]:
republican = ok[ok['partyname'] == "Republican Party"]
democratic = ok[ok['partyname'] == "Democratic Party"]

In [193]:
democratic[['edate','topic','Sentiment Value']]

Unnamed: 0,edate,topic,Sentiment Value
4141,04/11/2008,"secur, support, israel",0.047219
8147,04/11/2008,"right, vote, constitute",0.042842
2942,04/11/2008,"nuclear, weapon, security",0.040035
1234,04/11/2008,"tax, family, right",0.038371
10681,04/11/2008,"govern, support, care",0.037287
5316,04/11/2008,"human, global, nation",0.03508
613,04/11/2008,"work, new, job",0.031206
2113,04/11/2008,"security, iraq, people",0.028138
12209,04/11/2008,"school, education, student",0.027365
0,04/11/2008,"new, today, families",0.025764
