In [3]:
#Special module written for this class
#This provides access to data and to helper functions from previous weeks
#Make sure you update it before starting this notebook
import lucem_illud #pip install -U git+git://github.com/Computational-Content-Analysis-2018/lucem_illud.git

#All these packages need to be installed from pip
import gensim#For word2vec, etc
import requests #For downloading our datasets
import nltk #For stop words and stemmers
import numpy as np #For arrays
import pandas #Gives us DataFrames
import matplotlib.pyplot as plt #For graphics
import seaborn #Makes the graphics look nicer
import sklearn.metrics.pairwise #For cosine similarity
import sklearn.manifold #For T-SNE
import sklearn.decomposition #For PCA
import sklearn.feature_extraction
from nltk.corpus import stopwords #For stopwords

#gensim uses a couple of deprecated features
#we can't do anything about them so lets ignore them 
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning) 

#This 'magic' command makes the plots work better
#in the notebook, don't use it outside of a notebook.
#Also you can ignore the warning
%matplotlib inline

import os #For looking through files
import os.path #For managing file paths

#This will be doing most of the work
import networkx as nx

In [14]:
lucem_illud.setupStanfordNLP()

Starting downloads, this will take 5-10 minutes
../stanford-NLP/parser already exists, skipping download
../stanford-NLP/ner already exists, skipping download
../stanford-NLP/postagger already exists, skipping download
../stanford-NLP/core already exists, skipping download
[100%]Done setting up the Stanford NLP collection


In [15]:
import lucem_illud.stanford as stanford

The StanfordTokenizer will be deprecated in version 3.2.5.
Please use [91mnltk.tag.corenlp.CoreNLPPOSTagger[0m or [91mnltk.tag.corenlp.CoreNLPNERTagger[0m instead.
  super(StanfordNERTagger, self).__init__(*args, **kwargs)
The StanfordTokenizer will be deprecated in version 3.2.5.
Please use [91mnltk.tag.corenlp.CoreNLPPOSTagger[0m or [91mnltk.tag.corenlp.CoreNLPNERTagger[0m instead.
  super(StanfordPOSTagger, self).__init__(*args, **kwargs)


Getting our corpora.

In [4]:
newsDF = pandas.read_csv('nu_simple.csv', encoding='utf-8')
len(newsDF)

1724

In [7]:
newsDF2 = pandas.read_csv('party_simple.csv', encoding='latin-1')
len(newsDF2)

25129

<h1>Parties</h1>

In [8]:
news_P = newsDF2
#Break the articles into sentences.
news_P['sentences'] = news_P['Article'].apply(lambda x: [nltk.word_tokenize(s) for s in nltk.sent_tokenize(x)])
news_P.index = range(len(news_P) - 1, -1,-1) #Reindex to make things nice in the future

In [10]:
#Filter for articles from 2008-2009.
news_P_08 = news_P.loc[news_P['Year'] == 2008]
news_P_09 = news_P.loc[news_P['Year'] == 2009]
news_P_0809 = pandas.concat([news_P_08, news_P_09])

#Filter for articles from 2013-2014.
news_P_13 = news_P.loc[news_P['Year'] == 2013]
news_P_14 = news_P.loc[news_P['Year'] == 2014]
news_P_1314 = pandas.concat([news_P_13, news_P_14])

#Filter for articles from 2016-2017.
news_P_16 = news_P.loc[news_P['Year'] == 2016]
news_P_17 = news_P.loc[news_P['Year'] == 2017]
news_P_1617 = pandas.concat([news_P_16, news_P_17])

In [11]:
#Sample a random 100 articles from 2008-2009.
random_P_0809 = news_P_0809.sample(n=100)

#Sample a random 100 articles from 2013-2014.
random_P_1314 = news_P_1314.sample(n=100)

#Sample a random 100 articles from 2016-2017.
random_P_1617 = news_P_1617.sample(n=100)

In [16]:
#Apply POS tags for 2008-2009.
random_P_0809['POS_sents'] = random_P_0809['sentences'].apply(lambda x: stanford.postTagger.tag_sents(x))

#Apply POS tags for 2013-2014.
random_P_1314['POS_sents'] = random_P_1314['sentences'].apply(lambda x: stanford.postTagger.tag_sents(x))

#Apply POS tags for 2016-2017.
random_P_1617['POS_sents'] = random_P_1617['sentences'].apply(lambda x: stanford.postTagger.tag_sents(x))

**2008-2009**

In [18]:
random_P_0809['POS_sents']

17347    [[(Max, NNP), (Moein, NNP), (,, ,), (a, DT), (...
17482    [[(LOCK, NN), (AND, CC), (KEY, NN), (:, :), (A...
16091    [[(As, IN), (a, DT), (newly, RB), (established...
17628    [[(``, ``), (So, RB), (long, RB), (,, ,), (it,...
17243    [[(The, DT), (Golkar, NNP), (Party, NNP), ('s,...
15325    [[(Many, JJ), (pollsters, NNS), (published, VB...
17169    [[(Lately, RB), (there, EX), (have, VBP), (bee...
17015    [[(Halida, NNP), (Hatta, NNP), ((, NNP), (left...
17414    [[(With, IN), (the, DT), (House, NNP), (of, IN...
14863    [[(The, DT), (names, NNS), (of, IN), (politica...
17517    [[(Lawmaker, NNP), (Agus, NNP), (Condro, NNP),...
16910    [[(Candidates, NNS), (in, IN), (the, DT), (sec...
18094    [[(Golkar, NNP), (Party, NNP), (members, NNS),...
17715    [[(Your, PRP$), (comments, NNS), (on, IN), (th...
18021    [[(The, DT), (banking, NN), (sector, NN), (wil...
15759    [[(Legal, JJ), (experts, NNS), (,, ,), (resear...
18426    [[(For, IN), (Thai, NNP), (King, NNP), (Bhumib.

In [19]:
countTarget = 'NNP'
targetCounts = {}
for entry in random_P_0809['POS_sents']:
    for sentence in entry:
        for ent, kind in sentence:
            if kind != countTarget:
                continue
            elif ent in targetCounts:
                targetCounts[ent] += 1
            else:
                targetCounts[ent] = 1
sortedTargets = sorted(targetCounts.items(), key = lambda x: x[1], reverse = True)
sortedTargets[:20]

[('Party', 236),
 ('(', 188),
 (')', 155),
 ('House', 125),
 ('Yudhoyono', 97),
 ('Jakarta', 94),
 ('Indonesia', 91),
 ('Golkar', 85),
 ('President', 82),
 ('Kalla', 75),
 ('Democratic', 68),
 ('Court', 67),
 ('Indonesian', 57),
 ('Commission', 56),
 ('National', 54),
 ('Java', 53),
 ('KPK', 45),
 ('KPU', 42),
 ('Bambang', 39),
 ('East', 36)]

In [20]:
countTarget = 'NN'
targetCounts = {}
for entry in random_P_0809['POS_sents']:
    for sentence in entry:
        for ent, kind in sentence:
            if kind != countTarget:
                continue
            elif ent in targetCounts:
                targetCounts[ent] += 1
            else:
                targetCounts[ent] = 1
sortedTargets = sorted(targetCounts.items(), key = lambda x: x[1], reverse = True)
sortedTargets[:20]

[('party', 238),
 ('election', 163),
 ('percent', 116),
 (')', 100),
 ('campaign', 84),
 ('government', 68),
 ('Rp', 64),
 ('court', 50),
 ('country', 49),
 ('state', 48),
 ('chairman', 48),
 ('law', 47),
 ('year', 47),
 ('bill', 47),
 ('money', 46),
 ('candidate', 45),
 ('office', 45),
 ('support', 44),
 ('president', 44),
 ('minister', 43)]

In [21]:
NTarget = 'JJ'
Word = 'coalition'
NResults = set()
for entry in random_P_0809['POS_sents']:
    for sentence in entry:
        for (ent1, kind1),(ent2,kind2) in zip(sentence[:-1], sentence[1:]):
            if (kind1,ent2.lower())==(NTarget,Word):
                NResults.add(ent1)
            else:
                continue

print(NResults)

{'so-called', 'personal', 'other', 'political'}


In [107]:
NTarget = 'JJ'
Word = 'muslim'
NResults = set()
for entry in random_P_0809['POS_sents']:
    for sentence in entry:
        for (ent1, kind1),(ent2,kind2) in zip(sentence[:-1], sentence[1:]):
            if (kind1,ent2.lower())==(NTarget,Word):
                NResults.add(ent1)
            else:
                continue

print(NResults)

{'traditional', 'conservative', 'other'}


In [109]:
NTarget = 'JJ'
Word = 'organization'
NResults = set()
for entry in random_P_0809['POS_sents']:
    for sentence in entry:
        for (ent1, kind1),(ent2,kind2) in zip(sentence[:-1], sentence[1:]):
            if (kind1,ent2.lower())==(NTarget,Word):
                NResults.add(ent1)
            else:
                continue

print(NResults)

{'regional', 'intergovernmental', 'Muslim', 'religion-based'}


**2013-2014**

In [22]:
random_P_1314['POS_sents']

8554     [[(There, EX), (are, VBP), (moments, NNS), (in...
9496     [[(Exercising, VBG), (the, DT), (franchise, NN...
21244    [[(Former, JJ), (party, NN), (chairman, NN), (...
4729     [[(The, DT), (Election, NNP), (Supervisory, NN...
6846     [[(An, DT), (interesting, JJ), (picture, NN), ...
9329     [[(The, DT), (camps, NNS), (of, IN), (two, CD)...
6004     [[(Environmentally, RB), (friendly, JJ), (team...
3806     [[(When, WRB), (former, JJ), (Solo, NNP), (may...
6412     [[(Presidential, JJ), (candidate, NN), (Prabow...
9663     [[(Neneng, NNP), (Sri, NNP), (Wahyuni, NNP), (...
6002     [[(The, DT), (Corruption, NNP), (Eradication, ...
7646     [[(Following, VBG), (criticism, NN), (on, IN),...
9326     [[(ÒThis, NN), (is, VBZ), (a, DT), (business, ...
5496     [[(An, DT), (increase, NN), (of, IN), (vote, N...
9803     [[(Prosecutors, NNS), (are, VBP), (seeking, VB...
6807     [[(The, DT), (Corruption, NNP), (Eradication, ...
9808     [[(JAKARTA, NNP), (:, :), (The, DT), (central,.

In [23]:
countTarget = 'NNP'
targetCounts = {}
for entry in random_P_1314['POS_sents']:
    for sentence in entry:
        for ent, kind in sentence:
            if kind != countTarget:
                continue
            elif ent in targetCounts:
                targetCounts[ent] += 1
            else:
                targetCounts[ent] = 1
sortedTargets = sorted(targetCounts.items(), key = lambda x: x[1], reverse = True)
sortedTargets[:20]

[('Party', 228),
 ('(', 207),
 (')', 174),
 ('Jakarta', 110),
 ('Indonesia', 95),
 ("'\x80\x9d", 89),
 ('Democratic', 75),
 ('House', 67),
 ('KPK', 62),
 ('Court', 58),
 ('Golkar', 58),
 ('Jokowi', 58),
 ('President', 54),
 ('Commission', 53),
 ('Indonesian', 52),
 ('National', 51),
 ('Java', 48),
 ('Tuesday', 43),
 ('Ministry', 42),
 ('Anas', 40)]

In [24]:
countTarget = 'NN'
targetCounts = {}
for entry in random_P_1314['POS_sents']:
    for sentence in entry:
        for ent, kind in sentence:
            if kind != countTarget:
                continue
            elif ent in targetCounts:
                targetCounts[ent] += 1
            else:
                targetCounts[ent] = 1
sortedTargets = sorted(targetCounts.items(), key = lambda x: x[1], reverse = True)
sortedTargets[:20]

[('party', 176),
 ('election', 134),
 (')', 92),
 ('government', 73),
 ('candidate', 71),
 ('chairman', 69),
 ('case', 64),
 ('Rp', 59),
 ('percent', 58),
 ('year', 58),
 ('campaign', 57),
 ('coalition', 56),
 ('court', 55),
 ('president', 54),
 ('country', 54),
 ('money', 54),
 ('state', 51),
 ('law', 51),
 ('power', 48),
 ('graft', 46)]

In [94]:
NTarget = 'JJ'
Word = 'coalition'
NResults = set()
for entry in random_P_1314['POS_sents']:
    for sentence in entry:
        for (ent1, kind1),(ent2,kind2) in zip(sentence[:-1], sentence[1:]):
            if (kind1,ent2.lower())==(NTarget,Word):
                NResults.add(ent1)
            else:
                continue

print(NResults)

{'international', 'official', 'other', "Golkar'\x80\x99s", 'original', "Jokowi'\x80\x99s", 'ongoing'}


In [95]:
NTarget = 'JJ'
Word = 'organization'
NResults = set()
for entry in random_P_1314['POS_sents']:
    for sentence in entry:
        for (ent1, kind1),(ent2,kind2) in zip(sentence[:-1], sentence[1:]):
            if (kind1,ent2.lower())==(NTarget,Word):
                NResults.add(ent1)
            else:
                continue

print(NResults)

{'affiliated', 'Islamic', 'non-governmental', 'humanitarian', 'Muslim'}


In [101]:
NTarget = 'JJ'
Word = 'muslim'
NResults = set()
for entry in random_P_1314['POS_sents']:
    for sentence in entry:
        for (ent1, kind1),(ent2,kind2) in zip(sentence[:-1], sentence[1:]):
            if (kind1,ent2.lower())==(NTarget,Word):
                NResults.add(ent1)
            else:
                continue

print(NResults)

{'Non-party', 'dedicated', 'respected', 'Egyptian', 'mainstream', 'second-largest'}


**2016-2017**

In [25]:
random_P_1617['POS_sents']

20448    [[(The, DT), (Corruption, NNP), (Eradication, ...
24283    [[(The, DT), (mayor, NN), (of, IN), (Surakarta...
23871    [[(South, NNP), (Korean, NNP), (President, NNP...
24131    [[(In, IN), (the, DT), (last, JJ), (official, ...
23310    [[(The, DT), (House, NNP), (of, IN), (Represen...
24664    [[(Democratic, JJ), (Party, NNP), (chairman, N...
22885    [[(Ukraine, NNP), ('s, POS), (President, NNP),...
20527    [[(In, IN), (view, NN), (of, IN), (a, DT), (re...
23259    [[(The, DT), (House, NNP), (of, IN), (Represen...
22189    [[(Vietnamese, NNP), (Foreign, NNP), (Minister...
24237    [[(The, DT), (Saudi, JJ), (kingÕs, NN), (speec...
22138    [[(The, DT), (Corruption, NNP), (Eradication, ...
19667    [[(The, DT), (Agriculture, NNP), (Ministry, NN...
23741    [[(South, NNP), (Korea, NNP), ('s, POS), (pres...
25008    [[(Pope, NNP), (Francis, NNP), ('s, POS), (emb...
19804    [[(It, PRP), (appears, VBZ), (that, IN), (Golk...
19369    [[(The, DT), (South, NNP), (Sulawesi, NNP), (P.

In [26]:
countTarget = 'NNP'
targetCounts = {}
for entry in random_P_1617['POS_sents']:
    for sentence in entry:
        for ent, kind in sentence:
            if kind != countTarget:
                continue
            elif ent in targetCounts:
                targetCounts[ent] += 1
            else:
                targetCounts[ent] = 1
sortedTargets = sorted(targetCounts.items(), key = lambda x: x[1], reverse = True)
sortedTargets[:20]

[('(', 177),
 ('Party', 169),
 ('Jakarta', 137),
 (')', 134),
 ('House', 116),
 ('Ó', 103),
 ('Indonesia', 70),
 ('Golkar', 69),
 ('US', 57),
 ('Ahok', 51),
 ('Jokowi', 49),
 ('President', 47),
 ('National', 47),
 ('Commission', 46),
 ('KPK', 46),
 ('Indonesian', 42),
 ('China', 42),
 ('Trump', 42),
 ('Setya', 40),
 ('Monday', 39)]

In [27]:
countTarget = 'NN'
targetCounts = {}
for entry in random_P_1617['POS_sents']:
    for sentence in entry:
        for ent, kind in sentence:
            if kind != countTarget:
                continue
            elif ent in targetCounts:
                targetCounts[ent] += 1
            else:
                targetCounts[ent] = 1
sortedTargets = sorted(targetCounts.items(), key = lambda x: x[1], reverse = True)
sortedTargets[:20]

[('party', 127),
 ('election', 121),
 ('government', 89),
 (')', 85),
 ('percent', 79),
 ('case', 64),
 ('meeting', 62),
 ('Rp', 58),
 ('Ó', 57),
 ('chairman', 55),
 ('decision', 53),
 ('year', 52),
 ('law', 51),
 ('state', 50),
 ('candidate', 47),
 ('graft', 42),
 ('president', 42),
 ('member', 42),
 ('country', 39),
 ('campaign', 38)]

In [102]:
NTarget = 'JJ'
Word = 'muslim'
NResults = set()
for entry in random_P_1617['POS_sents']:
    for sentence in entry:
        for (ent1, kind1),(ent2,kind2) in zip(sentence[:-1], sentence[1:]):
            if (kind1,ent2.lower())==(NTarget,Word):
                NResults.add(ent1)
            else:
                continue

print(NResults)

{'Indonesian', 'Êstateless', 'JakartaÕs', 'Hijab-wearing'}


In [103]:
NTarget = 'JJ'
Word = 'organization'
NResults = set()
for entry in random_P_1617['POS_sents']:
    for sentence in entry:
        for (ent1, kind1),(ent2,kind2) in zip(sentence[:-1], sentence[1:]):
            if (kind1,ent2.lower())==(NTarget,Word):
                NResults.add(ent1)
            else:
                continue

print(NResults)

{'Muslim', 'Islamic'}


In [106]:
NTarget = 'NN'
Word = 'organization'
NResults = set()
for entry in random_P_1617['POS_sents']:
    for sentence in entry:
        for (ent1, kind1),(ent2,kind2) in zip(sentence[:-1], sentence[1:]):
            if (kind1,ent2.lower())==(NTarget,Word):
                NResults.add(ent1)
            else:
                continue

print(NResults)

{'society', 'mass'}


<h1>SMO</h1>

In [28]:
news_SMO = newsDF
#Break the articles into sentences.
news_SMO['sentences'] = news_SMO['Article'].apply(lambda x: [nltk.word_tokenize(s) for s in nltk.sent_tokenize(x)])
news_SMO.index = range(len(news_SMO) - 1, -1,-1) #Reindex to make things nice in the future

In [29]:
#Filter for articles from 2008-2009.
news_SMO_08 = news_SMO.loc[news_SMO['Year'] == 2008]
news_SMO_09 = news_SMO.loc[news_SMO['Year'] == 2009]
news_SMO_0809 = pandas.concat([news_SMO_08, news_SMO_09])

#Filter for articles from 2013-2014.
news_SMO_13 = news_SMO.loc[news_SMO['Year'] == 2013]
news_SMO_14 = news_SMO.loc[news_SMO['Year'] == 2014]
news_SMO_1314 = pandas.concat([news_SMO_13, news_SMO_14])

#Filter for articles from 2016-2017.
news_SMO_16 = news_SMO.loc[news_SMO['Year'] == 2016]
news_SMO_17 = news_SMO.loc[news_SMO['Year'] == 2017]
news_SMO_1617 = pandas.concat([news_SMO_16, news_SMO_17])

In [30]:
#Sample a random 100 articles from 2008-2009.
random_SMO_0809 = news_SMO_0809.sample(n=100)

#Sample a random 100 articles from 2013-2014.
random_SMO_1314 = news_SMO_1314.sample(n=100)

#Sample a random 100 articles from 2016-2017.
random_SMO_1617 = news_SMO_1617.sample(n=100)

In [32]:
#Apply POS tags for 2008-2009.
random_SMO_0809['POS_sents'] = random_SMO_0809['sentences'].apply(lambda x: stanford.postTagger.tag_sents(x))

#Apply POS tags for 2013-2014.
random_SMO_1314['POS_sents'] = random_SMO_1314['sentences'].apply(lambda x: stanford.postTagger.tag_sents(x))

#Apply POS tags for 2016-2017.
random_SMO_1617['POS_sents'] = random_SMO_1617['sentences'].apply(lambda x: stanford.postTagger.tag_sents(x))

**2008-2009**

In [44]:
random_SMO_0809['POS_sents']

1412    [[(Islamic, JJ), (scholars, NNS), (blamed, VBD...
1430    [[(Leaders, NNS), (of, IN), (Indonesia, NNP), ...
1315    [[(A, DT), (friend, NN), (sent, VBD), (a, DT),...
496     [[(Threatened, VBN), (heritage, NN), (:, :), (...
1396    [[(Unlike, IN), (the, DT), (majority, NN), (,,...
1354    [[(For, IN), (peaceful, JJ), (election, NN), (...
587     [[(The, DT), (local, JJ), (chapter, NN), (of, ...
1407    [[(The, DT), (two, CD), (contending, VBG), (pa...
1422    [[(It, PRP), (has, VBZ), (been, VBN), (said, V...
1416    [[(Heading, VBG), (into, IN), (Wednesday, NNP)...
1369    [[(Residents, NNS), (of, IN), (Kemang, NNP), (...
1359    [[(After, IN), (a, DT), (series, NN), (of, IN)...
1370    [[(The, DT), (Indonesian, NNP), (Ulema, NNP), ...
580     [[(Human, JJ), (rights, NNS), (advocates, NNS)...
1353    [[(High-profile, JJ), (meet, VBP), (:, :), (Pr...
497     [[(Religious, JJ), (intolerance, NN), (is, VBZ...
1424    [[(Post, NNP), (Sept., NNP), (11, CD), (,, ,),...
493     [[(Lut

In [45]:
countTarget = 'NNP'
targetCounts = {}
for entry in random_SMO_0809['POS_sents']:
    for sentence in entry:
        for ent, kind in sentence:
            if kind != countTarget:
                continue
            elif ent in targetCounts:
                targetCounts[ent] += 1
            else:
                targetCounts[ent] = 1
sortedTargets = sorted(targetCounts.items(), key = lambda x: x[1], reverse = True)
sortedTargets[:20]

[('(', 255),
 (')', 243),
 ('Islam', 214),
 ('NU', 213),
 ('Jakarta', 202),
 ('FPI', 169),
 ('Indonesia', 158),
 ('Java', 96),
 ('National', 89),
 ('Ahmadiyah', 87),
 ('Party', 86),
 ('Muhammadiyah', 68),
 ('Ulama', 65),
 ('Dur', 64),
 ('Yudhoyono', 61),
 ('Gus', 60),
 ('President', 58),
 ('West', 56),
 ('MUI', 53),
 ('Central', 52)]

In [111]:
countTarget = 'NN'
targetCounts = {}
for entry in random_SMO_0809['POS_sents']:
    for sentence in entry:
        for ent, kind in sentence:
            if kind != countTarget:
                continue
            elif ent in targetCounts:
                targetCounts[ent] += 1
            else:
                targetCounts[ent] = 1
sortedTargets = sorted(targetCounts.items(), key = lambda x: x[1], reverse = True)
sortedTargets[:20]

[('government', 215),
 ('police', 137),
 ('country', 111),
 (')', 111),
 ('violence', 90),
 ('organization', 73),
 ('group', 73),
 ('law', 71),
 ('chairman', 70),
 ('attack', 67),
 ('city', 61),
 ('time', 57),
 ('religion', 57),
 ('world', 55),
 ('state', 55),
 ('election', 53),
 ('leader', 50),
 ('president', 49),
 ('percent', 49),
 ('support', 48)]

In [47]:
NTarget = 'NNP'
Word = 'violence'
NResults = set()
for entry in random_SMO_0809['POS_sents']:
    for sentence in entry:
        for (ent1, kind1),(ent2,kind2) in zip(sentence[:-1], sentence[1:]):
            if (kind1,ent2.lower())==(NTarget,Word):
                NResults.add(ent1)
            else:
                continue

print(NResults)

{'Jakarta', 'Monas'}


In [48]:
NTarget = 'JJ'
Word = 'organization'
NResults = set()
for entry in random_SMO_0809['POS_sents']:
    for sentence in entry:
        for (ent1, kind1),(ent2,kind2) in zip(sentence[:-1], sentence[1:]):
            if (kind1,ent2.lower())==(NTarget,Word):
                NResults.add(ent1)
            else:
                continue

print(NResults)

{'violent', 'complex', 'Muslim', 'Islamic'}


In [76]:
NTarget = 'JJ'
Word = 'voters'
NResults = set()
for entry in random_SMO_0809['POS_sents']:
    for sentence in entry:
        for (ent1, kind1),(ent2,kind2) in zip(sentence[:-1], sentence[1:]):
            if (kind1,ent2.lower())==(NTarget,Word):
                NResults.add(ent1)
            else:
                continue

print(NResults)

{'female', 'potential', 'ineligible', 'eligible', 'underage', 'unregistered', 'Javanese', 'multiple-registered', 'Indonesian', 'many'}


In [77]:
NTarget = 'JJ'
Word = 'political'
NResults = set()
for entry in random_SMO_0809['POS_sents']:
    for sentence in entry:
        for (ent1, kind1),(ent2,kind2) in zip(sentence[:-1], sentence[1:]):
            if (kind1,ent2.lower())==(NTarget,Word):
                NResults.add(ent1)
            else:
                continue

print(NResults)

{'Islamic-based', 'corrupt', 'clever', 'Muslim', 'personal', 'unclear', 'own', 'local', 'short-term', 'social', 'Islamist', 'Major', 'powerful', 'aforementioned', 'moderate', 'different', 'global', 'major', 'legitimate', 'coherent', 'Other', 'certain', 'current'}


In [78]:
NTarget = 'JJ'
Word = 'election'
NResults = set()
for entry in random_SMO_0809['POS_sents']:
    for sentence in entry:
        for (ent1, kind1),(ent2,kind2) in zip(sentence[:-1], sentence[1:]):
            if (kind1,ent2.lower())==(NTarget,Word):
                NResults.add(ent1)
            else:
                continue

print(NResults)

{'legislative', 'peaceful', 'presidential', 'fair', 'local', 'general', 'other', 'near', 'gubernatorial', 'possible', 'recent', 'pronounced'}


In [79]:
NTarget = 'JJ'
Word = 'protest'
NResults = set()
for entry in random_SMO_0809['POS_sents']:
    for sentence in entry:
        for (ent1, kind1),(ent2,kind2) in zip(sentence[:-1], sentence[1:]):
            if (kind1,ent2.lower())==(NTarget,Word):
                NResults.add(ent1)
            else:
                continue

print(NResults)

{'strong', 'similar'}


In [80]:
NTarget = 'JJ'
Word = 'radical'
NResults = set()
for entry in random_SMO_0809['POS_sents']:
    for sentence in entry:
        for (ent1, kind1),(ent2,kind2) in zip(sentence[:-1], sentence[1:]):
            if (kind1,ent2.lower())==(NTarget,Word):
                NResults.add(ent1)
            else:
                continue

print(NResults)

{'few'}


In [93]:
NTarget = 'NNP'
Word = 'party'
NResults = set()
for entry in random_SMO_0809['POS_sents']:
    for sentence in entry:
        for (ent1, kind1),(ent2,kind2) in zip(sentence[:-1], sentence[1:]):
            if (kind1,ent2.lower())==(NTarget,Word):
                NResults.add(ent1)
            else:
                continue

print(NResults)

{'Awakening', 'Patriot', 'Golkar', 'Communist', 'Movement', 'NU', 'Conscience', 'Hanura', 'PKS', 'Sun', 'Justice', 'Development', 'Mandate', 'Democratic', ')'}


**2013-2014**

In [49]:
random_SMO_1314['POS_sents']

748     [[(A, DT), (group, NN), (of, IN), (academics, ...
199     [[(Two, CD), (groups, NNS), (in, IN), (Jakarta...
40      [[(Pros, NNS), (and, CC), (cons, NNS), (over, ...
101     [[(The, DT), (Jakarta, NNP), (Police, NNP), (a...
36      [[(Cabinet, NNP), (Secretary, NNP), (Dipo, NNP...
38      [[(Chairman, NN), (of, IN), (the, DT), (hard-l...
1067    [[(Nahdlatul, NNP), (Ulama, NNP), (,, ,), (the...
188     [[(March, NNP), (24, CD), (,, ,), (Online, NNP...
181     [[(Jay, NNP), (Rayner, NNP), ('s, POS), (amusi...
1100    [[(West, NNP), (Java, NNP), (and, CC), (East, ...
1049    [[(Rusdi, NNP), (Kirana, NNP), ('s, POS), (cla...
65      [[(The, DT), (ongoing, JJ), (friction, NN), (b...
1075    [[(The, DT), (marketing, NN), (arm, NN), (of, ...
83      [[(Police, NNS), (have, VBP), (arrested, VBN),...
110     [[(Not, RB), (gon, VBG), (na, TO), (take, VB),...
1082    [[(The, DT), (country, NN), ('s, POS), (larges...
1099    [[(Voting, NNP), (for, IN), (number, NN), (2, ...
70      [[(Res

In [50]:
countTarget = 'NNP'
targetCounts = {}
for entry in random_SMO_1314['POS_sents']:
    for sentence in entry:
        for ent, kind in sentence:
            if kind != countTarget:
                continue
            elif ent in targetCounts:
                targetCounts[ent] += 1
            else:
                targetCounts[ent] = 1
sortedTargets = sorted(targetCounts.items(), key = lambda x: x[1], reverse = True)
sortedTargets[:20]

[('FPI', 283),
 ('Jakarta', 216),
 ('(', 212),
 (')', 192),
 ('NU', 175),
 ('Islam', 141),
 ('Indonesia', 136),
 ('Java', 132),
 ('Party', 131),
 ('Jokowi', 104),
 ('Central', 101),
 ('Prabowo', 86),
 ('Ahok', 85),
 ('Police', 65),
 ('National', 61),
 ('Yudhoyono', 56),
 ('President', 52),
 ('Friday', 51),
 ('Thursday', 44),
 ('Ulama', 44)]

In [51]:
countTarget = 'NN'
targetCounts = {}
for entry in random_SMO_1314['POS_sents']:
    for sentence in entry:
        for ent, kind in sentence:
            if kind != countTarget:
                continue
            elif ent in targetCounts:
                targetCounts[ent] += 1
            else:
                targetCounts[ent] = 1
sortedTargets = sorted(targetCounts.items(), key = lambda x: x[1], reverse = True)
sortedTargets[:20]

[('chairman', 105),
 ('group', 102),
 ('country', 102),
 ('party', 99),
 ('organization', 93),
 ('police', 93),
 ('election', 86),
 (')', 80),
 ('support', 80),
 ('percent', 72),
 ('law', 67),
 ('governor', 65),
 ('Front', 64),
 ('violence', 64),
 ('government', 60),
 ('time', 52),
 ('campaign', 52),
 ('state', 48),
 ('candidate', 48),
 ('head', 46)]

In [52]:
NTarget = 'JJ'
Word = 'radical'
NResults = set()
for entry in random_SMO_1314['POS_sents']:
    for sentence in entry:
        for (ent1, kind1),(ent2,kind2) in zip(sentence[:-1], sentence[1:]):
            if (kind1,ent2.lower())==(NTarget,Word):
                NResults.add(ent1)
            else:
                continue

print(NResults)

{'such', 'further', 'other', 'Yogyakarta-based', 'many'}


In [53]:
NTarget = 'JJ'
Word = 'violence'
NResults = set()
for entry in random_SMO_1314['POS_sents']:
    for sentence in entry:
        for (ent1, kind1),(ent2,kind2) in zip(sentence[:-1], sentence[1:]):
            if (kind1,ent2.lower())==(NTarget,Word):
                NResults.add(ent1)
            else:
                continue

print(NResults)

{'vigilante', 'based', 'religious', 'obvious', 'unnecessary', 'religious-based', 'senseless', 'sexual', 'equal', 'sectarian', 'National', 'recent', 'religion-based'}


In [54]:
NTarget = 'JJ'
Word = 'vigilante'
NResults = set()
for entry in random_SMO_1314['POS_sents']:
    for sentence in entry:
        for (ent1, kind1),(ent2,kind2) in zip(sentence[:-1], sentence[1:]):
            if (kind1,ent2.lower())==(NTarget,Word):
                NResults.add(ent1)
            else:
                continue

print(NResults)

{'Muslim'}


In [63]:
NTarget = 'NNP'
Word = 'violence'
NResults = set()
for entry in random_SMO_1314['POS_sents']:
    for sentence in entry:
        for (ent1, kind1),(ent2,kind2) in zip(sentence[:-1], sentence[1:]):
            if (kind1,ent2.lower())==(NTarget,Word):
                NResults.add(ent1)
            else:
                continue

print(NResults)

{'FPI'}


In [64]:
NTarget = 'NNP'
Word = 'organization'
NResults = set()
for entry in random_SMO_1314['POS_sents']:
    for sentence in entry:
        for (ent1, kind1),(ent2,kind2) in zip(sentence[:-1], sentence[1:]):
            if (kind1,ent2.lower())==(NTarget,Word):
                NResults.add(ent1)
            else:
                continue

print(NResults)

{'Mass', ')'}


In [65]:
NTarget = 'JJ'
Word = 'organization'
NResults = set()
for entry in random_SMO_1314['POS_sents']:
    for sentence in entry:
        for (ent1, kind1),(ent2,kind2) in zip(sentence[:-1], sentence[1:]):
            if (kind1,ent2.lower())==(NTarget,Word):
                NResults.add(ent1)
            else:
                continue

print(NResults)

{'modernist', 'Islamic', 'religious', 'radical', 'legal', 'other', 'credible', 'hard-line', 'social', 'non-profit', 'Muslim'}


In [74]:
NTarget = 'JJ'
Word = 'election'
NResults = set()
for entry in random_SMO_1314['POS_sents']:
    for sentence in entry:
        for (ent1, kind1),(ent2,kind2) in zip(sentence[:-1], sentence[1:]):
            if (kind1,ent2.lower())==(NTarget,Word):
                NResults.add(ent1)
            else:
                continue

print(NResults)

{'legislative', 'direct', 'presidential', 'general', 'previous', 'effective', 'upcoming', 'current'}


In [75]:
NTarget = 'JJ'
Word = 'voters'
NResults = set()
for entry in random_SMO_1314['POS_sents']:
    for sentence in entry:
        for (ent1, kind1),(ent2,kind2) in zip(sentence[:-1], sentence[1:]):
            if (kind1,ent2.lower())==(NTarget,Word):
                NResults.add(ent1)
            else:
                continue

print(NResults)

{'registered', 'undecided', 'young', 'religious', 'new', 'potential', 'eligible', 'Disillusioned', 'Muslim'}


In [92]:
NTarget = 'NNP'
Word = 'party'
NResults = set()
for entry in random_SMO_1314['POS_sents']:
    for sentence in entry:
        for (ent1, kind1),(ent2,kind2) in zip(sentence[:-1], sentence[1:]):
            if (kind1,ent2.lower())==(NTarget,Word):
                NResults.add(ent1)
            else:
                continue

print(NResults)

{'Awakening', 'Star', 'Golkar', 'Communist', 'Unity', 'Hanura', 'Masyumi', 'Gerindra', 'Justice', 'Mandate', 'NasDem', 'Development', 'Democratic'}


**2016-2017**

In [55]:
random_SMO_1617['POS_sents']

766     [[(Taman, NNP), (Ismail, NNP), (Marzuki, NNP),...
759     [[(The, DT), (leader, NN), (of, IN), (the, DT)...
1479    [[(The, DT), (country, NN), ('s, POS), (two, C...
723     [[(On, IN), (the, DT), (fray, NN), (:, :), (St...
1653    [[(Thousands, NNS), (of, IN), (police, NN), (a...
1719    [[(The, DT), (two, CD), (largest, JJS), (Islam...
1522    [[(The, DT), (Jakarta, NNP), (gubernatorial, J...
1645    [[(Indonesia, NNP), ('s, POS), (largest, JJS),...
1619    [[((, FW), (JP/Hans, FW), (David, NNP), (Tampu...
924     [[(With, IN), (social, JJ), (media, NNS), (inc...
728     [[(The, DT), (Islam, NNP), (Defenders, NNS), (...
1696    [[(With, IN), (local, JJ), (hard-line, JJ), (I...
650     [[(Face, NNP), (to, TO), (face, VB), (:, :), (...
787     [[(Pre-existing, JJ), (inter-ethnic, JJ), (ten...
627     [[(Children, NNS), (are, VBP), (precious, JJ),...
1566    [[(Thousands, NNS), (of, IN), (police, NN), (a...
1470    [[(At, IN), (ease, NN), (:, :), (Hasan, NNP), ...
614     [[(The

In [56]:
countTarget = 'NNP'
targetCounts = {}
for entry in random_SMO_1617['POS_sents']:
    for sentence in entry:
        for ent, kind in sentence:
            if kind != countTarget:
                continue
            elif ent in targetCounts:
                targetCounts[ent] += 1
            else:
                targetCounts[ent] = 1
sortedTargets = sorted(targetCounts.items(), key = lambda x: x[1], reverse = True)
sortedTargets[:20]

[('Jakarta', 291),
 ('(', 216),
 (')', 199),
 ('NU', 182),
 ('Indonesia', 156),
 ('Islam', 148),
 ('Ahok', 140),
 ('FPI', 128),
 ('Police', 128),
 ('Rizieq', 121),
 ('National', 85),
 ('Java', 74),
 ('Ulama', 55),
 ('Nahdlatul', 52),
 ('East', 50),
 ('West', 49),
 ('Jokowi', 46),
 ('President', 45),
 ('Muhammadiyah', 43),
 ('Party', 43)]

In [57]:
countTarget = 'NN'
targetCounts = {}
for entry in random_SMO_1617['POS_sents']:
    for sentence in entry:
        for ent, kind in sentence:
            if kind != countTarget:
                continue
            elif ent in targetCounts:
                targetCounts[ent] += 1
            else:
                targetCounts[ent] = 1
sortedTargets = sorted(targetCounts.items(), key = lambda x: x[1], reverse = True)
sortedTargets[:20]

[('police', 139),
 ('country', 93),
 (')', 92),
 ('government', 90),
 ('rally', 90),
 ('election', 86),
 ('group', 72),
 ('blasphemy', 69),
 ('organization', 68),
 ('Front', 64),
 ('leader', 62),
 ('case', 59),
 ('governor', 55),
 ('chairman', 52),
 ('state', 51),
 ('mass', 50),
 (']', 50),
 ('religion', 46),
 ('family', 46),
 ('(', 45)]

In [58]:
NTarget = 'JJ'
Word = 'organization'
NResults = set()
for entry in random_SMO_1617['POS_sents']:
    for sentence in entry:
        for (ent1, kind1),(ent2,kind2) in zip(sentence[:-1], sentence[1:]):
            if (kind1,ent2.lower())==(NTarget,Word):
                NResults.add(ent1)
            else:
                continue

print(NResults)

{'rival', 'only', 'Islamic', 'bulky', 'Muslim'}


In [59]:
NTarget = 'JJ'
Word = 'voters'
NResults = set()
for entry in random_SMO_1617['POS_sents']:
    for sentence in entry:
        for (ent1, kind1),(ent2,kind2) in zip(sentence[:-1], sentence[1:]):
            if (kind1,ent2.lower())==(NTarget,Word):
                NResults.add(ent1)
            else:
                continue

print(NResults)

{'eligible', 'Muslim', 'intimidating'}


In [66]:
NTarget = 'JJ'
Word = 'organization'
NResults = set()
for entry in random_SMO_1617['POS_sents']:
    for sentence in entry:
        for (ent1, kind1),(ent2,kind2) in zip(sentence[:-1], sentence[1:]):
            if (kind1,ent2.lower())==(NTarget,Word):
                NResults.add(ent1)
            else:
                continue

print(NResults)

{'rival', 'only', 'Islamic', 'bulky', 'Muslim'}


In [67]:
NTarget = 'JJ'
Word = 'political'
NResults = set()
for entry in random_SMO_1617['POS_sents']:
    for sentence in entry:
        for (ent1, kind1),(ent2,kind2) in zip(sentence[:-1], sentence[1:]):
            if (kind1,ent2.lower())==(NTarget,Word):
                NResults.add(ent1)
            else:
                continue

print(NResults)

{'current', 'Islamic', 'recent', 'open', 'particular', 'sure', 'national', 'Contentious', 'Unpad', 'hungry', 'enormous', 'global', 'upcoming', 'certain', 'liberal', ']', 'Muslim', 'leftist'}


In [69]:
NTarget = 'VB'
Word = 'mass'
NResults = set()
for entry in random_SMO_1617['POS_sents']:
    for sentence in entry:
        for (ent1, kind1),(ent2,kind2) in zip(sentence[:-1], sentence[1:]):
            if (kind1,ent2.lower())==(NTarget,Word):
                NResults.add(ent1)
            else:
                continue

print(NResults)

{'perform'}


In [70]:
NTarget = 'JJ'
Word = 'mass'
NResults = set()
for entry in random_SMO_1617['POS_sents']:
    for sentence in entry:
        for (ent1, kind1),(ent2,kind2) in zip(sentence[:-1], sentence[1:]):
            if (kind1,ent2.lower())==(NTarget,Word):
                NResults.add(ent1)
            else:
                continue

print(NResults)

{'large', 'Islamic', 'similar', 'other', 'several', 'violent', 'anti-Pancasila'}


In [71]:
NTarget = 'VB'
Word = 'violence'
NResults = set()
for entry in random_SMO_1617['POS_sents']:
    for sentence in entry:
        for (ent1, kind1),(ent2,kind2) in zip(sentence[:-1], sentence[1:]):
            if (kind1,ent2.lower())==(NTarget,Word):
                NResults.add(ent1)
            else:
                continue

print(NResults)

{'justify'}


In [73]:
NTarget = 'JJ'
Word = 'election'
NResults = set()
for entry in random_SMO_1617['POS_sents']:
    for sentence in entry:
        for (ent1, kind1),(ent2,kind2) in zip(sentence[:-1], sentence[1:]):
            if (kind1,ent2.lower())==(NTarget,Word):
                NResults.add(ent1)
            else:
                continue

print(NResults)

{'polarizing', 'own', 'presidential', 'general', 'hypothetical', 'democratic', 'gubernatorial'}


In [82]:
NTarget = 'NN'
Word = 'radical'
NResults = set()
for entry in random_SMO_1617['POS_sents']:
    for sentence in entry:
        for (ent1, kind1),(ent2,kind2) in zip(sentence[:-1], sentence[1:]):
            if (kind1,ent2.lower())==(NTarget,Word):
                NResults.add(ent1)
            else:
                continue

print(NResults)

set()


In [88]:
NTarget = 'JJ'
Word = 'vigilante'
NResults = set()
for entry in random_SMO_1617['POS_sents']:
    for sentence in entry:
        for (ent1, kind1),(ent2,kind2) in zip(sentence[:-1], sentence[1:]):
            if (kind1,ent2.lower())==(NTarget,Word):
                NResults.add(ent1)
            else:
                continue

print(NResults)

{'Islamic'}


In [89]:
NTarget = 'JJ'
Word = 'party'
NResults = set()
for entry in random_SMO_1617['POS_sents']:
    for sentence in entry:
        for (ent1, kind1),(ent2,kind2) in zip(sentence[:-1], sentence[1:]):
            if (kind1,ent2.lower())==(NTarget,Word):
                NResults.add(ent1)
            else:
                continue

print(NResults)

{'only', 'third', 'political', 'particular', 'active', 'Nationalist', 'Democratic'}


In [117]:
NTarget = 'NNP'
Word = 'party'
NResults = set()
for entry in random_SMO_1617['POS_sents']:
    for sentence in entry:
        for (ent1, kind1),(ent2,kind2) in zip(sentence[:-1], sentence[1:]):
            if (kind1,ent2.lower())==(NTarget,Word):
                NResults.add(ent1)
            else:
                continue

print(NResults)

{'Awakening', 'Golkar', 'Communist', 'Hanura', 'Gerindra', 'Justice', 'Development', 'NasDem', 'Mandate', 'Berkarya', 'Democratic'}
