In [1]:
import os
os.chdir("../../../")

import pandas as pd
import numpy as np

import re
import nltk
from nltk.corpus import stopwords
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from nltk import tokenize

# Gensim
import gensim
import gensim.corpora as corpora
from gensim.utils import simple_preprocess
from gensim.models import CoherenceModel 
import spacy

In [2]:
target = os.getcwd() + "/data/text/png/post_courier_news.csv"
news = pd.read_csv(target).drop("Unnamed: 0", axis=1)

In [3]:
en = spacy.load('en_core_web_sm')
spacy_stopwords = list(en.Defaults.stop_words)

In [4]:
nltk_stopwords = stopwords.words("english")
print(len(nltk_stopwords))

179


In [5]:
nltk_unique = [sw for sw in nltk_stopwords if sw not in spacy_stopwords]
spacy_stopwords.extend(nltk_unique)

In [6]:
data = news.news.values.tolist()
# Remove new line characters
data = [re.sub('\s+', ' ', sent) for sent in data]
# Remove distracting single quotes
data = [re.sub("\'", "", sent) for sent in data]
print(data[:1])

[' BY ANCILLA ILAVE A GROUP of students at the Legal Training Institute have been told that the most important part of their job is to get the facts right in court. Chief Justice Sir Gibbs Salika, who opened discussions at a week-long workshop in Port Moresby this week, told the students that getting facts right is important. “Know your facts well to present to the court, and actively participate in discussions,” Sir Gibbs said. He said this during the launch of the workshop on civil and criminal advocacy attended by LTI in collaboration with Australia’s Victorian Bar. The workshop was supported by the Australian Government through the PNG-Australia Partnership. The students were engaged in a range of activities, including interactive sessions and practical courtroom scenarios with accomplished trainers and legal practitioners. The experience provided participants with invaluable insights into case analysis, evidence presentation, effective communication, and persuasive techniques. Tho

In [7]:
def sent_to_words(sentences):
    for sentence in sentences:
        yield (gensim.utils.simple_preprocess(str(sentence), deacc=True))

data_words = list(sent_to_words(data))
print(data_words[0])

['by', 'ancilla', 'ilave', 'group', 'of', 'students', 'at', 'the', 'legal', 'training', 'institute', 'have', 'been', 'told', 'that', 'the', 'most', 'important', 'part', 'of', 'their', 'job', 'is', 'to', 'get', 'the', 'facts', 'right', 'in', 'court', 'chief', 'justice', 'sir', 'gibbs', 'salika', 'who', 'opened', 'discussions', 'at', 'week', 'long', 'workshop', 'in', 'port', 'moresby', 'this', 'week', 'told', 'the', 'students', 'that', 'getting', 'facts', 'right', 'is', 'important', 'know', 'your', 'facts', 'well', 'to', 'present', 'to', 'the', 'court', 'and', 'actively', 'participate', 'in', 'discussions', 'sir', 'gibbs', 'said', 'he', 'said', 'this', 'during', 'the', 'launch', 'of', 'the', 'workshop', 'on', 'civil', 'and', 'criminal', 'advocacy', 'attended', 'by', 'lti', 'in', 'collaboration', 'with', 'australia', 'victorian', 'bar', 'the', 'workshop', 'was', 'supported', 'by', 'the', 'australian', 'government', 'through', 'the', 'png', 'australia', 'partnership', 'the', 'students', 'w

In [12]:
bigram = gensim.models.Phrases(data_words, min_count=5, threshold=100)
bigram_mod = gensim.models.phrases.Phraser(bigram)

trigram = gensim.models.Phrases(bigram[data_words], min_count=5, threshold=100)
trigram_mod = gensim.models.phrases.Phraser(trigram)

In [13]:
def remove_stopwords(texts):
    return [[
        word for word in gensim.utils.simple_preprocess(str(doc))
        if word not in spacy_stopwords
    ] for doc in texts]


def make_bigrams(texts):
    return [bigram_mod[doc] for doc in texts]


def make_trigrams(texts):
    return [trigram_mod[bigram_mod[doc]] for doc in texts]


def lemmatization(texts, allowed_postags=['NOUN', 'ADJ', 'VERB', 'ADV']):
    texts_out = []
    for sent in texts:
        doc = nlp(" ".join(sent))
        texts_out.append(
            [token.lemma_ for token in doc if token.pos_ in allowed_postags])
    return texts_out

In [14]:
data_words_nostops = remove_stopwords(data_words)
data_words_bigrams = make_bigrams(data_words_nostops)

nlp = spacy.load('en_core_web_sm', disable=['parser', 'ner'])

# Do lemmatization keeping only noun, adj, vb, adv
data_lemmatized = lemmatization(data_words_bigrams, allowed_postags=['NOUN', 'ADJ', 'VERB', 'ADV'])

print(data_lemmatized[0])

['group', 'student', 'legal', 'tell', 'important', 'job', 'fact', 'right', 'court', 'chief', 'open', 'discussion', 'week', 'long', 'week', 'tell', 'student', 'get', 'fact', 'right', 'important', 'know', 'fact', 'present', 'court', 'discussion', 'say', 'say', 'civil', 'criminal', 'advocacy', 'attend', 'support', 'australian', 'government', 'partnership', 'student', 'engage', 'range', 'activity', 'include', 'interactive', 'session', 'practical', 'courtroom', 'scenario', 'accomplish', 'trainer', 'legal', 'practitioner', 'experience', 'provide', 'participant', 'invaluable', 'insight', 'case', 'evidence', 'presentation', 'effective', 'communication', 'persuasive', 'technique', 'important', 'workshop', 'provide', 'mock', 'exercise', 'law', 'student', 'experience', 'actually', 'happen', 'court', 'room']


In [15]:
id2word = corpora.Dictionary(data_lemmatized)  
texts = data_lemmatized  
# DTM
corpus = [id2word.doc2bow(text) for text in texts]  

In [17]:
from gensim.models.wrappers import LdaMallet
mallet_path = os.getcwd() + '/../mallet-2.0.8/bin/mallet'
ldamallet = LdaMallet(mallet_path, corpus=corpus, num_topics=20, id2word=id2word)

Mallet LDA: 20 topics, 5 topic bits, 11111 topic mask
Data loaded.
max tokens: 1100
total tokens: 1462820
<10> LL/token: -10.2866
<20> LL/token: -9.36058
<30> LL/token: -8.78036
<40> LL/token: -8.54515

0	2.5	court law case order state process decision act issue land matter office rule lawyer justice legal application public petition chief 
1	2.5	public information country medium report include important book citizen region system protect national issue corruption write give publish communication note 
2	2.5	community people woman child support church youth life change live young violence family leader work program girl good bring member 
3	2.5	public city road water vehicle area travel proper clean resident place authority main town transport safety work carry highway drive 
4	2.5	event group visit year host culture local include hold support tourism promote company present traditional activity cultural day week sponsor 
5	2.5	people village affect area house fire goroka lose disaster

<160> LL/token: -8.18146
<170> LL/token: -8.17394
<180> LL/token: -8.17059
<190> LL/token: -8.16543

0	2.5	court law case process decision state order matter rule lawyer justice legal issue chief act application petition national file evidence 
1	2.5	system report information public medium provide citizen important agency country national set corruption include record book point practice understand conference 
2	2.5	community woman child church support youth life young change live family violence work program awareness girl group social society activity 
3	2.5	city water road public town vehicle place area travel clean resident transport safety main carry proper drive authority safe traffic 
4	2.5	event visit year host include hold culture group week promote tourism day support local attend present traditional cultural international meeting 
5	2.5	village area affect community assist fire assistance local people week team disaster house province island destroy appeal include early dama

<310> LL/token: -8.14909
<320> LL/token: -8.14724
<330> LL/token: -8.14598
<340> LL/token: -8.14655

0	2.5	court law case order state decision process matter rule issue lawyer justice legal national petition act office application file charge 
1	2.5	system report information public medium provide important citizen agency set corruption include national register process country require record state account 
2	2.5	woman community child church support youth life young violence live work family awareness program change group girl encourage society social 
3	2.5	city water public road town vehicle place area travel goroka clean resident transport proper main safe carry authority drive safety 
4	2.5	event visit host year culture include hold group promote day tourism present week local support traditional international stage cultural conference 
5	2.5	village area affect community week fire assist assistance team local disaster include house island appeal destroy province damage ground villa

<460> LL/token: -8.14396
<470> LL/token: -8.14559
<480> LL/token: -8.14434
<490> LL/token: -8.14404

0	2.5	court law case order state decision process matter issue rule lawyer justice legal petition national application act charge file evidence 
1	2.5	system report information public medium provide citizen include register process number agency set national important corruption record country base publish 
2	2.5	community woman child church support youth life young violence family awareness live change work group girl program society social man 
3	2.5	city public water road town vehicle place area market travel clean resident main transport goroka proper safety carry authority safe 
4	2.5	event visit year host culture day hold include group tourism promote week support present traditional international local stage cultural conference 
5	2.5	village area affect fire community local team week assistance province assist disaster water house include island destroy region month damage 
6	2.

<610> LL/token: -8.14474
<620> LL/token: -8.14554
<630> LL/token: -8.1435
<640> LL/token: -8.14499

0	2.5	court law case order state decision process matter rule lawyer justice legal national issue petition application file act evidence charge 
1	2.5	system report information public medium provide important country number register agency process set corruption citizen include record add ensure national 
2	2.5	community woman church support youth child life young violence awareness work change family group live girl society program social activity 
3	2.5	city public market road town place vehicle area water goroka travel clean resident proper main safety safe carry transport authority 
4	2.5	event visit year host day culture hold include tourism present group promote week local support traditional stage international cultural art 
5	2.5	village area affect water fire community assistance assist week team disaster local island region province include destroy house yesterday damage 
6	2.5

<760> LL/token: -8.14345
<770> LL/token: -8.14312
<780> LL/token: -8.14209
<790> LL/token: -8.14332

0	2.5	court law case order state decision process matter rule lawyer issue justice legal national petition application act file evidence charge 
1	2.5	system report information medium public provide agency important register number record national process corruption include base set place require citizen 
2	2.5	community woman church youth support child life young violence work awareness family live group change girl society social man activity 
3	2.5	city public market road town place area vehicle goroka main clean travel resident authority proper safe carry transport safety drive 
4	2.5	event visit year host day culture include tourism hold present promote group local stage traditional week international cultural support attend 
5	2.5	village area water affect fire community assist local assistance disaster team week island include destroy visit house damage appeal villager 
6	2.5	peo

<910> LL/token: -8.14192
<920> LL/token: -8.14142
<930> LL/token: -8.14164
<940> LL/token: -8.14167

0	2.5	court law case order state decision process rule matter lawyer justice legal issue petition charge application claim file national act 
1	2.5	system report information medium public provide process register important record number national agency corruption set ensure include require base publish 
2	2.5	community woman church support youth life young work violence awareness child live group change family society girl activity social man 
3	2.5	city public market town road area vehicle place travel goroka clean resident main proper safety safe transport ensure authority drive 
4	2.5	event visit year host day culture include hold tourism group local promote week traditional present international stage support cultural celebrate 
5	2.5	village water area affect fire community disaster assist week local assistance island team include month destroy house villager border damage 
6	2.5	p

In [18]:
def compute_coherence_values(dictionary,
                             corpus,
                             texts,
                             limit,
                             start=2,
                             step=1):
    coherence_values = []
    model_list = []
    for num_topics in range(start, limit, step):
        model = gensim.models.wrappers.LdaMallet(mallet_path,
                                                 corpus=corpus,
                                                 num_topics=num_topics,
                                                 id2word=id2word)
        model_list.append(model)
        coherencemodel = CoherenceModel(model=model,
                                        texts=texts,
                                        dictionary=dictionary,
                                        coherence='c_v')
        coherence_values.append(coherencemodel.get_coherence())

    return model_list, coherence_values

In [19]:
model_list, coherence_values = compute_coherence_values(dictionary=id2word,
                                                        corpus=corpus,
                                                        texts=data_lemmatized,
                                                        start=2,
                                                        limit=20,
                                                        step=1)

Mallet LDA: 2 topics, 1 topic bits, 1 topic mask
Data loaded.
max tokens: 1100
total tokens: 1462820
<10> LL/token: -8.40038
<20> LL/token: -8.21858
<30> LL/token: -8.03796
<40> LL/token: -7.96905

0	25	people police election issue leader time officer public member provincial law day province report court yesterday week national order man 
1	25	government year school student work country support service project program community people district development health include provide child education provincial 

<50> LL/token: -7.93687
<60> LL/token: -7.91964
<70> LL/token: -7.90863
<80> LL/token: -7.90017
<90> LL/token: -7.89452

0	25	people police election issue leader time officer public member provincial day province law report court yesterday national family week order 
1	25	government year school country student work support service community project program district people development health include provide woman child education 

<100> LL/token: -7.89005
<110> LL/token: -7.88844
<12

<960> LL/token: -7.87017
<970> LL/token: -7.87006
<980> LL/token: -7.87061
<990> LL/token: -7.87024

0	25	people police election issue leader time officer public provincial member province law national report court yesterday day week order family 
1	25	government year school country student work support service community project program district woman development health people provide include child education 

<1000> LL/token: -7.87032

Total time: 42 seconds
Mallet LDA: 3 topics, 2 topic bits, 11 topic mask
Data loaded.
max tokens: 1100
total tokens: 1462820
<10> LL/token: -8.76873
<20> LL/token: -8.44997
<30> LL/token: -8.1906
<40> LL/token: -8.09005

0	16.66667	government school year student country program project support development education fund work include training district plan teacher high provide policy 
1	16.66667	people work service health community area road year child time family live local village province woman district life good day 
2	16.66667	police election issue 

<660> LL/token: -7.94256
<670> LL/token: -7.94251
<680> LL/token: -7.94303
<690> LL/token: -7.94456

0	16.66667	government school year student country support project program district service development work education fund include provide training plan high teacher 
1	16.66667	people woman health year community child area road time work family village live day life local good market church hospital 
2	16.66667	police people election issue leader provincial officer member public province government law report court national yesterday order case land candidate 

<700> LL/token: -7.94366
<710> LL/token: -7.94344
<720> LL/token: -7.9428
<730> LL/token: -7.94206
<740> LL/token: -7.94177

0	16.66667	government school year country student support project program district service development work education fund provide include training plan high provincial 
1	16.66667	people health woman year community child area road work time family village live day life local good market church hospital 
2

<310> LL/token: -7.99104
<320> LL/token: -7.98942
<330> LL/token: -7.98929
<340> LL/token: -7.99012

0	12.5	school government student project program service district year development support education work fund provide training plan health high provincial build 
1	12.5	people year country woman community work child good time leader support change life live church day visit family event bring 
2	12.5	police officer area village market hospital people man team operation health city water road week report vehicle accord month station 
3	12.5	government election issue provincial national law court people public member land candidate province process state leader office order governor pay 

<350> LL/token: -7.9908
<360> LL/token: -7.98908
<370> LL/token: -7.98905
<380> LL/token: -7.98943
<390> LL/token: -7.99036

0	12.5	school government student service project program year district development support education fund provide work training plan health high include build 
1	12.5	people year 

<910> LL/token: -7.99083
<920> LL/token: -7.99127
<930> LL/token: -7.99221
<940> LL/token: -7.99102

0	12.5	school government student service project program year district development support education fund provide work provincial health training plan high province 
1	12.5	people year country woman community work child good time support life change live church family visit day leader event bring 
2	12.5	police officer area people village market hospital man operation city road water team vehicle week report health accord month station 
3	12.5	government election issue people provincial national law court leader member public land candidate process state province office order governor pay 

<950> LL/token: -7.99212
<960> LL/token: -7.99148
<970> LL/token: -7.99113
<980> LL/token: -7.99007
<990> LL/token: -7.99101

0	12.5	government school student service project program year district development education support fund work provide provincial training plan health high include 
1	12.5	peo

<410> LL/token: -8.03119
<420> LL/token: -8.0312
<430> LL/token: -8.03055
<440> LL/token: -8.0312

0	10	school year student woman child education program family training life teacher community event work attend high young youth good time 
1	10	people business land area company market time local village city water money live landowner place good small day food travel 
2	10	government country national development include system policy issue plan public leader provide address important ensure level management increase sector support 
3	10	police election officer law court issue case leader candidate order report yesterday man public process vehicle office member security party 
4	10	people government provincial service province project work district health community support road fund year build area program church provide team 

<450> LL/token: -8.03045
<460> LL/token: -8.02989
<470> LL/token: -8.02977
<480> LL/token: -8.02841
<490> LL/token: -8.02641

0	10	school year student woman child

<910> LL/token: -8.02501
<920> LL/token: -8.0263
<930> LL/token: -8.02676
<940> LL/token: -8.02673

0	10	school student year woman child education program training life teacher community family church event good work attend time young youth 
1	10	people business land area time company market village local city water live place landowner day money affect home start food 
2	10	country government development include national system policy plan issue leader level important provide develop address increase sector support region ensure 
3	10	police election officer law issue court leader report case candidate order yesterday man public process conduct office member vehicle party 
4	10	people government provincial service province project district work health year support community fund road provide build program area team deliver 

<950> LL/token: -8.02738
<960> LL/token: -8.02677
<970> LL/token: -8.02569
<980> LL/token: -8.0262
<990> LL/token: -8.02642

0	10	school year student woman child 


0	8.33333	year school student program child education training high teacher work receive week staff give support start attend provide parent institution 
1	8.33333	country woman community support church event work youth group important opportunity world change visit young good include business activity papua 
2	8.33333	police officer issue law report court order public case man act security state yesterday vehicle station charge operation office follow 
3	8.33333	village people health family market area hospital city water live time home day medical affect care team mother food travel 
4	8.33333	government service project district development provincial fund work road plan area support provide include policy company funding national health sector 
5	8.33333	people election leader member province provincial government national time candidate open governor leadership team party vote issue official parliament good 

<350> LL/token: -8.04551
<360> LL/token: -8.04446
<370> LL/token: -8.043

<710> LL/token: -8.04861
<720> LL/token: -8.04838
<730> LL/token: -8.04896
<740> LL/token: -8.04941

0	8.33333	school year student program education child training high teacher work receive staff week give support start provide attend parent learn 
1	8.33333	woman country community support church work event year youth group change visit world important opportunity good young life include day 
2	8.33333	police officer issue law report court order case public man security state act vehicle yesterday station charge office operation follow 
3	8.33333	health people village area market hospital family city water live time home medical day affect care team travel community mother 
4	8.33333	government service project district development fund provincial work plan road support policy provide include area national funding company sector province 
5	8.33333	people election leader member province provincial government time candidate national governor open leadership party vote process team issue 

<110> LL/token: -8.14291
<120> LL/token: -8.1349
<130> LL/token: -8.12798
<140> LL/token: -8.12322

0	7.14286	school student year education program teacher high event week attend child give youth parent learn institution primary receive study skill 
1	7.14286	government project provincial district people province fund year development land area build funding local company road benefit infrastructure business money 
2	7.14286	police election officer law court candidate order report issue process case vote yesterday charge security polling conduct act office return 
3	7.14286	market area road work city water vehicle place people time travel start village local public week town house sell yesterday 
4	7.14286	service support work provide community program health include system plan ensure training improve facility deliver management public staff partner department 
5	7.14286	country people leader government member issue policy church change national leadership hold good party parliament m

<460> LL/token: -8.09024
<470> LL/token: -8.08932
<480> LL/token: -8.09032
<490> LL/token: -8.09021

0	7.14286	school student year education teacher high program event week attend parent give child learn institution youth primary start study grade 
1	7.14286	government provincial project district province people fund year land work development road funding company build benefit infrastructure area money local 
2	7.14286	police election officer law court order candidate report issue process case yesterday act security office charge conduct vote public polling 
3	7.14286	area market city village water vehicle people place road local travel public house town time sell start affect food business 
4	7.14286	service support provide program community work include training plan system ensure development improve facility health management policy level partner base 
5	7.14286	people country leader member government issue church change good leadership national party lead hold parliament time meet

<810> LL/token: -8.08662
<820> LL/token: -8.08681
<830> LL/token: -8.08801
<840> LL/token: -8.08882

0	7.14286	school student year education teacher program high event week attend parent learn youth give child start primary institution study grade 
1	7.14286	government provincial project district province people fund service year work road land build area development funding pay benefit infrastructure company 
2	7.14286	police election officer law court issue report order process case public yesterday office act conduct charge security polling national return 
3	7.14286	area market village city water vehicle people local place travel road affect town sell business time public food main start 
4	7.14286	provide support program service development include system plan community work training ensure policy improve management sector level important develop partner 
5	7.14286	people country leader member government church change time good issue leadership lead party hold candidate parliament

<160> LL/token: -8.13287
<170> LL/token: -8.13182
<180> LL/token: -8.1322
<190> LL/token: -8.12953

0	6.25	road people market area village city business local travel vehicle food place water town sell start good public live transport 
1	6.25	police election officer law candidate issue order process leader party vote report public conduct security yesterday polling official office station 
2	6.25	service work health support community provide area team build facility include hospital continue staff rural program deliver local centre building 
3	6.25	country government development policy include plan system sector national issue address economic develop level region increase important public economy business 
4	6.25	government people provincial project province district fund year land member development national governor funding landowner pay benefit give work infrastructure 
5	6.25	people woman country church leader change year event life community work good time youth young group live d

java.lang.ArrayIndexOutOfBoundsException: 2
	at cc.mallet.topics.WorkerRunnable.sampleTopicsForOneDoc(WorkerRunnable.java:552)
	at cc.mallet.topics.WorkerRunnable.run(WorkerRunnable.java:275)
	at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
	at java.util.concurrent.FutureTask.run(FutureTask.java:266)
	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
	at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
	at java.lang.Thread.run(Thread.java:750)
java.lang.ArrayIndexOutOfBoundsException: 1
	at cc.mallet.topics.WorkerRunnable.sampleTopicsForOneDoc(WorkerRunnable.java:552)
	at cc.mallet.topics.WorkerRunnable.run(WorkerRunnable.java:275)
	at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511)
	at java.util.concurrent.FutureTask.run(FutureTask.java:266)
	at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
	at java.util.concurrent.ThreadPoolExecutor$Work

<510> LL/token: -8.11626
<520> LL/token: -8.11545
<530> LL/token: -8.11547
<540> LL/token: -8.11637

0	6.25	road market area people village city water local business vehicle travel food town sell place start live transport public money 
1	6.25	police election officer law issue candidate leader process order public party vote security conduct yesterday national polling official station office 
2	6.25	service health support community work provide hospital team include area facility build program rural people staff deliver medical continue care 
3	6.25	country development government policy plan system include sector economic national address develop important issue region increase level business improve economy 
4	6.25	government provincial people project district province fund year land member governor national development funding work pay landowner benefit give budget 
5	6.25	people woman year church country good life time leader live event change day youth group young community work br

<860> LL/token: -8.09903
<870> LL/token: -8.09803
<880> LL/token: -8.09683
<890> LL/token: -8.09727

0	6.25	road area market village people local city water vehicle business company food travel work town sell place start main transport 
1	6.25	police election officer leader law people candidate issue process member security party order vote conduct national open official station polling 
2	6.25	health community service support provide hospital people team facility include work area medical rural care staff program awareness centre assist 
3	6.25	country development policy government plan system sector include develop economic support important national business improve address level increase region provide 
4	6.25	government provincial project district people province year fund work land service governor funding development national pay member build infrastructure landowner 
5	6.25	woman people year family life church time good event country change day live work youth young community g

<110> LL/token: -8.14911
<120> LL/token: -8.14319
<130> LL/token: -8.13904
<140> LL/token: -8.13685

0	5.55556	family time year man live people child life village home day leave mother late find call fight lose death fire 
1	5.55556	woman church year country community event work change youth group people good visit world support young host day hold great 
2	5.55556	government provincial district project province fund year service land work pay funding people governor landowner national budget member benefit give 
3	5.55556	health service provide team hospital support community facility include medical staff care awareness people continue patient centre operation covid rural 
4	5.55556	country development plan policy support government improve sector system include develop provide economic program partnership level partner important economy focus 
5	5.55556	police officer law court order report issue public case act charge station state office security matter deal vehicle arrest yesterd

<410> LL/token: -8.11666
<420> LL/token: -8.11518
<430> LL/token: -8.11406
<440> LL/token: -8.11407

0	5.55556	family people year time live man child life village home day leave mother find late call fight lose bring death 
1	5.55556	woman community church work year event youth country support good group change people visit young day hold world host culture 
2	5.55556	government provincial district project province fund year service people work land funding pay development landowner national governor infrastructure benefit budget 
3	5.55556	health service provide hospital community team support facility medical include staff care patient awareness centre people operation continue covid worker 
4	5.55556	country development policy plan government system sector improve include develop support provide economic partnership important address level partner region economy 
5	5.55556	police officer law court order issue report public case act charge station office state security vehicle matter

<710> LL/token: -8.11175
<720> LL/token: -8.11208
<730> LL/token: -8.11167
<740> LL/token: -8.11214

0	5.55556	people family time year man live child life home village day leave mother late find call fight lose death thing 
1	5.55556	woman community church work event year youth change group country support good people visit young day hold world culture member 
2	5.55556	government provincial project district province fund year service people work land funding development pay landowner national infrastructure benefit budget receive 
3	5.55556	health service provide support hospital team community facility medical include staff care awareness patient centre continue operation covid people worker 
4	5.55556	country development policy government plan system sector improve include provide support economic develop level partnership important address partner business economy 
5	5.55556	police officer law court report order issue public case act charge station office security vehicle state mat

Mallet LDA: 10 topics, 4 topic bits, 1111 topic mask
Data loaded.
max tokens: 1100
total tokens: 1462820
<10> LL/token: -9.78914
<20> LL/token: -9.1749
<30> LL/token: -8.6135
<40> LL/token: -8.39994

0	5	woman family live village life child community man city home day time people mother young violence leave late youth thing 
1	5	police officer court law report order case issue charge land act station state public matter vehicle arrest suspect deal investigation 
2	5	health service provide hospital market water facility system medical team care include patient area support management centre covid worker supply 
3	5	school student program education training year teacher high child community parent institution learn support skill primary study train grade secondary 
4	5	election candidate national process vote conduct open security team party official week polling return yesterday campaign electorate period general time 
5	5	year work pay company give time start week receive continue mone

<280> LL/token: -8.11509
<290> LL/token: -8.11541

0	5	woman family live child life village time year city youth home day community man mother young violence leave late people 
1	5	police officer court law order report case vehicle issue public charge station act man matter arrest state suspect deal investigation 
2	5	health service hospital provide water facility community team support medical include staff care patient awareness centre area covid worker food 
3	5	school student year program education training teacher high child parent institution learn skill train primary study grade week attend receive 
4	5	election candidate national process issue party vote parliament time decision yesterday official polling open return conduct week team period general 
5	5	pay company business market money local travel area sell time week start town yesterday fire goroka small month operation farmer 
6	5	country government policy development plan issue system sector address national level economi

<560> LL/token: -8.11167
<570> LL/token: -8.11105
<580> LL/token: -8.11103
<590> LL/token: -8.1101

0	5	woman family year time child life live youth village home day man young mother leave people late violence thing community 
1	5	police officer court law order report case public charge station issue vehicle man act matter arrest deal suspect security force 
2	5	health service hospital community provide support team facility medical include staff care centre awareness patient covid worker month accord treatment 
3	5	school student program year education training teacher high parent institution child learn skill week train primary study attend grade receive 
4	5	election candidate process national issue decision party vote parliament time yesterday polling official return open conduct week period count seat 
5	5	company market business area water local city sell food travel town money small fire main affect farmer produce village goroka 
6	5	country government development policy plan sy

<860> LL/token: -8.10659
<870> LL/token: -8.10718
<880> LL/token: -8.10748
<890> LL/token: -8.10698

0	5	woman family year child live life time youth home day village young man work leave late mother violence thing people 
1	5	police officer court law order report case station charge public issue vehicle man act security arrest deal yesterday suspect operation 
2	5	health service hospital provide community support team facility medical include staff care awareness patient centre covid worker month accord continue 
3	5	school student program year education training teacher high parent week institution skill primary train learn study grade child fee attend 
4	5	election candidate issue national process decision party vote time parliament polling official yesterday open conduct return period count public seat 
5	5	market area business water city company local food village town sell money travel fire place main small goroka farmer transport 
6	5	country government development policy plan s

<110> LL/token: -8.16324
<120> LL/token: -8.15818
<130> LL/token: -8.14993
<140> LL/token: -8.14675

0	4.54545	school student education year program training teacher high institution parent week learn skill attend train primary study grade fee secondary 
1	4.54545	people provincial province government leader member governor leadership serve good country open change peace urge call work time political administrator 
2	4.54545	public people area city place road issue problem youth market town travel affect safe time live proper work goroka stop 
3	4.54545	election court candidate law process national party vote issue polling conduct decision official order public return count seat period campaign 
4	4.54545	country development policy support important sector include economic partner plan region develop address economy challenge opportunity level improve organisation key 
5	4.54545	district project service work community support build area people program development road deliver infrastru

<360> LL/token: -8.12537
<370> LL/token: -8.12578
<380> LL/token: -8.12782
<390> LL/token: -8.12688

0	4.54545	school student year education program training teacher high institution parent learn skill primary week study train grade attend fee receive 
1	4.54545	people provincial province government leader member governor leadership good country serve work year urge change lead time call bring political 
2	4.54545	area people city water public place road market problem town affect safe travel time village issue main live concern stop 
3	4.54545	election court candidate national process law vote issue party official polling conduct decision return order count seat time case yesterday 
4	4.54545	country development policy important plan support sector develop economic system include region challenge economy partner level address improve provide key 
5	4.54545	project district service work community government fund build support program area road development funding infrastructure deliver

<610> LL/token: -8.13247
<620> LL/token: -8.13095
<630> LL/token: -8.13102
<640> LL/token: -8.13258

0	4.54545	school student year education program training teacher high parent learn institution child skill primary study train week grade fee attend 
1	4.54545	people government province provincial leader member governor country leadership good serve urge work change bring year political time continue call 
2	4.54545	area people city water public road place village market problem affect town travel safe time live proper main home concern 
3	4.54545	election court candidate national process law vote official polling conduct issue return party decision public open order time count seat 
4	4.54545	country development policy plan important sector support system develop economic include partner provide economy challenge improve level address region key 
5	4.54545	project district service work government community fund build program area support road development funding deliver year infrastru

<860> LL/token: -8.13607
<870> LL/token: -8.13728
<880> LL/token: -8.13693
<890> LL/token: -8.13658

0	4.54545	school student year education program training teacher high parent learn institution week skill primary child study train attend grade fee 
1	4.54545	people government province leader provincial member governor country leadership good serve work year urge political change bring lead parliament continue 
2	4.54545	area city people water village market public road place problem town affect travel time live main vehicle home clean proper 
3	4.54545	election court candidate national process law vote official polling issue conduct decision party order public return time office yesterday count 
4	4.54545	country development policy plan important support system sector economic develop economy provide improve include partner level region address challenge opportunity 
5	4.54545	project district service government work fund community build program road development provincial support ar

<110> LL/token: -8.19426
<120> LL/token: -8.18705
<130> LL/token: -8.17994
<140> LL/token: -8.17483

0	4.16667	school student year education teacher high program parent institution child primary study grade learn fee receive secondary week attend teach 
1	4.16667	pay water road work affect authority yesterday issue public office week travel cost company time continue power month payment problem 
2	4.16667	election court candidate process vote national law polling official order office public conduct open case return count seat decision yesterday 
3	4.16667	community support church event youth visit group people member year program work present hold host day culture continue activity organisation 
4	4.16667	business training local company program provide work industry support agriculture train produce small opportunity skill include project grow australian manager 
5	4.16667	country development policy system plan sector economic level region develop economy important improve increase ke

<360> LL/token: -8.15747
<370> LL/token: -8.15593
<380> LL/token: -8.15577
<390> LL/token: -8.15786

0	4.16667	school student year education teacher program high parent child institution primary study learn grade fee week attend secondary receive teach 
1	4.16667	pay water road affect work travel yesterday office cost week issue authority month transport continue power payment operation disaster time 
2	4.16667	election court candidate process national vote law polling public official order conduct decision office case open count return seat issue 
3	4.16667	community support church event visit youth group year work people present day member hold host continue culture activity program include 
4	4.16667	business training local company provide program support industry project agriculture produce small train skill work grow opportunity farmer participant manager 
5	4.16667	country development policy plan system sector level economic region economy improve develop challenge key increase i

<610> LL/token: -8.15809
<620> LL/token: -8.15811
<630> LL/token: -8.15878
<640> LL/token: -8.15779

0	4.16667	school student year education teacher program high child parent training institution primary study learn grade fee secondary attend receive week 
1	4.16667	pay water road affect travel work cost yesterday office week transport issue continue month payment operation power authority disaster state 
2	4.16667	election court candidate process national vote law polling public official issue office conduct order case count decision seat open return 
3	4.16667	community support church event visit year group youth work present member host day hold culture program include continue people activity 
4	4.16667	business local company training project program provide industry support small agriculture produce grow farmer good opportunity tourism manager participant food 
5	4.16667	country development policy plan system level sector improve economic region economy partner important develop c

<860> LL/token: -8.1493
<870> LL/token: -8.15048
<880> LL/token: -8.15101
<890> LL/token: -8.14966

0	4.16667	school student education year training program teacher high parent child learn primary institution skill study train grade fee secondary week 
1	4.16667	pay road water affect travel week work yesterday continue transport cost operation office payment month close disaster time authority power 
2	4.16667	election court candidate process vote national law polling official public conduct office order decision return count seat yesterday issue open 
3	4.16667	community support church event visit group youth year program work present hold day include host member culture continue activity attend 
4	4.16667	business company local project year industry small agriculture provide increase grow produce good benefit farmer food tourism product coffee include 
5	4.16667	country development policy system plan improve important level include develop challenge sector partner provide region info

<60> LL/token: -8.30347
<70> LL/token: -8.26682
<80> LL/token: -8.2443
<90> LL/token: -8.22577

0	3.84615	school student year education road district teacher build high fund project funding complete building infrastructure receive primary grade fee parent 
1	3.84615	development country plan policy government sector economic develop increase improve economy include resource key region partner important investment level business 
2	3.84615	people government leader member country province governor leadership good change work party parliament serve political year deliver urge service bring 
3	3.84615	health service hospital facility team medical provide care patient centre covid worker staff country case include support treatment receive province 
4	3.84615	water city area affect town public place fire home situation safe problem clean house disaster resident carry supply main safety 
5	3.84615	program training support work provide train skill base organisation information australian oppor

<260> LL/token: -8.15347
<270> LL/token: -8.15112
<280> LL/token: -8.15151
<290> LL/token: -8.15037

0	3.84615	school student year education teacher high receive institution complete parent building primary study grade build fee week secondary child give 
1	3.84615	development country plan policy sector government economic develop increase economy region resource include key partner improve address infrastructure investment level 
2	3.84615	people leader member government country good leadership governor change time province serve political year continue parliament urge peace lead bring 
3	3.84615	health hospital service facility medical team patient care provide covid worker centre include case support treatment equipment awareness staff month 
4	3.84615	road city water area affect town public fire place work transport travel house clean disaster main resident home highway problem 
5	3.84615	program training work support provide information base train skill organisation management imp

<460> LL/token: -8.1447
<470> LL/token: -8.1433
<480> LL/token: -8.14145
<490> LL/token: -8.14147

0	3.84615	school student year education teacher high receive parent institution primary study child grade fee secondary week attend teach learn building 
1	3.84615	country development policy plan economic sector improve develop economy increase resource government include region key address partner investment challenge focus 
2	3.84615	people leader member government country good leadership change governor serve parliament political peace time urge bring continue lead call nation 
3	3.84615	health hospital service medical team facility care patient centre covid worker provide case receive treatment equipment include staff month accord 
4	3.84615	road city water area affect public town work fire transport travel place clean disaster resident highway main safe problem contractor 
5	3.84615	program work training support provide base information train management skill important organisation o

<660> LL/token: -8.14739
<670> LL/token: -8.14704
<680> LL/token: -8.14644
<690> LL/token: -8.14546

0	3.84615	school student year education teacher high institution parent receive primary study grade child fee secondary building week attend teach graduate 
1	3.84615	country development policy plan economic sector develop government resource increase economy improve include address key region partner investment national focus 
2	3.84615	people leader member government country good leadership change governor serve parliament political time peace urge bring lead continue call nation 
3	3.84615	health hospital service medical facility team care patient centre covid worker provide case treatment equipment include staff accord emergency receive 
4	3.84615	road city water area town work vehicle public affect transport place fire travel clean disaster resident highway carry safe house 
5	3.84615	program support work training provide community base management information train skill important 

<860> LL/token: -8.14389
<870> LL/token: -8.14372
<880> LL/token: -8.14245
<890> LL/token: -8.1439

0	3.84615	school student year education teacher high child parent institution primary study grade fee receive secondary week learn building teach graduate 
1	3.84615	country development policy plan economic government sector develop economy resource region increase address key improve include level challenge partner investment 
2	3.84615	people leader member government country good leadership change governor time serve political parliament peace urge public bring lead continue nation 
3	3.84615	health hospital medical service facility team care patient covid centre worker provide case staff accord treatment include equipment month general 
4	3.84615	road city water work area vehicle town public travel affect transport fire place house clean disaster resident highway construction time 
5	3.84615	program support training work community provide base information management train skill organi

<60> LL/token: -8.27645
<70> LL/token: -8.24412
<80> LL/token: -8.2264
<90> LL/token: -8.20827

0	3.57143	court law public order issue act state case process decision office matter national rule chief lawyer justice legal question petition 
1	3.57143	people election leader candidate member national vote open party parliament leadership government electorate governor time political seat campaign province general 
2	3.57143	police officer vehicle man station charge arrest report suspect fire investigation commander order force crime unit law incident victim kill 
3	3.57143	health service support provide hospital facility staff medical team centre care patient work worker receive deliver rural treatment general equipment 
4	3.57143	year event day time culture give tourism host celebrate book local present week group visit support promote hold raise today 
5	3.57143	country church people member government leader nation continue great world year region papua visit hold good independence med

<260> LL/token: -8.15925
<270> LL/token: -8.15841
<280> LL/token: -8.1576
<290> LL/token: -8.15676

0	3.57143	court law public order act case decision office issue process state matter national rule chief question lawyer justice legal system 
1	3.57143	people election leader candidate member government national vote open party parliament leadership electorate province political governor seat time campaign general 
2	3.57143	police officer vehicle man station arrest charge suspect fire report force order commander investigation law unit operation crime security incident 
3	3.57143	health service support provide hospital facility team staff medical centre care patient community worker deliver include treatment receive work equipment 
4	3.57143	year event day visit host culture world local tourism week celebrate include promote group support hold present time book traditional 
5	3.57143	country people church member work year leader continue good nation serve time late great independence s

<460> LL/token: -8.14738
<470> LL/token: -8.14633
<480> LL/token: -8.14572
<490> LL/token: -8.14586

0	3.57143	court law public order act decision case office process national state matter issue question chief rule lawyer justice system legal 
1	3.57143	election people candidate leader vote party national open member parliament polling official electorate political government province count seat time return 
2	3.57143	police officer vehicle station man security arrest report charge suspect fire force operation order personnel commander investigation unit crime law 
3	3.57143	health service support provide hospital team facility staff medical centre community patient care work worker include receive assistance treatment equipment 
4	3.57143	year event day visit host culture local world hold group tourism week celebrate promote present support include open traditional celebration 
5	3.57143	people country leader member work church year good serve nation time continue late leadership grea

<660> LL/token: -8.14515
<670> LL/token: -8.14338
<680> LL/token: -8.1452
<690> LL/token: -8.14318

0	3.57143	court law public order act decision case office national process matter state chief issue question rule lawyer justice legal system 
1	3.57143	election candidate people open vote party national parliament polling official leader electorate political member return count seat time campaign team 
2	3.57143	police officer vehicle man station security arrest force charge report suspect fire personnel operation investigation commander order unit law crime 
3	3.57143	health service support provide hospital facility team staff medical community centre patient care receive worker assistance treatment work equipment general 
4	3.57143	event year visit day host world culture week group local tourism hold include promote present celebrate opportunity support traditional celebration 
5	3.57143	people leader country member church work year good serve leadership time continue bring change lat

<860> LL/token: -8.14402
<870> LL/token: -8.14411
<880> LL/token: -8.14453
<890> LL/token: -8.14326

0	3.57143	court law public order decision act case office national process matter issue state chief question rule lawyer justice legal system 
1	3.57143	election candidate national vote party open polling parliament official return electorate people count time province seat team campaign yesterday leader 
2	3.57143	police officer man vehicle station security arrest report suspect fire operation force charge order commander investigation unit crime incident kill 
3	3.57143	health service support provide hospital facility team community staff medical centre patient care worker receive work treatment include assistance equipment 
4	3.57143	event year visit day world host culture tourism hold include group promote celebrate local week present support traditional celebration stage 
5	3.57143	people leader member country church work good year serve leadership change time bring late nation con

<60> LL/token: -8.28103
<70> LL/token: -8.24049
<80> LL/token: -8.21517
<90> LL/token: -8.19763

0	3.33333	business market company local money food sell small produce good agriculture farmer buy industry product coffee supply grow plant activity 
1	3.33333	woman community people work church change leader youth support country good violence nation challenge young role life live leadership girl 
2	3.33333	court law state process pay landowner order decision case issue national claim matter office rule payment lawyer justice legal chief 
3	3.33333	work road travel build cost complete open building transport construction start include operation service site carry highway vehicle major main 
4	3.33333	project district service fund support program government year development funding deliver people community rural receive infrastructure area build provide total 
5	3.33333	year family child time life day mother late leave live give young long home serve good heart feel thing lose 
6	3.33333	he

<260> LL/token: -8.14695
<270> LL/token: -8.14538
<280> LL/token: -8.14586
<290> LL/token: -8.14634

0	3.33333	business market company local money food sell small good agriculture produce farmer industry buy product supply coffee grow start plant 
1	3.33333	woman community people work church change youth leader support good young program violence life live group challenge country role organisation 
2	3.33333	court law state pay process landowner order decision case issue claim matter rule payment lawyer justice legal office national act 
3	3.33333	work road build travel complete cost open building transport construction start site include vehicle highway service carry area operation maintain 
4	3.33333	project district government service fund support year program development funding people deliver rural receive infrastructure area provide build benefit community 
5	3.33333	year family child time life day mother late leave give live long home serve thing heart feel good lose pass 
6	3.3

<460> LL/token: -8.13983
<470> LL/token: -8.14044
<480> LL/token: -8.14043
<490> LL/token: -8.14103

0	3.33333	business market company money local food sell small good agriculture produce industry farmer buy product coffee grow start plant supply 
1	3.33333	woman community people work church change support youth leader program good violence young life group live organisation society activity role 
2	3.33333	court law land state process landowner pay order decision issue case claim matter national rule payment lawyer justice legal petition 
3	3.33333	work road build complete cost open building travel transport construction start site area vehicle highway include time maintain contractor carry 
4	3.33333	project government district service fund support year program development people funding deliver rural infrastructure receive budget area provide plan money 
5	3.33333	year family time child life late day leave mother live long thing good home give find feel lose bring young 
6	3.33333	h

<660> LL/token: -8.13988
<670> LL/token: -8.14121
<680> LL/token: -8.13994
<690> LL/token: -8.14152

0	3.33333	business market company local money food sell small good agriculture industry produce farmer buy grow product coffee increase plant start 
1	3.33333	woman community people church work change support youth leader program life violence live young good group member organisation peace society 
2	3.33333	court land law state pay landowner issue process order decision case claim matter rule payment lawyer justice legal national petition 
3	3.33333	road work build building complete cost travel open start transport construction area include vehicle highway site service time maintain access 
4	3.33333	government project district service fund year support program development people funding deliver rural receive infrastructure budget area provide national benefit 
5	3.33333	year family time child life day late mother leave home live thing long good give find feel walk lose hard 
6	3.3333

<860> LL/token: -8.14097
<870> LL/token: -8.14232
<880> LL/token: -8.139
<890> LL/token: -8.1391

0	3.33333	business market company local money sell food good small agriculture industry produce farmer buy grow product coffee increase plant supply 
1	3.33333	woman community people church work support change youth leader program violence life young group good live member organisation peace society 
2	3.33333	court land state law issue landowner pay decision order process case claim matter national rule lawyer justice legal office payment 
3	3.33333	road work build complete building cost travel open transport construction include start area site highway vehicle time access service contractor 
4	3.33333	government project district service fund year support people development program funding deliver rural receive infrastructure budget plan national area benefit 
5	3.33333	year family time child life day late mother leave live home long good thing feel bring lose find walk give 
6	3.33333	he

<60> LL/token: -8.33251
<70> LL/token: -8.29041
<80> LL/token: -8.26443
<90> LL/token: -8.24431

0	3.125	police officer man report station charge arrest law order suspect fire investigation commander vehicle force unit victim crime incident criminal 
1	3.125	election candidate vote open party national polling leader electorate return official conduct count seat period team process general campaign people 
2	3.125	child year family mother landowner accord cent call life give home month live day time cancer find age affect raise 
3	3.125	time year work medium information country job disaster lot team island day report today experience week give record story big 
4	3.125	program training opportunity provide support work train skill base partnership improve develop participant tourism business knowledge australian organisation technology research 
5	3.125	market company business water local area sell food money town small produce farmer good buy clean supply village product agriculture 
6	

<260> LL/token: -8.15954
<270> LL/token: -8.16115
<280> LL/token: -8.16122
<290> LL/token: -8.15982

0	3.125	police officer man report station arrest charge order law suspect fire investigation force commander security operation unit victim crime incident 
1	3.125	election candidate vote open party national polling electorate parliament official return period conduct count seat leader general process team yesterday 
2	3.125	child family year life live home day mother time leave lose die walk cancer find accord body month face problem 
3	3.125	time year medium work thing give lot disaster report week today add start island long story country information end team 
4	3.125	program training work provide support partnership train skill improve base opportunity officer australian participant develop knowledge job management director experience 
5	3.125	market local company business water village sell food area money agriculture produce people town industry farmer good buy clean small 
6	3.12

<460> LL/token: -8.1489
<470> LL/token: -8.14458
<480> LL/token: -8.14554
<490> LL/token: -8.14463

0	3.125	police officer man report station order arrest law suspect fire charge force security investigation commander unit vehicle operation crime victim 
1	3.125	election candidate vote national party open polling parliament official period electorate return count seat process team campaign general leader member 
2	3.125	child family year life live home time day mother leave lose village late walk die person young body find man 
3	3.125	time report medium year give affect disaster happen information thing face add lot week island experience situation yesterday story end 
4	3.125	program training work support provide community partnership train skill management base officer improve organisation participant knowledge staff opportunity job director 
5	3.125	market business water local company village sell food people agriculture industry town money area produce farmer buy small grow produc

<660> LL/token: -8.14388
<670> LL/token: -8.14374
<680> LL/token: -8.14407
<690> LL/token: -8.14418

0	3.125	police officer man station report arrest order law suspect fire charge security force operation investigation commander vehicle unit victim crime 
1	3.125	election candidate vote open party polling parliament national official team electorate count seat return period campaign process general yesterday week 
2	3.125	child family year life time live home day mother late leave village lose man young die walk body person death 
3	3.125	time report medium affect information yesterday disaster give face remain year happen week add island situation month find end concern 
4	3.125	program training work support provide community train partnership skill base management officer organisation participant job knowledge service staff conduct improve 
5	3.125	market business water local company village sell food people industry agriculture money produce area small farmer buy good grow product 


<860> LL/token: -8.14503
<870> LL/token: -8.14425
<880> LL/token: -8.14618
<890> LL/token: -8.14556

0	3.125	police officer man station report arrest law order security suspect fire charge investigation commander force operation vehicle unit victim crime 
1	3.125	election candidate vote open party national polling parliament official period electorate count seat process team return general campaign yesterday start 
2	3.125	family year child time life live home day mother late leave village lose man die long walk young body start 
3	3.125	report time medium affect information yesterday face happen disaster add concern week situation give island raise remain point early country 
4	3.125	program work training support community provide train officer skill base management service staff organisation partnership manager participant knowledge job conduct 
5	3.125	market business water local company village food sell area industry agriculture people small money produce farmer buy grow product c

<60> LL/token: -8.32535
<70> LL/token: -8.28564
<80> LL/token: -8.25784
<90> LL/token: -8.23646

0	2.94118	training hospital team staff officer medical work patient train week management provide operation general skill treatment manager participant month doctor 
1	2.94118	community people issue youth problem address awareness violence activity change peace live medium life good group order fight bring create 
2	2.94118	court law case public order act office decision matter report process national state rule lawyer justice system legal chief issue 
3	2.94118	people government leader provincial province member governor party parliament leadership country political serve good urge administrator national elect term contest 
4	2.94118	country high covid number increase accord risk remain affect response include result disease island reduce spread measure region cent low 
5	2.94118	service district health support program provincial fund province facility community provide deliver area rural 

<210> LL/token: -8.17547
<220> LL/token: -8.17412
<230> LL/token: -8.17156
<240> LL/token: -8.16968

0	2.94118	health training hospital team staff medical officer provide service patient train care worker management facility skill treatment general week equipment 
1	2.94118	people community issue youth address problem awareness activity violence change peace live good order medium fight group life bring end 
2	2.94118	court law case public order act office decision matter state process rule lawyer chief question justice legal national issue petition 
3	2.94118	people government provincial leader province member governor party parliament national leadership political serve administrator term country urge contest call public_servant 
4	2.94118	country high report number increase covid affect result accord include cent response risk disaster disease recent remain reduce low measure 
5	2.94118	district service support program fund government community deliver area rural funding provide pro

<360> LL/token: -8.15473
<370> LL/token: -8.15462
<380> LL/token: -8.15325
<390> LL/token: -8.15078

0	2.94118	health training hospital service team staff medical facility provide officer patient train care worker centre treatment week equipment work skill 
1	2.94118	people community issue youth address problem awareness change leader violence activity peace live medium fight group good order bring call 
2	2.94118	court law public order case decision office act matter process state national rule chief lawyer question justice legal issue petition 
3	2.94118	government provincial people province leader member governor parliament national leadership political serve administrator term party central public_servant office urge year 
4	2.94118	country report high affect increase case number covid accord result include disaster cent response risk disease confirm remain measure recent 
5	2.94118	district service support program fund government funding area deliver community rural year developme

<510> LL/token: -8.14707
<520> LL/token: -8.14682
<530> LL/token: -8.14645
<540> LL/token: -8.14695

0	2.94118	health training hospital service team staff provide medical facility officer patient train care worker centre treatment equipment general week doctor 
1	2.94118	people community issue youth address problem leader violence awareness good live peace activity change medium order fight bring happen group 
2	2.94118	court law public case order decision act matter process state office rule issue national lawyer chief question justice legal petition 
3	2.94118	government provincial province people member leader governor national parliament leadership serve administrator office work political term administration public_servant urge central 
4	2.94118	country report high affect increase include case number covid accord cent result response disaster risk reduce low confirm recent measure 
5	2.94118	district service support program fund community government funding year area deliver rura

<660> LL/token: -8.14481
<670> LL/token: -8.144
<680> LL/token: -8.14459
<690> LL/token: -8.14548

0	2.94118	health training hospital team service staff provide medical facility patient officer care train worker centre treatment equipment general doctor cancer 
1	2.94118	people community issue leader youth problem address activity live violence change good peace awareness medium fight happen bring order call 
2	2.94118	court law public order case decision issue act matter office process state rule national lawyer question chief justice legal petition 
3	2.94118	government provincial province people member leader governor national serve leadership parliament work administrator term office year political central urge public_servant 
4	2.94118	country report affect high increase case number covid accord cent result include response disaster risk recent remain reduce confirm measure 
5	2.94118	district service support program fund community people area year funding government deliver rural

<810> LL/token: -8.14266
<820> LL/token: -8.14362
<830> LL/token: -8.14461
<840> LL/token: -8.14413

0	2.94118	health training hospital service team staff provide medical facility patient officer care train worker centre treatment equipment include work general 
1	2.94118	people community leader issue youth problem address change good violence live peace activity fight awareness bring call medium order village 
2	2.94118	court law public order case decision process matter act state office rule chief issue national lawyer question justice legal petition 
3	2.94118	government provincial province member people leader governor national work office leadership serve administrator parliament year administration central urge public_servant term 
4	2.94118	report country affect high increase covid case include number result cent accord disaster response risk confirm total remain recent measure 
5	2.94118	district service support program fund community area year funding government deliver people

<960> LL/token: -8.14133
<970> LL/token: -8.14193
<980> LL/token: -8.14354
<990> LL/token: -8.14202

0	2.94118	health training hospital team service staff provide medical facility patient officer train care centre worker treatment equipment general work doctor 
1	2.94118	people community leader issue youth problem address live change good violence peace order awareness call law fight village bring activity 
2	2.94118	court law public case order decision process matter state national issue office chief rule act question lawyer justice legal petition 
3	2.94118	government provincial province member people governor leader work national office serve leadership year administrator parliament administration public_servant urge central position 
4	2.94118	report country affect increase high case covid number accord include cent result disaster response risk recent effort measure remain total 
5	2.94118	district service support program fund community area government year funding deliver rural p

<110> LL/token: -8.21234
<120> LL/token: -8.20321
<130> LL/token: -8.19829
<140> LL/token: -8.19254

0	2.77778	event year visit day host culture hold group include tourism world celebrate today promote traditional present celebration cultural attend share 
1	2.77778	school student education year teacher high parent child institution primary study grade fee secondary teach university graduate learn book college 
2	2.77778	government development plan policy project sector resource national economic economy benefit include agreement cent develop investment implement key sign impact 
3	2.77778	people village church local live area money community food year start farmer province bring produce good small coffee agriculture time 
4	2.77778	government provincial district province service fund funding year project rural deliver development infrastructure governor national administration budget administrator receive program 
5	2.77778	court law order case matter rule state charge decision nation

<260> LL/token: -8.17079
<270> LL/token: -8.16985
<280> LL/token: -8.16746
<290> LL/token: -8.16495

0	2.77778	event year visit day host culture group tourism hold celebrate promote today include world traditional celebration cultural present stage play 
1	2.77778	school student education year teacher high parent institution primary study child grade fee secondary learn teach university graduate book college 
2	2.77778	government development plan policy project sector resource national economy improve develop economic include benefit agreement key cent implement partner sign 
3	2.77778	people church village local money area live community start food business year small produce farmer bring agriculture coffee good grow 
4	2.77778	government provincial district province service fund year project funding development governor deliver rural national budget infrastructure administrator administration receive program 
5	2.77778	court law order case matter decision act office rule process chie

<410> LL/token: -8.16635
<420> LL/token: -8.16626
<430> LL/token: -8.16451
<440> LL/token: -8.16559

0	2.77778	event year visit day culture host tourism celebrate group hold promote today traditional celebration world include cultural stage play week 
1	2.77778	school student education year teacher high parent institution primary child study grade fee secondary teach university learn college book class 
2	2.77778	government development plan policy project sector resource improve national develop economic key cent include implement partner infrastructure investment focus impact 
3	2.77778	people church local money business village food start year small produce agriculture bring live farmer good area coffee grow community 
4	2.77778	government provincial district province fund project service year funding rural development national governor deliver infrastructure budget program administrator administration total 
5	2.77778	court law order case act matter decision process rule chief natio

<560> LL/token: -8.15238
<570> LL/token: -8.15278
<580> LL/token: -8.15384
<590> LL/token: -8.15298

0	2.77778	event year visit day culture host tourism hold celebrate promote today group celebration traditional include cultural present play week stage 
1	2.77778	school student education year teacher high parent institution primary study child grade fee secondary learn university teach college attend book 
2	2.77778	development government plan policy sector project improve resource develop national include key partner implement focus economic support ensure stakeholder infrastructure 
3	2.77778	people church business local money food small start agriculture produce good village farmer grow year coffee product bring industry market 
4	2.77778	government provincial district province project fund service year funding rural governor budget national development infrastructure deliver program administrator administration total 
5	2.77778	court law order case act decision matter office proces

<710> LL/token: -8.14978
<720> LL/token: -8.14862
<730> LL/token: -8.14904
<740> LL/token: -8.14827

0	2.77778	event year day visit culture tourism celebrate host hold promote include group today celebration traditional play stage cultural time world 
1	2.77778	school student education year teacher high parent primary study institution grade child fee secondary teach university book college attend learn 
2	2.77778	development plan policy government sector improve project develop resource key national include partner implement support ensure focus economic stakeholder partnership 
3	2.77778	church people business local money food small good agriculture produce start farmer grow market year product coffee sell buy industry 
4	2.77778	government provincial district province project fund service year funding budget rural development governor deliver national infrastructure program administration total administrator 
5	2.77778	court law order case act decision matter process chief office ru

<860> LL/token: -8.14784
<870> LL/token: -8.14921
<880> LL/token: -8.14968
<890> LL/token: -8.1498

0	2.77778	event year day visit culture tourism hold celebrate promote host group today traditional celebration include stage cultural play world time 
1	2.77778	school student education year teacher high parent primary study institution grade child fee secondary university teach book college learn attend 
2	2.77778	development plan policy government sector improve develop resource national project include key partner implement focus support stakeholder economic partnership ensure 
3	2.77778	church business people local money food small start good sell produce market agriculture farmer year grow product coffee buy industry 
4	2.77778	government provincial district province project fund service year funding budget development rural national governor deliver infrastructure program administration total administrator 
5	2.77778	court law order case act decision matter office chief rule proces

Mallet LDA: 19 topics, 5 topic bits, 11111 topic mask
Data loaded.
max tokens: 1100
total tokens: 1462820
<10> LL/token: -10.25453
<20> LL/token: -9.29165
<30> LL/token: -8.72353
<40> LL/token: -8.50196

0	2.63158	program training work officer train skill conduct participant management staff knowledge attend week cancer community role team base support manager 
1	2.63158	country government increase support level sector system include high develop population provide ensure cent base improve research sustainable resource service 
2	2.63158	country development policy economic economy partner region opportunity partnership business meeting world trade develop investment important international papua nation industry 
3	2.63158	child church life youth young good live year family time change thing mother community work job give hope book future 
4	2.63158	election candidate vote party polling open official national electorate count seat return security team period yesterday campaign process p

<160> LL/token: -8.1881
<170> LL/token: -8.18613
<180> LL/token: -8.18201
<190> LL/token: -8.18047

0	2.63158	program training work officer train skill provide staff week management conduct participant manager team knowledge director service job base attend 
1	2.63158	system plan increase improve level sector national country high ensure provide base develop management implement service cent challenge address population 
2	2.63158	country development policy government economic region economy partner opportunity partnership papua support important world business investment meeting international trade nation 
3	2.63158	child year church life family young live time good work mother give serve thing start age today day bring word 
4	2.63158	election candidate vote open polling official electorate national party team count seat return period process security campaign week general conduct 
5	2.63158	police court officer case order charge law report station arrest man suspect investigation co

<310> LL/token: -8.16777
<320> LL/token: -8.1688
<330> LL/token: -8.16925
<340> LL/token: -8.16446

0	2.63158	program training work officer train skill staff week provide management participant service manager conduct knowledge job base team director experience 
1	2.63158	system plan policy improve increase level national sector provide ensure agency base implement service management high department address include develop 
2	2.63158	country development government economic region economy opportunity partner support partnership world papua important meeting investment policy develop trade resource international 
3	2.63158	year child church life time family live work good young mother give serve today thing hard start age day hope 
4	2.63158	election candidate vote open polling official national electorate return count seat team period process province general yesterday campaign day conduct 
5	2.63158	police officer charge station man report arrest law order suspect investigation securit

<460> LL/token: -8.16096
<470> LL/token: -8.1625
<480> LL/token: -8.16137
<490> LL/token: -8.16229

0	2.63158	program training work officer train skill staff provide week management base participant conduct team manager director knowledge job experience attend 
1	2.63158	system plan policy improve national ensure level increase sector provide agency service implement base management public address department develop high 
2	2.63158	country development economic region economy support opportunity partner world government papua important investment partnership trade international future develop meeting business 
3	2.63158	year child church life time family live work mother young good give serve today day thing age hard word start 
4	2.63158	election candidate vote open polling national official electorate return team count seat process period week general yesterday conduct province campaign 
5	2.63158	police officer charge report station man arrest suspect vehicle investigation law order 

<610> LL/token: -8.15916
<620> LL/token: -8.15743
<630> LL/token: -8.15809
<640> LL/token: -8.15818

0	2.63158	program training work officer provide train staff skill week management participant team manager knowledge director conduct job experience base service 
1	2.63158	plan system policy national improve level ensure sector increase provide agency implement base service public management government include department address 
2	2.63158	country development economic region support economy opportunity partner world papua develop government partnership meeting include future investment international important trade 
3	2.63158	year child church time life family live work good serve mother young give day thing today bring start hard word 
4	2.63158	election candidate vote open polling official national electorate return count seat team process yesterday period conduct general week campaign province 
5	2.63158	police officer man station charge report arrest law order suspect vehicle secur

<760> LL/token: -8.16487
<770> LL/token: -8.16487
<780> LL/token: -8.16486
<790> LL/token: -8.16494

0	2.63158	program training work officer provide train skill staff week management participant conduct job manager knowledge attend experience director team base 
1	2.63158	system plan policy national ensure improve level provide service public sector agency increase implement base address information department management develop 
2	2.63158	country development economic region economy support world partner opportunity papua partnership government international meeting investment future trade develop important resource 
3	2.63158	year child church time family life live work mother good serve young give day today start hard thing word hope 
4	2.63158	election candidate vote open polling official national electorate team return count process seat yesterday period general conduct week campaign start 
5	2.63158	police officer man station report charge arrest vehicle suspect law security inves

<910> LL/token: -8.16565
<920> LL/token: -8.16725
<930> LL/token: -8.16731
<940> LL/token: -8.16562

0	2.63158	program training work officer provide train skill staff week management participant job manager conduct knowledge experience director team opportunity base 
1	2.63158	system plan policy national ensure service improve provide level public sector agency implement department information base management include increase address 
2	2.63158	country development economic region support economy partner world papua partnership opportunity include meeting future investment develop international trade business important 
3	2.63158	year child church family time life live work mother good young serve give day today thing start bring hard word 
4	2.63158	election candidate vote polling open national official electorate return team count seat process general period yesterday week conduct campaign today 
5	2.63158	police officer man station report arrest vehicle charge suspect order security 

In [21]:
best_model = model_list[np.argmax(coherence_values)]
best_model.print_topics()

[(0,
  '0.034*"business" + 0.028*"market" + 0.027*"company" + 0.016*"local" + 0.014*"money" + 0.014*"sell" + 0.012*"food" + 0.011*"agriculture" + 0.011*"good" + 0.011*"small"'),
 (1,
  '0.053*"woman" + 0.053*"community" + 0.034*"people" + 0.024*"church" + 0.023*"work" + 0.023*"support" + 0.022*"change" + 0.019*"youth" + 0.019*"leader" + 0.017*"program"'),
 (2,
  '0.032*"court" + 0.021*"land" + 0.018*"state" + 0.016*"issue" + 0.016*"pay" + 0.016*"landowner" + 0.015*"process" + 0.014*"order" + 0.014*"decision" + 0.014*"law"'),
 (3,
  '0.046*"road" + 0.046*"work" + 0.025*"build" + 0.017*"complete" + 0.016*"building" + 0.015*"travel" + 0.014*"open" + 0.014*"cost" + 0.011*"transport" + 0.011*"start"'),
 (4,
  '0.076*"government" + 0.061*"project" + 0.059*"district" + 0.042*"service" + 0.038*"fund" + 0.035*"year" + 0.028*"support" + 0.027*"people" + 0.024*"development" + 0.023*"program"'),
 (5,
  '0.044*"year" + 0.035*"family" + 0.032*"time" + 0.028*"child" + 0.017*"life" + 0.014*"day" + 0.0

In [26]:
topics_df = pd.DataFrame()
for idx, val in enumerate(best_model.load_document_topics()):
    temp_df = pd.DataFrame(val)
    if topics_df.empty:
        topics_df = temp_df
    else:
        topics_df = pd.concat([topics_df, temp_df], axis=1)

In [28]:
topics_df_new = topics_df.drop(0, axis=1).T.reset_index().drop("index", axis=1)
topics_df_new["dominant_topic"] = [np.argmax(row) for _, row in topics_df_new.iterrows()]
inclu_indexs = topics_df_new[topics_df_new.dominant_topic.isin([0, 13])].index.to_list()

In [29]:
busecon_news = news.iloc[inclu_indexs].reset_index().drop("index", axis=1)
busecon_news

Unnamed: 0,url,title,date,news,tag
0,https://www.postcourier.com.pg/fresh-vegetable...,Fresh vegetables galore at Mt Hagen market,"July 13, 2023",\nBY PEARSON KOLO\nMT Hagen market in the West...,No Tag
1,https://www.postcourier.com.pg/cocoa-freight-p...,Cocoa freight pact helps rural growers,"July 13, 2023",\nBy PAUL BUNGTABU\nThe PNG Cocoa Board rural ...,No Tag
2,https://www.postcourier.com.pg/french-celebrat...,French celebrate Bastille Day in POM,"July 19, 2023",\nIn a remarkable celebration held at the APEC...,"Tagged: Bastille Day, French Embassy in Papua ..."
3,https://www.postcourier.com.pg/western-highlan...,Western Highlands is top SME province,"July 18, 2023",\nBY PEARSON KOLO\nWESTERN Highlands Province ...,No Tag
4,https://www.postcourier.com.pg/enb-takes-on-ic...,ENB takes on ICT revolution,"July 20, 2023",\nTHE digital transformation workshop that is ...,No Tag
...,...,...,...,...,...
1991,https://www.postcourier.com.pg/govt-gives-usai...,Govt gives USAID backing,"December 17, 2015",\nTHE United States Agency for International D...,No Tag
1992,https://www.postcourier.com.pg/santa-comes-ear...,Santa comes early for single Mt Hagen mum genset,"December 17, 2015",\nBy RAMCY WAMA\nA ROADSIDE bottle collecting ...,No Tag
1993,https://www.postcourier.com.pg/plan-set-to-add...,Plan set to address effects of El Nino,"December 17, 2015",\nA MAJOR review of strategies and operational...,No Tag
1994,https://www.postcourier.com.pg/housing-plan-se...,Housing plan set to go,"December 16, 2015",\nKey stakeholders sign MOU\nBy JOHN PANGKATAN...,No Tag


In [30]:
# ss_df = pd.DataFrame()
# for news in text_lst[:-1]:
#     news_lst = news.split("All rights reserved.")
#     header = news_lst[0]
#     header_lst = header.split("\n")
#     topic = header_lst[0]
#     sample = news_lst[-1].replace("\n", " ").strip()
    
#     sid = SentimentIntensityAnalyzer()
#     ss = sid.polarity_scores(sample)
#     ss_temp = pd.DataFrame(ss, columns=ss.keys(), index=[0])
#     date_pattern = r'\b\d{1,2} (?:January|February|March|April|May|June|July|August|September|October|November|December) \d{4}\b'
#     ss_temp["date"] = re.findall(date_pattern, c[0])[0]
#     ss_temp["text"] = sample
#     ss_df = pd.concat([ss_df, ss_temp], axis=0)

ss_df = pd.DataFrame()
for news in busecon_news.news:
    sample = news.replace("\n", " ").strip()
    
    sid = SentimentIntensityAnalyzer()
    ss = sid.polarity_scores(sample)
    ss_temp = pd.DataFrame(ss, columns=ss.keys(), index=[0])
    ss_df = pd.concat([ss_df, ss_temp], axis=0)

In [31]:
busecon_news["sentiment"] = ss_df["compound"].tolist()
busecon_news["date"] = pd.to_datetime(busecon_news["date"])

In [53]:
sent_by_month = (busecon_news.set_index("date").groupby(pd.Grouper(freq="m"))[["sentiment"]].mean().reset_index())
sent_by_month['date'] = (sent_by_month['date'].dt.floor('d') + pd.offsets.MonthEnd(0) - pd.offsets.MonthBegin(1))

In [54]:
png_infl_path = os.getcwd() + "/data/prices/outputs/2017/RTFP_country_2017_2022-08-22.csv"
png_infl = pd.read_csv(png_infl_path)
png_infl["date"] = pd.to_datetime(png_infl["date"], dayfirst=True)
png_infl_sent = png_infl.merge(sent_by_month, how="left", on="date")
png_infl_sent.head(5)

Unnamed: 0,Open,High,Low,Close,Inflation,country,ISO3,date,sentiment
0,0.997417,1.035504,0.960535,0.997725,,Papua New Guinea,PNG,2017-01-01,0.617313
1,0.998328,1.037044,0.960832,1.037044,,Papua New Guinea,PNG,2017-02-01,0.861838
2,1.029174,1.066052,0.992296,1.012038,,Papua New Guinea,PNG,2017-03-01,0.697121
3,1.019978,1.059257,0.9807,1.028941,,Papua New Guinea,PNG,2017-04-01,0.889195
4,1.018736,1.061383,0.976089,1.0093,,Papua New Guinea,PNG,2017-05-01,0.717284


In [90]:
sent_change = []
for i in range(len(png_infl_sent)):
    if i < 12:
        sent_change.append(np.NaN)
    else:
        a = (png_infl_sent["sentiment"][i] - png_infl_sent["sentiment"][i-12])/png_infl_sent["sentiment"][i-12]
        sent_change.append(a)

In [93]:
png_infl_sent["sent_change"] = sent_change

In [113]:
png_infl_sent["infl_diff"] = png_infl_sent["Close"].diff()
png_infl_sent["sent_diff"] = png_infl_sent["sentiment"].diff()

In [112]:
from scipy import signal
correlation = signal.correlate(c["sent_change"], c["Inflation"], mode="full")
lags = signal.correlation_lags(len(c["sent_change"]), len(c["Inflation"]), mode="full")
lag = lags[np.argmax(correlation)]
lag

27

In [118]:
import statsmodels.formula.api as smf
mod = smf.ols("infl_diff~sent_diff", data=png_infl_sent)
res = mod.fit(cov_type='HAC',cov_kwds={'maxlags':12})
res.summary()

0,1,2,3
Dep. Variable:,infl_diff,R-squared:,0.016
Model:,OLS,Adj. R-squared:,0.001
Method:,Least Squares,F-statistic:,1.028
Date:,"Mon, 21 Aug 2023",Prob (F-statistic):,0.314
Time:,17:56:57,Log-Likelihood:,108.42
No. Observations:,67,AIC:,-212.8
Df Residuals:,65,BIC:,-208.4
Df Model:,1,,
Covariance Type:,HAC,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,0.0048,0.003,1.422,0.155,-0.002,0.011
sent_diff,-0.0225,0.022,-1.014,0.311,-0.066,0.021

0,1,2,3
Omnibus:,0.098,Durbin-Watson:,2.491
Prob(Omnibus):,0.952,Jarque-Bera (JB):,0.062
Skew:,-0.062,Prob(JB):,0.969
Kurtosis:,2.917,Cond. No.,3.69
