# Imports 

In [100]:
import pandas as pd
import os
import numpy as np
import scipy as sp
import re
from sklearn.feature_extraction import stop_words as stop
pd.set_option('display.max_colwidth', -1)
pd.set_option('display.max_rows', 10)

# Functions

In [101]:
# used to mostly make the accuracy scores more readable
def make_percent(num):
    return '%.2f'%(num * 100) + '%'

# go through 'columns' of 'df' and replace parts of text based on 'regx' with "".
# used here to remove syntax from each headline in the dataframe
def df_regx_replace(df, columns, regx):
    for col in columns:
        df[col] = df[col].replace(to_replace = regx, value = "", regex = True)
        df[col] = df[col].str.lower()
    return df
        
# go through each column and tokenize/un-stop-word it. 
# used to begin the proccess of text normalization
def df_text_analyzer(df, columns, analyzer):
    for col in columns:
        column = df[col].copy()
        for index, val in column.iteritems():
            column.loc[index] = analyzer(val)
        df[col] = column
    return df

# remove rows that contain nan values
# used to remove 3 rows from this specific data set
def df_text_remove_nan(df):
    for row_index, row in df.iterrows():
        has_nan = False
        for index, val in row.iteritems():
            if(pd.isnull(val)):
                has_nan = True
        if (has_nan):
            print("removed row: " + str(row_index))
            df = df.drop(axis = 0, labels = row_index)
    return df

# create a single array of all the tokens that occurr in the data set.
def df_text_single_array(df, columns):
    element_index = 0;
    for col in columns:
        column = df[col].copy()
        col_num = 0
        for row_index, row_val in column.iteritems():
            for element in row_val:
                element_index = element_index + 1
    array = np.empty(shape = element_index, dtype = "S30")
    element_index = 0
    for col in columns:
        column = df[col].copy()
        for row_index, row_val in column.iteritems():
            for element in row_val:
                array[element_index] = element
                element_index = element_index + 1
    return array

# may not be needed, used to iterate through all cells in a df and transforming with
# tfidf transform function. Was having memory? issues.
def df_tfidf_transform(df, columns, tfidf):
    for col in columns:
        column = df[col].copy()
        for row_index, row_val in column.iteritems():
#             print(tfidf.transform(row_val))
            columns.loc[row_index] = tfidf.transform(row_val)
            print(columns.iloc[row_index])
#         df[col] = column
    return df

# append all the columns into one column, used to combine columns for aggregate vectorization.
def df_concat_str_columns(df, columns):
    concat_columns = df['Top1'].copy()
    for row_index, row_value in df.iterrows():
        single_string = ""
        for col in columns:
            single_string = single_string + " " + df[col].loc[row_index]
        concat_columns[row_index] = single_string
    return concat_columns

# Parse the data
Read the data into a dataframe, vectorize the first headline column, and extract the labels column.

## Import data

In [102]:
data_set = pd.read_csv("C:\\Users\\carmi\\OneDrive\\Documents\\Datasets\\stocknews\\Combined_News_DJIA.csv")
df_original = pd.DataFrame(data_set)
df_original

Unnamed: 0,Date,Label,Top1,Top2,Top3,Top4,Top5,Top6,Top7,Top8,...,Top16,Top17,Top18,Top19,Top20,Top21,Top22,Top23,Top24,Top25
0,2008-08-08,0,"b""Georgia 'downs two Russian warplanes' as countries move to brink of war""",b'BREAKING: Musharraf to be impeached.',b'Russia Today: Columns of troops roll into South Ossetia; footage from fighting (YouTube)',"b'Russian tanks are moving towards the capital of South Ossetia, which has reportedly been completely destroyed by Georgian artillery fire'","b""Afghan children raped with 'impunity,' U.N. official says - this is sick, a three year old was raped and they do nothing""",b'150 Russian tanks have entered South Ossetia whilst Georgia shoots down two Russian jets.',"b""Breaking: Georgia invades South Ossetia, Russia warned it would intervene on SO's side""","b""The 'enemy combatent' trials are nothing but a sham: Salim Haman has been sentenced to 5 1/2 years, but will be kept longer anyway just because they feel like it.""",...,"b'Georgia Invades South Ossetia - if Russia gets involved, will NATO absorb Georgia and unleash a full scale war?'",b'Al-Qaeda Faces Islamist Backlash',"b'Condoleezza Rice: ""The US would not act to prevent an Israeli strike on Iran."" Israeli Defense Minister Ehud Barak: ""Israel is prepared for uncompromising victory in the case of military hostilities.""'",b'This is a busy day: The European Union has approved new sanctions against Iran in protest at its nuclear programme.',"b""Georgia will withdraw 1,000 soldiers from Iraq to help fight off Russian forces in Georgia's breakaway region of South Ossetia""",b'Why the Pentagon Thinks Attacking Iran is a Bad Idea - US News &amp; World Report',b'Caucasus in crisis: Georgia invades South Ossetia',"b'Indian shoe manufactory - And again in a series of ""you do not like your work?""'",b'Visitors Suffering from Mental Illnesses Banned from Olympics',"b""No Help for Mexico's Kidnapping Surge"""
1,2008-08-11,1,"b'Why wont America and Nato help us? If they wont help us now, why did we help them in Iraq?'",b'Bush puts foot down on Georgian conflict',"b""Jewish Georgian minister: Thanks to Israeli training, we're fending off Russia """,b'Georgian army flees in disarray as Russians advance - Gori abandoned to Russia without a shot fired',"b""Olympic opening ceremony fireworks 'faked'""",b'What were the Mossad with fraudulent New Zealand Passports doing in Iraq?',b'Russia angered by Israeli military sale to Georgia',b'An American citizen living in S.Ossetia blames U.S. and Georgian leaders for the genocide of innocent people',...,b'Israel and the US behind the Georgian aggression?',"b'""Do not believe TV, neither Russian nor Georgian. There are much more victims""'",b'Riots are still going on in Montreal (Canada) because police murdered a boy on Saturday.',b'China to overtake US as largest manufacturer',b'War in South Ossetia [PICS]',b'Israeli Physicians Group Condemns State Torture',b' Russia has just beaten the United States over the head with Peak Oil',b'Perhaps *the* question about the Georgia - Russia conflict ',b'Russia is so much better at war',"b""So this is what it's come to: trading sex for food."""
2,2008-08-12,0,"b'Remember that adorable 9-year-old who sang at the opening ceremonies? That was fake, too.'","b""Russia 'ends Georgia operation'""","b'""If we had no sexual harassment we would have no children...""'","b""Al-Qa'eda is losing support in Iraq because of a brutal crackdown on activities it regards as un-Islamic - including women buying cucumbers""",b'Ceasefire in Georgia: Putin Outmaneuvers the West',b'Why Microsoft and Intel tried to kill the XO $100 laptop',b'Stratfor: The Russo-Georgian War and the Balance of Power ',"b""I'm Trying to Get a Sense of This Whole Georgia-Russia War: Vote Up If You Think Georgia Started It, Or Down If you Think Russia Did""",...,b'U.S. troops still in Georgia (did you know they were in Georgia in the first place?)',b'Why Russias response to Georgia was right',"b'Gorbachev accuses U.S. of making a ""serious blunder"" in pursuing its interest in the Caucasus region'","b'Russia, Georgia, and NATO: Cold War Two'","b'Remember that adorable 62-year-old who led your country into war based on evidence? That was fake, too.'",b'War in Georgia: The Israeli connection',b'All signs point to the US encouraging Georgia to invade South Ossetia. Goddamnit Bush.',b'Christopher King argues that the US and NATO are behind the Georgian invasion of South Ossetia but have misjudged Russian resolve. ',b'America: The New Mexico?',"b""BBC NEWS | Asia-Pacific | Extinction 'by man not climate'"""
3,2008-08-13,0,b' U.S. refuses Israel weapons to attack Iran: report',"b""When the president ordered to attack Tskhinvali [the capital of South Ossetia], we knew then we were doomed. How come he didn't realize that?""",b' Israel clears troops who killed Reuters cameraman',"b'Britain\'s policy of being tough on drugs is ""pointless"", says a former civil servant who once ran the Cabinet\'s anti-drugs unit.'","b'Body of 14 year old found in trunk; Latest (ransom paid) kidnapping victim in Mexico. Head cop quits, Prez dissolves suspect elite task force'",b'China has moved 10 *million* quake survivors into prefab homes',"b""Bush announces Operation Get All Up In Russia's Grill. Yeah, this will end well.""",b'Russian forces sink Georgian ships ',...,b'Elephants extinct by 2020?',b'US humanitarian missions soon in Georgia - if Russia hits the US - WWIII?',"b""Georgia's DDOS came from US sources""","b'Russian convoy heads into Georgia, violating truce'",b'Israeli defence minister: US against strike on Iran',b'Gorbachev: We Had No Choice',b'Witness: Russian forces head towards Tbilisi in breach of ceasefire agreement',b' Quarter of Russians blame U.S. for conflict: poll',b'Georgian president says US military will take control of seaports and airports - Pentagon denies',"b'2006: Nobel laureate Aleksander Solzhenitsyn accuses U.S., NATO of encircling Russia'"
4,2008-08-14,1,b'All the experts admit that we should legalise drugs ',b'War in South Osetia - 89 pictures made by a Russian soldier.',b'Swedish wrestler Ara Abrahamian throws away medal in Olympic hissy fit ',"b'Russia exaggerated the death toll in South Ossetia. Now only 44 were originally killed compared to 2,000.'",b'Missile That Killed 9 Inside Pakistan May Have Been Launched by the CIA',"b""Rushdie Condemns Random House's Refusal to Publish Novel for Fear of Muslim Retaliation""",b'Poland and US agree to missle defense deal. Interesting timing!',"b'Will the Russians conquer Tblisi? Bet on it, no seriously you can BET on it'",...,b'Bank analyst forecast Georgian crisis 2 days early',"b""Georgia confict could set back Russia's US relations 'for years' | World news | guardian.co.uk""",b'War in the Caucasus is as much the product of an American imperial drive as local conflicts.',"b'""Non-media"" photos of South Ossetia/Georgia conflict.'",b'Georgian TV reporter shot by Russian sniper during live broadcast [video]',b'Saudi Arabia: Mother moves to block child marriage',b'Taliban wages war on humanitarian aid workers',"b'Russia: World ""can forget about"" Georgia\'s territorial integrity'",b'Darfur rebels accuse Sudan of mounting major attack',b'Philippines : Peace Advocate say Muslims need assurance Christians not out to convert them'
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1984,2016-06-27,0,Barclays and RBS shares suspended from trading after tanking more than 8%,Pope says Church should ask forgiveness from gays for past treatment,Poland 'shocked' by xenophobic abuse of Poles in UK,"There will be no second referendum, cabinet agrees","Scotland welcome to join EU, Merkel ally says",Sterling dips below Friday's 31-year low amid Brexit uncertainty,No negative news about South African President allowed on state broadcaster.,Surge in Hate Crimes in the U.K. Following U.K.s Brexit Vote,...,German lawyers to probe Erdogan over alleged war crimes,"Boris Johnson says the UK will continue to ""intensify"" cooperation with the EU and tells his fellow Leave supporters they must accept the 52-48 referendum win was ""not entirely overwhelming"".",Richard Branson is calling on the UK government to hold a second EU referendum to prevent 'irreversible damage' to the country.,Turkey 'sorry for downing Russian jet',Edward Snowden lawyer vows new push for pardon from Obama,"Brexit opinion poll reveals majority don't want second EU referendum: ""half (48%) of British adults say that they are happy with the result, with two in five (43%) saying they are unhappy with the outcome.""","Conservative MP Leave Campaigner: ""The leave campaign don't have a post-Brexit plan...""","Economists predict UK recession, further weakening of Pound following Brexit.","New EU 'superstate plan by France, Germany: Creating a European superstate limiting the powers of individual members following Britains referendum decision to leave the EU",Pakistani clerics declare transgender marriages legal under Islamic law
1985,2016-06-28,1,"2,500 Scientists To Australia: If You Want To Save The Great Barrier Reef, Stop Supporting Coal","The personal details of 112,000 French police officers have been uploaded to Google Drive in a security breach just a fortnight after two officers were murdered at their home by a jihadist.",S&amp;P cuts United Kingdom sovereign credit rating to 'AA' from 'AAA',Huge helium deposit found in Africa,CEO of the South African state broadcaster quits shortly after negative news about president is banned.,"Brexit cost investors $2 trillion, the worst one day drop ever",Hong Kong democracy activists call for return to British rule as first step to independence,Brexit: Iceland president says UK can join 'triangle' of non-EU countries,...,"US, Canada and Mexico pledge 50% of power from clean energy by 2025","There is increasing evidence that Australia is torturing refugees, medical experts claim","Richard Branson, the founder of Virgin Group, said Tuesday that the company has lost about a third of its value since the U.K. voted to leave the European Union last week.","37,000-yr-old skull from Borneo reveals surprise for scientists - Study of the ""Deep Skull"" - oldest modern human discovered in SE Asia - reveals this ancient person was not related to Indigenous Australians, as originally thought. ""Our discovery is a game changer.""",Palestinians stone Western Wall worshipers; police shut Temple Mount to non-Muslims,Jean-Claude Juncker asks Farage: Why are you here?,"""Romanians for Remainians"" offering a new home to the 48% of Britons who voted to stay in the EU | Bucharest newspaper's app connects loving Romanian families with needy Brits, allowing people to offer to help would-be immigrants apply for a Romanian ID",Brexit: Gibraltar in talks with Scotland to stay in EU,8 Suicide Bombers Strike Lebanon,"Mexico's security forces routinely use 'sexual torture' against women: Rights group Amnesty International has compiled testimonies of sexual violence used as torture by Mexican security forces. Despite thousands of complaints, only 15 probes have led to criminal convictions since 1991."
1986,2016-06-29,1,Explosion At Airport In Istanbul,Yemeni former president: Terrorism is the offspring of Wahhabism of Al Saud regime,UK must accept freedom of movement to access EU Market,"Devastated: scientists too late to captive breed mammal lost to climate change - Australian conservationists spent 5 months obtaining permissions &amp; planning for a captive breeding program. But when they arrived on the rodents tiny island, they they were too late.",British Labor Party leader Jeremy Corbyn loses a no-confidence vote but refuses to resign,A Muslim Shop in the UK Was Just Firebombed While People Were Inside,Mexican Authorities Sexually Torture Women in Prison,UK shares and pound continue to recover,...,"Escape Tunnel, Dug by Hand, Is Found at Holocaust Massacre Site","The land under Beijing is sinking by as much as four inches per year because of the overconsumption of groundwater, according to new research.","Car bomb and Anti-Islamic attack on Mosque in Perth, Australia",Emaciated lions in Taiz Zoo are trapped in blood-soaked cages and left to starve for months due to the Yemeni civil war,Rupert Murdoch describes Brexit as 'wonderful'. The media mogul likened leaving the EU to a prison break and shared his view of Donald Trump as a very able man.,More than 40 killed in Yemen suicide attacks,Google Found Disastrous Symantec and Norton Vulnerabilities That Are 'As Bad As It Gets,"Extremist violence on the rise in Germany: Domestic intelligence agency says far-right, far-left and Islamist radical groups gaining membership in country",BBC News: Labour MPs pass Corbyn no-confidence motion,Tiny New Zealand town with 'too many jobs' launches drive to recruit outsiders
1987,2016-06-30,1,Jamaica proposes marijuana dispensers for tourists at airports following legalisation: The kiosks and desks would give people a license to purchase up to 2 ounces of the drug to use during their stay,Stephen Hawking says pollution and 'stupidity' still biggest threats to mankind: we have certainly not become less greedy or less stupid in our treatment of the environment over the past decade,Boris Johnson says he will not run for Tory party leadership,Six gay men in Ivory Coast were abused and forced to flee their homes after they were pictured signing a condolence book for victims of the recent attack on a gay nightclub in Florida,Switzerland denies citizenship to Muslim immigrant girls who refused to swim with boys: report,Palestinian terrorist stabs israeli teen girl to death in her bedroom,Puerto Rico will default on $1 billion of debt on Friday,Republic of Ireland fans to be awarded medal for sportsmanship by Paris mayor.,...,Googles free wifi at Indian railway stations is better than most of the countrys paid services,"Mounting evidence suggests 'hobbits' were wiped out by modern humans' ancestors 50,000 years ago.","The men who carried out Tuesday's terror attack at Istanbul's Ataturk Airport were from Russia, Uzbekistan and Kyrgyzstan, a Turkish offical said.",Calls to suspend Saudi Arabia from UN Human Rights Council because of military aggresion in Yemen,More Than 100 Nobel Laureates Call Out Greenpeace For Anti-GMO Obstruction In Developing World,"British pedophile sentenced to 85 years in US for trafficking child abuse images: Domminich Shaw, a kingpin of sexual violence against children, sent dozens of images online and discussed plans to assault and kill a child while on probation","US permitted 1,200 offshore fracks in Gulf of Mexico between 2010 and 2014 and allowed 72 billion gallons of chemical discharge in 2014.",We will be swimming in ridicule - French beach police to carry guns while in swimming trunks: Police lifeguards on Frances busiest beaches will carry guns and bullet-proof vests for the first time this summer amid fears that terrorists could target holidaymakers.,UEFA says no minutes of silence for Istanbul victims at Euro 2016 because 'Turkey have already been eliminated',Law Enforcement Sources: Gun Used in Paris Terrorist Attacks Came from Phoenix


Import the combined dataset, and place it in a dataframe. This dataset has labels with values 0 - indicating the DJIA went down, and 1 - indicating the DJIA went up or stayed the same. The next 27 columns are the top 27 headlines for a given date in ascending order.

## Clean data
There are a few things we need to do in order to make this data easier to work with. Thankfully, there are no missing values. However, we will want to do some basic text processing in order to get rid of things like stop words, syntax, and more easily vectorize the headlines.

In [103]:
columns = df.iloc[:, 2:]
regx = "[/\-_\+=\*,\.\"\'()!\?@#$%\^;:\[\]]|^b[\'\"]*"
df_no_syntax = df_regx_replace(df_original, columns, regx)
df_no_syntax

Unnamed: 0,Date,Label,Top1,Top2,Top3,Top4,Top5,Top6,Top7,Top8,...,Top16,Top17,Top18,Top19,Top20,Top21,Top22,Top23,Top24,Top25
0,2008-08-08,0,georgia downs two russian warplanes as countries move to brink of war,b'BREAKING: Musharraf to be impeached.',b'Russia Today: Columns of troops roll into South Ossetia; footage from fighting (YouTube)',"b'Russian tanks are moving towards the capital of South Ossetia, which has reportedly been completely destroyed by Georgian artillery fire'","b""Afghan children raped with 'impunity,' U.N. official says - this is sick, a three year old was raped and they do nothing""",b'150 Russian tanks have entered South Ossetia whilst Georgia shoots down two Russian jets.',"b""Breaking: Georgia invades South Ossetia, Russia warned it would intervene on SO's side""","b""The 'enemy combatent' trials are nothing but a sham: Salim Haman has been sentenced to 5 1/2 years, but will be kept longer anyway just because they feel like it.""",...,"b'Georgia Invades South Ossetia - if Russia gets involved, will NATO absorb Georgia and unleash a full scale war?'",b'Al-Qaeda Faces Islamist Backlash',"b'Condoleezza Rice: ""The US would not act to prevent an Israeli strike on Iran."" Israeli Defense Minister Ehud Barak: ""Israel is prepared for uncompromising victory in the case of military hostilities.""'",b'This is a busy day: The European Union has approved new sanctions against Iran in protest at its nuclear programme.',"b""Georgia will withdraw 1,000 soldiers from Iraq to help fight off Russian forces in Georgia's breakaway region of South Ossetia""",b'Why the Pentagon Thinks Attacking Iran is a Bad Idea - US News &amp; World Report',b'Caucasus in crisis: Georgia invades South Ossetia',"b'Indian shoe manufactory - And again in a series of ""you do not like your work?""'",b'Visitors Suffering from Mental Illnesses Banned from Olympics',"b""No Help for Mexico's Kidnapping Surge"""
1,2008-08-11,1,why wont america and nato help us if they wont help us now why did we help them in iraq,b'Bush puts foot down on Georgian conflict',"b""Jewish Georgian minister: Thanks to Israeli training, we're fending off Russia """,b'Georgian army flees in disarray as Russians advance - Gori abandoned to Russia without a shot fired',"b""Olympic opening ceremony fireworks 'faked'""",b'What were the Mossad with fraudulent New Zealand Passports doing in Iraq?',b'Russia angered by Israeli military sale to Georgia',b'An American citizen living in S.Ossetia blames U.S. and Georgian leaders for the genocide of innocent people',...,b'Israel and the US behind the Georgian aggression?',"b'""Do not believe TV, neither Russian nor Georgian. There are much more victims""'",b'Riots are still going on in Montreal (Canada) because police murdered a boy on Saturday.',b'China to overtake US as largest manufacturer',b'War in South Ossetia [PICS]',b'Israeli Physicians Group Condemns State Torture',b' Russia has just beaten the United States over the head with Peak Oil',b'Perhaps *the* question about the Georgia - Russia conflict ',b'Russia is so much better at war',"b""So this is what it's come to: trading sex for food."""
2,2008-08-12,0,remember that adorable 9yearold who sang at the opening ceremonies that was fake too,"b""Russia 'ends Georgia operation'""","b'""If we had no sexual harassment we would have no children...""'","b""Al-Qa'eda is losing support in Iraq because of a brutal crackdown on activities it regards as un-Islamic - including women buying cucumbers""",b'Ceasefire in Georgia: Putin Outmaneuvers the West',b'Why Microsoft and Intel tried to kill the XO $100 laptop',b'Stratfor: The Russo-Georgian War and the Balance of Power ',"b""I'm Trying to Get a Sense of This Whole Georgia-Russia War: Vote Up If You Think Georgia Started It, Or Down If you Think Russia Did""",...,b'U.S. troops still in Georgia (did you know they were in Georgia in the first place?)',b'Why Russias response to Georgia was right',"b'Gorbachev accuses U.S. of making a ""serious blunder"" in pursuing its interest in the Caucasus region'","b'Russia, Georgia, and NATO: Cold War Two'","b'Remember that adorable 62-year-old who led your country into war based on evidence? That was fake, too.'",b'War in Georgia: The Israeli connection',b'All signs point to the US encouraging Georgia to invade South Ossetia. Goddamnit Bush.',b'Christopher King argues that the US and NATO are behind the Georgian invasion of South Ossetia but have misjudged Russian resolve. ',b'America: The New Mexico?',"b""BBC NEWS | Asia-Pacific | Extinction 'by man not climate'"""
3,2008-08-13,0,us refuses israel weapons to attack iran report,"b""When the president ordered to attack Tskhinvali [the capital of South Ossetia], we knew then we were doomed. How come he didn't realize that?""",b' Israel clears troops who killed Reuters cameraman',"b'Britain\'s policy of being tough on drugs is ""pointless"", says a former civil servant who once ran the Cabinet\'s anti-drugs unit.'","b'Body of 14 year old found in trunk; Latest (ransom paid) kidnapping victim in Mexico. Head cop quits, Prez dissolves suspect elite task force'",b'China has moved 10 *million* quake survivors into prefab homes',"b""Bush announces Operation Get All Up In Russia's Grill. Yeah, this will end well.""",b'Russian forces sink Georgian ships ',...,b'Elephants extinct by 2020?',b'US humanitarian missions soon in Georgia - if Russia hits the US - WWIII?',"b""Georgia's DDOS came from US sources""","b'Russian convoy heads into Georgia, violating truce'",b'Israeli defence minister: US against strike on Iran',b'Gorbachev: We Had No Choice',b'Witness: Russian forces head towards Tbilisi in breach of ceasefire agreement',b' Quarter of Russians blame U.S. for conflict: poll',b'Georgian president says US military will take control of seaports and airports - Pentagon denies',"b'2006: Nobel laureate Aleksander Solzhenitsyn accuses U.S., NATO of encircling Russia'"
4,2008-08-14,1,all the experts admit that we should legalise drugs,b'War in South Osetia - 89 pictures made by a Russian soldier.',b'Swedish wrestler Ara Abrahamian throws away medal in Olympic hissy fit ',"b'Russia exaggerated the death toll in South Ossetia. Now only 44 were originally killed compared to 2,000.'",b'Missile That Killed 9 Inside Pakistan May Have Been Launched by the CIA',"b""Rushdie Condemns Random House's Refusal to Publish Novel for Fear of Muslim Retaliation""",b'Poland and US agree to missle defense deal. Interesting timing!',"b'Will the Russians conquer Tblisi? Bet on it, no seriously you can BET on it'",...,b'Bank analyst forecast Georgian crisis 2 days early',"b""Georgia confict could set back Russia's US relations 'for years' | World news | guardian.co.uk""",b'War in the Caucasus is as much the product of an American imperial drive as local conflicts.',"b'""Non-media"" photos of South Ossetia/Georgia conflict.'",b'Georgian TV reporter shot by Russian sniper during live broadcast [video]',b'Saudi Arabia: Mother moves to block child marriage',b'Taliban wages war on humanitarian aid workers',"b'Russia: World ""can forget about"" Georgia\'s territorial integrity'",b'Darfur rebels accuse Sudan of mounting major attack',b'Philippines : Peace Advocate say Muslims need assurance Christians not out to convert them'
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1984,2016-06-27,0,barclays and rbs shares suspended from trading after tanking more than 8,Pope says Church should ask forgiveness from gays for past treatment,Poland 'shocked' by xenophobic abuse of Poles in UK,"There will be no second referendum, cabinet agrees","Scotland welcome to join EU, Merkel ally says",Sterling dips below Friday's 31-year low amid Brexit uncertainty,No negative news about South African President allowed on state broadcaster.,Surge in Hate Crimes in the U.K. Following U.K.s Brexit Vote,...,German lawyers to probe Erdogan over alleged war crimes,"Boris Johnson says the UK will continue to ""intensify"" cooperation with the EU and tells his fellow Leave supporters they must accept the 52-48 referendum win was ""not entirely overwhelming"".",Richard Branson is calling on the UK government to hold a second EU referendum to prevent 'irreversible damage' to the country.,Turkey 'sorry for downing Russian jet',Edward Snowden lawyer vows new push for pardon from Obama,"Brexit opinion poll reveals majority don't want second EU referendum: ""half (48%) of British adults say that they are happy with the result, with two in five (43%) saying they are unhappy with the outcome.""","Conservative MP Leave Campaigner: ""The leave campaign don't have a post-Brexit plan...""","Economists predict UK recession, further weakening of Pound following Brexit.","New EU 'superstate plan by France, Germany: Creating a European superstate limiting the powers of individual members following Britains referendum decision to leave the EU",Pakistani clerics declare transgender marriages legal under Islamic law
1985,2016-06-28,1,2500 scientists to australia if you want to save the great barrier reef stop supporting coal,"The personal details of 112,000 French police officers have been uploaded to Google Drive in a security breach just a fortnight after two officers were murdered at their home by a jihadist.",S&amp;P cuts United Kingdom sovereign credit rating to 'AA' from 'AAA',Huge helium deposit found in Africa,CEO of the South African state broadcaster quits shortly after negative news about president is banned.,"Brexit cost investors $2 trillion, the worst one day drop ever",Hong Kong democracy activists call for return to British rule as first step to independence,Brexit: Iceland president says UK can join 'triangle' of non-EU countries,...,"US, Canada and Mexico pledge 50% of power from clean energy by 2025","There is increasing evidence that Australia is torturing refugees, medical experts claim","Richard Branson, the founder of Virgin Group, said Tuesday that the company has lost about a third of its value since the U.K. voted to leave the European Union last week.","37,000-yr-old skull from Borneo reveals surprise for scientists - Study of the ""Deep Skull"" - oldest modern human discovered in SE Asia - reveals this ancient person was not related to Indigenous Australians, as originally thought. ""Our discovery is a game changer.""",Palestinians stone Western Wall worshipers; police shut Temple Mount to non-Muslims,Jean-Claude Juncker asks Farage: Why are you here?,"""Romanians for Remainians"" offering a new home to the 48% of Britons who voted to stay in the EU | Bucharest newspaper's app connects loving Romanian families with needy Brits, allowing people to offer to help would-be immigrants apply for a Romanian ID",Brexit: Gibraltar in talks with Scotland to stay in EU,8 Suicide Bombers Strike Lebanon,"Mexico's security forces routinely use 'sexual torture' against women: Rights group Amnesty International has compiled testimonies of sexual violence used as torture by Mexican security forces. Despite thousands of complaints, only 15 probes have led to criminal convictions since 1991."
1986,2016-06-29,1,explosion at airport in istanbul,Yemeni former president: Terrorism is the offspring of Wahhabism of Al Saud regime,UK must accept freedom of movement to access EU Market,"Devastated: scientists too late to captive breed mammal lost to climate change - Australian conservationists spent 5 months obtaining permissions &amp; planning for a captive breeding program. But when they arrived on the rodents tiny island, they they were too late.",British Labor Party leader Jeremy Corbyn loses a no-confidence vote but refuses to resign,A Muslim Shop in the UK Was Just Firebombed While People Were Inside,Mexican Authorities Sexually Torture Women in Prison,UK shares and pound continue to recover,...,"Escape Tunnel, Dug by Hand, Is Found at Holocaust Massacre Site","The land under Beijing is sinking by as much as four inches per year because of the overconsumption of groundwater, according to new research.","Car bomb and Anti-Islamic attack on Mosque in Perth, Australia",Emaciated lions in Taiz Zoo are trapped in blood-soaked cages and left to starve for months due to the Yemeni civil war,Rupert Murdoch describes Brexit as 'wonderful'. The media mogul likened leaving the EU to a prison break and shared his view of Donald Trump as a very able man.,More than 40 killed in Yemen suicide attacks,Google Found Disastrous Symantec and Norton Vulnerabilities That Are 'As Bad As It Gets,"Extremist violence on the rise in Germany: Domestic intelligence agency says far-right, far-left and Islamist radical groups gaining membership in country",BBC News: Labour MPs pass Corbyn no-confidence motion,Tiny New Zealand town with 'too many jobs' launches drive to recruit outsiders
1987,2016-06-30,1,jamaica proposes marijuana dispensers for tourists at airports following legalisation the kiosks and desks would give people a license to purchase up to 2 ounces of the drug to use during their stay,Stephen Hawking says pollution and 'stupidity' still biggest threats to mankind: we have certainly not become less greedy or less stupid in our treatment of the environment over the past decade,Boris Johnson says he will not run for Tory party leadership,Six gay men in Ivory Coast were abused and forced to flee their homes after they were pictured signing a condolence book for victims of the recent attack on a gay nightclub in Florida,Switzerland denies citizenship to Muslim immigrant girls who refused to swim with boys: report,Palestinian terrorist stabs israeli teen girl to death in her bedroom,Puerto Rico will default on $1 billion of debt on Friday,Republic of Ireland fans to be awarded medal for sportsmanship by Paris mayor.,...,Googles free wifi at Indian railway stations is better than most of the countrys paid services,"Mounting evidence suggests 'hobbits' were wiped out by modern humans' ancestors 50,000 years ago.","The men who carried out Tuesday's terror attack at Istanbul's Ataturk Airport were from Russia, Uzbekistan and Kyrgyzstan, a Turkish offical said.",Calls to suspend Saudi Arabia from UN Human Rights Council because of military aggresion in Yemen,More Than 100 Nobel Laureates Call Out Greenpeace For Anti-GMO Obstruction In Developing World,"British pedophile sentenced to 85 years in US for trafficking child abuse images: Domminich Shaw, a kingpin of sexual violence against children, sent dozens of images online and discussed plans to assault and kill a child while on probation","US permitted 1,200 offshore fracks in Gulf of Mexico between 2010 and 2014 and allowed 72 billion gallons of chemical discharge in 2014.",We will be swimming in ridicule - French beach police to carry guns while in swimming trunks: Police lifeguards on Frances busiest beaches will carry guns and bullet-proof vests for the first time this summer amid fears that terrorists could target holidaymakers.,UEFA says no minutes of silence for Istanbul victims at Euro 2016 because 'Turkey have already been eliminated',Law Enforcement Sources: Gun Used in Paris Terrorist Attacks Came from Phoenix


Removed all puncation/syntax, and lowercased each headline for easier parasing.

In [104]:
df_no_nan = df_text_remove_nan(df_no_syntax)
df_no_nan

removed row: 277
removed row: 348
removed row: 681


Unnamed: 0,Date,Label,Top1,Top2,Top3,Top4,Top5,Top6,Top7,Top8,...,Top16,Top17,Top18,Top19,Top20,Top21,Top22,Top23,Top24,Top25
0,2008-08-08,0,georgia downs two russian warplanes as countries move to brink of war,b'BREAKING: Musharraf to be impeached.',b'Russia Today: Columns of troops roll into South Ossetia; footage from fighting (YouTube)',"b'Russian tanks are moving towards the capital of South Ossetia, which has reportedly been completely destroyed by Georgian artillery fire'","b""Afghan children raped with 'impunity,' U.N. official says - this is sick, a three year old was raped and they do nothing""",b'150 Russian tanks have entered South Ossetia whilst Georgia shoots down two Russian jets.',"b""Breaking: Georgia invades South Ossetia, Russia warned it would intervene on SO's side""","b""The 'enemy combatent' trials are nothing but a sham: Salim Haman has been sentenced to 5 1/2 years, but will be kept longer anyway just because they feel like it.""",...,"b'Georgia Invades South Ossetia - if Russia gets involved, will NATO absorb Georgia and unleash a full scale war?'",b'Al-Qaeda Faces Islamist Backlash',"b'Condoleezza Rice: ""The US would not act to prevent an Israeli strike on Iran."" Israeli Defense Minister Ehud Barak: ""Israel is prepared for uncompromising victory in the case of military hostilities.""'",b'This is a busy day: The European Union has approved new sanctions against Iran in protest at its nuclear programme.',"b""Georgia will withdraw 1,000 soldiers from Iraq to help fight off Russian forces in Georgia's breakaway region of South Ossetia""",b'Why the Pentagon Thinks Attacking Iran is a Bad Idea - US News &amp; World Report',b'Caucasus in crisis: Georgia invades South Ossetia',"b'Indian shoe manufactory - And again in a series of ""you do not like your work?""'",b'Visitors Suffering from Mental Illnesses Banned from Olympics',"b""No Help for Mexico's Kidnapping Surge"""
1,2008-08-11,1,why wont america and nato help us if they wont help us now why did we help them in iraq,b'Bush puts foot down on Georgian conflict',"b""Jewish Georgian minister: Thanks to Israeli training, we're fending off Russia """,b'Georgian army flees in disarray as Russians advance - Gori abandoned to Russia without a shot fired',"b""Olympic opening ceremony fireworks 'faked'""",b'What were the Mossad with fraudulent New Zealand Passports doing in Iraq?',b'Russia angered by Israeli military sale to Georgia',b'An American citizen living in S.Ossetia blames U.S. and Georgian leaders for the genocide of innocent people',...,b'Israel and the US behind the Georgian aggression?',"b'""Do not believe TV, neither Russian nor Georgian. There are much more victims""'",b'Riots are still going on in Montreal (Canada) because police murdered a boy on Saturday.',b'China to overtake US as largest manufacturer',b'War in South Ossetia [PICS]',b'Israeli Physicians Group Condemns State Torture',b' Russia has just beaten the United States over the head with Peak Oil',b'Perhaps *the* question about the Georgia - Russia conflict ',b'Russia is so much better at war',"b""So this is what it's come to: trading sex for food."""
2,2008-08-12,0,remember that adorable 9yearold who sang at the opening ceremonies that was fake too,"b""Russia 'ends Georgia operation'""","b'""If we had no sexual harassment we would have no children...""'","b""Al-Qa'eda is losing support in Iraq because of a brutal crackdown on activities it regards as un-Islamic - including women buying cucumbers""",b'Ceasefire in Georgia: Putin Outmaneuvers the West',b'Why Microsoft and Intel tried to kill the XO $100 laptop',b'Stratfor: The Russo-Georgian War and the Balance of Power ',"b""I'm Trying to Get a Sense of This Whole Georgia-Russia War: Vote Up If You Think Georgia Started It, Or Down If you Think Russia Did""",...,b'U.S. troops still in Georgia (did you know they were in Georgia in the first place?)',b'Why Russias response to Georgia was right',"b'Gorbachev accuses U.S. of making a ""serious blunder"" in pursuing its interest in the Caucasus region'","b'Russia, Georgia, and NATO: Cold War Two'","b'Remember that adorable 62-year-old who led your country into war based on evidence? That was fake, too.'",b'War in Georgia: The Israeli connection',b'All signs point to the US encouraging Georgia to invade South Ossetia. Goddamnit Bush.',b'Christopher King argues that the US and NATO are behind the Georgian invasion of South Ossetia but have misjudged Russian resolve. ',b'America: The New Mexico?',"b""BBC NEWS | Asia-Pacific | Extinction 'by man not climate'"""
3,2008-08-13,0,us refuses israel weapons to attack iran report,"b""When the president ordered to attack Tskhinvali [the capital of South Ossetia], we knew then we were doomed. How come he didn't realize that?""",b' Israel clears troops who killed Reuters cameraman',"b'Britain\'s policy of being tough on drugs is ""pointless"", says a former civil servant who once ran the Cabinet\'s anti-drugs unit.'","b'Body of 14 year old found in trunk; Latest (ransom paid) kidnapping victim in Mexico. Head cop quits, Prez dissolves suspect elite task force'",b'China has moved 10 *million* quake survivors into prefab homes',"b""Bush announces Operation Get All Up In Russia's Grill. Yeah, this will end well.""",b'Russian forces sink Georgian ships ',...,b'Elephants extinct by 2020?',b'US humanitarian missions soon in Georgia - if Russia hits the US - WWIII?',"b""Georgia's DDOS came from US sources""","b'Russian convoy heads into Georgia, violating truce'",b'Israeli defence minister: US against strike on Iran',b'Gorbachev: We Had No Choice',b'Witness: Russian forces head towards Tbilisi in breach of ceasefire agreement',b' Quarter of Russians blame U.S. for conflict: poll',b'Georgian president says US military will take control of seaports and airports - Pentagon denies',"b'2006: Nobel laureate Aleksander Solzhenitsyn accuses U.S., NATO of encircling Russia'"
4,2008-08-14,1,all the experts admit that we should legalise drugs,b'War in South Osetia - 89 pictures made by a Russian soldier.',b'Swedish wrestler Ara Abrahamian throws away medal in Olympic hissy fit ',"b'Russia exaggerated the death toll in South Ossetia. Now only 44 were originally killed compared to 2,000.'",b'Missile That Killed 9 Inside Pakistan May Have Been Launched by the CIA',"b""Rushdie Condemns Random House's Refusal to Publish Novel for Fear of Muslim Retaliation""",b'Poland and US agree to missle defense deal. Interesting timing!',"b'Will the Russians conquer Tblisi? Bet on it, no seriously you can BET on it'",...,b'Bank analyst forecast Georgian crisis 2 days early',"b""Georgia confict could set back Russia's US relations 'for years' | World news | guardian.co.uk""",b'War in the Caucasus is as much the product of an American imperial drive as local conflicts.',"b'""Non-media"" photos of South Ossetia/Georgia conflict.'",b'Georgian TV reporter shot by Russian sniper during live broadcast [video]',b'Saudi Arabia: Mother moves to block child marriage',b'Taliban wages war on humanitarian aid workers',"b'Russia: World ""can forget about"" Georgia\'s territorial integrity'",b'Darfur rebels accuse Sudan of mounting major attack',b'Philippines : Peace Advocate say Muslims need assurance Christians not out to convert them'
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1984,2016-06-27,0,barclays and rbs shares suspended from trading after tanking more than 8,Pope says Church should ask forgiveness from gays for past treatment,Poland 'shocked' by xenophobic abuse of Poles in UK,"There will be no second referendum, cabinet agrees","Scotland welcome to join EU, Merkel ally says",Sterling dips below Friday's 31-year low amid Brexit uncertainty,No negative news about South African President allowed on state broadcaster.,Surge in Hate Crimes in the U.K. Following U.K.s Brexit Vote,...,German lawyers to probe Erdogan over alleged war crimes,"Boris Johnson says the UK will continue to ""intensify"" cooperation with the EU and tells his fellow Leave supporters they must accept the 52-48 referendum win was ""not entirely overwhelming"".",Richard Branson is calling on the UK government to hold a second EU referendum to prevent 'irreversible damage' to the country.,Turkey 'sorry for downing Russian jet',Edward Snowden lawyer vows new push for pardon from Obama,"Brexit opinion poll reveals majority don't want second EU referendum: ""half (48%) of British adults say that they are happy with the result, with two in five (43%) saying they are unhappy with the outcome.""","Conservative MP Leave Campaigner: ""The leave campaign don't have a post-Brexit plan...""","Economists predict UK recession, further weakening of Pound following Brexit.","New EU 'superstate plan by France, Germany: Creating a European superstate limiting the powers of individual members following Britains referendum decision to leave the EU",Pakistani clerics declare transgender marriages legal under Islamic law
1985,2016-06-28,1,2500 scientists to australia if you want to save the great barrier reef stop supporting coal,"The personal details of 112,000 French police officers have been uploaded to Google Drive in a security breach just a fortnight after two officers were murdered at their home by a jihadist.",S&amp;P cuts United Kingdom sovereign credit rating to 'AA' from 'AAA',Huge helium deposit found in Africa,CEO of the South African state broadcaster quits shortly after negative news about president is banned.,"Brexit cost investors $2 trillion, the worst one day drop ever",Hong Kong democracy activists call for return to British rule as first step to independence,Brexit: Iceland president says UK can join 'triangle' of non-EU countries,...,"US, Canada and Mexico pledge 50% of power from clean energy by 2025","There is increasing evidence that Australia is torturing refugees, medical experts claim","Richard Branson, the founder of Virgin Group, said Tuesday that the company has lost about a third of its value since the U.K. voted to leave the European Union last week.","37,000-yr-old skull from Borneo reveals surprise for scientists - Study of the ""Deep Skull"" - oldest modern human discovered in SE Asia - reveals this ancient person was not related to Indigenous Australians, as originally thought. ""Our discovery is a game changer.""",Palestinians stone Western Wall worshipers; police shut Temple Mount to non-Muslims,Jean-Claude Juncker asks Farage: Why are you here?,"""Romanians for Remainians"" offering a new home to the 48% of Britons who voted to stay in the EU | Bucharest newspaper's app connects loving Romanian families with needy Brits, allowing people to offer to help would-be immigrants apply for a Romanian ID",Brexit: Gibraltar in talks with Scotland to stay in EU,8 Suicide Bombers Strike Lebanon,"Mexico's security forces routinely use 'sexual torture' against women: Rights group Amnesty International has compiled testimonies of sexual violence used as torture by Mexican security forces. Despite thousands of complaints, only 15 probes have led to criminal convictions since 1991."
1986,2016-06-29,1,explosion at airport in istanbul,Yemeni former president: Terrorism is the offspring of Wahhabism of Al Saud regime,UK must accept freedom of movement to access EU Market,"Devastated: scientists too late to captive breed mammal lost to climate change - Australian conservationists spent 5 months obtaining permissions &amp; planning for a captive breeding program. But when they arrived on the rodents tiny island, they they were too late.",British Labor Party leader Jeremy Corbyn loses a no-confidence vote but refuses to resign,A Muslim Shop in the UK Was Just Firebombed While People Were Inside,Mexican Authorities Sexually Torture Women in Prison,UK shares and pound continue to recover,...,"Escape Tunnel, Dug by Hand, Is Found at Holocaust Massacre Site","The land under Beijing is sinking by as much as four inches per year because of the overconsumption of groundwater, according to new research.","Car bomb and Anti-Islamic attack on Mosque in Perth, Australia",Emaciated lions in Taiz Zoo are trapped in blood-soaked cages and left to starve for months due to the Yemeni civil war,Rupert Murdoch describes Brexit as 'wonderful'. The media mogul likened leaving the EU to a prison break and shared his view of Donald Trump as a very able man.,More than 40 killed in Yemen suicide attacks,Google Found Disastrous Symantec and Norton Vulnerabilities That Are 'As Bad As It Gets,"Extremist violence on the rise in Germany: Domestic intelligence agency says far-right, far-left and Islamist radical groups gaining membership in country",BBC News: Labour MPs pass Corbyn no-confidence motion,Tiny New Zealand town with 'too many jobs' launches drive to recruit outsiders
1987,2016-06-30,1,jamaica proposes marijuana dispensers for tourists at airports following legalisation the kiosks and desks would give people a license to purchase up to 2 ounces of the drug to use during their stay,Stephen Hawking says pollution and 'stupidity' still biggest threats to mankind: we have certainly not become less greedy or less stupid in our treatment of the environment over the past decade,Boris Johnson says he will not run for Tory party leadership,Six gay men in Ivory Coast were abused and forced to flee their homes after they were pictured signing a condolence book for victims of the recent attack on a gay nightclub in Florida,Switzerland denies citizenship to Muslim immigrant girls who refused to swim with boys: report,Palestinian terrorist stabs israeli teen girl to death in her bedroom,Puerto Rico will default on $1 billion of debt on Friday,Republic of Ireland fans to be awarded medal for sportsmanship by Paris mayor.,...,Googles free wifi at Indian railway stations is better than most of the countrys paid services,"Mounting evidence suggests 'hobbits' were wiped out by modern humans' ancestors 50,000 years ago.","The men who carried out Tuesday's terror attack at Istanbul's Ataturk Airport were from Russia, Uzbekistan and Kyrgyzstan, a Turkish offical said.",Calls to suspend Saudi Arabia from UN Human Rights Council because of military aggresion in Yemen,More Than 100 Nobel Laureates Call Out Greenpeace For Anti-GMO Obstruction In Developing World,"British pedophile sentenced to 85 years in US for trafficking child abuse images: Domminich Shaw, a kingpin of sexual violence against children, sent dozens of images online and discussed plans to assault and kill a child while on probation","US permitted 1,200 offshore fracks in Gulf of Mexico between 2010 and 2014 and allowed 72 billion gallons of chemical discharge in 2014.",We will be swimming in ridicule - French beach police to carry guns while in swimming trunks: Police lifeguards on Frances busiest beaches will carry guns and bullet-proof vests for the first time this summer amid fears that terrorists could target holidaymakers.,UEFA says no minutes of silence for Istanbul victims at Euro 2016 because 'Turkey have already been eliminated',Law Enforcement Sources: Gun Used in Paris Terrorist Attacks Came from Phoenix


Remove any rows that contain nan values in the columns.

# Baseline accuracy 
To quickly get a baseline accuracy, combining the columns seems to be the simplest way to get a vectorized data set for classification input. We will do more nuanced vectorization and classification after getting a baseline for this dataset.

## Combine columns

In [105]:
df = df_no_nan
columns = df.iloc[:, 2:]
concat_column = df_concat_str_columns(df, columns)
concat_column = pd.DataFrame(concat_column)
concat_column

Unnamed: 0,Top1
0,"georgia downs two russian warplanes as countries move to brink of war b'BREAKING: Musharraf to be impeached.' b'Russia Today: Columns of troops roll into South Ossetia; footage from fighting (YouTube)' b'Russian tanks are moving towards the capital of South Ossetia, which has reportedly been completely destroyed by Georgian artillery fire' b""Afghan children raped with 'impunity,' U.N. official says - this is sick, a three year old was raped and they do nothing"" b'150 Russian tanks have entered South Ossetia whilst Georgia shoots down two Russian jets.' b""Breaking: Georgia invades South Ossetia, Russia warned it would intervene on SO's side"" b""The 'enemy combatent' trials are nothing but a sham: Salim Haman has been sentenced to 5 1/2 years, but will be kept longer anyway just because they feel like it."" b'Georgian troops retreat from S. Osettain capital, presumably leaving several hundred people killed. [VIDEO]' b'Did the U.S. Prep Georgia for War with Russia?' b'Rice Gives Green Light for Israel to Attack Iran: Says U.S. has no veto over Israeli military ops' b'Announcing:Class Action Lawsuit on Behalf of American Public Against the FBI' b""So---Russia and Georgia are at war and the NYT's top story is opening ceremonies of the Olympics? What a fucking disgrace and yet further proof of the decline of journalism."" b""China tells Bush to stay out of other countries' affairs"" b'Did World War III start today?' b'Georgia Invades South Ossetia - if Russia gets involved, will NATO absorb Georgia and unleash a full scale war?' b'Al-Qaeda Faces Islamist Backlash' b'Condoleezza Rice: ""The US would not act to prevent an Israeli strike on Iran."" Israeli Defense Minister Ehud Barak: ""Israel is prepared for uncompromising victory in the case of military hostilities.""' b'This is a busy day: The European Union has approved new sanctions against Iran in protest at its nuclear programme.' b""Georgia will withdraw 1,000 soldiers from Iraq to help fight off Russian forces in Georgia's breakaway region of South Ossetia"" b'Why the Pentagon Thinks Attacking Iran is a Bad Idea - US News &amp; World Report' b'Caucasus in crisis: Georgia invades South Ossetia' b'Indian shoe manufactory - And again in a series of ""you do not like your work?""' b'Visitors Suffering from Mental Illnesses Banned from Olympics' b""No Help for Mexico's Kidnapping Surge"""
1,"why wont america and nato help us if they wont help us now why did we help them in iraq b'Bush puts foot down on Georgian conflict' b""Jewish Georgian minister: Thanks to Israeli training, we're fending off Russia "" b'Georgian army flees in disarray as Russians advance - Gori abandoned to Russia without a shot fired' b""Olympic opening ceremony fireworks 'faked'"" b'What were the Mossad with fraudulent New Zealand Passports doing in Iraq?' b'Russia angered by Israeli military sale to Georgia' b'An American citizen living in S.Ossetia blames U.S. and Georgian leaders for the genocide of innocent people' b'Welcome To World War IV! Now In High Definition!' b""Georgia's move, a mistake of monumental proportions "" b'Russia presses deeper into Georgia; U.S. says regime change is goal' b'Abhinav Bindra wins first ever Individual Olympic Gold Medal for India' b' U.S. ship heads for Arctic to define territory' b'Drivers in a Jerusalem taxi station threaten to quit rather than work for their new boss - an Arab' b'The French Team is Stunned by Phelps and the 4x100m Relay Team' b'Israel and the US behind the Georgian aggression?' b'""Do not believe TV, neither Russian nor Georgian. There are much more victims""' b'Riots are still going on in Montreal (Canada) because police murdered a boy on Saturday.' b'China to overtake US as largest manufacturer' b'War in South Ossetia [PICS]' b'Israeli Physicians Group Condemns State Torture' b' Russia has just beaten the United States over the head with Peak Oil' b'Perhaps *the* question about the Georgia - Russia conflict ' b'Russia is so much better at war' b""So this is what it's come to: trading sex for food."""
2,"remember that adorable 9yearold who sang at the opening ceremonies that was fake too b""Russia 'ends Georgia operation'"" b'""If we had no sexual harassment we would have no children...""' b""Al-Qa'eda is losing support in Iraq because of a brutal crackdown on activities it regards as un-Islamic - including women buying cucumbers"" b'Ceasefire in Georgia: Putin Outmaneuvers the West' b'Why Microsoft and Intel tried to kill the XO $100 laptop' b'Stratfor: The Russo-Georgian War and the Balance of Power ' b""I'm Trying to Get a Sense of This Whole Georgia-Russia War: Vote Up If You Think Georgia Started It, Or Down If you Think Russia Did"" b""The US military was surprised by the timing and swiftness of the Russian military's move into South Ossetia and is still trying to sort out what happened, a US defense official said Monday"" b'U.S. Beats War Drum as Iran Dumps the Dollar' b'Gorbachev: ""Georgian military attacked the South Ossetian capital of Tskhinvali with multiple rocket launchers designed to devastate large areas""' b'CNN use footage of Tskhinvali ruins to cover Georgian report [VIDEO]' b'Beginning a war as the Olympics were opening violates the ancient tradition of a truce to conflict during the Games. The IOC could respond by taking the 2014 games away from Russia.' b'55 pyramids as large as the Luxor stacked into a mega-city pyramid in Tokyo Bay' b'The 11 Top Party Cities in the World' b'U.S. troops still in Georgia (did you know they were in Georgia in the first place?)' b'Why Russias response to Georgia was right' b'Gorbachev accuses U.S. of making a ""serious blunder"" in pursuing its interest in the Caucasus region' b'Russia, Georgia, and NATO: Cold War Two' b'Remember that adorable 62-year-old who led your country into war based on evidence? That was fake, too.' b'War in Georgia: The Israeli connection' b'All signs point to the US encouraging Georgia to invade South Ossetia. Goddamnit Bush.' b'Christopher King argues that the US and NATO are behind the Georgian invasion of South Ossetia but have misjudged Russian resolve. ' b'America: The New Mexico?' b""BBC NEWS | Asia-Pacific | Extinction 'by man not climate'"""
3,"us refuses israel weapons to attack iran report b""When the president ordered to attack Tskhinvali [the capital of South Ossetia], we knew then we were doomed. How come he didn't realize that?"" b' Israel clears troops who killed Reuters cameraman' b'Britain\'s policy of being tough on drugs is ""pointless"", says a former civil servant who once ran the Cabinet\'s anti-drugs unit.' b'Body of 14 year old found in trunk; Latest (ransom paid) kidnapping victim in Mexico. Head cop quits, Prez dissolves suspect elite task force' b'China has moved 10 *million* quake survivors into prefab homes' b""Bush announces Operation Get All Up In Russia's Grill. Yeah, this will end well."" b'Russian forces sink Georgian ships ' b""The commander of a Navy air reconnaissance squadron that provides the President and the defense secretary the airborne ability to command the nation's nuclear weapons has been relieved of duty"" b""92% of CNN readers: Russia's actions in Georgia - justified!"" b'USA to send fleet into Black Sea to help Georgia, send troops in ""humanitarian aid exercise""' b""US warns against Israeli plan to strike against Iran's nuclear facilities"" b""In an intriguing cyberalliance, two Estonian computer experts are heading to Georgia to keep the country's networks running amid an intense military confrontation with Russia"" b'The CNN Effect: Georgia Schools Russia in Information Warfare' b'Why Russias response to Georgia was right' b'Elephants extinct by 2020?' b'US humanitarian missions soon in Georgia - if Russia hits the US - WWIII?' b""Georgia's DDOS came from US sources"" b'Russian convoy heads into Georgia, violating truce' b'Israeli defence minister: US against strike on Iran' b'Gorbachev: We Had No Choice' b'Witness: Russian forces head towards Tbilisi in breach of ceasefire agreement' b' Quarter of Russians blame U.S. for conflict: poll' b'Georgian president says US military will take control of seaports and airports - Pentagon denies' b'2006: Nobel laureate Aleksander Solzhenitsyn accuses U.S., NATO of encircling Russia'"
4,"all the experts admit that we should legalise drugs b'War in South Osetia - 89 pictures made by a Russian soldier.' b'Swedish wrestler Ara Abrahamian throws away medal in Olympic hissy fit ' b'Russia exaggerated the death toll in South Ossetia. Now only 44 were originally killed compared to 2,000.' b'Missile That Killed 9 Inside Pakistan May Have Been Launched by the CIA' b""Rushdie Condemns Random House's Refusal to Publish Novel for Fear of Muslim Retaliation"" b'Poland and US agree to missle defense deal. Interesting timing!' b'Will the Russians conquer Tblisi? Bet on it, no seriously you can BET on it' b'Russia exaggerating South Ossetian death toll, says human rights group' b' Musharraf expected to resign rather than face impeachment' b'Moscow Made Plans Months Ago to Invade Georgia' b'Why Russias response to Georgia was right' b'Nigeria has handed over the potentially oil-rich Bakassi peninsula to Cameroon.' b'The US and Poland have agreed a preliminary deal on plans for the controversial US defence shield' b'Russia apparently is sabotaging infrastructure to cripple the already battered Georgian military.' b'Bank analyst forecast Georgian crisis 2 days early' b""Georgia confict could set back Russia's US relations 'for years' | World news | guardian.co.uk"" b'War in the Caucasus is as much the product of an American imperial drive as local conflicts.' b'""Non-media"" photos of South Ossetia/Georgia conflict.' b'Georgian TV reporter shot by Russian sniper during live broadcast [video]' b'Saudi Arabia: Mother moves to block child marriage' b'Taliban wages war on humanitarian aid workers' b'Russia: World ""can forget about"" Georgia\'s territorial integrity' b'Darfur rebels accuse Sudan of mounting major attack' b'Philippines : Peace Advocate say Muslims need assurance Christians not out to convert them'"
...,...
1984,"barclays and rbs shares suspended from trading after tanking more than 8 Pope says Church should ask forgiveness from gays for past treatment Poland 'shocked' by xenophobic abuse of Poles in UK There will be no second referendum, cabinet agrees Scotland welcome to join EU, Merkel ally says Sterling dips below Friday's 31-year low amid Brexit uncertainty No negative news about South African President allowed on state broadcaster. Surge in Hate Crimes in the U.K. Following U.K.s Brexit Vote Weapons shipped into Jordan by the CIA and Saudi Arabia intended for Syrian rebels have been systematically stolen by Jordanian intelligence operatives and sold to arms merchants on the black market, according to American and Jordanian officials Angela Merkel said the U.K. must file exit papers with the European Union before talks can begin In a birth offering hope to a threatened species, an aquarium in Osaka, Japan, has succeeded in artificially breeding a southern rockhopper penguin for the first time in the world. Sky News Journalist Left Speechless As Leave MP Tells Him 'There Is No Plan' Giant panda in Macau gives birth to twins Get out now: EU leader tells Britain it must invoke Article 50 on Tuesday Sea turtle 'beaten and left for dead' on beach by people taking selfies: Loggerhead sea turtle receiving treatment after it was beaten with sticks and stepped on in Lebanon German lawyers to probe Erdogan over alleged war crimes Boris Johnson says the UK will continue to ""intensify"" cooperation with the EU and tells his fellow Leave supporters they must accept the 52-48 referendum win was ""not entirely overwhelming"". Richard Branson is calling on the UK government to hold a second EU referendum to prevent 'irreversible damage' to the country. Turkey 'sorry for downing Russian jet' Edward Snowden lawyer vows new push for pardon from Obama Brexit opinion poll reveals majority don't want second EU referendum: ""half (48%) of British adults say that they are happy with the result, with two in five (43%) saying they are unhappy with the outcome."" Conservative MP Leave Campaigner: ""The leave campaign don't have a post-Brexit plan..."" Economists predict UK recession, further weakening of Pound following Brexit. New EU 'superstate plan by France, Germany: Creating a European superstate limiting the powers of individual members following Britains referendum decision to leave the EU Pakistani clerics declare transgender marriages legal under Islamic law"
1985,"2500 scientists to australia if you want to save the great barrier reef stop supporting coal The personal details of 112,000 French police officers have been uploaded to Google Drive in a security breach just a fortnight after two officers were murdered at their home by a jihadist. S&amp;P cuts United Kingdom sovereign credit rating to 'AA' from 'AAA' Huge helium deposit found in Africa CEO of the South African state broadcaster quits shortly after negative news about president is banned. Brexit cost investors $2 trillion, the worst one day drop ever Hong Kong democracy activists call for return to British rule as first step to independence Brexit: Iceland president says UK can join 'triangle' of non-EU countries UK's Osborne: 'Absolutely' going to have to cut spending, raise taxes 'Do not let Scotland down now' : Scottish MEP Alyn Smith has urged members of the European Parliament to stand by his country following the UK referendum on EU membership. British pound could hit history-making dollar parity by end of 2016 Merkel vows to strengthen EU, tells UK no 'cherry-picking' ""Ryanair will not deploy new aircraft on routes to and from the UK [United Kingdom] next year [2017], following the Brexit vote, and will instead focus on the European Union [EU]."" People, ever more greedy and stupid, destroy the world - Stephen Hawking to Larry King Siemens freezes new UK wind power investment following Brexit vote US, Canada and Mexico pledge 50% of power from clean energy by 2025 There is increasing evidence that Australia is torturing refugees, medical experts claim Richard Branson, the founder of Virgin Group, said Tuesday that the company has lost about a third of its value since the U.K. voted to leave the European Union last week. 37,000-yr-old skull from Borneo reveals surprise for scientists - Study of the ""Deep Skull"" - oldest modern human discovered in SE Asia - reveals this ancient person was not related to Indigenous Australians, as originally thought. ""Our discovery is a game changer."" Palestinians stone Western Wall worshipers; police shut Temple Mount to non-Muslims Jean-Claude Juncker asks Farage: Why are you here? ""Romanians for Remainians"" offering a new home to the 48% of Britons who voted to stay in the EU | Bucharest newspaper's app connects loving Romanian families with needy Brits, allowing people to offer to help would-be immigrants apply for a Romanian ID Brexit: Gibraltar in talks with Scotland to stay in EU 8 Suicide Bombers Strike Lebanon Mexico's security forces routinely use 'sexual torture' against women: Rights group Amnesty International has compiled testimonies of sexual violence used as torture by Mexican security forces. Despite thousands of complaints, only 15 probes have led to criminal convictions since 1991."
1986,"explosion at airport in istanbul Yemeni former president: Terrorism is the offspring of Wahhabism of Al Saud regime UK must accept freedom of movement to access EU Market Devastated: scientists too late to captive breed mammal lost to climate change - Australian conservationists spent 5 months obtaining permissions &amp; planning for a captive breeding program. But when they arrived on the rodents tiny island, they they were too late. British Labor Party leader Jeremy Corbyn loses a no-confidence vote but refuses to resign A Muslim Shop in the UK Was Just Firebombed While People Were Inside Mexican Authorities Sexually Torture Women in Prison UK shares and pound continue to recover Iceland historian Johannesson wins presidential election 99-Million-Yr-Old Bird Wings Found Encased in Amber - Finding things trapped in amber is far from rare. But when researchers in Burma found a pair of tiny bird-like wings frozen inside, they knew they had something special. A chatbot programmed by a British teenager has successfully challenged 160,000 parking tickets since its launch last year. The Philippine president-elect said Monday he would aggressively promote artificial birth control in the country even at the risk of getting in a fight with the dominant Catholic church, which staunchly opposes the use of contraceptives. Former Belgian Prime Minister ridicules Nigel Farage and accuses Ukip leader of lying in EU referendum campaign Brexiteer Nigel Farage To EU: 'You're Not Laughing Now, Are You?' Islamic State bombings in southern Yemen kill 38 people Escape Tunnel, Dug by Hand, Is Found at Holocaust Massacre Site The land under Beijing is sinking by as much as four inches per year because of the overconsumption of groundwater, according to new research. Car bomb and Anti-Islamic attack on Mosque in Perth, Australia Emaciated lions in Taiz Zoo are trapped in blood-soaked cages and left to starve for months due to the Yemeni civil war Rupert Murdoch describes Brexit as 'wonderful'. The media mogul likened leaving the EU to a prison break and shared his view of Donald Trump as a very able man. More than 40 killed in Yemen suicide attacks Google Found Disastrous Symantec and Norton Vulnerabilities That Are 'As Bad As It Gets Extremist violence on the rise in Germany: Domestic intelligence agency says far-right, far-left and Islamist radical groups gaining membership in country BBC News: Labour MPs pass Corbyn no-confidence motion Tiny New Zealand town with 'too many jobs' launches drive to recruit outsiders"
1987,"jamaica proposes marijuana dispensers for tourists at airports following legalisation the kiosks and desks would give people a license to purchase up to 2 ounces of the drug to use during their stay Stephen Hawking says pollution and 'stupidity' still biggest threats to mankind: we have certainly not become less greedy or less stupid in our treatment of the environment over the past decade Boris Johnson says he will not run for Tory party leadership Six gay men in Ivory Coast were abused and forced to flee their homes after they were pictured signing a condolence book for victims of the recent attack on a gay nightclub in Florida Switzerland denies citizenship to Muslim immigrant girls who refused to swim with boys: report Palestinian terrorist stabs israeli teen girl to death in her bedroom Puerto Rico will default on $1 billion of debt on Friday Republic of Ireland fans to be awarded medal for sportsmanship by Paris mayor. Afghan suicide bomber 'kills up to 40' - BBC News US airstrikes kill at least 250 ISIS fighters in convoy outside Fallujah, official says Turkish Cop Who Took Down Istanbul Gunman Hailed a Hero Cannabis compounds could treat Alzheimer's by removing plaque-forming proteins from brain cells, research suggests Japan's top court has approved blanket surveillance of the country's Muslims: 'They made us terrorist suspects, we never did anything wrong,' says Japanese Muslim, Mohammed Fujita CIA Gave Romania Millions to Host Secret Prisons Groups urge U.N. to suspend Saudi Arabia from rights council Googles free wifi at Indian railway stations is better than most of the countrys paid services Mounting evidence suggests 'hobbits' were wiped out by modern humans' ancestors 50,000 years ago. The men who carried out Tuesday's terror attack at Istanbul's Ataturk Airport were from Russia, Uzbekistan and Kyrgyzstan, a Turkish offical said. Calls to suspend Saudi Arabia from UN Human Rights Council because of military aggresion in Yemen More Than 100 Nobel Laureates Call Out Greenpeace For Anti-GMO Obstruction In Developing World British pedophile sentenced to 85 years in US for trafficking child abuse images: Domminich Shaw, a kingpin of sexual violence against children, sent dozens of images online and discussed plans to assault and kill a child while on probation US permitted 1,200 offshore fracks in Gulf of Mexico between 2010 and 2014 and allowed 72 billion gallons of chemical discharge in 2014. We will be swimming in ridicule - French beach police to carry guns while in swimming trunks: Police lifeguards on Frances busiest beaches will carry guns and bullet-proof vests for the first time this summer amid fears that terrorists could target holidaymakers. UEFA says no minutes of silence for Istanbul victims at Euro 2016 because 'Turkey have already been eliminated' Law Enforcement Sources: Gun Used in Paris Terrorist Attacks Came from Phoenix"


Concatenate all the headlines into one corpus to be vectorized in a tdidf/word count vectorizer

In [79]:
df = df.drop(labels = columns.columns, axis = 1)
# df = df.merge(concat_column)
df = pd.concat([df, concat_column], axis = 1)
df

Unnamed: 0,Date,Label,Top1
0,2008-08-08,0,georgia downs two russian warplanes as countries move to brink of war breaking musharraf to be impeached russia today columns of troops roll into south ossetia footage from fighting youtube russian tanks are moving towards the capital of south ossetia which has reportedly been completely destroyed by georgian artillery fire afghan children raped with impunity un official says this is sick a three year old was raped and they do nothing 150 russian tanks have entered south ossetia whilst georgia shoots down two russian jets breaking georgia invades south ossetia russia warned it would intervene on sos side the enemy combatent trials are nothing but a sham salim haman has been sentenced to 5 12 years but will be kept longer anyway just because they feel like it georgian troops retreat from s osettain capital presumably leaving several hundred people killed video did the us prep georgia for war with russia rice gives green light for israel to attack iran says us has no veto over israeli military ops announcingclass action lawsuit on behalf of american public against the fbi sorussia and georgia are at war and the nyts top story is opening ceremonies of the olympics what a fucking disgrace and yet further proof of the decline of journalism china tells bush to stay out of other countries affairs did world war iii start today georgia invades south ossetia if russia gets involved will nato absorb georgia and unleash a full scale war alqaeda faces islamist backlash condoleezza rice the us would not act to prevent an israeli strike on iran israeli defense minister ehud barak israel is prepared for uncompromising victory in the case of military hostilities this is a busy day the european union has approved new sanctions against iran in protest at its nuclear programme georgia will withdraw 1000 soldiers from iraq to help fight off russian forces in georgias breakaway region of south ossetia why the pentagon thinks attacking iran is a bad idea us news &amp world report caucasus in crisis georgia invades south ossetia indian shoe manufactory and again in a series of you do not like your work visitors suffering from mental illnesses banned from olympics no help for mexicos kidnapping surge
1,2008-08-11,1,why wont america and nato help us if they wont help us now why did we help them in iraq bush puts foot down on georgian conflict jewish georgian minister thanks to israeli training were fending off russia georgian army flees in disarray as russians advance gori abandoned to russia without a shot fired olympic opening ceremony fireworks faked what were the mossad with fraudulent new zealand passports doing in iraq russia angered by israeli military sale to georgia an american citizen living in sossetia blames us and georgian leaders for the genocide of innocent people welcome to world war iv now in high definition georgias move a mistake of monumental proportions russia presses deeper into georgia us says regime change is goal abhinav bindra wins first ever individual olympic gold medal for india us ship heads for arctic to define territory drivers in a jerusalem taxi station threaten to quit rather than work for their new boss an arab the french team is stunned by phelps and the 4x100m relay team israel and the us behind the georgian aggression do not believe tv neither russian nor georgian there are much more victims riots are still going on in montreal canada because police murdered a boy on saturday china to overtake us as largest manufacturer war in south ossetia pics israeli physicians group condemns state torture russia has just beaten the united states over the head with peak oil perhaps the question about the georgia russia conflict russia is so much better at war so this is what its come to trading sex for food
2,2008-08-12,0,remember that adorable 9yearold who sang at the opening ceremonies that was fake too russia ends georgia operation if we had no sexual harassment we would have no children alqaeda is losing support in iraq because of a brutal crackdown on activities it regards as unislamic including women buying cucumbers ceasefire in georgia putin outmaneuvers the west why microsoft and intel tried to kill the xo 100 laptop stratfor the russogeorgian war and the balance of power im trying to get a sense of this whole georgiarussia war vote up if you think georgia started it or down if you think russia did the us military was surprised by the timing and swiftness of the russian militarys move into south ossetia and is still trying to sort out what happened a us defense official said monday us beats war drum as iran dumps the dollar gorbachev georgian military attacked the south ossetian capital of tskhinvali with multiple rocket launchers designed to devastate large areas cnn use footage of tskhinvali ruins to cover georgian report video beginning a war as the olympics were opening violates the ancient tradition of a truce to conflict during the games the ioc could respond by taking the 2014 games away from russia 55 pyramids as large as the luxor stacked into a megacity pyramid in tokyo bay the 11 top party cities in the world us troops still in georgia did you know they were in georgia in the first place why russias response to georgia was right gorbachev accuses us of making a serious blunder in pursuing its interest in the caucasus region russia georgia and nato cold war two remember that adorable 62yearold who led your country into war based on evidence that was fake too war in georgia the israeli connection all signs point to the us encouraging georgia to invade south ossetia goddamnit bush christopher king argues that the us and nato are behind the georgian invasion of south ossetia but have misjudged russian resolve america the new mexico bbc news | asiapacific | extinction by man not climate
3,2008-08-13,0,us refuses israel weapons to attack iran report when the president ordered to attack tskhinvali the capital of south ossetia we knew then we were doomed how come he didnt realize that israel clears troops who killed reuters cameraman britain\s policy of being tough on drugs is pointless says a former civil servant who once ran the cabinet\s antidrugs unit body of 14 year old found in trunk latest ransom paid kidnapping victim in mexico head cop quits prez dissolves suspect elite task force china has moved 10 million quake survivors into prefab homes bush announces operation get all up in russias grill yeah this will end well russian forces sink georgian ships the commander of a navy air reconnaissance squadron that provides the president and the defense secretary the airborne ability to command the nations nuclear weapons has been relieved of duty 92 of cnn readers russias actions in georgia justified usa to send fleet into black sea to help georgia send troops in humanitarian aid exercise us warns against israeli plan to strike against irans nuclear facilities in an intriguing cyberalliance two estonian computer experts are heading to georgia to keep the countrys networks running amid an intense military confrontation with russia the cnn effect georgia schools russia in information warfare why russias response to georgia was right elephants extinct by 2020 us humanitarian missions soon in georgia if russia hits the us wwiii georgias ddos came from us sources russian convoy heads into georgia violating truce israeli defence minister us against strike on iran gorbachev we had no choice witness russian forces head towards tbilisi in breach of ceasefire agreement quarter of russians blame us for conflict poll georgian president says us military will take control of seaports and airports pentagon denies 2006 nobel laureate aleksander solzhenitsyn accuses us nato of encircling russia
4,2008-08-14,1,all the experts admit that we should legalise drugs war in south osetia 89 pictures made by a russian soldier swedish wrestler ara abrahamian throws away medal in olympic hissy fit russia exaggerated the death toll in south ossetia now only 44 were originally killed compared to 2000 missile that killed 9 inside pakistan may have been launched by the cia rushdie condemns random houses refusal to publish novel for fear of muslim retaliation poland and us agree to missle defense deal interesting timing will the russians conquer tblisi bet on it no seriously you can bet on it russia exaggerating south ossetian death toll says human rights group musharraf expected to resign rather than face impeachment moscow made plans months ago to invade georgia why russias response to georgia was right nigeria has handed over the potentially oilrich bakassi peninsula to cameroon the us and poland have agreed a preliminary deal on plans for the controversial us defence shield russia apparently is sabotaging infrastructure to cripple the already battered georgian military bank analyst forecast georgian crisis 2 days early georgia confict could set back russias us relations for years | world news | guardiancouk war in the caucasus is as much the product of an american imperial drive as local conflicts nonmedia photos of south ossetiageorgia conflict georgian tv reporter shot by russian sniper during live broadcast video saudi arabia mother moves to block child marriage taliban wages war on humanitarian aid workers russia world can forget about georgia\s territorial integrity darfur rebels accuse sudan of mounting major attack philippines peace advocate say muslims need assurance christians not out to convert them
...,...,...,...
1984,2016-06-27,0,barclays and rbs shares suspended from trading after tanking more than 8 pope says church should ask forgiveness from gays for past treatment poland shocked by xenophobic abuse of poles in uk there will be no second referendum cabinet agrees scotland welcome to join eu merkel ally says sterling dips below fridays 31year low amid brexit uncertainty no negative news about south african president allowed on state broadcaster surge in hate crimes in the uk following uks brexit vote weapons shipped into jordan by the cia and saudi arabia intended for syrian rebels have been systematically stolen by jordanian intelligence operatives and sold to arms merchants on the black market according to american and jordanian officials angela merkel said the uk must file exit papers with the european union before talks can begin in a birth offering hope to a threatened species an aquarium in osaka japan has succeeded in artificially breeding a southern rockhopper penguin for the first time in the world sky news journalist left speechless as leave mp tells him there is no plan giant panda in macau gives birth to twins get out now eu leader tells britain it must invoke article 50 on tuesday sea turtle beaten and left for dead on beach by people taking selfies loggerhead sea turtle receiving treatment after it was beaten with sticks and stepped on in lebanon german lawyers to probe erdogan over alleged war crimes boris johnson says the uk will continue to intensify cooperation with the eu and tells his fellow leave supporters they must accept the 5248 referendum win was not entirely overwhelming richard branson is calling on the uk government to hold a second eu referendum to prevent irreversible damage to the country turkey sorry for downing russian jet edward snowden lawyer vows new push for pardon from obama brexit opinion poll reveals majority dont want second eu referendum half 48 of british adults say that they are happy with the result with two in five 43 saying they are unhappy with the outcome conservative mp leave campaigner the leave campaign dont have a postbrexit plan economists predict uk recession further weakening of pound following brexit new eu superstate plan by france germany creating a european superstate limiting the powers of individual members following britains referendum decision to leave the eu pakistani clerics declare transgender marriages legal under islamic law
1985,2016-06-28,1,2500 scientists to australia if you want to save the great barrier reef stop supporting coal the personal details of 112000 french police officers have been uploaded to google drive in a security breach just a fortnight after two officers were murdered at their home by a jihadist s&ampp cuts united kingdom sovereign credit rating to aa from aaa huge helium deposit found in africa ceo of the south african state broadcaster quits shortly after negative news about president is banned brexit cost investors 2 trillion the worst one day drop ever hong kong democracy activists call for return to british rule as first step to independence brexit iceland president says uk can join triangle of noneu countries uks osborne absolutely going to have to cut spending raise taxes do not let scotland down now scottish mep alyn smith has urged members of the european parliament to stand by his country following the uk referendum on eu membership british pound could hit historymaking dollar parity by end of 2016 merkel vows to strengthen eu tells uk no cherrypicking ryanair will not deploy new aircraft on routes to and from the uk united kingdom next year 2017 following the brexit vote and will instead focus on the european union eu people ever more greedy and stupid destroy the world stephen hawking to larry king siemens freezes new uk wind power investment following brexit vote us canada and mexico pledge 50 of power from clean energy by 2025 there is increasing evidence that australia is torturing refugees medical experts claim richard branson the founder of virgin group said tuesday that the company has lost about a third of its value since the uk voted to leave the european union last week 37000yrold skull from borneo reveals surprise for scientists study of the deep skull oldest modern human discovered in se asia reveals this ancient person was not related to indigenous australians as originally thought our discovery is a game changer palestinians stone western wall worshipers police shut temple mount to nonmuslims jeanclaude juncker asks farage why are you here romanians for remainians offering a new home to the 48 of britons who voted to stay in the eu | bucharest newspapers app connects loving romanian families with needy brits allowing people to offer to help wouldbe immigrants apply for a romanian id brexit gibraltar in talks with scotland to stay in eu 8 suicide bombers strike lebanon mexicos security forces routinely use sexual torture against women rights group amnesty international has compiled testimonies of sexual violence used as torture by mexican security forces despite thousands of complaints only 15 probes have led to criminal convictions since 1991
1986,2016-06-29,1,explosion at airport in istanbul yemeni former president terrorism is the offspring of wahhabism of al saud regime uk must accept freedom of movement to access eu market devastated scientists too late to captive breed mammal lost to climate change australian conservationists spent 5 months obtaining permissions &amp planning for a captive breeding program but when they arrived on the rodents tiny island they they were too late british labor party leader jeremy corbyn loses a noconfidence vote but refuses to resign a muslim shop in the uk was just firebombed while people were inside mexican authorities sexually torture women in prison uk shares and pound continue to recover iceland historian johannesson wins presidential election 99millionyrold bird wings found encased in amber finding things trapped in amber is far from rare but when researchers in burma found a pair of tiny birdlike wings frozen inside they knew they had something special a chatbot programmed by a british teenager has successfully challenged 160000 parking tickets since its launch last year the philippine presidentelect said monday he would aggressively promote artificial birth control in the country even at the risk of getting in a fight with the dominant catholic church which staunchly opposes the use of contraceptives former belgian prime minister ridicules nigel farage and accuses ukip leader of lying in eu referendum campaign brexiteer nigel farage to eu youre not laughing now are you islamic state bombings in southern yemen kill 38 people escape tunnel dug by hand is found at holocaust massacre site the land under beijing is sinking by as much as four inches per year because of the overconsumption of groundwater according to new research car bomb and antiislamic attack on mosque in perth australia emaciated lions in taiz zoo are trapped in bloodsoaked cages and left to starve for months due to the yemeni civil war rupert murdoch describes brexit as wonderful the media mogul likened leaving the eu to a prison break and shared his view of donald trump as a very able man more than 40 killed in yemen suicide attacks google found disastrous symantec and norton vulnerabilities that are as bad as it gets extremist violence on the rise in germany domestic intelligence agency says farright farleft and islamist radical groups gaining membership in country bbc news labour mps pass corbyn noconfidence motion tiny new zealand town with too many jobs launches drive to recruit outsiders
1987,2016-06-30,1,jamaica proposes marijuana dispensers for tourists at airports following legalisation the kiosks and desks would give people a license to purchase up to 2 ounces of the drug to use during their stay stephen hawking says pollution and stupidity still biggest threats to mankind we have certainly not become less greedy or less stupid in our treatment of the environment over the past decade boris johnson says he will not run for tory party leadership six gay men in ivory coast were abused and forced to flee their homes after they were pictured signing a condolence book for victims of the recent attack on a gay nightclub in florida switzerland denies citizenship to muslim immigrant girls who refused to swim with boys report palestinian terrorist stabs israeli teen girl to death in her bedroom puerto rico will default on 1 billion of debt on friday republic of ireland fans to be awarded medal for sportsmanship by paris mayor afghan suicide bomber kills up to 40 bbc news us airstrikes kill at least 250 isis fighters in convoy outside fallujah official says turkish cop who took down istanbul gunman hailed a hero cannabis compounds could treat alzheimers by removing plaqueforming proteins from brain cells research suggests japans top court has approved blanket surveillance of the countrys muslims they made us terrorist suspects we never did anything wrong says japanese muslim mohammed fujita cia gave romania millions to host secret prisons groups urge un to suspend saudi arabia from rights council googles free wifi at indian railway stations is better than most of the countrys paid services mounting evidence suggests hobbits were wiped out by modern humans ancestors 50000 years ago the men who carried out tuesdays terror attack at istanbuls ataturk airport were from russia uzbekistan and kyrgyzstan a turkish offical said calls to suspend saudi arabia from un human rights council because of military aggresion in yemen more than 100 nobel laureates call out greenpeace for antigmo obstruction in developing world british pedophile sentenced to 85 years in us for trafficking child abuse images domminich shaw a kingpin of sexual violence against children sent dozens of images online and discussed plans to assault and kill a child while on probation us permitted 1200 offshore fracks in gulf of mexico between 2010 and 2014 and allowed 72 billion gallons of chemical discharge in 2014 we will be swimming in ridicule french beach police to carry guns while in swimming trunks police lifeguards on frances busiest beaches will carry guns and bulletproof vests for the first time this summer amid fears that terrorists could target holidaymakers uefa says no minutes of silence for istanbul victims at euro 2016 because turkey have already been eliminated law enforcement sources gun used in paris terrorist attacks came from phoenix


Remove redundant, non-combined columns and append the new concatenated column onto the existing dataframe with labels.

## Form a baseline 

In [86]:
from sklearn.feature_extraction.text import TfidfVectorizer
sw = stop.ENGLISH_STOP_WORDS
tfidf = TfidfVectorizer(stop_words = sw)
tfidf.fit(df['Top1'])
print(tfidf.get_feature_names()[0:1000])

['000', '0001', '001', '003', '004', '005', '006', '007', '0077', '00s', '01', '011', '014', '014ckwh', '017', '02', '0200', '021hour', '022', '0220', '0221', '025', '03', '030', '035017', '04', '05', '050', '05eurgb', '06', '060', '062', '068as', '07', '07232014', '075', '07baku1268', '07pc', '08', '089m', '08baku671', '09', '0900', '0930', '0935', '10', '100', '1000', '10000', '100000', '1000000', '1000000000', '100000man', '100000person', '100000th', '10000s', '10000strong', '10000yearold', '10001500', '1000km', '1000megawatt', '1000per', '1000s', '1000strong', '1000x', '1000year', '1000yearold', '100200', '1004', '1004am', '1006', '100abarrel', '100apack', '100billion', '100bn', '100day', '100ds', '100fold', '100foot', '100ft', '100k', '100km', '100m', '100mil', '100million', '100mstretch', '100mw', '100page', '100s', '100th', '100x', '100year', '100yearold', '101', '1011', '1012', '1017', '101st', '102', '1020', '102000', '1021', '10262010', '102day', '102yearold', '103', '1030', 

Create a dictionary in a tfidf vectorizer given our concatenated dataset.

In [99]:
features = tfidf.transform(df['Top1'])
data_labels = df['Label']
display(features)
print(features)

<1986x39900 sparse matrix of type '<class 'numpy.float64'>'
	with 489780 stored elements in Compressed Sparse Row format>

  (0, 39697)	0.04792113397841143
  (0, 39586)	0.018635470668102998
  (0, 39576)	0.02309480578615759
  (0, 39317)	0.03510663510162514
  (0, 39295)	0.03222955083293025
  (0, 39198)	0.052627215370409826
  (0, 38962)	0.07472527781990451
  (0, 38644)	0.05458685467845757
  (0, 38632)	0.03848278340226787
  (0, 38591)	0.09580465919607202
  (0, 38346)	0.05874965900421731
  (0, 38220)	0.028760672420553638
  (0, 38214)	0.04589661170480536
  (0, 38159)	0.0641100318305066
  (0, 37476)	0.07190063735605241
  (0, 37430)	0.03528136571540746
  (0, 37203)	0.08424845497195356
  (0, 36718)	0.057575186629517626
  (0, 36629)	0.055135230165267744
  (0, 36160)	0.0639523086984654
  (0, 35872)	0.05955281974015128
  (0, 35600)	0.035923795876247486
  (0, 35374)	0.09332647229079422
  (0, 34955)	0.05458685467845757
  (0, 34709)	0.04748600345927687
  :	:
  (1985, 3536)	0.09229817581704898
  (1985, 3108)	0.0404703132373173
  (1985, 3009)	0.03580320373466395
  (1985, 2796)	0.06189488586264328
  (1985, 2761)	0.035896283

We have converted the *whole* column of concatenated headlines into a single sparse matrix using the tfidf transformer built on the dictionary created earlier. At this point, I'm not sure if I should have created a sparse matrix for each row, or use the above method instead. So, for this baseline, I will perform both.

In [6]:
analyzer = tfidf.build_analyzer()
columns = df.iloc[:, 2:]
df = df_text_analyzer(df, columns, analyzer)
df

Unnamed: 0,Date,Label,Top1,Top2,Top3,Top4,Top5,Top6,Top7,Top8,...,Top16,Top17,Top18,Top19,Top20,Top21,Top22,Top23,Top24,Top25
0,2008-08-08,0,"[georgia, downs, russian, warplanes, countries, brink, war]","[breaking, musharraf, impeached]","[russia, today, columns, troops, roll, south, ossetia, footage, fighting, youtube]","[russian, tanks, moving, capital, south, ossetia, reportedly, completely, destroyed, georgian, artillery]","[afghan, children, raped, impunity, official, says, sick, year, old, raped]","[150, russian, tanks, entered, south, ossetia, whilst, georgia, shoots, russian, jets]","[breaking, georgia, invades, south, ossetia, russia, warned, intervene, sos]","[enemy, combatent, trials, sham, salim, haman, sentenced, 12, years, kept, longer, just, feel, like]",...,"[georgia, invades, south, ossetia, russia, gets, involved, nato, absorb, georgia, unleash, scale, war]","[alqaeda, faces, islamist, backlash]","[condoleezza, rice, act, prevent, israeli, strike, iran, israeli, defense, minister, ehud, barak, israel, prepared, uncompromising, victory, case, military, hostilities]","[busy, day, european, union, approved, new, sanctions, iran, protest, nuclear, programme]","[georgia, withdraw, 1000, soldiers, iraq, help, fight, russian, forces, georgias, breakaway, region, south, ossetia]","[pentagon, thinks, attacking, iran, bad, idea, news, amp, world, report]","[caucasus, crisis, georgia, invades, south, ossetia]","[indian, shoe, manufactory, series, like, work]","[visitors, suffering, mental, illnesses, banned, olympics]","[help, mexicos, kidnapping, surge]"
1,2008-08-11,1,"[wont, america, nato, help, wont, help, did, help, iraq]","[bush, puts, foot, georgian, conflict]","[jewish, georgian, minister, thanks, israeli, training, fending, russia]","[georgian, army, flees, disarray, russians, advance, gori, abandoned, russia, shot, fired]","[olympic, opening, ceremony, fireworks, faked]","[mossad, fraudulent, new, zealand, passports, doing, iraq]","[russia, angered, israeli, military, sale, georgia]","[american, citizen, living, sossetia, blames, georgian, leaders, genocide, innocent, people]",...,"[israel, georgian, aggression]","[believe, tv, russian, georgian, victims]","[riots, going, montreal, canada, police, murdered, boy, saturday]","[china, overtake, largest, manufacturer]","[war, south, ossetia, pics]","[israeli, physicians, group, condemns, state, torture]","[russia, just, beaten, united, states, head, peak, oil]","[question, georgia, russia, conflict]","[russia, better, war]","[come, trading, sex, food]"
2,2008-08-12,0,"[remember, adorable, 9yearold, sang, opening, ceremonies, fake]","[russia, ends, georgia, operation]","[sexual, harassment, children]","[alqaeda, losing, support, iraq, brutal, crackdown, activities, regards, unislamic, including, women, buying, cucumbers]","[ceasefire, georgia, putin, outmaneuvers, west]","[microsoft, intel, tried, kill, xo, 100, laptop]","[stratfor, russogeorgian, war, balance, power]","[im, trying, sense, georgiarussia, war, vote, think, georgia, started, think, russia, did]",...,"[troops, georgia, did, know, georgia, place]","[russias, response, georgia, right]","[gorbachev, accuses, making, blunder, pursuing, caucasus, region]","[russia, georgia, nato, cold, war]","[remember, adorable, 62yearold, led, country, war, based, evidence, fake]","[war, georgia, israeli, connection]","[signs, point, encouraging, georgia, invade, south, ossetia, goddamnit, bush]","[christopher, king, argues, nato, georgian, invasion, south, ossetia, misjudged, russian, resolve]","[america, new, mexico]","[bbc, news, asiapacific, extinction, man, climate]"
3,2008-08-13,0,"[refuses, israel, weapons, attack, iran, report]","[president, ordered, attack, tskhinvali, capital, south, ossetia, knew, doomed, come, didnt, realize]","[israel, clears, troops, killed, reuters, cameraman]","[britain, policy, tough, drugs, pointless, says, civil, servant, ran, cabinet, antidrugs, unit]","[body, 14, year, old, trunk, latest, ransom, paid, kidnapping, victim, mexico, head, cop, quits, prez, dissolves, suspect, elite, task, force]","[china, moved, 10, million, quake, survivors, prefab, homes]","[bush, announces, operation, russias, grill, yeah, end]","[russian, forces, sink, georgian, ships]",...,"[elephants, extinct, 2020]","[humanitarian, missions, soon, georgia, russia, hits, wwiii]","[georgias, ddos, came, sources]","[russian, convoy, heads, georgia, violating, truce]","[israeli, defence, minister, strike, iran]","[gorbachev, choice]","[witness, russian, forces, head, tbilisi, breach, ceasefire, agreement]","[quarter, russians, blame, conflict, poll]","[georgian, president, says, military, control, seaports, airports, pentagon, denies]","[2006, nobel, laureate, aleksander, solzhenitsyn, accuses, nato, encircling, russia]"
4,2008-08-14,1,"[experts, admit, legalise, drugs]","[war, south, osetia, 89, pictures, russian, soldier]","[swedish, wrestler, ara, abrahamian, throws, away, medal, olympic, hissy, fit]","[russia, exaggerated, death, toll, south, ossetia, 44, originally, killed, compared, 2000]","[missile, killed, inside, pakistan, launched, cia]","[rushdie, condemns, random, houses, refusal, publish, novel, fear, muslim, retaliation]","[poland, agree, missle, defense, deal, interesting, timing]","[russians, conquer, tblisi, bet, seriously, bet]",...,"[bank, analyst, forecast, georgian, crisis, days, early]","[georgia, confict, set, russias, relations, years, world, news, guardiancouk]","[war, caucasus, product, american, imperial, drive, local, conflicts]","[nonmedia, photos, south, ossetiageorgia, conflict]","[georgian, tv, reporter, shot, russian, sniper, live, broadcast, video]","[saudi, arabia, mother, moves, block, child, marriage]","[taliban, wages, war, humanitarian, aid, workers]","[russia, world, forget, georgia, territorial, integrity]","[darfur, rebels, accuse, sudan, mounting, major, attack]","[philippines, peace, advocate, say, muslims, need, assurance, christians, convert]"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1984,2016-06-27,0,"[barclays, rbs, shares, suspended, trading, tanking]","[pope, says, church, ask, forgiveness, gays, past, treatment]","[poland, shocked, xenophobic, abuse, poles, uk]","[second, referendum, cabinet, agrees]","[scotland, welcome, join, eu, merkel, ally, says]","[sterling, dips, fridays, 31year, low, amid, brexit, uncertainty]","[negative, news, south, african, president, allowed, state, broadcaster]","[surge, hate, crimes, uk, following, uks, brexit, vote]",...,"[german, lawyers, probe, erdogan, alleged, war, crimes]","[boris, johnson, says, uk, continue, intensify, cooperation, eu, tells, fellow, leave, supporters, accept, 5248, referendum, win, entirely, overwhelming]","[richard, branson, calling, uk, government, hold, second, eu, referendum, prevent, irreversible, damage, country]","[turkey, sorry, downing, russian, jet]","[edward, snowden, lawyer, vows, new, push, pardon, obama]","[brexit, opinion, poll, reveals, majority, dont, want, second, eu, referendum, half, 48, british, adults, say, happy, result, 43, saying, unhappy, outcome]","[conservative, mp, leave, campaigner, leave, campaign, dont, postbrexit, plan]","[economists, predict, uk, recession, weakening, pound, following, brexit]","[new, eu, superstate, plan, france, germany, creating, european, superstate, limiting, powers, individual, members, following, britains, referendum, decision, leave, eu]","[pakistani, clerics, declare, transgender, marriages, legal, islamic, law]"
1985,2016-06-28,1,"[2500, scientists, australia, want, save, great, barrier, reef, stop, supporting, coal]","[personal, details, 112000, french, police, officers, uploaded, google, drive, security, breach, just, fortnight, officers, murdered, home, jihadist]","[ampp, cuts, united, kingdom, sovereign, credit, rating, aa, aaa]","[huge, helium, deposit, africa]","[ceo, south, african, state, broadcaster, quits, shortly, negative, news, president, banned]","[brexit, cost, investors, trillion, worst, day, drop]","[hong, kong, democracy, activists, return, british, rule, step, independence]","[brexit, iceland, president, says, uk, join, triangle, noneu, countries]",...,"[canada, mexico, pledge, 50, power, clean, energy, 2025]","[increasing, evidence, australia, torturing, refugees, medical, experts, claim]","[richard, branson, founder, virgin, group, said, tuesday, company, lost, value, uk, voted, leave, european, union, week]","[37000yrold, skull, borneo, reveals, surprise, scientists, study, deep, skull, oldest, modern, human, discovered, se, asia, reveals, ancient, person, related, indigenous, australians, originally, thought, discovery, game, changer]","[palestinians, stone, western, wall, worshipers, police, shut, temple, mount, nonmuslims]","[jeanclaude, juncker, asks, farage]","[romanians, remainians, offering, new, home, 48, britons, voted, stay, eu, bucharest, newspapers, app, connects, loving, romanian, families, needy, brits, allowing, people, offer, help, wouldbe, immigrants, apply, romanian, id]","[brexit, gibraltar, talks, scotland, stay, eu]","[suicide, bombers, strike, lebanon]","[mexicos, security, forces, routinely, use, sexual, torture, women, rights, group, amnesty, international, compiled, testimonies, sexual, violence, used, torture, mexican, security, forces, despite, thousands, complaints, 15, probes, led, criminal, convictions, 1991]"
1986,2016-06-29,1,"[explosion, airport, istanbul]","[yemeni, president, terrorism, offspring, wahhabism, al, saud, regime]","[uk, accept, freedom, movement, access, eu, market]","[devastated, scientists, late, captive, breed, mammal, lost, climate, change, australian, conservationists, spent, months, obtaining, permissions, amp, planning, captive, breeding, program, arrived, rodents, tiny, island, late]","[british, labor, party, leader, jeremy, corbyn, loses, noconfidence, vote, refuses, resign]","[muslim, shop, uk, just, firebombed, people, inside]","[mexican, authorities, sexually, torture, women, prison]","[uk, shares, pound, continue, recover]",...,"[escape, tunnel, dug, hand, holocaust, massacre, site]","[land, beijing, sinking, inches, year, overconsumption, groundwater, according, new, research]","[car, bomb, antiislamic, attack, mosque, perth, australia]","[emaciated, lions, taiz, zoo, trapped, bloodsoaked, cages, left, starve, months, yemeni, civil, war]","[rupert, murdoch, describes, brexit, wonderful, media, mogul, likened, leaving, eu, prison, break, shared, view, donald, trump, able, man]","[40, killed, yemen, suicide, attacks]","[google, disastrous, symantec, norton, vulnerabilities, bad, gets]","[extremist, violence, rise, germany, domestic, intelligence, agency, says, farright, farleft, islamist, radical, groups, gaining, membership, country]","[bbc, news, labour, mps, pass, corbyn, noconfidence, motion]","[tiny, new, zealand, town, jobs, launches, drive, recruit, outsiders]"
1987,2016-06-30,1,"[jamaica, proposes, marijuana, dispensers, tourists, airports, following, legalisation, kiosks, desks, people, license, purchase, ounces, drug, use, stay]","[stephen, hawking, says, pollution, stupidity, biggest, threats, mankind, certainly, greedy, stupid, treatment, environment, past, decade]","[boris, johnson, says, run, tory, party, leadership]","[gay, men, ivory, coast, abused, forced, flee, homes, pictured, signing, condolence, book, victims, recent, attack, gay, nightclub, florida]","[switzerland, denies, citizenship, muslim, immigrant, girls, refused, swim, boys, report]","[palestinian, terrorist, stabs, israeli, teen, girl, death, bedroom]","[puerto, rico, default, billion, debt, friday]","[republic, ireland, fans, awarded, medal, sportsmanship, paris, mayor]",...,"[googles, free, wifi, indian, railway, stations, better, countrys, paid, services]","[mounting, evidence, suggests, hobbits, wiped, modern, humans, ancestors, 50000, years, ago]","[men, carried, tuesdays, terror, attack, istanbuls, ataturk, airport, russia, uzbekistan, kyrgyzstan, turkish, offical, said]","[calls, suspend, saudi, arabia, human, rights, council, military, aggresion, yemen]","[100, nobel, laureates, greenpeace, antigmo, obstruction, developing, world]","[british, pedophile, sentenced, 85, years, trafficking, child, abuse, images, domminich, shaw, kingpin, sexual, violence, children, sent, dozens, images, online, discussed, plans, assault, kill, child, probation]","[permitted, 1200, offshore, fracks, gulf, mexico, 2010, 2014, allowed, 72, billion, gallons, chemical, discharge, 2014]","[swimming, ridicule, french, beach, police, carry, guns, swimming, trunks, police, lifeguards, frances, busiest, beaches, carry, guns, bulletproof, vests, time, summer, amid, fears, terrorists, target, holidaymakers]","[uefa, says, minutes, silence, istanbul, victims, euro, 2016, turkey, eliminated]","[law, enforcement, sources, gun, used, paris, terrorist, attacks, came, phoenix]"


Tokenize and remove stop words for each headline for easier processing in classifiers.

In [7]:
single_array = df_text_single_array(df, columns)
print(single_array[0:1000])

[b'georgia' b'downs' b'russian' b'warplanes' b'countries' b'brink' b'war'
 b'wont' b'america' b'nato' b'help' b'wont' b'help' b'did' b'help' b'iraq'
 b'remember' b'adorable' b'9yearold' b'sang' b'opening' b'ceremonies'
 b'fake' b'refuses' b'israel' b'weapons' b'attack' b'iran' b'report'
 b'experts' b'admit' b'legalise' b'drugs' b'mom' b'missing' b'gay' b'man'
 b'bad' b'hes' b'21yearold' b'cheerleader' b'theyd' b'looking' b'afghan'
 b'prison' b'majority' b'female' b'prisoners' b'serving' b'20year'
 b'sentences' b'victims' b'rape' b'man' b'arrested' b'locked' b'hours'
 b'taking' b'photo' b'police' b'van' b'ignoring' b'entry' b'sign'
 b'elderly' b'chinese' b'women' b'sentenced' b'year' b'reeducation'
 b'labor' b'sought' b'permit' b'demonstrate' b'official' b'olympic'
 b'protest' b'area' b'british' b'resident' b'held' b'guantanamo' b'bay'
 b'wins' b'legal' b'battle' b'force' b'foreign' b'office' b'reveal'
 b'torture' b'evidence' b'syria' b'says' b'ready' b'russian' b'missile'
 b'soil' b'co

Create a single array of all tokens to build a dictionary from

In [8]:
tfidf.fit(single_array)
print(len(tfidf.get_feature_names()))
print(tfidf.get_feature_names()[0:1000])

39900
['000', '0001', '001', '003', '004', '005', '006', '007', '0077', '00s', '01', '011', '014', '014ckwh', '017', '02', '0200', '021hour', '022', '0220', '0221', '025', '03', '030', '035017', '04', '05', '050', '05eurgb', '06', '060', '062', '068as', '07', '07232014', '075', '07baku1268', '07pc', '08', '089m', '08baku671', '09', '0900', '0930', '0935', '10', '100', '1000', '10000', '100000', '1000000', '1000000000', '100000man', '100000person', '100000th', '10000s', '10000strong', '10000yearold', '10001500', '1000km', '1000megawatt', '1000per', '1000s', '1000strong', '1000x', '1000year', '1000yearold', '100200', '1004', '1004am', '1006', '100abarrel', '100apack', '100billion', '100bn', '100day', '100ds', '100fold', '100foot', '100ft', '100k', '100km', '100m', '100mil', '100million', '100mstretch', '100mw', '100page', '100s', '100th', '100x', '100year', '100yearold', '101', '1011', '1012', '1017', '101st', '102', '1020', '102000', '1021', '10262010', '102day', '102yearold', '103', '1

Create a dictionary for a tfidf vectorizer.

In [10]:
columns = df.iloc[:, 2:]
df = df_tfidf_transform(df, columns, tfidf)
# df


Top1       (0, 15944)\t1.0\n  (1, 12175)\t1.0\n  (2, 31184)\t1.0\n  (3, 38644)\t1.0\n  (4, 9834)\t1.0\n  (5, 6850)\t1.0\n  (6, 38591)\t1.0
Top2       (0, 15944)\t1.0\n  (1, 12175)\t1.0\n  (2, 31184)\t1.0\n  (3, 38644)\t1.0\n  (4, 9834)\t1.0\n  (5, 6850)\t1.0\n  (6, 38591)\t1.0
Top3       (0, 15944)\t1.0\n  (1, 12175)\t1.0\n  (2, 31184)\t1.0\n  (3, 38644)\t1.0\n  (4, 9834)\t1.0\n  (5, 6850)\t1.0\n  (6, 38591)\t1.0
Top4       (0, 15944)\t1.0\n  (1, 12175)\t1.0\n  (2, 31184)\t1.0\n  (3, 38644)\t1.0\n  (4, 9834)\t1.0\n  (5, 6850)\t1.0\n  (6, 38591)\t1.0
Top5       (0, 15944)\t1.0\n  (1, 12175)\t1.0\n  (2, 31184)\t1.0\n  (3, 38644)\t1.0\n  (4, 9834)\t1.0\n  (5, 6850)\t1.0\n  (6, 38591)\t1.0
                                                                       ...                                                                
Top21      (0, 15944)\t1.0\n  (1, 12175)\t1.0\n  (2, 31184)\t1.0\n  (3, 38644)\t1.0\n  (4, 9834)\t1.0\n  (5, 6850)\t1.0\n  (6, 38591)\t1.0
Top22      (0, 15944)\t1.0\

Top1       (0, 21793)\t1.0\n  (1, 39317)\t1.0\n  (2, 35765)\t1.0\n  (3, 28290)\t1.0\n  (4, 23263)\t1.0\n  (5, 31401)\t1.0\n  (6, 34799)\t1.0\n  (7, 15080)\t1.0\n  (8, 30349)\t1.0\n  (9, 9866)\t1.0\n  (10, 31318)\t1.0\n  (11, 38294)\t1.0\n  (12, 9426)\t1.0\n  (13, 17965)\t1.0\n  (14, 9610)\t1.0\n  (15, 32237)\t1.0
Top2       (0, 21793)\t1.0\n  (1, 39317)\t1.0\n  (2, 35765)\t1.0\n  (3, 28290)\t1.0\n  (4, 23263)\t1.0\n  (5, 31401)\t1.0\n  (6, 34799)\t1.0\n  (7, 15080)\t1.0\n  (8, 30349)\t1.0\n  (9, 9866)\t1.0\n  (10, 31318)\t1.0\n  (11, 38294)\t1.0\n  (12, 9426)\t1.0\n  (13, 17965)\t1.0\n  (14, 9610)\t1.0\n  (15, 32237)\t1.0
Top3       (0, 21793)\t1.0\n  (1, 39317)\t1.0\n  (2, 35765)\t1.0\n  (3, 28290)\t1.0\n  (4, 23263)\t1.0\n  (5, 31401)\t1.0\n  (6, 34799)\t1.0\n  (7, 15080)\t1.0\n  (8, 30349)\t1.0\n  (9, 9866)\t1.0\n  (10, 31318)\t1.0\n  (11, 38294)\t1.0\n  (12, 9426)\t1.0\n  (13, 17965)\t1.0\n  (14, 9610)\t1.0\n  (15, 32237)\t1.0
Top4       (0, 21793)\t1.0\n  (1, 39317)\t1.0\n  (2, 35

Top1       (0, 22221)\t1.0\n  (1, 27643)\t1.0
Top2       (0, 22221)\t1.0\n  (1, 27643)\t1.0
Top3       (0, 22221)\t1.0\n  (1, 27643)\t1.0
Top4       (0, 22221)\t1.0\n  (1, 27643)\t1.0
Top5       (0, 22221)\t1.0\n  (1, 27643)\t1.0
                         ...                 
Top21      (0, 22221)\t1.0\n  (1, 27643)\t1.0
Top22      (0, 22221)\t1.0\n  (1, 27643)\t1.0
Top23      (0, 22221)\t1.0\n  (1, 27643)\t1.0
Top24      (0, 22221)\t1.0\n  (1, 27643)\t1.0
Top25      (0, 22221)\t1.0\n  (1, 27643)\t1.0
Name: 49, Length: 25, dtype: object
Top1       (0, 8801)\t1.0\n  (1, 10633)\t1.0\n  (2, 5161)\t1.0\n  (3, 26703)\t1.0\n  (4, 17998)\t1.0\n  (5, 28337)\t1.0\n  (6, 39872)\t1.0
Top2       (0, 8801)\t1.0\n  (1, 10633)\t1.0\n  (2, 5161)\t1.0\n  (3, 26703)\t1.0\n  (4, 17998)\t1.0\n  (5, 28337)\t1.0\n  (6, 39872)\t1.0
Top3       (0, 8801)\t1.0\n  (1, 10633)\t1.0\n  (2, 5161)\t1.0\n  (3, 26703)\t1.0\n  (4, 17998)\t1.0\n  (5, 28337)\t1.0\n  (6, 39872)\t1.0
Top4       (0, 8801)\t1.0\n  (1, 10633)\t

Top1       (0, 7165)\t1.0\n  (1, 18347)\t1.0\n  (2, 39317)\t1.0\n  (3, 21040)\t1.0\n  (4, 15613)\t1.0\n  (5, 22769)\t1.0
Top2       (0, 7165)\t1.0\n  (1, 18347)\t1.0\n  (2, 39317)\t1.0\n  (3, 21040)\t1.0\n  (4, 15613)\t1.0\n  (5, 22769)\t1.0
Top3       (0, 7165)\t1.0\n  (1, 18347)\t1.0\n  (2, 39317)\t1.0\n  (3, 21040)\t1.0\n  (4, 15613)\t1.0\n  (5, 22769)\t1.0
Top4       (0, 7165)\t1.0\n  (1, 18347)\t1.0\n  (2, 39317)\t1.0\n  (3, 21040)\t1.0\n  (4, 15613)\t1.0\n  (5, 22769)\t1.0
Top5       (0, 7165)\t1.0\n  (1, 18347)\t1.0\n  (2, 39317)\t1.0\n  (3, 21040)\t1.0\n  (4, 15613)\t1.0\n  (5, 22769)\t1.0
                                                              ...                                                       
Top21      (0, 7165)\t1.0\n  (1, 18347)\t1.0\n  (2, 39317)\t1.0\n  (3, 21040)\t1.0\n  (4, 15613)\t1.0\n  (5, 22769)\t1.0
Top22      (0, 7165)\t1.0\n  (1, 18347)\t1.0\n  (2, 39317)\t1.0\n  (3, 21040)\t1.0\n  (4, 15613)\t1.0\n  (5, 22769)\t1.0
Top23      (0, 7165)\t1.0\n  (1,

Top1       (0, 36160)\t1.0\n  (1, 19560)\t1.0\n  (2, 35310)\t1.0\n  (3, 13229)\t1.0\n  (4, 4332)\t1.0\n  (5, 7034)\t1.0\n  (6, 8451)\t1.0\n  (7, 20421)\t1.0\n  (8, 14218)\t1.0\n  (9, 22221)\t1.0\n  (10, 7402)\t1.0\n  (11, 35707)\t1.0
Top2       (0, 36160)\t1.0\n  (1, 19560)\t1.0\n  (2, 35310)\t1.0\n  (3, 13229)\t1.0\n  (4, 4332)\t1.0\n  (5, 7034)\t1.0\n  (6, 8451)\t1.0\n  (7, 20421)\t1.0\n  (8, 14218)\t1.0\n  (9, 22221)\t1.0\n  (10, 7402)\t1.0\n  (11, 35707)\t1.0
Top3       (0, 36160)\t1.0\n  (1, 19560)\t1.0\n  (2, 35310)\t1.0\n  (3, 13229)\t1.0\n  (4, 4332)\t1.0\n  (5, 7034)\t1.0\n  (6, 8451)\t1.0\n  (7, 20421)\t1.0\n  (8, 14218)\t1.0\n  (9, 22221)\t1.0\n  (10, 7402)\t1.0\n  (11, 35707)\t1.0
Top4       (0, 36160)\t1.0\n  (1, 19560)\t1.0\n  (2, 35310)\t1.0\n  (3, 13229)\t1.0\n  (4, 4332)\t1.0\n  (5, 7034)\t1.0\n  (6, 8451)\t1.0\n  (7, 20421)\t1.0\n  (8, 14218)\t1.0\n  (9, 22221)\t1.0\n  (10, 7402)\t1.0\n  (11, 35707)\t1.0
Top5       (0, 36160)\t1.0\n  (1, 19560)\t1.0\n  (2, 35310)\t1.0

Top1       (0, 18714)\t1.0\n  (1, 32635)\t1.0\n  (2, 23690)\t1.0\n  (3, 27690)\t1.0\n  (4, 5599)\t1.0\n  (5, 16076)\t1.0\n  (6, 28771)\t1.0
Top2       (0, 18714)\t1.0\n  (1, 32635)\t1.0\n  (2, 23690)\t1.0\n  (3, 27690)\t1.0\n  (4, 5599)\t1.0\n  (5, 16076)\t1.0\n  (6, 28771)\t1.0
Top3       (0, 18714)\t1.0\n  (1, 32635)\t1.0\n  (2, 23690)\t1.0\n  (3, 27690)\t1.0\n  (4, 5599)\t1.0\n  (5, 16076)\t1.0\n  (6, 28771)\t1.0
Top4       (0, 18714)\t1.0\n  (1, 32635)\t1.0\n  (2, 23690)\t1.0\n  (3, 27690)\t1.0\n  (4, 5599)\t1.0\n  (5, 16076)\t1.0\n  (6, 28771)\t1.0
Top5       (0, 18714)\t1.0\n  (1, 32635)\t1.0\n  (2, 23690)\t1.0\n  (3, 27690)\t1.0\n  (4, 5599)\t1.0\n  (5, 16076)\t1.0\n  (6, 28771)\t1.0
                                                                        ...                                                                
Top21      (0, 18714)\t1.0\n  (1, 32635)\t1.0\n  (2, 23690)\t1.0\n  (3, 27690)\t1.0\n  (4, 5599)\t1.0\n  (5, 16076)\t1.0\n  (6, 28771)\t1.0
Top22      (0, 18714

Top1       (0, 3612)\t1.0\n  (1, 4344)\t1.0\n  (2, 39247)\t1.0\n  (3, 38512)\t1.0\n  (4, 34454)\t1.0\n  (5, 14583)\t1.0\n  (6, 3604)\t1.0\n  (7, 39247)\t1.0\n  (8, 16418)\t1.0\n  (9, 38220)\t1.0\n  (10, 32661)\t1.0\n  (11, 1444)\t1.0\n  (12, 7329)\t1.0\n  (13, 19564)\t1.0\n  (14, 33472)\t1.0\n  (15, 25134)\t1.0\n  (16, 28668)\t1.0\n  (17, 39619)\t1.0
Top2       (0, 3612)\t1.0\n  (1, 4344)\t1.0\n  (2, 39247)\t1.0\n  (3, 38512)\t1.0\n  (4, 34454)\t1.0\n  (5, 14583)\t1.0\n  (6, 3604)\t1.0\n  (7, 39247)\t1.0\n  (8, 16418)\t1.0\n  (9, 38220)\t1.0\n  (10, 32661)\t1.0\n  (11, 1444)\t1.0\n  (12, 7329)\t1.0\n  (13, 19564)\t1.0\n  (14, 33472)\t1.0\n  (15, 25134)\t1.0\n  (16, 28668)\t1.0\n  (17, 39619)\t1.0
Top3       (0, 3612)\t1.0\n  (1, 4344)\t1.0\n  (2, 39247)\t1.0\n  (3, 38512)\t1.0\n  (4, 34454)\t1.0\n  (5, 14583)\t1.0\n  (6, 3604)\t1.0\n  (7, 39247)\t1.0\n  (8, 16418)\t1.0\n  (9, 38220)\t1.0\n  (10, 32661)\t1.0\n  (11, 1444)\t1.0\n  (12, 7329)\t1.0\n  (13, 19564)\t1.0\n  (14, 33472)\t1.0\n

Top1       (0, 27736)\t1.0\n  (1, 1885)\t1.0\n  (2, 35104)\t1.0\n  (3, 32729)\t1.0\n  (4, 2285)\t1.0\n  (5, 7878)\t1.0\n  (6, 5731)\t1.0\n  (7, 27407)\t1.0\n  (8, 5537)\t1.0\n  (9, 15229)\t1.0\n  (10, 2731)\t1.0
Top2       (0, 27736)\t1.0\n  (1, 1885)\t1.0\n  (2, 35104)\t1.0\n  (3, 32729)\t1.0\n  (4, 2285)\t1.0\n  (5, 7878)\t1.0\n  (6, 5731)\t1.0\n  (7, 27407)\t1.0\n  (8, 5537)\t1.0\n  (9, 15229)\t1.0\n  (10, 2731)\t1.0
Top3       (0, 27736)\t1.0\n  (1, 1885)\t1.0\n  (2, 35104)\t1.0\n  (3, 32729)\t1.0\n  (4, 2285)\t1.0\n  (5, 7878)\t1.0\n  (6, 5731)\t1.0\n  (7, 27407)\t1.0\n  (8, 5537)\t1.0\n  (9, 15229)\t1.0\n  (10, 2731)\t1.0
Top4       (0, 27736)\t1.0\n  (1, 1885)\t1.0\n  (2, 35104)\t1.0\n  (3, 32729)\t1.0\n  (4, 2285)\t1.0\n  (5, 7878)\t1.0\n  (6, 5731)\t1.0\n  (7, 27407)\t1.0\n  (8, 5537)\t1.0\n  (9, 15229)\t1.0\n  (10, 2731)\t1.0
Top5       (0, 27736)\t1.0\n  (1, 1885)\t1.0\n  (2, 35104)\t1.0\n  (3, 32729)\t1.0\n  (4, 2285)\t1.0\n  (5, 7878)\t1.0\n  (6, 5731)\t1.0\n  (7, 27407)\t

Name: 184, Length: 25, dtype: object
Top1       (0, 6727)\t1.0\n  (1, 7762)\t1.0\n  (2, 29285)\t1.0\n  (3, 9056)\t1.0\n  (4, 38589)\t1.0\n  (5, 27057)\t1.0\n  (6, 26590)\t1.0\n  (7, 16329)\t1.0\n  (8, 11272)\t1.0\n  (9, 15122)\t1.0\n  (10, 5065)\t1.0\n  (11, 18721)\t1.0\n  (12, 21551)\t1.0\n  (13, 37211)\t1.0\n  (14, 36636)\t1.0\n  (15, 39317)\t1.0
Top2       (0, 6727)\t1.0\n  (1, 7762)\t1.0\n  (2, 29285)\t1.0\n  (3, 9056)\t1.0\n  (4, 38589)\t1.0\n  (5, 27057)\t1.0\n  (6, 26590)\t1.0\n  (7, 16329)\t1.0\n  (8, 11272)\t1.0\n  (9, 15122)\t1.0\n  (10, 5065)\t1.0\n  (11, 18721)\t1.0\n  (12, 21551)\t1.0\n  (13, 37211)\t1.0\n  (14, 36636)\t1.0\n  (15, 39317)\t1.0
Top3       (0, 6727)\t1.0\n  (1, 7762)\t1.0\n  (2, 29285)\t1.0\n  (3, 9056)\t1.0\n  (4, 38589)\t1.0\n  (5, 27057)\t1.0\n  (6, 26590)\t1.0\n  (7, 16329)\t1.0\n  (8, 11272)\t1.0\n  (9, 15122)\t1.0\n  (10, 5065)\t1.0\n  (11, 18721)\t1.0\n  (12, 21551)\t1.0\n  (13, 37211)\t1.0\n  (14, 36636)\t1.0\n  (15, 39317)\t1.0
Top4       (0, 6727)\

Top1       (0, 38636)\t1.0\n  (1, 32972)\t1.0\n  (2, 30667)\t1.0\n  (3, 10647)\t1.0\n  (4, 9183)\t1.0\n  (5, 21764)\t1.0\n  (6, 21314)\t1.0\n  (7, 5939)\t1.0\n  (8, 6924)\t1.0\n  (9, 6856)\t1.0\n  (10, 17234)\t1.0\n  (11, 23021)\t1.0\n  (12, 17682)\t1.0\n  (13, 34470)\t1.0\n  (14, 11893)\t1.0\n  (15, 19320)\t1.0\n  (16, 28335)\t1.0\n  (17, 5939)\t1.0\n  (18, 6924)\t1.0\n  (19, 34112)\t1.0
Top2       (0, 38636)\t1.0\n  (1, 32972)\t1.0\n  (2, 30667)\t1.0\n  (3, 10647)\t1.0\n  (4, 9183)\t1.0\n  (5, 21764)\t1.0\n  (6, 21314)\t1.0\n  (7, 5939)\t1.0\n  (8, 6924)\t1.0\n  (9, 6856)\t1.0\n  (10, 17234)\t1.0\n  (11, 23021)\t1.0\n  (12, 17682)\t1.0\n  (13, 34470)\t1.0\n  (14, 11893)\t1.0\n  (15, 19320)\t1.0\n  (16, 28335)\t1.0\n  (17, 5939)\t1.0\n  (18, 6924)\t1.0\n  (19, 34112)\t1.0
Top3       (0, 38636)\t1.0\n  (1, 32972)\t1.0\n  (2, 30667)\t1.0\n  (3, 10647)\t1.0\n  (4, 9183)\t1.0\n  (5, 21764)\t1.0\n  (6, 21314)\t1.0\n  (7, 5939)\t1.0\n  (8, 6924)\t1.0\n  (9, 6856)\t1.0\n  (10, 17234)\t1.0\n 

Top1       (0, 38713)\t1.0\n  (1, 38656)\t1.0\n  (2, 6744)\t1.0\n  (3, 18714)\t1.0\n  (4, 5903)\t1.0\n  (5, 27802)\t1.0\n  (6, 566)\t1.0\n  (7, 23199)\t1.0\n  (8, 29383)\t1.0\n  (9, 1221)\t1.0\n  (10, 23292)\t1.0\n  (11, 38713)\t1.0\n  (12, 34914)\t1.0\n  (13, 10592)\t1.0\n  (14, 18773)\t1.0\n  (15, 16882)\t1.0\n  (16, 17995)\t1.0\n  (17, 34914)\t1.0\n  (18, 32287)\t1.0\n  (19, 10592)\t1.0
Top2       (0, 38713)\t1.0\n  (1, 38656)\t1.0\n  (2, 6744)\t1.0\n  (3, 18714)\t1.0\n  (4, 5903)\t1.0\n  (5, 27802)\t1.0\n  (6, 566)\t1.0\n  (7, 23199)\t1.0\n  (8, 29383)\t1.0\n  (9, 1221)\t1.0\n  (10, 23292)\t1.0\n  (11, 38713)\t1.0\n  (12, 34914)\t1.0\n  (13, 10592)\t1.0\n  (14, 18773)\t1.0\n  (15, 16882)\t1.0\n  (16, 17995)\t1.0\n  (17, 34914)\t1.0\n  (18, 32287)\t1.0\n  (19, 10592)\t1.0
Top3       (0, 38713)\t1.0\n  (1, 38656)\t1.0\n  (2, 6744)\t1.0\n  (3, 18714)\t1.0\n  (4, 5903)\t1.0\n  (5, 27802)\t1.0\n  (6, 566)\t1.0\n  (7, 23199)\t1.0\n  (8, 29383)\t1.0\n  (9, 1221)\t1.0\n  (10, 23292)\t1.0\n

Top1       (0, 7380)\t1.0\n  (1, 22138)\t1.0\n  (2, 37070)\t1.0\n  (3, 16329)\t1.0\n  (4, 25689)\t1.0\n  (5, 4241)\t1.0\n  (6, 7706)\t1.0\n  (7, 12293)\t1.0\n  (8, 34738)\t1.0\n  (9, 3250)\t1.0\n  (10, 36877)\t1.0\n  (11, 22221)\t1.0\n  (12, 17401)\t1.0\n  (13, 6744)\t1.0\n  (14, 24403)\t1.0\n  (15, 8758)\t1.0\n  (16, 30607)\t1.0\n  (17, 9157)\t1.0\n  (18, 20074)\t1.0\n  (19, 17836)\t1.0
Top2       (0, 7380)\t1.0\n  (1, 22138)\t1.0\n  (2, 37070)\t1.0\n  (3, 16329)\t1.0\n  (4, 25689)\t1.0\n  (5, 4241)\t1.0\n  (6, 7706)\t1.0\n  (7, 12293)\t1.0\n  (8, 34738)\t1.0\n  (9, 3250)\t1.0\n  (10, 36877)\t1.0\n  (11, 22221)\t1.0\n  (12, 17401)\t1.0\n  (13, 6744)\t1.0\n  (14, 24403)\t1.0\n  (15, 8758)\t1.0\n  (16, 30607)\t1.0\n  (17, 9157)\t1.0\n  (18, 20074)\t1.0\n  (19, 17836)\t1.0
Top3       (0, 7380)\t1.0\n  (1, 22138)\t1.0\n  (2, 37070)\t1.0\n  (3, 16329)\t1.0\n  (4, 25689)\t1.0\n  (5, 4241)\t1.0\n  (6, 7706)\t1.0\n  (7, 12293)\t1.0\n  (8, 34738)\t1.0\n  (9, 3250)\t1.0\n  (10, 36877)\t1.0\n  (

Name: 285, Length: 25, dtype: object
Top1     [philippine, man, loses, life, saving, 30]                                                            
Top2     [pirate, party, won, 21, votes, german, federal, election]                                            
Top3     [hivpositive, travelers, enter, usa, 15, countries, ban, hivpositive, travelers, entry]               
Top4     [hey, reddit, pirates, got, votes, germanys, elections, night]                                        
Top5     [french, outraged, roman, polanskis, arrest]                                                          
                             ...                                                                               
Top21    [cuba, pins, hopes, new, forprofit, farms]                                                            
Top22    [outcry, polanskis, detention]                                                                        
Top23    [ghanas, kayayo, girls, striving, better, life]           

Top1     [professor, david, nutt, population]                                                                                                                                               
Top2     [israel, migrant, workers, receive, shelter, food, medical, care, perform, manual, labor, outside, camps, earn, salary, stay, camp, asylum, claims, decided, months, years, slaves]
Top3     [exxonmobil, pay, iraq, 50bn, fees, plus, barrel, oil, moved, exclusive, development, iraqs, largest, petroleum, reserve, west, qurna, oilfield]                                   
Top4     [british, soldiers, gunned, preparing, tea, afghan, policeman, training]                                                                                                           
Top5     [dutch, lowest, cannabis, users, europe, report]                                                                                                                                   
                               ...                     

Top1     [chinas, city, built, reason, maintain, growth, rate, madness, vid]                                                                                  
Top2     [russia, confirmed, missiles, observed, northern, norway, yesterday, launched, nuclear, submarine, built, carry, nuclear, warheads]                  
Top3     [swedish, pirate, party, proposes, internet, rights, european, parliament, wants, suggestions]                                                       
Top4     [let, world, know, supreme, court, maintains, censorship, press, brazil]                                                                             
Top5     [girls, india, refusing, child, brides, despite, 2006, law, banning, ageold, practice, parents, rural, india, want, marry, daughters, legal, age, 18]
                                                                                 ...                                                                          
Top21    [india, form, new, southern, state]  

Top1     [france, considers, banning, burqa]                                                                                                                                 
Top2     [turkeys, prime, minister, israel, commits, war, crimes, israel, response, antisemite]                                                                              
Top3     [airline, passengers, right, refuse, naked, nbody, scanners]                                                                                                        
Top4     [haitis, head, voodoo, priest, question, church, scientology, operation]                                                                                            
Top5     [cousin, says, hungry, sleeping, bushes, dead, bodies, nearby, stops, dont, says, thats, life, life, say, insists, thats, life, like, death, lasts, little]         
                                                                                    ...                                           

Top1     [investigation, reveals, 1985, live, aid, band, aid, raised, 250, million, 95, money, diverted, support, ethiopian, rebels, buy, weapons]            
Top2     [number, asked, postreddit, life, heres, entry, kiva, fellow, armenia]                                                                               
Top3     [irish, catholic, bishop, asks, parishioners, help, cover, costs, sex, abuse, lawsuits, parishioners, abuse, victims, disgusted]                     
Top4     [night, lights, chile]                                                                                                                               
Top5     [dubai, seeks, arrest, israeli, prime, minister, mossad, chief, murder, hamas, leader]                                                               
                                                  ...                                                                                                         
Top21    [ugandas, environment, dilemma, count

Name: 420, Length: 25, dtype: object
Top1     [immersion, propaganda, north, koreans]                                                                                                                                                                     
Top2     [israel, celebrates, holocaust, irony, week, day, israel, stops, remember, holocaust, implement, law, allows, israel, deport, 10s, thousands, palestinians, showing, papers, palestinians, born, west, bank]
Top3     [military, warns, impending, oil, shortages, aware, way, life, soon, going, change, dramatically, permanently]                                                                                              
Top4     [wikileaks, plans, post, video, showing, massacre, afghani, civilians]                                                                                                                                      
Top5     [greatest, reason, strike]                                                                        

Name: 444, Length: 25, dtype: object
Top1     [chomsky, said, interrogators, told, written, things, israeli, government, did, like, suggested, government, world, likes, say, said]
Top2     [hamas, destroys, dozens, homes, southern, gaza, wait, arab, news]                                                                   
Top3     [protests, turn, deadly, thailand, pics]                                                                                             
Top4     [greatest, bank, robbery, world, history, banks, doing, robbing]                                                                     
Top5     [heterosexual, couple, court, battle, right, civil, union, partnership, homosexual, couples, allowed]                                
                                                         ...                                                                                  
Top21    [chavez, asks, venezuelans, use, twitter, blow, whistle, currency, speculators]                 

Top1     [female, prime, minister, australia]                                                                                                                                                                                         
Top2     [cashstrapped, north, korea, demanded, united, states, pay, us65, trillion, 75, trillion, compensation, decades, hostility, nlmao]                                                                                           
Top3     [canadian, heritage, minister, lied, calling, dmca, critics, radical, extremists]                                                                                                                                            
Top4     [early, britons, 16000, years, ago, cannibals, using, sophisticated, butchering, techniques, strip, flesh, bones, men, women, children]                                                                                      
Top5     [irish, support, palestine, time, ireland, huge, supporter, jewish,

Top1     [28000, deaths, caused, war, drugs, mexico, finally, president, felipe, calderon, agrees, discuss, drug, legalization]                                                                                                              
Top2     [israeli, troops, south, border, lebanon, fired, says]                                                                                                                                                                              
Top3     [photographer, captures, tragic, death, firefighter, oil, spill, china, 47, photos]                                                                                                                                                 
Top4     [tv, cameras, capture, distressing, moment, 5yrold, palestinian, boy, sees, father, arrested, stealing, water]                                                                                                                      
Top5     [fine, june, day, author, launching, be

Top1     [imf, fears, social, explosion]                                                                                                                                                                     
Top2     [child, sex, abuse, church, failings, lead, belgians, formally, renounce, religion]                                                                                                                 
Top3     [israels, accountability, soldiers, seldom, punished, killing, palestinians, 148, cases, 288, palestinians, killed, 2006, 2009, 22, resulted, opening, military, police, investigation, unit, probe]
Top4     [rare, example, openness, china, set, website, citizens, express, views, leaders, users, wasted, little, time, firing, unusually, blunt, criticisms, government]                                    
Top5     [zulu, king, condemns, photos, virginity, tests, annual, dance]                                                                                                        

Name: 556, Length: 25, dtype: object
Top1     [afghanistans, president, karzai, admits, office, receives, million, year, cash, iran, uses, presidential, expenses, says, expects, continue, says, provides, bags, money]                                                       
Top2     [twothirds, west, african, nation, benin, poorest, world, water]                                                                                                                                                                 
Top3     [wikileaks, taunts, pentagon, server, mirrors, usa, hosting, amazon, aws]                                                                                                                                                        
Top4     [idf, palestinian, peace, activist, bars, months, based, solely, testimony, mental, disabled, man, man, using, interrogation, contradicted, multiple, times, confessed, crimes, obviously, didnt, commit, court, charges, thrown]
Top5     [torture, kill

Top1     [wikileaks, vanishes, web, company, removes, dns, support, media, guardiancouk]                                                                                                                                                                        
Top2     [chinese, drive, new, motorcycles, cars, like, bikes, disaster, results]                                                                                                                                                                               
Top3     [mexican, police, chief, took, job, men, didnt, want, shot, dead]                                                                                                                                                                                      
Top4     [assange, actually, accused, rape, dude, just, didnt, use, condom, busted, random, exclusivetosweeden, law]                                                                                                                 

Top1     [aussie, just, want, know, brazil, sri, lanka, suffering, flooding, probably, need, aid, 120000, homeless, hundreds, dead]                                                                       
Top2     [wikileaks, volunteer, detained, agents]                                                                                                                                                         
Top3     [italys, constitutional, court, thursday, struck, key, parts, law, protect, prime, minister, silvio, berlusconi, prosecution]                                                                    
Top4     [pregnant, al, jazeera, reporter, objects, humiliating, israeli, security, checks, told, bra]                                                                                                    
Top5     [rise, new, global, elite, worlds, superrich, nation, unto]                                                                                                                        

Top1     [photos, fucking, amazing, bbcs, human, planet, series]                                                                                                                                        
Top2     [london, soon, illegal, lie, ground, away, food, drink, free, homeless]                                                                                                                        
Top3     [man, plowed, group, bikers, brazilian, central, banker, hes, charged, attempted, murder]                                                                                                      
Top4     [ok, mexican, drug, war, interesting, mexican, army, soldiers, caught, transporting, cocaine, ton]                                                                                             
Top5     [double, irish, companies, bypass, high, incometax, rates, uk, graphic]                                                                                                                    

Name: 679, Length: 25, dtype: object
Top1     [internet, protests, started, new, zealand, government, passed, controversial, strikes, filesharing, urgency]         
Top2     [brave, saudi, guy, speaks, television]                                                                               
Top3     [oscar, nominated, documentary, filmmaker, photographer, tim, hetherington, chris, hondros, reportedly, killed, libya]
Top4     [british, tourist, beaten, death, dubai, police, left, cell, water, food, days]                                       
Top5     [vladimir, putin, reveals, plan, boost, russian, birth, rate, reverse, russias, declining, population]                
                                                          ...                                                                  
Top21    [ministry, defence, ordered, disclose, involvement, usled, rendition]                                                 
Top22    [coming, balkan, war, washington, times]                  

Top1     [mi6, attacks, alqaeda, operation, cupcake, british, intelligence, hacked, alqaeda, online, magazine, replaced, bombmaking, instructions, recipe, cupcakes]    
Top2     [biomass, map, popularly, eaten, fish, populations, 1900, versus, 2000, theyre, practically, extinct]                                                          
Top3     [report, strikes, internet, laws, violate, human, rights]                                                                                                      
Top4     [european, racism, xenophobia, immigrants, rise]                                                                                                               
Top5     [nepal, census, recognizes, gender, believed, world]                                                                                                           
                                 ...                                                                                                                       

Top1     [colombian, army, colonel, admits, unit, murdered, 57, civilians, dressed, uniforms, claimed, rebels, killed, combat]                                                                                       
Top2     [workers, making, converse, sneakers, indonesia, say, supervisors, throw, shoes, slap, face, dogs, pigs, nike, brands, owner, admits, abuse, occurred, contractors, make, hip, hightops, says, little, stop]
Top3     [murdochs, ordered, testify, contempt, bloomberg]                                                                                                                                                           
Top4     [murdochs, option, drop, bid, real, issue, avoid, humiliation, retreat, real, business, uk, anymore, just, set, disintegrating, assets]                                                                     
Top5     [israelis, palestinians, march, jerusalem, support, palestinian, independence, joint, jewisharab, march, 20, years]                    

Top1     [pakistan, let, china, stealth, chopper, bin, laden, raid]                                                                                
Top2     [excellent, war, map, fighting, libya]                                                                                                    
Top3     [essex, police, charge, man, water, fight, planned, blackberry, messenger]                                                                
Top4     [29yearold, tibetan, monk, dies, selfimmolation, protest, southwest, china, heard, calling, return, dalai, lama, freedom, tibetan, people]
Top5     [north, sea, oil, spill, worst, decade, government, described, leak, substantial, amp, estimates, tons]                                   
                                                          ...                                                                                      
Top21    [german, boy, told, clean, calls, cops, forced, labour]                                                

Top1     [vladimir, putin, action, man, pics]                                                                                                                                                                      
Top2     [sexabuse, victims, urge, international, criminal, court, prosecute, pope, benedict, xvi, crimes, humanity]                                                                                               
Top3     [philip, murphy, ambassador, germany, written, open, letter, admonishing, german, jerks, racially, abusing, black, member, staff, berlin, recently]                                                       
Top4     [jobless, young, left, harm, todays, youth, unemployment, doing, felt, decades, affected, society, large, economist]                                                                                      
Top5     [india, hindu, woman, set, giving, birth, girl]                                                                                                

Name: 801, Length: 25, dtype: object
Top1     [arrest, bush, visits, bc, amnesty, tells, ottawa, rights, body, says, canadian, international, law, oblige, canada, detain, bush, investigate, war, crimes, torture]                               
Top2     [new, zealand, oil, spill, pictures, countrys, worst, maritime, environmental, disaster]                                                                                                            
Top3     [usedcar, salesman, iran, proxy, iran, assassination, plot, doesnt, add, experts]                                                                                                                   
Top4     [chinese, artist, ai, weiwei, named, powerful, person, art, world, arts, magazine, china, complains, political, bias]                                                                               
Top5     [food, insecurity, comes, london, shoppers, londons, spitalfields, market, shock, food, prices, rocket, 800, cent, british, red, c

Top1     [scottish, school, removes, concrete, play, area, replaces, urban, jungle, containing, trees, hills, boulders, tunnels, year, period, results, 80, fewer, accidents]                  
Top2     [roman, catholic, churchs, paedophile, investigator, jailed, possessing, thousnds, child, porn, images]                                                                               
Top3     [indonesians, killed, 750, endangered, orangutans, year, practices, quantified, believed, threat, existence, red, apes]                                                               
Top4     [fury, young, activist, publishing, nude, selfphoto, unprecedented, young, egyptian, female, sunday, dared, publish, nude, photo, nude, photos, blog, act, free, personal, expression]
Top5     [norways, anders, behring, breivik, open, court, hearing]                                                                                                                             
                                   ...  

Name: 847, Length: 25, dtype: object
Top1     [north, korea, leader, kimjong, il, died]                                                                                                                                                                       
Top2     [land, smiles, photographer, entered, north, korea, posing, businessman, looking, open, chocolate, factory, images, rarely, captured, rarely, distributed, west, stark, glimmers, everyday, life, worlds, gulag]
Top3     [north, korea, says, leader, kim, jong, il, died]                                                                                                                                                               
Top4     [twitter, coordinating, tool, arab, uprisings, sold, 300million, stake, member, saudi, royal, family]                                                                                                           
Top5     [number, journalists, imprisoned, worldwide, shot, 20, percent, highest, level, mi

Top1     [fidel, castro, lambasted, republican, presidential, race, greatest, competition, idiocy, ignorance, world, seen]                                                                                                                                 
Top2     [swedes, norwegians, broke, power, percent]                                                                                                                                                                                                       
Top3     [doctors, borders, halting, work, detention, centers, libyan, city, misrata, detainees, tortured, denied, urgent, medical, care]                                                                                                                  
Top4     [sopa, coming, canada, warns, michael, geist]                                                                                                                                                                                              

Top1     [adds, vatican, moneylaundering, concern, list]                                                                                                                                  
Top2     [japanese, whalers, cut, season, short]                                                                                                                                          
Top3     [japan, shutting, nuclear, power, industry]                                                                                                                                      
Top4     [chinese, state, media, thanks, women, hot, hard, think, awkward, way, media, outlet, celebrate, international, womens, day]                                                     
Top5     [india, battle, continues, hindu, temples, riches, vaults, temple, kerala, yielded, gold, gems, worth, estimated, 21, billion, lawsuit, raised, thorny, question, owns, treasure]
                                                                 

Top1     [police, officers, arrested, owner, used, autoparts, business, filed, complaint, accusing, officers, kidnapping, torturing, refused, pay, bribe, burned, death, inside, home, juarez, right, scheduled, testify]                                                   
Top2     [150, afghan, schoolgirls, poisoned, antieducation, attack]                                                                                                                                                                                                        
Top3     [supreme, court, canada, ruled, current, emergency, wiretap, provision, allows, surveillance, court, order, unconstitutional]                                                                                                                                      
Top4     [quebec, shuts, scientologylinked, narconon, center, dangerous, practices, narconon, treated, drug, addicts, purification, treatments, concocted, scientology, founder, ron, hubbard, sc

Top1     [indian, state, let, forest, guards, shoot, poachers, sight, maharashtra, government, says, killing, poachers, longer, considered, crime, tiger, deaths, state, year]                  
Top2     [skydive, parachute, successful]                                                                                                                                                       
Top3     [south, korea, chemically, castrate, repeat, child, rapist]                                                                                                                            
Top4     [italy, police, cracking, ferrari, lamborghini, drivers, driving, fast, italy, like, southern, europe, drowning, debt, police, pursuing, drivers, make, sure, declaring, paying, taxes]
Top5     [president, poland, criminalization, drug, use, mistake]                                                                                                                               
                                   

Top1     [announcing, declaration, internet, freedom, bunch, organizations, individuals, getting, today, launch, beginning, process, creation, internet, declaration, freedom]                                                  
Top2     [tampon, king, sparked, period, change, indias, women]                                                                                                                                                                 
Top3     [prisoners, brazil, reducing, sentences, producing, electricity, 16, hrs, spent, pedaling, charge, battery, connected, bike, prisoners, good, standing, shave, day, sentences]                                         
Top4     [scientology, abandoned, ron, hubbards, granddaughter, david, miscavifes, father]                                                                                                                                      
Top5     [falkvinge, drm, outright, banned, legislation, isnt, coding]                              

Name: 1007, Length: 25, dtype: object
Top1     [thousands, uk, workers, blacklisted, political, views, corporations, uk, used, secret, blacklisting, database, screen, left, wing, troublemakers, union, sympathizers, potential, job, recruits]
Top2     [ireland, abandoning, religion, faster, country]                                                                                                                                                 
Top3     [pharma, giant, pfizer, fined, bribing, officials, eastern, europe, china, raw, story]                                                                                                           
Top4     [starts, landmark, agent, orange, cleanup, vietnam]                                                                                                                                              
Top5     [interpol, issues, arrest, warrant, sea, shepherd, chief]                                                                                    

Top1     [russia, just, declassified, news, shake, world, gem, markets, core, discovery, vast, new, diamond, field, containing, trillions, carats, supply, global, markets, 3000, years]                                           
Top2     [canada, rises, world, economic, freedom, ranking, plummets, 18th, canada]                                                                                                                                                
Top3     [russia, signs, deal, north, korea, write, 90, north, koreas, 11, billion, debt, moscow]                                                                                                                                  
Top4     [japan, prevent, chinese, landing, disputed, islands, means]                                                                                                                                                              
Top5     [africas, warm, heart, cold, welcome, chinese, chinese, miners, merchants, labo

Top1     [home, man, builds, 60ftlong, world, war, trench, garden, invites, history, buffs, round, reenactment]                                                                        
Top2     [pakistani, couple, kill, daughter, talking, boy]                                                                                                                             
Top3     [thousands, iranians, chanting, death, america, burnt, flags, friday, mark, 33rd, anniversary, seizure, embassy, tehran]                                                      
Top4     [pakistani, coupe, kill, daughter, pouring, acid, face, talking, boy]                                                                                                         
Top5     [china, opposition, party, lasts, day, founder, gets, years, prison]                                                                                                          
                                         ...                                    

Top1     [real, north, korea, problem, world, fears, missiles, 250000, innocents, onethird, children, forced, slave, labor, starvation, rations, daily, subjected, heinous, torture, executions]                                                                           
Top2     [north, korean, satellite, tumbling, control, officials, say]                                                                                                                                                                                                     
Top3     [tibetans, vote, petitions, government, ignored, protest, kind, criminalized, imprisoned, tibetans, denied, lawyer]                                                                                                                                               
Top4     [google, boss, eric, schmidt, im, proud, tax, avoidance, schemeits, called, capitalism]                                                                                                    

Top1     [google, unveils, detailed, map, north, korea]                                                                                                                                                        
Top2     [libor, lies, revealed, rigging, 300, trillion, benchmark, benchmark, rate, 300, trillion, contracts, based, honesty, new, evidence, bankings, biggest, scandal, shows, traders, took, license, cheat]
Top3     [queen, beatrix, netherlands, announce, abdication]                                                                                                                                                   
Top4     [israel, noshow, rights, review]                                                                                                                                                                      
Top5     [currently, canadawide, outage, entire, visa, credit, card, network]                                                                                           

Top1     [vw, shares, 11b, profits, workers, profit, sharing, hard, earned, deserved, equitable, participation, employees, success]                         
Top2     [north, korea, fires, short, range, missiles, sea, japan]                                                                                          
Top3     [bellicose, north, korea, forces, china, shift, stance, old, friend, chinese, newspapers, calling, north, korea, ungrateful, unreliable, liability]
Top4     [zimbabwean, police, interrogating, young, children, aged, 46, school, parents, radios, police, confiscating, wind, radios, night, time, raids]    
Top5     [dutch, lesbian, couple, nineyearold, turkish, foster, child, gone, hiding, row, turkey, muslim, children, cared, gay, christian, couples]         
                                                                            ...                                                                             
Top21    [google, readers, demise, awful, iranians, use, a

Top1     [rhinos, mozambique, killed, poachers]                                                                                                                                                                    
Top2     [2yearold, girl, gets, windpipe, stem, cells]                                                                                                                                                             
Top3     [japan, russia, want, finally, end, world, war, ii, agree, abnormal]                                                                                                                                      
Top4     [new, zealand, says, bizarre, baby, names, 4real, juztice, lucifer]                                                                                                                                       
Top5     [split, son, expelled, school, tells, lesbian, couple, legally, married, south, african, couple, said, called, meeting, principal, secunda, hig

Top1     [open, letter, turkish, tv, network, closed, tomorrow, broadcasted, protest, coverage]                                                                                                                                          
Top2     [subway, contractor, destroys, ancient, tombsworkers, destroyed, 12, historically, significant, tombs, 22003000, yrs, old, chinas, da, gong, mountain, yesterday, conducting, archaeological, excavations, tombs, gone, morning]
Top3     [bosses, collapsed, banks, sent, jail, chancellor, told]                                                                                                                                                                        
Top4     [china, executes, communist, party, official, raping, series, underage, girls, reportedly, young, 11]                                                                                                                           
Top5     [egyptian, president, appoints, member, terror, group, 

Top1     [german, minister, proposes, banning, companies, participate, spying, activities, doing, business, eu]                                                                                   
Top2     [wont, lovin, mcdonalds, admits, 90, employees, zerohours, contracts, guaranteed, work, stable, income, fastfood, chain, potentially, largest, zerohours, employer, uks, private, sector]
Top3     [tourist, accidentally, snaps, finger, 600yearold, statue, italys, florence]                                                                                                             
Top4     [drone, strikes, kill, militants, yemen, americans, urged, leave]                                                                                                                        
Top5     [gustl, mollath, man, locked, psychatric, ward, years, tried, whizzleblow, illegal, banking, activities, free, german, court, ruled]                                                     
                         

Top1     [john, mcafee, reveals, details, gadget, thwart, nsa, government, bans, sale, ill, sell, england, japan, world, coming, stopped]                                                
Top2     [pope, francis, rip, rewrite, vatican, constitution]                                                                                                                            
Top3     [secret, cold, war, documents, reveal, nsa, spied, senators]                                                                                                                    
Top4     [swiss, army, trains, invasion, bankrupt, france, looking, money]                                                                                                               
Top5     [glenn, greenwald, working, new, nsa, revelations, assassination, program]                                                                                                      
                                            ...                       

Top1     [eightyearold, girl, china, youngest, living, person, diagnosed, lung, cancer, attributed, pollution]                                                                                                           
Top2     [decent, person, eaten, days, shameful, things, survive, tormented, typhoon, victims, scour, food]                                                                                                              
Top3     [member, army, national, guards, tactical, human, intelligence, team, commits, suicide, suicide, note, says, forced, commit, war, crimes, crimes, humanity, iraq]                                               
Top4     [spain, toughens, new, sun, tax, law, homes, searched, illegal, solar, panels, warrant, offenders, fined, 60million, 80, million]                                                                               
Top5     [mathematicallyimprobable, voting, pattern, proves, fraud, philippine, polls]                                          

Top1     [tony, blair, george, bush, exchanged, voluminous, correspondence, prior, start, military, operations, iraq, uk, moving, declassify, details, talks, inquiry, britains, involvement, conflict, british, media, reported]
Top2     [trolley, bus, blast, russian, city, volgograd, killed, 10, people, emergencies, services, reported, explosion, comes, day, terrorist, attack, city, killed, 17, people]                                                
Top3     [glenn, greenwald, says, nsa, gchq, dismayed, dont, access, inflight, internet, communication, idea, human, beings, communicate, moments, ability, monitor, intolerable]                                                
Top4     [2000, tons, explosives, disappear, chemical, plant, yekaterinburg, russia]                                                                                                                                             
Top5     [saudi, prince, faces, execution, murder]                                              

Top1     [russia, says, ukrainian, affairs]                                                                                                                                                                                           
Top2     [ukraine, protester, falls, policeman, barricades, offered, live, tv, interview, romance, instead, rants, corruption, tv, station, shames, pictures, dead, friends, true, fell, love, policeman, met, im, going, tell, story]
Top3     [apple, security, flaw, actually, nsa, backdoor]                                                                                                                                                                             
Top4     [turkish, main, opposition, convenes, erdoans, voice, recording, regarding, billion, cash, stored, properties, erdoan, family]                                                                                               
Top5     [mt, gox, dead, rumors, swirl, insolvency, japanbased, bitcoin, exc

Name: 1437, Length: 25, dtype: object
Top1     [twothirds, afghanistan, reconstruction, money, gone, company, dyncorp, international]                                                                                                  
Top2     [683, muslim, brotherhood, supporters, sentenced, death, egypt]                                                                                                                         
Top3     [hitlers, maid, elisabeth, kalhammer, breaks, silence, 71, years]                                                                                                                       
Top4     [cold, war, spysatellite, images, unveil, lost, cities, photos, triple, number, known, archaeological, sites, middle, east, revealing, thousands, ancient, cities, roads, canals, ruins]
Top5     [china, bans, big, bang, theory, american, shows, internet, analysts, say, government, censors, concerned, ideological, issues]                                                  

Top1     [neurotoxic, pesticides, blamed, worlds, bee, collapse, harming, butterflies, worms, fish, birds, evidence, sufficient, trigger, regulatory, action]                                                   
Top2     [rejects, australias, feeble, bid, strip, tasmanian, forests, heritage, status]                                                                                                                        
Top3     [doctors, vote, ban, uk, cigarette, sales, born, 2000, british, medical, association, hails, vote, step, achieving, goal, tobaccofree, society, 2035, critics, illiberal]                              
Top4     [iran, arrests, vulgar, online, video, showing, people, singing, dancing, support, nations, world, cup, team]                                                                                          
Top5     [world, act, years, save, oceans, pollution, overfishing, watchdog]                                                                                        

Top1     [22, yr, old, australian, aboriginal, woman, died, jail, twice, seeking, medical, attention, authorities, deemed, healthy, imprisoned, unpaid, fines, totalling, just, 1000]
Top2     [russia, admits, soldiers, caught, ukraine]                                                                                                                                 
Top3     [israeli, prime, minister, benjamin, netanyahus, approval, rating, hits, 38, 82, month]                                                                                     
Top4     [burger, king, worldwide, agreed, acquire, tim, hortons, 125, billion, deal, headquarters, canada]                                                                          
Top5     [malaysia, airlines, flights, airline, burning, 2million, day]                                                                                                              
                                      ...                                                 

Top1     [wind, blows, away, fossil, power, nordics, baltics, arrival, wind, power, large, scale, pushed, electricity, prices, eroding, profitability, fossil, power, stations]                                  
Top2     [rhino, horn, demand, vietnam, drops, 33, year, information, campaign, successfully, changes, minds, people, think, rhino, horn, medicinal, value]                                                      
Top3     [dont, threaten, warn, says, india, china, unveiling, plans, construct, roads, way, chinese, border, match, troop, infrastructure, build, chinese]                                                      
Top4     [doctors, borders, weve, reached, ceiling, maxed, ebola, aid, resources]                                                                                                                                
Top5     [saudi, arabia, reward, outstanding, teachers, bmw, cars, cash, gifts]                                                                                 

Top1     [iran, week, unveiled, monument, jewish, soldiers, killed, iraniraq, war, lasted, 1980, 1988, saw, countries, suffer, millions, casualties, billions, dollar, damage, jewish, community, leaders, number, iranian, religious, officials, took, ceremony]
Top2     [activists, drop, interview, dvds, north, korea, balloon]                                                                                                                                                                                               
Top3     [curiosity, rover, drills, mars, rock, finds, water]                                                                                                                                                                                                    
Top5     [qatar, hires, fake, fans, stadiums, migrant, workers, qatar, dollar, hour, sitting, stadiums, pretending, fun, applaud, wave]                                                                                           

Top1     [australian, learner, driver, stitched, chainsaw, wound, drank, gin, pain, driving, hospital, lost, supreme, court, appeal, drinkdriving, charge]
Top2     [ukraine, truce, broken, 139, times, day]                                                                                                        
Top4     [71, chileans, approve, legislation, decriminalize, abortion]                                                                                    
Top5     [mexican, marijuana, production, slumps, face, legalization]                                                                                     
                                     ...                                                                                                                  
Top21    [18, dead, haiti, carnival, accident]                                                                                                            
Top22    [russian, prosecutors, seek, 10, year, sentence, putin, criti

Top1     [isis, taliban, announced, jihad, khaama, press, kp]                                                                                                                               
Top2     [mexican, police, capture, leader, jurez, cartel]                                                                                                                                  
Top3     [canadian, insurer, grey, power, denies, flooded, home, claim, senior, away, cancer, treatment, leaving, home, unattended, days]                                                   
Top4     [germany, plans, time, officially, recognize, killing, hundreds, thousands, armenians, turkish, regime, 100, years, ago, genocide]                                                 
Top5     [bell, faces, 750million, lawsuit, tracking, customers, cellphone, internet, usage]                                                                                                
                                                ...    

Top1     [medical, marijuana, legal, forms, supreme, court, rules, canada]                            
Top2     [elon, musks, spacex, plans, launch, 4000, satellites, broadcasting, internet, entire, world]
Top3     [global, diabetes, rates, rising, obesity, spreads]                                          
Top4     [extramarital, sex, isnt, adultery, long, youve, paid, japanese, court, rules]               
Top5     [rupert, murdoch, getting, ready, step, ceo, 21st, century, fox]                             
                                       ...                                                            
Top21    [azerbaijan, bans, guardian, news, outlets, reporting, baku, european, games]                
Top22    [ttip, vote, postponed, european, parliament, descends, panic, trade, deal]                  
Top23    [reuters, saudi, arabia, ready, raise, oil, output, meet, demand]                            
Top24    [ancient, church, uncovered, highway, project, israel]          

Top1     [cnn, amp, cbc, sued, pirating, 31, second, youtube, video, addition, claims, copyright, infringement, media, giants, face, allegations, breached, anticircumvention, measures, dmca]                                                                 
Top2     [ireland, refuses, extradite, man, prison, inhumane]                                                                                                                                                                                                  
Top3     [american, flag, raised, havana, time, 54, years]                                                                                                                                                                                                     
Top4     [humans, definitely, killed, mammoths, giant, armadillo, sabretooth, tiger, scientists, claim, new, research, settles, argument, humans, climate, change, responsible, end, megafauna, claimed, debunks, myth, early, humans, l

Name: 1800, Length: 25, dtype: object
Top1     [transpacific, partnership, trade, deal, reached]                                                                                                                                                 
Top2     [previously, unknown, lines, epic, gilgamesh, discovered, stolen, cuneiform, tablet, serendipitous, deal, history, museum, amp, smuggler, provided, new, insight, famous, stories, told]          
Top3     [montreal, dump, billion, liters, sewage, st, lawrence, river]                                                                                                                                    
Top4     [human, rights, longer, priority, uk, government, says, foreign, office, chief]                                                                                                                   
Top5     [isis, militants, blow, ancient, arch, triumph, palmyra]                                                                                 

Top1     [reports, gun, possible, explosions, near, saintdenis, area, paris, france]                                            
Top2     [france, rejects, fear, renews, commitment, 30000, syrian, refugees]                                                   
Top3     [paris, attacks, mother, died, using, body, shield, fiveyearold, son, bullets]                                         
Top4     [despite, paris, attacks, hollande, says, erecting, walls, fences, end, europe]                                        
Top5     [indian, government, starts, offering, 90, discount, cancer, drugs, reduces, costs, cardiac, implants, 50, 60, percent]
                                                                  ...                                                           
Top21    [japanese, whalers, fined, million, australian, court]                                                                 
Top22    [man, joker, mask, vows, kill, arab, week, quebec]                                      

Name: 1871, Length: 25, dtype: object
Top1     [china, participate, sanctions, nkorea]                                                 
Top2     [aiming, ban, child, marriages, shot, pakistan, antiislamic, blasphemous]               
Top3     [tsai, ingwen, elected, taiwans, female, president]                                     
Top4     [schoolgirls, report, abuse, young, asylum, seekers]                                    
Top5     [prisoners, freed, iran, prisoners, include, washington, post, reporter, jason, rezaian]
                                                   ...                                           
Top21    [facebook, outsources, fight, racist, posts, germany]                                   
Top22    [gunfire, explosions, heard, burkina, faso, hotel]                                      
Top23    [burkina, faso, security, forces, raid, besieged, hotel, free, hostages]                
Top24    [julian, assange, questioned, swedish, prosecutors, london]            

Top1     [china, bans, depictions, gay, people, adultery, night, stands, cleavage, reincarnation, television]                                                                                                                                                         
Top2     [peanut, allergy, risk, reduced, 80, consuming, peanuts, infant, research, suggests]                                                                                                                                                                         
Top3     [philippines, seizes, north, korean, ship]                                                                                                                                                                                                                   
Top4     [refugee, children, calais, jungle, camp, raped, aid, workers, claim]                                                                                                                                     

Top1     [wildfire, destroying, fort, mcmurray, city, evacuated]                                                                                                                                  
Top2     [selfproclaimed, sharia, police, germany, stand, trial, grounds, violating, laws, wearing, uniforms, political, messages]                                                                
Top3     [doubts, rise, ttip, france, threatens, block, euus, deal]                                                                                                                               
Top4     [seven, worlds, biggest, banks, agreed, pay, 324, million, settle, private, lawsuit, accusing, rigging, rate, benchmark, used, 553, trillion, derivatives, market]                       
Top5     [jewish, man, handed, life, prison, grisly, murder, east, jerusalem, teen, israeli, ringleader, killing, palestinian, teenager, abducted, burned, death, 2014, sentenced, life, prison]  
                         

Top1     [jamaica, proposes, marijuana, dispensers, tourists, airports, following, legalisation, kiosks, desks, people, license, purchase, ounces, drug, use, stay]                                                              
Top2     [stephen, hawking, says, pollution, stupidity, biggest, threats, mankind, certainly, greedy, stupid, treatment, environment, past, decade]                                                                              
Top3     [boris, johnson, says, run, tory, party, leadership]                                                                                                                                                                    
Top4     [gay, men, ivory, coast, abused, forced, flee, homes, pictured, signing, condolence, book, victims, recent, attack, gay, nightclub, florida]                                                                            
Top5     [switzerland, denies, citizenship, muslim, immigrant, girls, refused, swim, boys, repor

IndexError: single positional indexer is out-of-bounds

In [32]:
from sklearn.feature_extraction.text import CountVectorizer
vectorizer = CountVectorizer(
    analyzer = 'word',
    strip_accents = 'unicode',
    lowercase = True,
    stop_words = sw,
)

From sklearn, create a CountVectorizer, a class of methods that vectorizes text data into counts of their words, letters, etc. In this instance, the CountVectorizer calcuates word frequencies via the analyzer parameter.

# Begin training
Using cross cross validation methods, create a train_test_split, fit the training sets to a logistic classifier model, and interpret the accuracy.

In [61]:
from sklearn.cross_validation import train_test_split
X_train, X_test, y_train, y_test = train_test_split(
    features,
    data_labels,
    train_size = 0.80,
    random_state = 1234
)

ValueError: Found input variables with inconsistent numbers of samples: [553377, 1986]

Create a training set via the CrossValidation library in sklearn. 

In [58]:
from sklearn.linear_model import LogisticRegression
log_model = LogisticRegression()

Create a logistic regresssion classifier object.

In [59]:
log_model = log_model.fit(X=X_train, y=y_train)
y_pred = log_model.predict(X_test)

Produce a log model that can be trained on the X_train partition of the dataset and the y_train partition of the dataset. Then produce an array of predicted labels correponding to the X_test dataset partition.

# Baseline result (logistic regression)

In [60]:
from sklearn.metrics import accuracy_score
print(make_percent(accuracy_score(y_test, y_pred)))


45.73%


The resulting accuracy of the classifer.

Remember, this accuracy is from using the top headline for each day, not taking into consideration the other 24 headlines. Lets try to use some different classification algorithms and compare results.

# Different classifiers
We will try Naive Bayes, Decision tree (gini criteria), KNN, and some ensemble methods.

## Naive Bayes

In [None]:
from sklearn.naive_bayes import GaussianNB
gnbc = GaussianNB()
gnbc.fit(X_train.toarray(), y_train)
print(make_percent(gnbc.score(X_test.toarray(), y_test)))

Accuracy is even worse! That's ok, differnt classifiers - different results. Lets try a decision tree next.

## Decision Tree (Gini)

In [None]:
from sklearn.tree import DecisionTreeClassifier