In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
%matplotlib inline

from urllib.request import urlopen
from bs4 import BeautifulSoup
import re
import datetime
from time import sleep

In [2]:
def collapse_speaker(lines):
    #Merge text with same speaker as previous text

    merged_lines = []

    ml_index = 0

    for i in range(len(lines)):
        if len(lines[i]) == 2:
            merged_lines.append(lines[i])
            ml_index += 1
        elif any(c.islower() for c in (lines[i][0])):
            #print(i)
            merged_lines[ml_index-1][1] = merged_lines[ml_index-1][1] + ' ' + lines[i][0]
        else:
            merged_lines.append(lines[i])
            ml_index += 1
            
    return merged_lines

In [3]:
def gridify(merged_lines):
    #Create dataframe

    grid = []

    for i in range(len(merged_lines)):
        if len(merged_lines[i]) <= 2:
            grid.append(merged_lines[i][::-1])
        else:
            grid.append(merged_lines[i])
            
    grid = pd.DataFrame(grid)
    grid.columns = ['text', 'speaker']
    cols = grid.columns.tolist()
    cols = cols[-1:] + cols[:-1]
    grid = grid[cols] 
    
    return grid

In [4]:
def get_commercials(grid):
    commercial = np.zeros(len(grid))

    for i in grid[grid['text'].str.contains("COMMERCIAL")].index:
        commercial[i] = 1

    segment = [1]
    segment_index = 1

    for i in range(1, len(commercial)):
        if commercial[i] < commercial[i-1]:
            segment_index += 1
            segment.append(segment_index)
        else:
            segment.append(segment_index)

    grid['segment'] = segment

In [5]:
def get_features(grid, show):
    #Get number of letters, number of words, and speaker type
    number_of_letters = []
    number_of_words = []
    speaker_type = []
    
    for i in range(len(grid)):
        #Number of letters in the text field
        if any(c.islower() for c in (grid['text'][i])):
            number_of_letters.append((len(grid['text'][i])))
        else:
            number_of_letters.append(np.nan)
        
        
        #Number of words in the text field
        if any(c.islower() for c in (grid['text'][i])):
            number_of_words.append((len(grid['text'][i].split(' '))))
        else:
            number_of_words.append(np.nan)
    
        #Is speaker host or guest
        if any(c.islower() for c in (grid['text'][i])):
            #speaker_type.append((len(grid['text'][i].split(' '))))
            if ("WALLACE" in grid['speaker'][i]):
                speaker_type.append('host')
            else:
                speaker_type.append('guest')
        else:
            speaker_type.append(np.nan)

    grid['number_of_letters'] = number_of_letters
    grid['number_of_words'] = number_of_words
    grid['speaker_type'] = speaker_type    

In [6]:
#FIXING TO ADD SHOW VARIABLE

def get_features(grid, show):
    #Get number of letters, number of words, speaker type, and order of speech within show
    number_of_letters = []
    number_of_words = []
    speaker_type = []
    speech_order = []
    speech_index = 1
    
    for i in range(len(grid)):
        #Number of letters in the text field
        if any(c.islower() for c in (grid['text'][i])):
            number_of_letters.append((len(grid['text'][i])))
        else:
            number_of_letters.append(np.nan)
        
        
        #Number of words in the text field
        if any(c.islower() for c in (grid['text'][i])):
            number_of_words.append((len(grid['text'][i].split(' '))))
        else:
            number_of_words.append(np.nan)
    
        #Is speaker host or guest
        if any(c.islower() for c in (grid['text'][i])):
            if show == 'fox':
                if "WALLACE" in grid['speaker'][i]:
                    speaker_type.append('host')
                else:
                    speaker_type.append('guest')
            if show == 'maddow':
                if "MADDOW" in grid['speaker'][i]:
                    speaker_type.append('host')
                else:
                    speaker_type.append('guest')
            if show == 'hannity':
                if "HANNITY" in grid['speaker'][i]:
                    speaker_type.append('host')
                else:
                    speaker_type.append('guest')
        else:
            speaker_type.append(np.nan)
            
        if grid['speaker'][i] is not None:
            speech_order.append(speech_index)
            speech_index += 1
        else:
            speech_order.append(np.nan)

    grid['number_of_letters'] = number_of_letters
    grid['number_of_words'] = number_of_words
    grid['speaker_type'] = speaker_type    
    grid['speech_order'] = speech_order

In [7]:
def get_video(grid):
    video_shift = np.zeros(len(grid))

    for i in grid[grid['text'].str.contains("VIDEO")].index:
        video_shift[i] = 1

    video = [0]
    video_index = 0

    for i in range(1, len(video_shift)):
        if video_shift[i] < video_shift[i-1]:
            """
            if video_index == 0:
                video_index = 1
            if video_index == 1:
                video_index = 0
            """
            video_index = (video_index + 1) % 2
            video.append(video_index)
        else:
            video.append(video_index)

    grid['tape'] = video

In [8]:
def foxnews_parser(site):
    page = urlopen(site)
    soup = BeautifulSoup(page, 'lxml')
    full_text = soup.find('div', class_='article-text')
    date_text = date_text = soup.findAll('meta')
    
    #Split full text into lines

    text = []
    for line in full_text.get_text().split("\n"):
        text.append(line)
        
    #Separate speaker from text

    lines = []
    for i in range(len(text)):
        if '):' in text[i]:
            lines.append(text[i].split(':', 1))
        elif text[i] == '(CROWD CHANTING)':
            pass
        elif 'ALL RIGHTS RESERVED.' in text[i]:
            pass
        elif any(c.islower() for c in text[i].split(':', 1)[0]):
            lines.append([text[i]])
            #print(1)
        else:
            lines.append(text[i].split(':', 1)) #Only split on first colon

    #remove extraneous lines        
    lines = lines[2:-2]
    
    merged_lines = collapse_speaker(lines)
    
    grid = gridify(merged_lines)

    get_commercials(grid)
    get_features(grid, 'fox')
    get_video(grid)
    
    grid['network'] = 'Fox News'
    grid['show'] = 'Fox News Sunday'
    
    grid['show_date'] = re.search("([0-9]{4}\-[0-9]{2}\-[0-9]{2})", str(date_text)).group(1)
    
    return grid

In [9]:
def hannity_parser(site):
    page = urlopen(site)
    soup = BeautifulSoup(page, 'lxml')
    full_text = soup.find('div', class_='article-text')
    date_text = date_text = soup.findAll('meta')
    
    #Split full text into lines

    text = []
    for line in full_text.get_text().split("\n"):
        text.append(line)
        
    #Separate speaker from text

    lines = []
    for i in range(len(text)):
        if '):' in text[i]:
            lines.append(text[i].split(':', 1))
        elif text[i] == '(CROWD CHANTING)':
            pass
        elif 'ALL RIGHTS RESERVED.' in text[i]:
            pass
        elif any(c.islower() for c in text[i].split(':', 1)[0]):
            lines.append([text[i]])
            #print(1)
        else:
            lines.append(text[i].split(':', 1)) #Only split on first colon

    #remove extraneous lines        
    lines = lines[2:-2]
    
    merged_lines = collapse_speaker(lines)
    
    grid = gridify(merged_lines)

    get_commercials(grid)
    get_features(grid, 'hannity')
    get_video(grid)
    
    grid['network'] = 'Fox News'
    grid['show'] = 'Hannity'
    
    grid['show_date'] = re.search("([0-9]{4}\-[0-9]{2}\-[0-9]{2})", str(date_text)).group(1)
    
    return grid

In [10]:
def maddow_parser(site):
    page = urlopen(site)
    soup = BeautifulSoup(page, 'lxml')
    
    full_text = soup.find('div', class_='field field-name-body field-type-text-with-summary field-label-inline inline')
    date_text = date_text = soup.findAll('meta')
    
    #Split full text into lines

    text = []
    for line in full_text.get_text().split("\n"):
        if 'Copyright 2017 ASC Services' not in line:
            text.append(line)
        else:
            break
        
    #Separate speaker from text

    lines = []
    for i in range(len(text)):
        if '):' in text[i]:
            lines.append(text[i].split(':', 1))
        elif text[i] == '(CROWD CHANTING)':
            pass
        elif 'ALL RIGHTS RESERVED.' in text[i]:
            break
        elif text[i] == '\xa0':
            pass
        elif 'Copyright 2017 ASC Services' in text[i]:
            break
        elif re.search(r'\((.*?)\)',text[i]) is not None:
            temp = re.search(r'\((.*?)\)',text[i]).group(1)
            if ('COMMERCIAL' not in temp) and ('VIDEO' not in temp):
                pass
        elif any(c.islower() for c in text[i].split(':', 1)[0]):
            lines.append([text[i]])
        else:
            lines.append(text[i].split(':', 1)) #Only split on first colon

    #remove extraneous lines        
    #lines = lines[4:-2]

    
    fixed_lines = []

    for i in range(len(lines)):
        if ('RACHEL MADDOW, MSNBC HOST' in lines[i]) or ('RACHEL MADDOW SHOW' in lines[i]):
            fixed_lines = lines[i:]
            break
        elif ('RACHEL MADDOW, MSNBC HOST' in str(lines[i])) or ('RACHEL MADDOW SHOW' in str(lines[i])):
            fixed_lines = lines[i:]
            break
        elif ('RACHEL MADDOW, MSNBC ANCHOR' in str(lines[i])) or ('RACHEL MADDOW, MSNBC ANCHOR' in str(lines[i])):
            fixed_lines = lines[i:]
            break   
    
    lines = fixed_lines

    """
    if re.search(r'^[A-Z]*(?!:)',str(lines[0])) is None:
        lines[0] = 'RACHEL MADDOW: '+lines[0]
    """
    
    merged_lines = collapse_speaker(lines)
    
    grid = gridify(merged_lines)

    get_commercials(grid)
    get_features(grid, 'maddow')
    get_video(grid)
    
    grid['network'] = 'MSNBC'
    grid['show'] = 'The Rachel Maddow Show'
    
    #Get date
    temp_date = re.search("([0-9]\/[0-9]{2}\/[0-9]{4})", str(date_text)).group(1)
    grid['show_date'] = datetime.datetime.strptime(temp_date, '%m/%d/%Y').strftime('%Y-%m-%d')
    
    
    return grid

In [11]:
foxnews = "http://www.foxnews.com/transcript/2017/03/26/priebus-on-future-trumps-agenda-rep-jordan-details-conservative-opposition-to-gop-leadership-iraqi-pm-haider-al-abadi-talks-terror.html"
#foxnews = "http://www.foxnews.com/transcript/2017/03/19/paul-ryan-on-efforts-to-repeal-replace-obamacare-rep-nunes-previews-comey.html"

In [12]:
foxnews_parser(foxnews)

Unnamed: 0,speaker,text,segment,number_of_letters,number_of_words,speaker_type,speech_order,tape,network,show,show_date
0,"CHRIS WALLACE, HOST",I’m Chris Wallace. President Trump suffers a ...,1,148.0,26.0,host,1.0,0,Fox News,Fox News Sunday,2017-03-26
1,,(BEGIN VIDEOTAPE),1,,,,,0,Fox News,Fox News Sunday,2017-03-26
2,"REP. PAUL RYAN, R-WIS., SPEAKER OF THE HOUSE",I don't know what else to say other than Obam...,1,75.0,17.0,guest,2.0,1,Fox News,Fox News Sunday,2017-03-26
3,PRESIDENT DONALD TRUMP,Bad things are going to happen to ObamaCare. ...,1,155.0,31.0,guest,3.0,1,Fox News,Fox News Sunday,2017-03-26
4,WALLACE,"We’ll discuss prospects for tax reform, trade...",1,697.0,115.0,host,4.0,1,Fox News,Fox News Sunday,2017-03-26
5,"REP. DEVIN NUNES, R-CALIF., INTELLIGENCE COMMI...",I felt like I had a duty and obligation to te...,1,125.0,28.0,guest,5.0,1,Fox News,Fox News Sunday,2017-03-26
6,WALLACE,We’ll ask our Sunday panel whether Nunes’ mov...,1,188.0,29.0,host,6.0,1,Fox News,Fox News Sunday,2017-03-26
7,,(END VIDEOTAPE),1,,,,,1,Fox News,Fox News Sunday,2017-03-26
8,WALLACE,And hello again from Fox News in Washington. ...,1,606.0,106.0,host,7.0,0,Fox News,Fox News Sunday,2017-03-26
9,"KEVIN CORKE, FOX NEWS CORRESPONDENT","Chris, it certainly makes sense for the White...",1,913.0,166.0,guest,8.0,0,Fox News,Fox News Sunday,2017-03-26


In [13]:
msnbc = 'http://www.msnbc.com/transcripts/rachel-maddow-show/2017-03-30'
#msnbc = 'http://www.msnbc.com/transcripts/rachel-maddow-show/2017-03-29'

In [14]:
maddow_parser(msnbc)

Unnamed: 0,speaker,text,segment,number_of_letters,number_of_words,speaker_type,speech_order,tape,network,show,show_date
0,"RACHEL MADDOW, MSNBC HOST",And thanks to you at home for joining us for ...,1,3281.0,541.0,host,1.0,0,MSNBC,The Rachel Maddow Show,2017-03-30
1,"GEN. MIKE FLYNN, FORMER NATIONAL SECURITY ADVISER",I mean five people around her have had have b...,1,182.0,33.0,guest,2.0,0,MSNBC,The Rachel Maddow Show,2017-03-30
2,MADDOW,Now that Mike Flynn himself is asking for imm...,1,8773.0,1490.0,host,3.0,0,MSNBC,The Rachel Maddow Show,2017-03-30
3,"SHANE HARRIS, THE WALL STREET JOURNAL SENIOR W...","Thanks, Rachel. Thanks for having me.",1,38.0,7.0,guest,4.0,0,MSNBC,The Rachel Maddow Show,2017-03-30
4,MADDOW,"So, you`re reporting is that Flynn has made t...",1,528.0,93.0,host,5.0,0,MSNBC,The Rachel Maddow Show,2017-03-30
5,HARRIS,"Yes, our sources have confirmed that to us an...",1,591.0,113.0,guest,6.0,0,MSNBC,The Rachel Maddow Show,2017-03-30
6,MADDOW,Uh-huh.,1,8.0,2.0,host,7.0,0,MSNBC,The Rachel Maddow Show,2017-03-30
7,HARRIS,And he feels that he needs protections in ord...,1,179.0,34.0,guest,8.0,0,MSNBC,The Rachel Maddow Show,2017-03-30
8,MADDOW,"And, Shane, what can you tell us about how of...",1,386.0,69.0,host,9.0,0,MSNBC,The Rachel Maddow Show,2017-03-30
9,HARRIS,"In general, what investigators are weighing i...",1,1158.0,204.0,guest,10.0,0,MSNBC,The Rachel Maddow Show,2017-03-30


In [15]:
fox_sunday_url = 'http://www.foxnews.com/on-air/fox-news-sunday-chris-wallace/transcripts'
fox_sunday_links = []

conn = urlopen(fox_sunday_url)
html = conn.read()

soup = BeautifulSoup(html)
links = soup.find_all('a')

for tag in links:
    link = tag.get('href',None)
    if (link is not None) & ('http://www.foxnews' in link):
        fox_sunday_links.append(link)



 BeautifulSoup([your markup])

to this:

 BeautifulSoup([your markup], "lxml")

  markup_type=markup_type))


In [16]:
#Only keeping links from 2017
fox_sunday_links = fox_sunday_links[:15]

In [17]:
maddow_url = 'http://www.msnbc.com/transcripts/rachel-maddow-show'
maddow_links = []

conn = urlopen(maddow_url)
html = conn.read()

soup = BeautifulSoup(html)
links = soup.find_all('a')

for tag in links:
    link = tag.get('href',None)
    if (link is not None) and ('/2017-' in link):
        maddow_links.append('http://www.msnbc.com/' + link)



 BeautifulSoup([your markup])

to this:

 BeautifulSoup([your markup], "lxml")

  markup_type=markup_type))


In [201]:
fox_sunday_data = pd.DataFrame()

for i in range(len(fox_sunday_links)):
    print(i)
    fox_sunday_data = fox_sunday_data.append(foxnews_parser(fox_sunday_links[i]))
    sleep(10)

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14


In [160]:
fox_sunday_data.to_csv('fox_sunday_data.csv')

In [311]:
maddow_data = pd.DataFrame()
maddow_error_list = []

for i in range(len(maddow_links)):
    print(i)
    try:
        maddow_data = maddow_data.append(maddow_parser(maddow_links[i]))
    except Exception:
        print("ERROR: "+str(i))
        maddow_error_list.append(i)
        pass
    sleep(10)

0
1
ERROR: 1
2
3
ERROR: 3
4
5
ERROR: 5
6
7
ERROR: 7
8
ERROR: 8
9
ERROR: 9
10
ERROR: 10
11
ERROR: 11
12
13
14
15
ERROR: 15
16
ERROR: 16
17
ERROR: 17
18
19
20
ERROR: 20
21
22
ERROR: 22
23
ERROR: 23
24
ERROR: 24
25
ERROR: 25
26
ERROR: 26
27
ERROR: 27
28
ERROR: 28
29
ERROR: 29
30
ERROR: 30
31
ERROR: 31
32
33
ERROR: 33
34
35
36
ERROR: 36
37
ERROR: 37
38
39
ERROR: 39
40
41
ERROR: 41
42
ERROR: 42
43
44
ERROR: 44
45
46
47
ERROR: 47
48
ERROR: 48
49
ERROR: 49
50
51
ERROR: 51
52
ERROR: 52
53
ERROR: 53
54
ERROR: 54
55
ERROR: 55
56
57
ERROR: 57
58
ERROR: 58
59
ERROR: 59


In [318]:
maddow_data.to_csv('maddow_data.csv')

In [262]:
hannity_links = []

for i in range(7):

    hannity_url = 'http://www.foxnews.com/on-air/hannity/transcripts?page='+str(i)
    temp_links = []

    conn = urlopen(hannity_url)
    html = conn.read()

    soup = BeautifulSoup(html)
    links = soup.find_all('a')

    for tag in links:
        link = tag.get('href',None)
        if (link is not None) & ('http://www.foxnews.com/transcript' in link):
            temp_links.append(link)
            
    hannity_links.extend(temp_links)



 BeautifulSoup([your markup])

to this:

 BeautifulSoup([your markup], "lxml")

  markup_type=markup_type))


In [263]:
len(hannity_links)

154

In [264]:
hannity_links = np.unique(np.array(hannity_links))

In [265]:
len(hannity_links)

71

In [325]:
hannity_error_list = []
hannity_data = pd.DataFrame()

for i in range(len(hannity_links)):
    print(i)
    try:
        hannity_data = hannity_data.append(hannity_parser(hannity_links[i]))
    except Exception:
        print("ERROR: "+str(i))
        hannity_error_list.append(i)
        pass
    sleep(10)

0
ERROR: 0
1
ERROR: 1
2
ERROR: 2
3
ERROR: 3
4
ERROR: 4
5
ERROR: 5
6
ERROR: 6
7
ERROR: 7
8
ERROR: 8
9
ERROR: 9
10
11
12
ERROR: 12
13
ERROR: 13
14
ERROR: 14
15
ERROR: 15
16
ERROR: 16
17
ERROR: 17
18
19
ERROR: 19
20
21
ERROR: 21
22
ERROR: 22
23
ERROR: 23
24
ERROR: 24
25
ERROR: 25
26
ERROR: 26
27
ERROR: 27
28
ERROR: 28
29
ERROR: 29
30
31
ERROR: 31
32
33
34
35
36
37
ERROR: 37
38
39
40
41
42
43
44
45
46
ERROR: 46
47
48
49
50
51
ERROR: 51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70


In [326]:
hannity_data

Unnamed: 0,speaker,text,segment,number_of_letters,number_of_words,speaker_type,speech_order,tape,network,show,show_date
0,"SEAN HANNITY, HOST","And welcome to ""Hannity."" So President Obama...",1,418.0,70.0,host,1.0,0,Fox News,Hannity,2017-01-18
1,,(BEGIN VIDEO CLIP),1,,,,,0,Fox News,Hannity,2017-01-18
2,PRESIDENT BARACK OBAMA,I have enjoyed working with all of you. That ...,1,676.0,122.0,guest,2.0,1,Fox News,Hannity,2017-01-18
3,,(END VIDEO CLIP),1,,,,,1,Fox News,Hannity,2017-01-18
4,HANNITY,"All right, pretty disrespectful. Instead of f...",1,398.0,67.0,host,3.0,0,Fox News,Hannity,2017-01-18
5,,(BEGIN VIDEO CLIPS),1,,,,,0,Fox News,Hannity,2017-01-18
6,"HANNITY, JAN. 12, 2009",You've gotten to spend a little time with him.,1,47.0,10.0,host,4.0,1,Fox News,Hannity,2017-01-18
7,THEN-PRESIDENT GEORGE W. BUSH,"Yes. He's a -- he's a smart, capable person w...",1,163.0,33.0,guest,5.0,1,Fox News,Hannity,2017-01-18
8,"HANNITY, NOV. 9, 2010",You made a decision not to attack President O...,1,51.0,10.0,host,6.0,1,Fox News,Hannity,2017-01-18
9,G. BUSH,It's not just President Obama. I suspect I'll...,1,100.0,19.0,guest,7.0,1,Fox News,Hannity,2017-01-18


In [327]:
hannity_data.to_csv('hannity_data.csv')

In [277]:
#hannity_error_list = error_list[12:]

In [278]:
hannity_error_list

[0,
 1,
 2,
 3,
 4,
 5,
 6,
 7,
 8,
 9,
 12,
 13,
 14,
 15,
 16,
 17,
 19,
 21,
 22,
 23,
 24,
 25,
 26,
 27,
 28,
 29,
 31,
 37,
 46,
 51]

In [283]:
hannity_data

Unnamed: 0,speaker,text,segment,number_of_letters,number_of_words,speaker_type,speech_order,tape,network,show,show_date
0,"SEAN HANNITY, HOST","And welcome to ""Hannity."" So President Obama...",1,418.0,70.0,guest,1.0,0,Fox News,Hannity,2017-01-18
1,,(BEGIN VIDEO CLIP),1,,,,,0,Fox News,Hannity,2017-01-18
2,PRESIDENT BARACK OBAMA,I have enjoyed working with all of you. That ...,1,676.0,122.0,guest,2.0,1,Fox News,Hannity,2017-01-18
3,,(END VIDEO CLIP),1,,,,,1,Fox News,Hannity,2017-01-18
4,HANNITY,"All right, pretty disrespectful. Instead of f...",1,398.0,67.0,guest,3.0,0,Fox News,Hannity,2017-01-18
5,,(BEGIN VIDEO CLIPS),1,,,,,0,Fox News,Hannity,2017-01-18
6,"HANNITY, JAN. 12, 2009",You've gotten to spend a little time with him.,1,47.0,10.0,guest,4.0,1,Fox News,Hannity,2017-01-18
7,THEN-PRESIDENT GEORGE W. BUSH,"Yes. He's a -- he's a smart, capable person w...",1,163.0,33.0,guest,5.0,1,Fox News,Hannity,2017-01-18
8,"HANNITY, NOV. 9, 2010",You made a decision not to attack President O...,1,51.0,10.0,guest,6.0,1,Fox News,Hannity,2017-01-18
9,G. BUSH,It's not just President Obama. I suspect I'll...,1,100.0,19.0,guest,7.0,1,Fox News,Hannity,2017-01-18


In [321]:
maddow_data.show_date.value_counts()

2017-03-14    91
2017-04-06    79
2017-02-09    71
2017-01-17    68
2017-03-20    66
2017-01-26    65
2017-02-21    60
2017-04-03    60
2017-02-17    57
2017-03-21    56
2017-03-09    56
2017-03-22    55
2017-03-13    54
2017-03-30    53
2017-02-06    46
2017-02-01    42
2017-02-02    38
2017-04-10    37
2017-02-16    35
2017-02-13    34
Name: show_date, dtype: int64

In [322]:
fox_sunday_data.show_date.value_counts()

2017-02-05    344
2017-03-19    313
2017-03-26    290
2017-01-08    261
2017-01-22    241
2017-02-12    233
2017-01-29    228
2017-02-19    220
2017-01-15    216
2017-03-12    207
2017-02-26    205
2017-03-05    203
2017-04-02    199
2017-04-09    186
2017-01-01    154
Name: show_date, dtype: int64

In [333]:
frames = [fox_sunday_data, maddow_data, hannity_data]
full_data = pd.concat(frames)

In [334]:
full_data

Unnamed: 0,speaker,text,segment,number_of_letters,number_of_words,speaker_type,speech_order,tape,network,show,show_date
0,"CHRIS WALLACE, FOX NEWS ANCHOR",I’m Chris Wallace. President Trump strikes b...,1,176.0,31.0,host,1.0,0,Fox News,Fox News Sunday,2017-04-09
1,,(BEGIN VIDEOTAPE),1,,,,,0,Fox News,Fox News Sunday,2017-04-09
2,"DONALD TRUMP, PRESIDENT OF THE UNITED STATES",It is in this vital national security interes...,1,137.0,26.0,guest,2.0,1,Fox News,Fox News Sunday,2017-04-09
3,WALLACE,We will discuss Mr. Trump's order to launch c...,1,953.0,161.0,host,3.0,1,Fox News,Fox News Sunday,2017-04-09
4,"ADMIRAL WILLIAM MCRAVEN, FMR. HEAD OF U.S. SPE...",We cannot justify and see this mass migration...,1,110.0,22.0,guest,4.0,1,Fox News,Fox News Sunday,2017-04-09
5,WALLACE,"All, right now, on ""Fox News Sunday.""",1,38.0,8.0,host,5.0,1,Fox News,Fox News Sunday,2017-04-09
6,,(END VIDEOTAPE),1,,,,,1,Fox News,Fox News Sunday,2017-04-09
7,WALLACE,And hello again from Fox News in Washington. ...,1,657.0,112.0,host,6.0,0,Fox News,Fox News Sunday,2017-04-09
8,"KRISTIN FISHER, FOX NEWS CORRRESPONDENT","Chris, residents of the same Syrian town hit ...",1,1525.0,256.0,guest,7.0,0,Fox News,Fox News Sunday,2017-04-09
9,WALLACE,"Kristin Fisher, reporting live from Palm Beac...",1,215.0,35.0,host,8.0,0,Fox News,Fox News Sunday,2017-04-09


In [336]:
np.sum(full_data.number_of_words)

593647.0

In [338]:
np.sum(maddow_data.number_of_words)

155087.0

In [341]:
np.sort(hannity_data.show_date.unique())

array(['2017-01-18', '2017-01-19', '2017-01-31', '2017-02-02',
       '2017-02-16', '2017-02-20', '2017-02-21', '2017-02-22',
       '2017-02-23', '2017-02-24', '2017-03-01', '2017-03-02',
       '2017-03-03', '2017-03-06', '2017-03-07', '2017-03-08',
       '2017-03-09', '2017-03-10', '2017-03-14', '2017-03-15',
       '2017-03-16', '2017-03-17', '2017-03-20', '2017-03-21',
       '2017-03-22', '2017-03-23', '2017-03-27', '2017-03-28',
       '2017-03-29', '2017-03-30', '2017-04-01', '2017-04-03',
       '2017-04-04', '2017-04-05', '2017-04-06', '2017-04-07',
       '2017-04-10', '2017-04-11', '2017-04-12', '2017-04-14'], dtype=object)