In [7]:
from lxml.html import parse
from urllib.request import urlopen
from bs4 import BeautifulSoup

In [122]:
import numpy as np
import pandas as pd

## get news from bloomberg

In [65]:
url = 'https://www.bloomberg.com/search?query=amazon&page=1'

request_object = urlopen(url)

soup = BeautifulSoup(request_object, 'lxml')

In [67]:
tag = BeautifulSoup('<h1 class="search-result-story__headline">', 'lxml').h1

In [68]:
tag.name

'h1'

In [69]:
tag['class']

['search-result-story__headline']

In [70]:
type(tag['class'])

list

In [71]:
tag.attrs

{'class': ['search-result-story__headline']}

In [72]:
headlines = soup.find_all('h1', class_="search-result-story__headline".split())

In [73]:
for item in headlines:
    print(item.get_text())


 Amazon's Small Paychecks: Charts 
 How Amazon Can Keep Growing Prime Membership 
 Piper Jaffray's Olson Sees Amazon Web Services Growing 
 Amazon’s Jeff Wilke Talks About AI Ambitions, Robots, and Trump 
 Amazon Hits 100 Million Prime Members 
 Amazon Hits 100 Million Prime Members 
 Amazon Nears Record Amid Wall Street Praise for Prime Milestone 
 Billionaire Bezos Made 59 Times More than Amazon's Median Worker 
 Still Unclear Why Amazon Disclosed Subscriber Numbers: Ovide 
 Bloomberg Markets: Amazon Hits 100 Million Prime Customers 


In [74]:
publish_dates = soup.find_all('time', class_="published-at")

In [75]:
testlist = []
for item in publish_dates:
    testlist.append(item.get_text())
testlist

[' Apr 20, 2018 ',
 ' Apr 19, 2018 ',
 ' Apr 19, 2018 ',
 ' Apr 19, 2018 ',
 ' Apr 19, 2018 ',
 ' Apr 19, 2018 ',
 ' Apr 19, 2018 ',
 ' Apr 19, 2018 ',
 ' Apr 19, 2018 ',
 ' Apr 19, 2018 ']

In [76]:
testlist[0]

' Apr 20, 2018 '

In [77]:
len(testlist[0])

14

In [78]:
len(publish_dates)

10

In [79]:
publish_dates[0]

<time class="published-at" datetime="2018-04-20T10:30:19+00:00"> Apr 20, 2018 </time>

In [80]:
stories = soup.find_all('div', class_="search-result-story__body")

In [81]:
for item in stories:
    print(item.get_text())

 Good morning! This is Fly Charts, the daily charts-only newsletter from Gadfly; sign up here. From #MeToo in Japan to Qualcomm's poor planning, here are four charts that tell you what you need... 
                            Bloomberg Intelligence's Jitendra Waral discusses a milestone for Amazon and possible challenges for the company. He speaks with Vonnie Quinn on "Bloomberg Markets."... 
                           Michael Olson, senior research analyst at Piper Jaffray, discusses Amazon on "Bloomberg Markets: The Open."... 
 public role in shaping Amazon’s agenda, which includes advancing its technological ambitions as well as playing defense against presidential tweets. When he started at Amazon, the company was struggling... 
                                                             Even Trump tweeted 6 times about Amazon this year, he's got no mention in Jeff Bezos' annual letter to shareholders https://bloom.bg/2HPGq5j... 
                                                   

In [205]:
# define a function to get publish date, story headline, story body
# put them together

def get_news(url):
    """
    Takes in a url and returns a dictionary containing publish date, story headline, story body from Bloomberg website.
    """
    
    request_object = urlopen(url)
    soup = BeautifulSoup(request_object, 'lxml')
    
    news_dict_list = []
    
    # publish dates, headlines, story bodies
    publish_dates = soup.find_all('time', class_="published-at")
    headlines = soup.find_all('h1', class_="search-result-story__headline".split())
    stories = soup.find_all('div', class_="search-result-story__body")
    
    length = min(len(publish_dates), len(headlines), len(stories))
    
    for i in range(length):
        news_dict = {}
        news_dict['date'] = publish_dates[i].get_text()
        news_dict['headline'] = headlines[i].get_text()
        news_dict['story'] = stories[i].get_text()
        news_dict_list.append(news_dict)
    return news_dict_list

In [206]:
url = 'https://www.bloomberg.com/search?query=amazon&page=1'
newslist = get_news(url)

In [207]:
newslist

[{'date': ' Apr 23, 2018 ',
  'headline': " Inside Amazon's Possible Plan to Build a Domestic Robot ",
  'story': '                                                          Bloomberg\'s Mark Gurman reports on Amazon\'s secret plan to build domestic a robot. He speaks on "Bloomberg Daybreak: Asia."... '},
 {'date': ' Apr 23, 2018 ',
  'headline': " Amazon's Top-Secret Robot Project Unveiled ",
  'story': "                                Bloomberg's Gerritt DeVynck discusses the difference between Amazon's warehouse robots that move merchandise around its fulfillment centers and the domestic robots... "},
 {'date': ' Apr 23, 2018 ',
  'headline': ' Amazon Pharmacy Competition Is Overblown, Larry Robbins Says at Sohn ',
  'story': ' Holding Co., saying that competition from Amazon is unlikely to materialize. He sees a 96 percent upside in CVS after its merger with Aetna Inc.Amazon doesn’t do firearms, doesn’t do tobacco, doesn’t... '},
 {'date': ' Apr 23, 2018 ',
  'headline': ' Amazon Be

In [201]:
# get all news up to page 500
newslist = []
for i in range(200):
    url = 'https://www.bloomberg.com/search?query=amazon&page='+str(i)
    newslist = newslist + get_news(url)

TypeError: can only concatenate list (not "NoneType") to list

In [202]:
len(newslist)

0

In [55]:
newslist[-1]

{'date': ' Feb 1, 2018 ',
 'headline': ' Now Even the Empire State Building Has Space to Fill—a Lot of It ',
 'story': ' last week by brokerage Cushman & Wakefield. Still, the site -- across 34th Street from one of Amazon.com Inc.’s first physical bookstores -- will be one of the most in-demand in Manhattan, said... '}

In [57]:
df = pd.DataFrame(newslist)

In [58]:
df.head()

Unnamed: 0,date,headline,story
0,"Apr 18, 2018",Amazon's Bezos Reveals Company Has Topped 100...,"Mark Mahaney, analyst at RBC Capital..."
1,"Apr 18, 2018",RBC's Mahaney Sees 200 Million Amazon Prime M...,Mark Maha...
2,"Apr 18, 2018",Postal Board Nominees Willing to Challenge Tr...,confidential agreements that are negotiated w...
3,"Apr 18, 2018",NBA Video-Game League to Stream Exclusively o...,video-game league signed a multiyear broadcas...
4,"Apr 18, 2018",Jeff Bezos Says Amazon Has Exceeded 100 Milli...,Amazon.com Chief Executive Officer Jeff Bezos...


In [59]:
df.tail()

Unnamed: 0,date,headline,story
995,"Jan 30, 2018",Surveillance: NAFTA Decision will be Market M...,"the tax cuts. Max Nisen, Bloomberg Gadfly Col..."
996,"Feb 1, 2018",Conglomerates Are Broken,mentioned as candidates for breakups. The Dig...
997,"Feb 1, 2018",iPhone X Puts Apple in Crosshairs of Nervous ...,stands above all others as the two-day blitz ...
998,"Jan 31, 2018","Bloomberg Baystate Business: Markets, Health ...",Bloomberg News reporter Zachary Tracer talked...
999,"Feb 1, 2018",Now Even the Empire State Building Has Space ...,last week by brokerage Cushman & Wakefield. S...


In [63]:
# write dataframe to csv file

df.to_csv('amazon_news_100page.csv')

## get sentiment score

In [91]:
df.head()

Unnamed: 0,date,headline,story
0,"Apr 18, 2018",Amazon's Bezos Reveals Company Has Topped 100...,"Mark Mahaney, analyst at RBC Capital..."
1,"Apr 18, 2018",RBC's Mahaney Sees 200 Million Amazon Prime M...,Mark Maha...
2,"Apr 18, 2018",Postal Board Nominees Willing to Challenge Tr...,confidential agreements that are negotiated w...
3,"Apr 18, 2018",NBA Video-Game League to Stream Exclusively o...,video-game league signed a multiyear broadcas...
4,"Apr 18, 2018",Jeff Bezos Says Amazon Has Exceeded 100 Milli...,Amazon.com Chief Executive Officer Jeff Bezos...


In [92]:
df['headline'][0]

" Amazon's Bezos Reveals Company Has Topped 100 Million Prime Members "

In [93]:
df['story'][0]

"          Mark Mahaney, analyst at RBC Capital Markets, and Bloomberg's Spencer Soper discuss Amazon CEO Jeff Bezos' annual shareholder letter and paid subscriptions for Prime. They speak with Emily... "

In [94]:
import nltk
from nltk.corpus import sentiwordnet as swn

In [95]:
# function to get sentiment score

def sentiment_score(doc):
    """This function takes in a string and returns a sentiment score for that string."""
    global doc_sentiment
    sentences = nltk.sent_tokenize(doc)
    stokens = [nltk.word_tokenize(sent) for sent in sentences]
    taggedlist=[]
    for stoken in stokens:        
         taggedlist.append(nltk.pos_tag(stoken))
    wnl = nltk.WordNetLemmatizer()

    score_list=[]
    for idx,taggedsent in enumerate(taggedlist):
        score_list.append([])
        for idx2,t in enumerate(taggedsent):
            newtag=''
            lemmatized=wnl.lemmatize(t[0])
            if t[1].startswith('NN'):
                newtag='n'
            elif t[1].startswith('JJ'):
                newtag='a'
            elif t[1].startswith('V'):
                newtag='v'
            elif t[1].startswith('R'):
                newtag='r'
            else:
                newtag=''       
            if(newtag!=''):    
                synsets = list(swn.senti_synsets(lemmatized, newtag))
                #Getting average of all possible sentiments, as you requested        
                score=0
                if(len(synsets)>0):
                    for syn in synsets:
                        score+=syn.pos_score()-syn.neg_score()
                    score_list[idx].append(score/len(synsets))
    sentence_sentiment=[]
    for score_sent in score_list:
        if len(score_sent)!=0:
            sentence_sentiment.append(sum([word_score for word_score in score_sent])/len(score_sent))
    if sentence_sentiment !=[]:
        doc_sentiment = sum(sentence_sentiment)/len(sentence_sentiment)
    return doc_sentiment

In [96]:
sentiment_score(df['headline'][0])

0.017857142857142856

In [97]:
sentiment_score(df['story'][0])

0.00720959595959596

In [98]:
# dealing with date of news

df['date'][0]

' Apr 18, 2018 '

In [99]:
from dateutil.parser import parse

In [100]:
parse(df['date'][0])

datetime.datetime(2018, 4, 18, 0, 0)

In [103]:
for i in range(len(df['date'])):
    df['date'][i] = parse(df['date'][i])

In [104]:
df.head()

Unnamed: 0,date,headline,story
0,2018-04-18 00:00:00,Amazon's Bezos Reveals Company Has Topped 100...,"Mark Mahaney, analyst at RBC Capital..."
1,2018-04-18 00:00:00,RBC's Mahaney Sees 200 Million Amazon Prime M...,Mark Maha...
2,2018-04-18 00:00:00,Postal Board Nominees Willing to Challenge Tr...,confidential agreements that are negotiated w...
3,2018-04-18 00:00:00,NBA Video-Game League to Stream Exclusively o...,video-game league signed a multiyear broadcas...
4,2018-04-18 00:00:00,Jeff Bezos Says Amazon Has Exceeded 100 Milli...,Amazon.com Chief Executive Officer Jeff Bezos...


In [105]:
df['date'][0].year

2018

In [106]:
df['date'][0].month

4

In [107]:
df['date'][0].day

18

In [108]:
# get sentiment score

headline_score = []
story_score = []

for i in range(len(df)):
    headline_score.append(sentiment_score(df['headline'][i]))
    story_score.append(sentiment_score(df['story'][i]))

In [109]:
len(headline_score)

1000

In [110]:
len(story_score)

1000

In [113]:
df['headline_score'] = headline_score
df['story_score'] = story_score
df['avg_score'] = (df['headline_score'] + df['story_score']) / 2

df.head()

Unnamed: 0,date,headline,story,headline_score,story_score,avg_score
0,2018-04-18 00:00:00,Amazon's Bezos Reveals Company Has Topped 100...,"Mark Mahaney, analyst at RBC Capital...",0.017857,0.00721,0.012533
1,2018-04-18 00:00:00,RBC's Mahaney Sees 200 Million Amazon Prime M...,Mark Maha...,0.027778,0.02345,0.025614
2,2018-04-18 00:00:00,Postal Board Nominees Willing to Challenge Tr...,confidential agreements that are negotiated w...,0.033036,0.012763,0.022899
3,2018-04-18 00:00:00,NBA Video-Game League to Stream Exclusively o...,video-game league signed a multiyear broadcas...,-0.04375,-0.004167,-0.023958
4,2018-04-18 00:00:00,Jeff Bezos Says Amazon Has Exceeded 100 Milli...,Amazon.com Chief Executive Officer Jeff Bezos...,0.029762,0.030203,0.029983


In [114]:
df.tail()

Unnamed: 0,date,headline,story,headline_score,story_score,avg_score
995,2018-01-30 00:00:00,Surveillance: NAFTA Decision will be Market M...,"the tax cuts. Max Nisen, Bloomberg Gadfly Col...",0.030769,0.010611,0.02069
996,2018-02-01 00:00:00,Conglomerates Are Broken,mentioned as candidates for breakups. The Dig...,0.004808,0.001377,0.003092
997,2018-02-01 00:00:00,iPhone X Puts Apple in Crosshairs of Nervous ...,stands above all others as the two-day blitz ...,0.0,0.001894,0.000947
998,2018-01-31 00:00:00,"Bloomberg Baystate Business: Markets, Health ...",Bloomberg News reporter Zachary Tracer talked...,0.088542,-0.002813,0.042864
999,2018-02-01 00:00:00,Now Even the Empire State Building Has Space ...,last week by brokerage Cushman & Wakefield. S...,-0.012897,-0.018964,-0.015931


In [115]:
# get average score for news of the same day

day_list = df['date'].unique().tolist()

In [116]:
len(day_list)

73

In [117]:
day_list[:10]

[datetime.datetime(2018, 4, 18, 0, 0),
 datetime.datetime(2018, 4, 16, 0, 0),
 datetime.datetime(2018, 4, 17, 0, 0),
 datetime.datetime(2018, 4, 14, 0, 0),
 datetime.datetime(2018, 4, 12, 0, 0),
 datetime.datetime(2018, 4, 13, 0, 0),
 datetime.datetime(2018, 4, 11, 0, 0),
 datetime.datetime(2018, 4, 10, 0, 0),
 datetime.datetime(2018, 4, 9, 0, 0),
 datetime.datetime(2018, 4, 8, 0, 0)]

In [120]:
# weekday needs to add 1 to be correct
# due to all the times becomes 00:00

df['date'][995].weekday()

1

In [123]:
headline_score_daily = []
story_score_daily = []

for i in range(0,len(day_list)):
    
    headline_score_avg = np.mean(df[df['date']==day_list[i]]['headline_score'].tolist())
    headline_score_daily.append(headline_score_avg)
    
    story_score_avg = np.mean(df[df['date']==day_list[i]]['story_score'].tolist())
    story_score_daily.append(story_score_avg)

In [124]:
len(headline_score_daily)

73

In [125]:
len(story_score_daily)

73

In [126]:
# combine the daily scores to a dataframe

daydf = pd.DataFrame()

daydf['date'] = day_list
daydf['headline_score'] = headline_score_daily
daydf['story_score'] = story_score_daily
daydf['avg_score'] = (daydf['headline_score'] + daydf['story_score']) / 2

In [127]:
daydf.head()

Unnamed: 0,date,headline_score,story_score,avg_score
0,2018-04-18,0.003877,-0.002758,0.00056
1,2018-04-16,0.013159,-0.000184,0.006487
2,2018-04-17,-0.002557,0.006168,0.001806
3,2018-04-14,0.010503,0.024096,0.0173
4,2018-04-12,0.00661,0.007125,0.006867


In [128]:
daydf.tail()

Unnamed: 0,date,headline_score,story_score,avg_score
68,2018-02-05,0.009451,-0.01928,-0.004914
69,2018-01-30,0.00927,0.004983,0.007127
70,2018-01-29,-0.033335,-0.002715,-0.018025
71,2018-02-03,-0.013889,-0.032556,-0.023223
72,2018-01-28,0.022254,-0.009485,0.006384


In [130]:
# order by date

daydf = daydf.sort_values(by=['date'])

In [131]:
daydf.head()

Unnamed: 0,date,headline_score,story_score,avg_score
72,2018-01-28,0.022254,-0.009485,0.006384
70,2018-01-29,-0.033335,-0.002715,-0.018025
69,2018-01-30,0.00927,0.004983,0.007127
67,2018-01-31,0.052435,0.026948,0.039692
66,2018-02-01,0.002022,0.016211,0.009117


In [132]:
daydf.tail()

Unnamed: 0,date,headline_score,story_score,avg_score
5,2018-04-13,-0.004955,0.007302,0.001174
3,2018-04-14,0.010503,0.024096,0.0173
1,2018-04-16,0.013159,-0.000184,0.006487
2,2018-04-17,-0.002557,0.006168,0.001806
0,2018-04-18,0.003877,-0.002758,0.00056


In [133]:
# write dataframe to csv file

daydf.to_csv('score_day.csv')

## nltk vader module

In [134]:
from nltk.sentiment.vader import SentimentIntensityAnalyzer



In [135]:
sid = SentimentIntensityAnalyzer()

In [147]:
sentence = "Jeff Bezos Says Amazon Has Exceeded 100 Million Paid Prime Members"

ss = sid.polarity_scores(sentence)

In [148]:
for k in sorted(ss):
    print('{0}: {1}, '.format(k, ss[k]), end='')
    print()

compound: 0.1779, 
neg: 0.0, 
neu: 0.855, 
pos: 0.145, 


In [150]:
ss['compound']

0.1779

In [149]:
df.head()

Unnamed: 0,date,headline,story,headline_score,story_score,avg_score
0,2018-04-18 00:00:00,Amazon's Bezos Reveals Company Has Topped 100...,"Mark Mahaney, analyst at RBC Capital...",0.017857,0.00721,0.012533
1,2018-04-18 00:00:00,RBC's Mahaney Sees 200 Million Amazon Prime M...,Mark Maha...,0.027778,0.02345,0.025614
2,2018-04-18 00:00:00,Postal Board Nominees Willing to Challenge Tr...,confidential agreements that are negotiated w...,0.033036,0.012763,0.022899
3,2018-04-18 00:00:00,NBA Video-Game League to Stream Exclusively o...,video-game league signed a multiyear broadcas...,-0.04375,-0.004167,-0.023958
4,2018-04-18 00:00:00,Jeff Bezos Says Amazon Has Exceeded 100 Milli...,Amazon.com Chief Executive Officer Jeff Bezos...,0.029762,0.030203,0.029983


In [151]:
# define a function to get sentiment score

def sid_score(sentence):
    """input a string, output a compound sentiment score"""
    sid = SentimentIntensityAnalyzer()
    ss = sid.polarity_scores(sentence)
    return ss['compound']

In [152]:
sid_score(sentence)

0.1779

In [153]:
# get sentiment score

headline_score = []
story_score = []

for i in range(len(df)):
    headline_score.append(sid_score(df['headline'][i]))
    story_score.append(sid_score(df['story'][i]))

In [154]:
# add score to dataframe

df['headline_score'] = headline_score
df['story_score'] = story_score
df['avg_score'] = (df['headline_score'] + df['story_score']) / 2

df.head()

Unnamed: 0,date,headline,story,headline_score,story_score,avg_score
0,2018-04-18 00:00:00,Amazon's Bezos Reveals Company Has Topped 100...,"Mark Mahaney, analyst at RBC Capital...",0.0,0.1779,0.08895
1,2018-04-18 00:00:00,RBC's Mahaney Sees 200 Million Amazon Prime M...,Mark Maha...,0.1779,0.0,0.08895
2,2018-04-18 00:00:00,Postal Board Nominees Willing to Challenge Tr...,confidential agreements that are negotiated w...,-0.1027,0.6808,0.28905
3,2018-04-18 00:00:00,NBA Video-Game League to Stream Exclusively o...,video-game league signed a multiyear broadcas...,0.0,0.0,0.0
4,2018-04-18 00:00:00,Jeff Bezos Says Amazon Has Exceeded 100 Milli...,Amazon.com Chief Executive Officer Jeff Bezos...,0.1779,0.0,0.08895


In [155]:
# get average score for news of the same day

day_list = df['date'].unique().tolist()

In [156]:
headline_score_daily = []
story_score_daily = []

for i in range(0,len(day_list)):
    
    headline_score_avg = np.mean(df[df['date']==day_list[i]]['headline_score'].tolist())
    headline_score_daily.append(headline_score_avg)
    
    story_score_avg = np.mean(df[df['date']==day_list[i]]['story_score'].tolist())
    story_score_daily.append(story_score_avg)

In [157]:
# combine the daily scores to a dataframe

daydf = pd.DataFrame()

daydf['date'] = day_list
daydf['headline_score'] = headline_score_daily
daydf['story_score'] = story_score_daily
daydf['avg_score'] = (daydf['headline_score'] + daydf['story_score']) / 2

In [158]:
daydf.head()

Unnamed: 0,date,headline_score,story_score,avg_score
0,2018-04-18,0.032985,0.09108,0.062033
1,2018-04-16,0.005354,0.300738,0.153046
2,2018-04-17,0.0327,-0.035092,-0.001196
3,2018-04-14,0.1779,0.2263,0.2021
4,2018-04-12,-0.022257,0.120136,0.048939


In [159]:
daydf.tail()

Unnamed: 0,date,headline_score,story_score,avg_score
68,2018-02-05,0.219633,0.310442,0.265038
69,2018-01-30,0.176722,0.145078,0.1609
70,2018-01-29,0.183133,0.198583,0.190858
71,2018-02-03,0.0,0.34,0.17
72,2018-01-28,0.1779,0.1027,0.1403


In [160]:
# order by date

daydf = daydf.sort_values(by=['date'])

In [161]:
daydf.head()

Unnamed: 0,date,headline_score,story_score,avg_score
72,2018-01-28,0.1779,0.1027,0.1403
70,2018-01-29,0.183133,0.198583,0.190858
69,2018-01-30,0.176722,0.145078,0.1609
67,2018-01-31,0.204446,0.401046,0.302746
66,2018-02-01,0.161862,0.276862,0.219362


In [162]:
daydf.tail()

Unnamed: 0,date,headline_score,story_score,avg_score
5,2018-04-13,0.020375,0.303817,0.162096
3,2018-04-14,0.1779,0.2263,0.2021
1,2018-04-16,0.005354,0.300738,0.153046
2,2018-04-17,0.0327,-0.035092,-0.001196
0,2018-04-18,0.032985,0.09108,0.062033


In [163]:
# write dataframe to csv file

daydf.to_csv('sid_score.csv')

## use news sentiment score for portfolio construction

In [170]:
# define a function to get average sentiment score
# for a given company by reading most recent news

def news_score(company_list):
    """input a list of company names, output a dictionary of company_name:news_score"""
    
    company_score = {}
    
    for company in company_list:
        url = 'https://www.bloomberg.com/search?query=' + company
        newslist = get_news(url)

        df = pd.DataFrame(newslist)

        headline_score = []
        story_score = []

        for i in range(len(df)):
            headline_score.append(sid_score(df['headline'][i]))
            story_score.append(sid_score(df['story'][i]))

        avg_score = (np.mean(headline_score) + np.mean(story_score))/2
        
        company_score[company] = avg_score
        
    return company_score

In [178]:
company_list = ['Apple', 'Microsoft', 'Facebook', 'Google', 'Intel', 'Visa', 'Cisco', 'Mastercard', 'Nvidia',
               'Berkshire', 'JP+Morgan+Chase', 'Bank+of+America', 'Wells+Fargo', 'Citigroup', 'Goldman+Sachs',
               'US+Bancorp', 'Morgan+Stanley', 'American+Express', 'Johnson+&+Johnson', 'Pfizer', 'Unitedhealth',
               'AbbVie', 'Merck', 'Amgen', 'Medtronic', 'Abbott', 'Bristol+Myers+Squibb', 'Gilead', 'Amazon',
               'Home+Depot', 'Comcast', 'Walt+Disney', 'NetFlix', 'McDonald', 'Booking', 'Nike', 'Starbucks',
               'Time+Warner', 'Boeing', '3M', 'General+Electric', 'Honeywell', 'Union+Pacific', 'United+Technologies',
               'Caterpillar', 'Lockheed+Martin', 'UPS', 'Raytheon', 'Coca+Cola', 'Costco', 'CVS', 'Walmart', 'Walgreens',
               'Chevron', 'Exxon+Mobil', 'Marathon+Oil', 'Phillips+66', 'Valero', 'Schlumberger']

In [179]:
# caution: depends on how large your company list is
# this can take several minutes to run

company_score = news_score(company_list)

In [180]:
company_score

{'3M': 0.03302,
 'AbbVie': -0.16885499999999998,
 'Abbott': 0.13954,
 'Amazon': 0.14342500000000002,
 'American+Express': -0.06330499999999999,
 'Amgen': 0.10195,
 'Apple': 0.10029000000000002,
 'Bank+of+America': 0.14468000000000003,
 'Berkshire': 0.031724999999999996,
 'Boeing': -0.021529999999999997,
 'Booking': -0.06226,
 'Bristol+Myers+Squibb': -0.07617499999999998,
 'CVS': 0.2042,
 'Caterpillar': -0.037774999999999975,
 'Chevron': 0.111005,
 'Cisco': 0.12919000000000003,
 'Citigroup': 0.062439999999999996,
 'Coca+Cola': 0.05159444444444443,
 'Comcast': 0.01356000000000001,
 'Costco': 0.09909500000000002,
 'Exxon+Mobil': -0.03702,
 'Facebook': -0.019584999999999998,
 'General+Electric': 0.035655,
 'Gilead': 0.057984999999999995,
 'Goldman+Sachs': 0.069665,
 'Google': -0.09785,
 'Home+Depot': 0.09847500000000001,
 'Honeywell': 0.10244,
 'Intel': -0.043295,
 'JP+Morgan+Chase': 0.10921500000000002,
 'Johnson+&+Johnson': -0.04363499999999999,
 'Lockheed+Martin': -0.029194999999999995,

In [181]:
# sort company score dictionary by sentiment score

import operator

sorted_company_score = sorted(company_score.items(), key=operator.itemgetter(1))

In [182]:
sorted_company_score

[('AbbVie', -0.16885499999999998),
 ('Starbucks', -0.164075),
 ('Google', -0.09785),
 ('Bristol+Myers+Squibb', -0.07617499999999998),
 ('American+Express', -0.06330499999999999),
 ('Booking', -0.06226),
 ('Mastercard', -0.062244999999999995),
 ('US+Bancorp', -0.06045),
 ('Visa', -0.05849999999999999),
 ('Johnson+&+Johnson', -0.04363499999999999),
 ('Intel', -0.043295),
 ('Caterpillar', -0.037774999999999975),
 ('Exxon+Mobil', -0.03702),
 ('Walt+Disney', -0.03584),
 ('Union+Pacific', -0.030159999999999992),
 ('Lockheed+Martin', -0.029194999999999995),
 ('Merck', -0.02747),
 ('Boeing', -0.021529999999999997),
 ('Microsoft', -0.020450000000000017),
 ('Facebook', -0.019584999999999998),
 ('United+Technologies', -0.01944499999999999),
 ('McDonald', 0.009859999999999996),
 ('Comcast', 0.01356000000000001),
 ('Walmart', 0.01515),
 ('Berkshire', 0.031724999999999996),
 ('3M', 0.03302),
 ('General+Electric', 0.035655),
 ('Pfizer', 0.04252),
 ('Coca+Cola', 0.05159444444444443),
 ('Gilead', 0.057

In [208]:
# S&P 500 score

index_score = news_score(['S&P+500+index'])
index_score

{'S&P+500+index': -0.139625}

In [184]:
total = 0

for i in sorted_company_score[-20:]:
    total = total + i[1]
    
total

3.1556900000000008