In [10]:
!pip install transformers

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.3.2[0m[39;49m -> [0m[32;49m24.0[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [11]:
import pandas as pd

df_news = pd.read_csv("investopedia_articles.csv")
df_news = df_news[df_news["Headline"].str.contains("apple|AAPL", case=False, regex=True)].reset_index(drop=True)

pd.set_option('display.max_colwidth', None)
df_news.head()

Unnamed: 0,Date,Time,Source,Headline,Symbol,Company
0,02/15/2024,6:34AM,IH Market News,"Cisco Systems Stock Drops 5% Following Restructuring Plan, Berkshire Trims Apple Stake, and Latest News",NASDAQ:AAPL,Apple Inc
1,02/05/2024,9:00AM,Business Wire,"USHER’s Road to Halftime on Apple Music lets fans listen, watch, dance, and sing their way to Super Bowl LVIII",NASDAQ:AAPL,Apple Inc
2,02/02/2024,8:47AM,IH Market News,"Apple Beats Q1 Revenue and Earnings Estimates Amid China Sales Drop, Shares Dip 3% in Early Trading",NASDAQ:AAPL,Apple Inc
3,02/01/2024,4:30PM,Business Wire,Apple reports first quarter results,NASDAQ:AAPL,Apple Inc
4,01/25/2024,1:55PM,Dow Jones News,"Apple Issues Changes to Operating System, App Store to Comply With EU Law",NASDAQ:AAPL,Apple Inc


In [12]:
print(f"{len(df_news)} total headlines")
print(f"{len(df_news['Headline'].unique())} unique headlines")

2365 total headlines
1572 unique headlines


In [14]:
df_news = df_news.drop_duplicates(subset=["Headline"], keep="first").reset_index(drop=True)
len(df_news)

1572

In [15]:
from transformers import AutoTokenizer, AutoModelForSequenceClassification

tokenizer = AutoTokenizer.from_pretrained("ProsusAI/finbert")
model = AutoModelForSequenceClassification.from_pretrained("ProsusAI/finbert")

In [16]:
import torch 
import tqdm

# Loops through each headline in the dataframe. The tqdm module provides a loading bar for loops. 
for headline in tqdm.tqdm(list(df_news['Headline'])):

    # Converts the text into inputs tokens that the model can read
    input = tokenizer(headline, return_tensors='pt', padding=True, truncation=True)

    # Computes the model outputs
    outputs = model(**input)

    # Normalises model output values using the softmax function
    sentiments = torch.nn.functional.softmax(outputs.logits, dim=-1)

    # Adds the computed sentiment values to the dataframe row containing the analyzed headline
    df_news.loc[df_news['Headline']==headline, "Positive"] = sentiments[0, 0].tolist()
    df_news.loc[df_news['Headline']==headline, "Negative"] = sentiments[0, 1].tolist()
    df_news.loc[df_news['Headline']==headline, "Neutral"] = sentiments[0, 2].tolist()

df_news.head()

  0%|                                                  | 0/1572 [00:00<?, ?it/s]

100%|███████████████████████████████████████| 1572/1572 [01:10<00:00, 22.16it/s]


Unnamed: 0,Date,Time,Source,Headline,Symbol,Company,Positive,Negative,Neutral
0,02/15/2024,6:34AM,IH Market News,"Cisco Systems Stock Drops 5% Following Restructuring Plan, Berkshire Trims Apple Stake, and Latest News",NASDAQ:AAPL,Apple Inc,0.012263,0.910677,0.07706
1,02/05/2024,9:00AM,Business Wire,"USHER’s Road to Halftime on Apple Music lets fans listen, watch, dance, and sing their way to Super Bowl LVIII",NASDAQ:AAPL,Apple Inc,0.064723,0.016325,0.918952
2,02/02/2024,8:47AM,IH Market News,"Apple Beats Q1 Revenue and Earnings Estimates Amid China Sales Drop, Shares Dip 3% in Early Trading",NASDAQ:AAPL,Apple Inc,0.037662,0.950361,0.011977
3,02/01/2024,4:30PM,Business Wire,Apple reports first quarter results,NASDAQ:AAPL,Apple Inc,0.03692,0.067719,0.895361
4,01/25/2024,1:55PM,Dow Jones News,"Apple Issues Changes to Operating System, App Store to Comply With EU Law",NASDAQ:AAPL,Apple Inc,0.042882,0.044438,0.912681


In [17]:
df_news.loc[df_news["Negative"]>0.8, ["Headline", "Negative"]].head(10)

Unnamed: 0,Headline,Negative
0,"Cisco Systems Stock Drops 5% Following Restructuring Plan, Berkshire Trims Apple Stake, and Latest News",0.910677
2,"Apple Beats Q1 Revenue and Earnings Estimates Amid China Sales Drop, Shares Dip 3% in Early Trading",0.950361
6,Apple's Smartphone Shipments in China Fell in Fourth Quarter,0.974899
8,"Apple Seeks Dismissal of UK Suit Over App Store Fees, Reuters Reports",0.93939
16,"Wall Street Highlights: Apple Cuts iPhone Prices in China, Microsoft Launches Copilot Pro Subscription, and More",0.800631
17,"Apple CEO Tim Cook Takes Steep Pay Cut, as Expected",0.931088
23,Apple on Track for Lowest Close Since November 2023 -- Data Talk,0.897906
25,"Trending: Apple to Appeal U.S. Ban on Series 9, Ultra 2 Smartwatches",0.912491
26,"Apple Stocks Reach Record, Adobe Faces Stock Drops, Berkshire Hathaway Increases Stake in OXY, and More",0.84182
34,Apple’s Stock Takes a Hit Following Bleak Holiday Quarter Warning,0.806516


In [8]:
df_news.loc[df_news["Positive"]>0.8, ["Headline", "Positive"]].head(10)

Unnamed: 0,Headline,Positive
11,Apple Commits to Address EU Antitrust Concerns Over Apple Pay,0.823839
36,Apple's Chinese Supplier Stocks Rise on Strong iPhone Demand,0.898205
40,"Apple unveils the new MacBook Pro featuring the M3 family of chips, making the world’s best pro laptop even better",0.8287
52,Apple Is Best Performer in the DJIA So Far Today -- Data Talk,0.848845
83,"Apple Working on AI Tools to Challenge Competitors' Offerings, Bloomberg Says",0.842362
85,"Nokia, Apple Sign New Patent Cross-License Agreement Ahead of Expiration",0.811564
86,Apple Shares Hit All-Time High in Push Toward $3 Trillion Market Cap,0.807361
89,"Apple’s Racial Equity and Justice Initiative surpasses $200 million in investments, doubling initial 2020 commitment",0.930279
99,"Trending: Broadcom, Apple Extend Chip-Supply Deal",0.862029
100,Apple Strikes Multibillion-Dollar Supply Deal With Broadcom,0.896374


In [18]:
df_news.to_csv("headlines_w_sentiment.csv", index=False)