In [2]:
import json
import requests
import pandas as pd
from newsapi import NewsApiClient
from transformers import BertTokenizer, BertForSequenceClassification
from transformers import pipeline
import openai
from newsplease import NewsPlease

In [3]:
# API configuration
openai.api_type = "azure"
openai.api_base = "your API Base"
openai.api_key = "your API Key"
openai.api_version = "2023-09-01-preview"
MODEL = "Your Model"
news_api_key = "Your newsapi.org API Key"
newsapi = NewsApiClient(api_key=news_api_key)

In [4]:
# Get all useful sources
sources = newsapi.get_sources(language='en')
sources = sources['sources']
sources = pd.DataFrame(sources)
sources

Unnamed: 0,id,name,description,url,category,language,country
0,abc-news,ABC News,"Your trusted source for breaking news, analysi...",https://abcnews.go.com,general,en,us
1,abc-news-au,ABC News (AU),"Australia's most trusted source of local, nati...",http://www.abc.net.au/news,general,en,au
2,al-jazeera-english,Al Jazeera English,"News, analysis from the Middle East and worldw...",http://www.aljazeera.com,general,en,us
3,ars-technica,Ars Technica,The PC enthusiast's resource. Power users and ...,http://arstechnica.com,technology,en,us
4,associated-press,Associated Press,The AP delivers in-depth coverage on the inter...,https://apnews.com/,general,en,us
...,...,...,...,...,...,...,...
76,the-washington-times,The Washington Times,The Washington Times delivers breaking news an...,https://www.washingtontimes.com/,general,en,us
77,time,Time,Breaking news and analysis from TIME.com. Poli...,http://time.com,general,en,us
78,usa-today,USA Today,"Get the latest national, international, and po...",http://www.usatoday.com/news,general,en,us
79,vice-news,Vice News,"Vice News is Vice Media, Inc.'s current affair...",https://news.vice.com,general,en,us


In [5]:
# Concatenate the sources into a string for api call
listOfSources = ""
for i in range(len(sources)):
    listOfSources += f'{sources["id"][i]}, '
# results = newsapi.get_everything(q='+ETF AND (launch OR list OR delist)',language='en',sources=listOfSources,sort_by='publishedAt',from_param='2023-10-05', to='2023-11-04')
# results = results['articles']
# results

In [6]:
# Function to extract news articles from the results into a txt file for further processing
# def extract_content(url):
#     driver = webdriver.Chrome()
#     driver.get(url)
#     title = driver.title
#     content = driver.find_elements('xpath', '//script[]')
#     content_list = []
#     for i in content:
#         content_list.append(i.text)
#     content_list = " ".join(content_list)
#     driver.close()
#     return [title, content_list]

In [7]:
# Get today's date and the date 31 days ago
from datetime import datetime, timedelta
endtime = datetime.today().strftime('%Y-%m-%d')
starttime = (datetime.today() - timedelta(days=31)).strftime('%Y-%m-%d')
print(starttime, endtime)

2023-10-09 2023-11-09


In [19]:
# finbert query function
def finbert_query(text):
    finbert = BertForSequenceClassification.from_pretrained('yiyanghkust/finbert-tone',num_labels=3)
    tokenizer = BertTokenizer.from_pretrained('yiyanghkust/finbert-tone')

    nlp = pipeline("sentiment-analysis", model=finbert, tokenizer=tokenizer)

    result = nlp(text)
    return result

# GPT summary function
def gpt_summary(title, description, content):
    response = openai.ChatCompletion.create(
        engine=MODEL,
        messages=[
            {"role": "system", "content": "You are a financial analyst. You are reading a news article about a recent ETF launch. You need to summarize the article and write a short report about it for investment specialists and executives in JPMC's ETF sales team."},
            {"role": "user", "content": f"Title: {title}"},
            {"role": "user", "content": f"Description: {description}"},
            {"role": "user", "content": f"Content: {content if len(content) > 150 else 'N/A'}"},
            {"role": "user", "content": "If any of the field is N/A, please ignore it and use the rest of the information to write the summary."},
        ],
        temperature=0.2,
    )
    result = response['choices'][0]['message']['content']
    print(result)
    return result

In [20]:
# API call for ETF launch news, newsapi library does not support searching only title
launching_result = requests.get(f'https://newsapi.org/v2/everything/?q=ETF AND (launch OR list OR delist)&sources={listOfSources}&searchIn=title,description&language=en&sortBy=publishedAt&from={starttime}&to={endtime}&apiKey={news_api_key}')
launching_result = launching_result.json()
export_result = dict(sorted(launching_result.items()))
export_result = json.dumps(launching_result, indent=4)

# Export the results to a json file
with open('launch_related_news.json', 'w') as f:
    f.write(export_result)

num_results = launching_result['totalResults']
launching_result = launching_result['articles']
launching_result = pd.DataFrame(launching_result)
launching_result = launching_result.drop_duplicates(subset=['title'])
launching_result = launching_result.reset_index(drop=True)
launching_result = launching_result.drop(columns=['source', 'author', 'url', 'urlToImage', 'publishedAt'])
launching_analysis_list = []
launching_analysis = pd.DataFrame(columns=['title', 'sentiment_label', 'sentiment_score', 'summary'], index=range(num_results))
for i in range(num_results):
    temp = launching_result.iloc[i]
    launching_analysis_list.append(temp['title']+'\n' + temp['description']+ '\n'+ temp['content'])
    finbert_result = finbert_query(launching_analysis_list[i])
    launching_analysis.iloc[i] = [temp['title'], finbert_result[0].get('label'), finbert_result[0].get('score'), gpt_summary(temp['title'],temp['description'],temp['content'])]
launching_analysis


Summary:

The recent 10% surge in Bitcoin, triggered by a false report that BlackRock Inc. had received approval to launch a spot ETF, has highlighted the potential impact of the upcoming decision by the US Securities & Exchange Commission on the approval of exchange-traded funds (ETFs) that invest directly in Bitcoin. The erroneous report quickly drove Bitcoin's price to $30,002, its highest since March, before cooling off after BlackRock clarified that its application is still under review. This incident provides insights into the market's anticipation and potential reaction to the launch of a Bitcoin ETF.
Summary:

The article discusses the recent fluctuations in Bitcoin's price following rumors about BlackRock's potential launch of a spot-Bitcoin ETF in the US. The cryptocurrency's value saw a 10% increase due to speculation that BlackRock had received approval for the ETF. However, these gains were short-lived as BlackRock clarified that their application is still under review by 

Unnamed: 0,title,sentiment_label,sentiment_score,summary
0,"Bitcoin’s 10% jump to $30,000 hints at trader ...",Negative,0.978541,"Summary:\n\nThe recent 10% surge in Bitcoin, t..."
1,"Bitcoin jumps 10%, then slides after BlackRock...",Positive,0.977037,Summary:\n\nThe article discusses the recent f...


In [21]:
# Export the results to a json file
launching_summary = launching_analysis.to_dict(orient='records')
result = {}
with open('launch_related_analyzation.json', 'w') as f:
    for i in range(len(launching_summary)):
        result[i] = launching_summary[i]
    result = dict(result.items())
    f.write(json.dumps(result, indent=4))


: 

In [11]:
# API Call for ETF regulatory news
regulatory_result = requests.get(f'https://newsapi.org/v2/everything/?q=ETF AND (regulatory OR regulation)&sources={listOfSources}&searchIn=title,description&language=en&sortBy=publishedAt&from={starttime}&to={endtime}&apiKey={news_api_key}')
regulatory_result = regulatory_result.json()
export_result = dict(sorted(regulatory_result.items()))
export_result = json.dumps(regulatory_result, indent=4)

# Export the results to a json file
with open('regulatory_related_news.json', 'w') as f:
    f.write(export_result)

num_regulatory_results = regulatory_result['totalResults']
regulatory_result = regulatory_result['articles']
regulatory_result = pd.DataFrame(regulatory_result)
regulatory_result = regulatory_result.drop_duplicates(subset=['title'])
regulatory_result = regulatory_result.reset_index(drop=True)
regulatory_result = regulatory_result.drop(columns=['source', 'author', 'url', 'urlToImage', 'publishedAt'])
regulatory_analysis_list = []
regulatory_analysis = pd.DataFrame(columns=['title', 'sentiment_label', 'sentiment_score', 'summary'], index=range(num_regulatory_results))
for i in range(num_regulatory_results):
    temp = regulatory_result.iloc[i]
    regulatory_analysis_list.append(temp['title']+'\n' + temp['description']+ '\n'+ temp['content'])
    finbert_result = finbert_query(regulatory_analysis_list[i])
    regulatory_analysis.iloc[i] = [temp['title'], finbert_result[0].get('label'), finbert_result[0].get('score'), gpt_summary(temp['title'],temp['description'],temp['content'])]

Summary:

BlackRock, the world's largest asset manager, has proposed a Bitcoin Exchange-Traded Fund (ETF) which was added to a clearing-house eligibility file in August. However, this move does not necessarily indicate any regulatory approval. The ETF is still under review by the regulatory authorities. The addition to the Depository Trust & Clearing Corporation (DTCC) file simply means that the ETF has met certain operational requirements and is eligible for clearing and settlement through DTCC's systems. The launch of a Bitcoin ETF by BlackRock could potentially signal a significant shift in the acceptance of cryptocurrency by mainstream financial institutions. However, the regulatory landscape for such products remains uncertain.


In [12]:
# Export the results to a json file
regulatory_summary = regulatory_analysis.to_dict(orient='records')
result = {}
with open('regulatory_related_analyzation.json', 'w') as f:
    for i in range(len(regulatory_summary)):
        result[i] = regulatory_summary[i]
    result = dict(result.items())
    f.write(json.dumps(result, indent=4))