In [None]:
!pip install transformers
!pip install yfinance
!pip install plotly

In [180]:
import requests
from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
import datetime
import yfinance as yf
import pandas as pd
import pytz
import plotly.graph_objects as go

In [194]:
API_TOKEN = <API_TOKEN>

In [195]:
tokenizer = AutoTokenizer.from_pretrained("mrm8488/distilroberta-finetuned-financial-news-sentiment-analysis")
model = AutoModelForSequenceClassification.from_pretrained("mrm8488/distilroberta-finetuned-financial-news-sentiment-analysis")
classifier = pipeline("sentiment-analysis", model=model, tokenizer=tokenizer)

def fetch_prive_n_news(stock_id, start_date, end_date):
  raw_df =  (
        yf.Ticker(stock_id)
        .history(start=start_date, end=end_date, interval='1h')
        .tz_convert(datetime.timezone.utc)
    )
  raw_df['timestamp'] = raw_df.index.astype('int64') // 10**9
  output = pd.DataFrame(requests.get(f'https://finnhub.io/api/v1/company-news?symbol=TSLA&from={start_date}&to={end_date}&token={API_TOKEN}').json())
  output['datetime'] = output['datetime'].apply(lambda x: datetime.datetime.fromtimestamp(x))
  output['sentiment'] = output['summary'].apply(lambda x: classifier(x))
  output['sentiment_group'] = output['sentiment'].apply(lambda x: x[0]['label'])
  output['sentiment_score'] = output['sentiment'].apply(lambda x: x[0]['score'])
  # Bir sonraki 30 gecedeki degeri bizim icin onemli. (ornegin 8.45teki haberin etkisini saatlik bir sekilde her 30 gece akan datada 9.30da gormeyi bekleriz)
  output['datetime_group'] = output['datetime'].apply(lambda x:x.replace(minute=0, second=0, microsecond=0) + datetime.timedelta(hours=1, minutes=30))
  output['datetime_group'] = output['datetime_group'].apply(lambda x:datetime.datetime.timestamp(x))
  sentiment_results = output.groupby(['datetime_group','sentiment_group'])['category'].agg('count').reset_index()
  df_dict = dict(zip(sentiment_results['datetime_group'], sentiment_results['sentiment_group'].apply(lambda x: -1 if x == 'negative' else 1 if x == 'positive' else 0) * sentiment_results['category']))
  raw_df = raw_df.merge(pd.DataFrame.from_dict(df_dict, orient='index', columns=['sentiment_score']), left_on='timestamp', right_index=True, how='left').fillna(0) 
  # Draw a price Chart and draw a second y axis for sentiment score
  fig = go.Figure()
  fig.add_trace(go.Scatter(x=raw_df.index, y=raw_df['Close'], name='Price'))
  fig.add_trace(go.Bar(x=raw_df.index, y=raw_df['sentiment_score'], name='Sentiment Score', yaxis='y2', opacity=0.5))
  fig.update_layout(yaxis2=dict(overlaying='y', side='right'))
  fig.show()
  return raw_df

In [196]:
raw_df = fetch_prive_n_news('AAPL', '2022-11-27', '2022-12-04')


casting datetime64[ns, UTC] values to int64 with .astype(...) is deprecated and will raise in a future version. Use .view(...) instead.

