# Ensemble Learning

## Initial Imports

In [None]:
import warnings
warnings.filterwarnings('ignore')

In [11]:
!pip install emoji

Collecting emoji
  Downloading emoji-1.2.0-py3-none-any.whl (131 kB)
Installing collected packages: emoji
Successfully installed emoji-1.2.0


In [12]:
import emoji
import os
import re
from dotenv import load_dotenv
import numpy as np
import pandas as pd
from pathlib import Path
from collections import Counter
import nltk as nltk
nltk.download('vader_lexicon')
from nltk.sentiment.vader import SentimentIntensityAnalyzer
analyzer = SentimentIntensityAnalyzer()
from collections import Counter
import csv
# from textblob import TextBlob

%matplotlib inline

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\LEON\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


In [3]:
from sklearn.metrics import balanced_accuracy_score
from sklearn.metrics import confusion_matrix
from imblearn.metrics import classification_report_imbalanced

## Read the CSV and Perform Basic Data Cleaning

In [4]:
# Load environment variables
load_dotenv()

# Import environment variables
api_key = os.getenv("API_KEY")


# Verify that environment variables were loaded
print(f"FMP API key data type: {type(api_key)}")


FMP API key data type: <class 'NoneType'>


In [5]:
#!/usr/bin/env python

try:
    # For Python 3.0 and later
    from urllib.request import urlopen
except ImportError:
    # Fall back to Python 2's urllib2
    from urllib2 import urlopen

import json

def get_jsonparsed_data(url):
    """
    Receive the content of ``url``, parse it as JSON and return the object.

    Parameters
    ----------
    url : str

    Returns
    -------
    dict
    """
    response = urlopen(url)
    data = response.read().decode("utf-8")
    return json.loads(data)

url = ("https://financialmodelingprep.com/api/v3/stock_news?tickers=TSLA&limit=100000&apikey=e8ac3c3d2405f465935cd797c342b129")


news_df = pd.DataFrame(get_jsonparsed_data(url))



In [6]:
cols_to_keep = ["publishedDate", "text"]  
news_df = news_df[cols_to_keep]
news_df['publishedDate'] = pd.to_datetime(news_df['publishedDate']).dt.date

news_df = news_df.sort_values(by=['publishedDate'], ascending=False)
news_df = news_df.rename(columns={"publishedDate": "date"})
news_df.to_csv('TSLA.csv', sep='\t', encoding='utf-8')

In [7]:
news_df

Unnamed: 0,date,text
0,2021-02-27,Matt McCall believes bitcoin is superior to go...
2,2021-02-27,These are some hidden gems in clean energy.
1,2021-02-27,Interest rates have risen meaningfully in rece...
11,2021-02-26,With the Nasdaq breaking below its 50-day line...
18,2021-02-26,Some hedge fund managers are getting concerned...
17,2021-02-26,Hyundai Motor Company (OTC: HYMFT) has managed...
16,2021-02-26,Tesla CEO Elon Musk has confirmed that product...
14,2021-02-26,"With a limited supply and wider acceptance, Bi..."
13,2021-02-26,The temptation to spice up this low-maintenanc...
12,2021-02-26,"When Cathie Wood talks, it's worth more than j..."


In [8]:

news_df = news_df.groupby(['date'],as_index=True).agg(lambda x : x.sum() if x.dtype=='int64' else ' '.join(x))
pd.set_option("display.max_colwidth", 300)


In [13]:
def clean_titles(text):
    text = re.sub('b[(\')]','',text)
    text = re.sub('b[(\")]','',text)
    text = re.sub("\'",'',text)
    text = re.sub("@[A-Za-z0-9]+","",text) #Remove @ sign
    text = re.sub(r"(?:\@|http?\://|https?\://|www)\S+", "", text) #Remove http links
    text = " ".join(text.split())
    text = ''.join(c for c in text if c not in emoji.UNICODE_EMOJI) #Remove Emojis
    text = text.replace("#", "").replace("_", " ") #Remove hashtag sign but keep the text
    return text

for i in range(0, len('tweet')):
    news_df['text'] = news_df.apply(lambda x: clean_titles(x['text']), axis = 1)
    
news_df

Unnamed: 0_level_0,text
date,Unnamed: 1_level_1
2018-12-24,Market Update
2018-12-25,Jamie Albertine of Consumer Edge Research joins Closing Bell to review the stocks in the auto sector and share his predictions for 2019.
2018-12-26,Wedbush Securities Managing Director Daniel Ives on the outlook for Tesla and Apple in 2019.
2018-12-27,"Ed Kim, AutoPacific VP of industry analysis, on Teslas new board members and how President Trumps trade spats have affected the U.S. auto industry."
2019-01-01,"CNBCs Phil LeBeau reports on Teslas deliveries for the fourth quarter, which missed forecasts and sent shares lower."
2019-01-03,"“Bulls & Bears” panel discusses how Tesla shares dropped after missing delivery estimates and cutting the prices on all of its models by $2,000."
2019-01-06,"CNBCs Jim Cramer discusses his take on the latest Tesla news: The electric automaker is preparing to open its first non-U.S. factory in Shanghai, China."
2019-01-17,"The Wall Street Journal is reporting that Tesla is cutting their workforce, hoping to be able to lower the price of the model 3 sedan."
2019-01-18,"Elon Musk is cutting Tesla Inc.s workforce by 7 percent -- or more than 3,000 jobs -- warning that the “road ahead is very difficult” in making electric cars more affordable for the mass market. Bloomberg Businessweeks Max Chafkin has more on ""Bloomberg Technology."""
2019-01-22,"IBM said adjusted earnings for the three months ending in December came in at $4.87 per share, down 5% from the same period last year but ahead of the Street consensus of $4.82 per share. Teslas most recent downgrade & the weakness in China and how it could benefit the U.S.-China trade talks."


In [14]:

def get_sentiment(text):
    sia = SentimentIntensityAnalyzer()
    sentiment = sia.polarity_scores(text)
    return sentiment


compound = []
pos = []
neu = []
neg = []
SIA = 0

for i in range(0, len(news_df['text'])):
    SIA = get_sentiment(news_df['text'][i])
    compound.append(SIA['compound'])
    pos.append(SIA['pos'])
    neu.append(SIA['neu'])
    neg.append(SIA['neg'])
    
# compound - sum of all the lexicon ratings 

news_df['compound'] = compound
news_df['positive'] = pos
news_df['neutral'] = neu
news_df['negative'] = neg

news_df.head()


Unnamed: 0_level_0,text,compound,positive,neutral,negative
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
2018-12-24,Market Update,0.0,0.0,1.0,0.0
2018-12-25,Jamie Albertine of Consumer Edge Research joins Closing Bell to review the stocks in the auto sector and share his predictions for 2019.,0.296,0.091,0.909,0.0
2018-12-26,Wedbush Securities Managing Director Daniel Ives on the outlook for Tesla and Apple in 2019.,0.296,0.136,0.864,0.0
2018-12-27,"Ed Kim, AutoPacific VP of industry analysis, on Teslas new board members and how President Trumps trade spats have affected the U.S. auto industry.",-0.1531,0.0,0.935,0.065
2019-01-01,"CNBCs Phil LeBeau reports on Teslas deliveries for the fourth quarter, which missed forecasts and sent shares lower.",-0.296,0.102,0.694,0.204


In [18]:
# create a list of our conditions
conditions = [
    (news_df['positive'] > 0.15),
    (news_df['negative'] > 0.15)
    ]

# create a list of the values we want to assign for each condition
values = [1, -1,]

# create a new column and use np.select to assign values to it using our lists as arguments
news_df['Sentiment Signal'] = np.select(conditions, values)

# display updated DataFrame
# news_df[news_df['Sentiment Signal']!=0]
news_df

Unnamed: 0_level_0,text,compound,positive,neutral,negative,Sentiment Signal
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
2018-12-24,Market Update,0.0000,0.000,1.000,0.000,0
2018-12-25,Jamie Albertine of Consumer Edge Research joins Closing Bell to review the stocks in the auto sector and share his predictions for 2019.,0.2960,0.091,0.909,0.000,0
2018-12-26,Wedbush Securities Managing Director Daniel Ives on the outlook for Tesla and Apple in 2019.,0.2960,0.136,0.864,0.000,0
2018-12-27,"Ed Kim, AutoPacific VP of industry analysis, on Teslas new board members and how President Trumps trade spats have affected the U.S. auto industry.",-0.1531,0.000,0.935,0.065,0
2019-01-01,"CNBCs Phil LeBeau reports on Teslas deliveries for the fourth quarter, which missed forecasts and sent shares lower.",-0.2960,0.102,0.694,0.204,-1
2019-01-03,"“Bulls & Bears” panel discusses how Tesla shares dropped after missing delivery estimates and cutting the prices on all of its models by $2,000.",-0.1280,0.085,0.772,0.143,0
2019-01-06,"CNBCs Jim Cramer discusses his take on the latest Tesla news: The electric automaker is preparing to open its first non-U.S. factory in Shanghai, China.",0.0000,0.000,1.000,0.000,0
2019-01-17,"The Wall Street Journal is reporting that Tesla is cutting their workforce, hoping to be able to lower the price of the model 3 sedan.",0.0258,0.102,0.764,0.135,0
2019-01-18,"Elon Musk is cutting Tesla Inc.s workforce by 7 percent -- or more than 3,000 jobs -- warning that the “road ahead is very difficult” in making electric cars more affordable for the mass market. Bloomberg Businessweeks Max Chafkin has more on ""Bloomberg Technology.""",-0.4404,0.000,0.913,0.087,0
2019-01-22,"IBM said adjusted earnings for the three months ending in December came in at $4.87 per share, down 5% from the same period last year but ahead of the Street consensus of $4.82 per share. Teslas most recent downgrade & the weakness in China and how it could benefit the U.S.-China trade talks.",0.5719,0.140,0.799,0.062,0


In [16]:

news_df = news_df.drop(columns=['text', 'subjectivity', 'polarity', 'compound', 'positive', 'neutral', 'negative'])
news_df[news_df['Sentiment Signal']!=0]
news_df

KeyError: "['subjectivity' 'polarity'] not found in axis"

In [None]:
news_df.to_csv('Sentiment Signal.csv',  encoding='utf-8')
 