In [55]:
# Both Semiconductor & Traditional Tech companies will be in this file
import os
import pandas as pd
from datetime import datetime, timedelta
from dotenv import load_dotenv
import alpaca_trade_api as tradeapi
from newsapi.newsapi_client import NewsApiClient
from nltk.sentiment.vader import SentimentIntensityAnalyzer
from pathlib import Path
import nltk

In [7]:
# Load .env enviroment variables
load_dotenv()

# Set News API Key
newsapi = NewsApiClient(api_key=os.environ["News_API"])

# Set Alpaca API key and secret

alpaca_api_key = os.getenv("API_Key_ID")
alpaca_secret_key = os.getenv("Alpaca_Secret_Key")

api = tradeapi.REST(alpaca_api_key, alpaca_secret_key, api_version='v2')

In [3]:
load_dotenv()

True

In [9]:
# Set the tickers for the Semiconductor stocks
amd_ticker = ["AMD"]
nvda_ticker = ["NVDA"]
# Set timeframe to '1D'
timeframe = "1D"

# Set date and the date for 2008
start_date = pd.Timestamp("2008-01-01", tz="America/New_York").isoformat()
end_date = pd.Timestamp("2008-12-31", tz="America/New_York").isoformat()

# Get the entire year of 2008 worth of historical data for the stocks
amd_df = api.get_barset(
    amd_ticker,
    timeframe,
    limit=None,
    start=start_date,
    end=end_date,
    after=None,
    until=None,
).df

nvda_df = api.get_barset(
    nvda_ticker,
    timeframe,
    limit=None,
    start=start_date,
    end=end_date,
    after=None,
    until=None,
).df
# Display data
amd_df.head()

Unnamed: 0_level_0,AMD,AMD,AMD,AMD,AMD
Unnamed: 0_level_1,open,high,low,close,volume
time,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
2008-08-11 00:00:00-04:00,5.1,5.2,5.03,5.1,15357358
2008-08-12 00:00:00-04:00,5.2,5.35,5.12,5.21,29786686
2008-08-13 00:00:00-04:00,5.2,5.23,5.0,5.15,18525188
2008-08-14 00:00:00-04:00,5.15,5.37,5.14,5.31,18530013
2008-08-15 00:00:00-04:00,5.37,5.87,5.25,5.62,32811844


In [24]:
# Drop Outer Table Level
# amd_df = amd_df.droplevel(axis=1, level=0)
amd_df['AMD'] = amd_df['close']
amd_df = amd_df.drop(columns = 'close')
amd_df.head()
# Use the drop function to drop extra columns
# amd_df = amd_df.drop(columns=["open", "high", "low", "volume"])

# Since this is daily data, we can keep only the date (remove the time) component of the data
#amd_df.index = amd_df.index.date

# Display sample data


Unnamed: 0,AMD
2008-08-11,5.1
2008-08-12,5.21
2008-08-13,5.15
2008-08-14,5.31
2008-08-15,5.62


In [17]:
nvda_df.head()

Unnamed: 0,close
2008-08-11,11.24
2008-08-12,11.0501
2008-08-13,12.28
2008-08-14,12.99
2008-08-15,12.95


In [25]:
nvda_df['NVDA'] = nvda_df['close']
nvda_df = nvda_df.drop(columns = 'close')
nvda_df.head()

Unnamed: 0,NVDA
2008-08-11,11.24
2008-08-12,11.0501
2008-08-13,12.28
2008-08-14,12.99
2008-08-15,12.95


In [130]:
# Combine DataFrames
semiconductor_df = pd.concat([amd_df, nvda_df], axis="columns", join="inner")
semiconductor_df.head()

Unnamed: 0,date,text,positive,neutral,negative,compound,date.1,text.1,positive.1,neutral.1,negative.1,compound.1
0,2021-07-16,Semiconductor heavyweight \r\n Intel\r\n is in...,0.0,1.0,0.0,0.0,2021-07-16,Intel Corp. INTC -1.26% is exploring a deal to...,0.037,0.963,0.0,0.0258
1,2021-07-09,"When it comes to technical analysis, Im an ama...",0.072,0.782,0.146,-0.3672,2021-07-09,"When it comes to technical analysis, Im an ama...",0.072,0.782,0.146,-0.3672
2,2021-07-16,Intel Corp. INTC -1.26% is exploring a deal to...,0.037,0.963,0.0,0.0258,2021-07-13,You might be alarmed when you see headlines ab...,0.136,0.821,0.044,0.5574
3,2021-07-16,Semiconductor heavyweight \r\n Intel\r\n is in...,0.0,1.0,0.0,0.0,2021-07-08,Getty Images\r\nGetty Images\r\nKey Takeaways:...,0.0,0.906,0.094,-0.4019


In [37]:
semiconductor_returns = semiconductor_df.pct_change().dropna()
semiconductor_df = semiconductor_df.append(semiconductor_returns)
semiconductor_returns.head()

Unnamed: 0,AMD,NVDA
2008-08-12,0.021569,-0.016895
2008-08-13,-0.011516,0.111302
2008-08-14,0.031068,0.057818
2008-08-15,0.05838,-0.003079
2008-08-18,0.049822,0.023166


In [42]:
semiconductor_df['AMD % Returns'] = semiconductor_returns['AMD']
semiconductor_df['NVDA % Returns'] = semiconductor_returns['NVDA']

In [43]:
semiconductor_df.head()

Unnamed: 0,AMD,NVDA,AMD % Returns,NVDA % Returns
2008-08-11,5.1,11.24,,
2008-08-12,5.21,11.0501,0.021569,-0.016895
2008-08-13,5.15,12.28,-0.011516,0.111302
2008-08-14,5.31,12.99,0.031068,0.057818
2008-08-15,5.62,12.95,0.05838,-0.003079


In [46]:
# Fetch news about AMD in 2008
amd_News = newsapi.get_everything(
    q=" AMD AND Advanced Micro Devices AND 2008",
    language="en"
    #to= 2008-12-31
)

# Show the total number of news
amd_News["totalResults"]

21

In [48]:
# Fetch news about AMD in 2008
nvda_News = newsapi.get_everything(
    q=" NVDA AND NVIDIA AND 2008",
    language="en"
    #to= 2008-12-31
)

# Show the total number of news
nvda_News["totalResults"]

4

In [63]:
amd_sentiment = []
analyzer = SentimentIntensityAnalyzer()

for article in amd_News["articles"]:
    try:
        date = article["publishedAt"][:10] # Measure for the date
        text = article["content"][0:198]
        sentiment = analyzer.polarity_scores(text) # Getting polarity scores so we can build our sentiment from the analyzer
        pos = sentiment["pos"]
        neu = sentiment["neu"]
        neg = sentiment["neg"]
        compound = sentiment["compound"] # Compound sentiment
        
        amd_sentiment.append({
            "text": text,
            "date": date,
            "positive": pos,
            "neutral": neu,
            "negative": neg,
            "compound": compound # Compound sentiment
        })
        
    except AttributeError:
        pass

amd_df = pd.DataFrame(amd_sentiment) 
semiconductor_df = pd.DataFrame(amd_sentiment) 
cols = ['date', 'text', 'positive', 'neutral', 'negative', 'compound']
amd_df = amd_df[cols] # Set the reorganized columns for the dataframe
amd_df.describe()

Unnamed: 0,positive,neutral,negative,compound
count,20.0,20.0,20.0,20.0
mean,0.026,0.9261,0.04795,-0.04409
std,0.038471,0.085267,0.080186,0.305732
min,0.0,0.75,0.0,-0.6124
25%,0.0,0.90075,0.0,-0.23175
50%,0.0,0.9545,0.0,0.0
75%,0.04175,1.0,0.06575,0.0258
max,0.136,1.0,0.25,0.5574


In [64]:
nvda_sentiment = []

for article in nvda_News["articles"]:
    try:
        date = article["publishedAt"][:10] # Measure for the date
        text = article["content"][0:198]
        sentiment = analyzer.polarity_scores(text) # Getting polarity scores so we can build our sentiment from the analyzer
        pos = sentiment["pos"]
        neu = sentiment["neu"]
        neg = sentiment["neg"]
        compound = sentiment["compound"] # Compound sentiment
        
        nvda_sentiment.append({
            "text": text,
            "date": date,
            "positive": pos,
            "neutral": neu,
            "negative": neg,
            "compound": compound # Compound sentiment
        })
        
    except AttributeError:
        pass

nvda_df = pd.DataFrame(nvda_sentiment) 
sentiment_df = pd.DataFrame(nvda_sentiment)
cols = ['date', 'text', 'positive', 'neutral', 'negative', 'compound']
nvda_df = nvda_df[cols] # Set the reorganized columns for the dataframe
nvda_df.describe()

Unnamed: 0,positive,neutral,negative,compound
count,4.0,4.0,4.0,4.0
mean,0.06125,0.868,0.071,-0.046475
std,0.057858,0.081801,0.063045,0.446871
min,0.0,0.782,0.0,-0.4019
25%,0.02775,0.81125,0.033,-0.375875
50%,0.0545,0.8635,0.069,-0.1707
75%,0.088,0.92025,0.107,0.1587
max,0.136,0.963,0.146,0.5574


In [58]:
nvda_df.head()

Unnamed: 0,date,text,positive,neutral,negative,compound
0,2021-07-16,Intel Corp. INTC -1.26% is exploring a deal to...,0.037,0.963,0.0,0.0258
1,2021-07-09,"When it comes to technical analysis, Im an ama...",0.072,0.782,0.146,-0.3672
2,2021-07-13,You might be alarmed when you see headlines ab...,0.136,0.821,0.044,0.5574
3,2021-07-08,Getty Images\r\nGetty Images\r\nKey Takeaways:...,0.0,0.906,0.094,-0.4019


In [59]:
amd_df.head()

Unnamed: 0,date,text,positive,neutral,negative,compound
0,2021-07-16,Semiconductor heavyweight \r\n Intel\r\n is in...,0.0,1.0,0.0,0.0
1,2021-07-09,"When it comes to technical analysis, Im an ama...",0.072,0.782,0.146,-0.3672
2,2021-07-16,Intel Corp. INTC -1.26% is exploring a deal to...,0.037,0.963,0.0,0.0258
3,2021-07-16,Semiconductor heavyweight \r\n Intel\r\n is in...,0.0,1.0,0.0,0.0
4,2021-07-08,Yves here. Get a cup of coffee. This is an ext...,0.072,0.928,0.0,0.3384


In [114]:
# Semiconductor_df file
chip_path = Path(r'C:\Users\Yonathan\Desktop\semiconductor.csv', index=False)
semiconductor_df.to_csv(chip_path)

# amd_df file
amd_path = Path(r'C:\Users\Yonathan\Desktop\amd.csv', index=False)
amd_df.to_csv(amd_path)

# nvda_df file
nvda_path = Path(r'C:\Users\Yonathan\Desktop\nvda.csv', index=False)
nvda_df.to_csv(nvda_path)

In [85]:
#semiconductor_df.index = semiconductor_df.index.date ?
semiconductor_df.head()

Unnamed: 0,text,date,positive,neutral,negative,compound
0,Semiconductor heavyweight \r\n Intel\r\n is in...,2021-07-16,0.0,1.0,0.0,0.0
1,"When it comes to technical analysis, Im an ama...",2021-07-09,0.072,0.782,0.146,-0.3672
2,Intel Corp. INTC -1.26% is exploring a deal to...,2021-07-16,0.037,0.963,0.0,0.0258
3,Semiconductor heavyweight \r\n Intel\r\n is in...,2021-07-16,0.0,1.0,0.0,0.0
4,Yves here. Get a cup of coffee. This is an ext...,2021-07-08,0.072,0.928,0.0,0.3384


In [66]:
# Tech stocks

In [95]:
# Set the tickers for the Semiconductor stocks
aapl_ticker = ["AAPL"]
msft_ticker = ["MSFT"]
# Set timeframe to '1D'
timeframe = "1D"

# Set date and the date for 2008
start_date = pd.Timestamp("2008-01-01", tz="America/New_York").isoformat()
end_date = pd.Timestamp("2008-12-31", tz="America/New_York").isoformat()

# Get the entire year of 2008 worth of historical data for the stocks
aapl_df = api.get_barset(
    aapl_ticker,
    timeframe,
    limit=None,
    start=start_date,
    end=end_date,
    after=None,
    until=None,
).df

msft_df = api.get_barset(
    msft_ticker,
    timeframe,
    limit=None,
    start=start_date,
    end=end_date,
    after=None,
    until=None,
).df
# Display data
aapl_df.head()

Unnamed: 0_level_0,AAPL,AAPL,AAPL,AAPL,AAPL
Unnamed: 0_level_1,open,high,low,close,volume
time,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
2008-08-11 00:00:00-04:00,24.303,25.214,24.239,24.793,217147576
2008-08-12 00:00:00-04:00,24.794,25.613,24.787,25.269,202305395
2008-08-13 00:00:00-04:00,25.416,25.714,25.129,25.617,204471048
2008-08-14 00:00:00-04:00,25.473,25.779,25.406,25.609,166907819
2008-08-15 00:00:00-04:00,25.567,25.679,25.007,25.1,155869244


In [96]:
msft_df.head()

Unnamed: 0_level_0,MSFT,MSFT,MSFT,MSFT,MSFT
Unnamed: 0_level_1,open,high,low,close,volume
time,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
2008-08-11 00:00:00-04:00,27.87,28.4,27.66,27.9,63751320
2008-08-12 00:00:00-04:00,27.76,28.36,27.58,28.14,58188143
2008-08-13 00:00:00-04:00,28.07,28.5,27.76,27.91,46635919
2008-08-14 00:00:00-04:00,27.79,28.28,27.56,27.92,43124440
2008-08-15 00:00:00-04:00,27.92,28.15,27.58,27.8,43548108


In [101]:
# Drop Outer Table Level
#aapl_df = aapl_df.droplevel(axis=1, level=0)
#msft_df = msft_df.droplevel(axis=1, level=0)

# Use the drop function to drop extra columns
#aapl_df = aapl_df.drop(columns=["open", "high", "low", "volume"])
#msft_df = msft_df.drop(columns=["open", "high", "low", "volume"])

# Since this is daily data, we can keep only the date (remove the time) component of the data
#aapl_df.index = aapl_df.index.date
#msft_df.index = msft_df.index.date
# Display sample data
aapl_df.head()

Unnamed: 0,close
2008-08-11,24.793
2008-08-12,25.269
2008-08-13,25.617
2008-08-14,25.609
2008-08-15,25.1


In [102]:
aapl_df['AAPL'] = aapl_df['close']
aapl_df = aapl_df.drop(columns = 'close')
aapl_df.head()

Unnamed: 0,AAPL
2008-08-11,24.793
2008-08-12,25.269
2008-08-13,25.617
2008-08-14,25.609
2008-08-15,25.1


In [103]:
msft_df['MSFT'] = msft_df['close']
msft_df = msft_df.drop(columns = 'close')
msft_df.head()

Unnamed: 0,MSFT
2008-08-11,27.9
2008-08-12,28.14
2008-08-13,27.91
2008-08-14,27.92
2008-08-15,27.8


In [104]:
# Combine DataFrames
tech_df = pd.concat([aapl_df, msft_df], axis="columns", join="inner")
tech_df.tail()

Unnamed: 0,AAPL,MSFT
2008-12-24,12.157,19.16
2008-12-26,12.26,19.12
2008-12-29,12.371,18.97
2008-12-30,12.327,19.32
2008-12-31,12.191,19.44


In [105]:
tech_returns = tech_df.pct_change().dropna()
tech_df = tech_df.append(tech_returns)
tech_returns.head()

Unnamed: 0,AAPL,MSFT
2008-08-12,0.019199,0.008602
2008-08-13,0.013772,-0.008173
2008-08-14,-0.000312,0.000358
2008-08-15,-0.019876,-0.004298
2008-08-18,-0.001594,-0.003237


In [106]:
tech_df['MSFT % Returns'] = tech_returns['MSFT']
tech_df['AAPL % Returns'] = tech_returns['AAPL']

In [107]:
tech_df.head()

Unnamed: 0,AAPL,MSFT,MSFT % Returns,AAPL % Returns
2008-08-11,24.793,27.9,,
2008-08-12,25.269,28.14,0.008602,0.019199
2008-08-13,25.617,27.91,-0.008173,0.013772
2008-08-14,25.609,27.92,0.000358,-0.000312
2008-08-15,25.1,27.8,-0.004298,-0.019876


In [108]:
# Fetch news about AAPL in 2008
aapl_News = newsapi.get_everything(
    q=" AAPL AND AAPL AND 2008",
    language="en"
    #to= 2008-12-31
)

# Show the total number of news
aapl_News["totalResults"]

28

In [109]:
# Fetch news about AMD in 2008
msft_News = newsapi.get_everything(
    q=" MSFT AND Microsoft AND 2008",
    language="en"
    #to= 2008-12-31
)

# Show the total number of news
msft_News["totalResults"]

23

In [124]:
aapl_sentiment = []
analyzer = SentimentIntensityAnalyzer()

for article in aapl_News["articles"]:
    try:
        date = article["publishedAt"][:10] # Measure for the date
        text = article["content"][0:198]
        sentiment = analyzer.polarity_scores(text) # Getting polarity scores so we can build our sentiment from the analyzer
        pos = sentiment["pos"]
        neu = sentiment["neu"]
        neg = sentiment["neg"]
        compound = sentiment["compound"] # Compound sentiment
        
        aapl_sentiment.append({
            "text": text,
            "date": date,
            "positive": pos,
            "neutral": neu,
            "negative": neg,
            "compound": compound # Compound sentiment
        })
        
    except AttributeError:
        pass

aapl_df = pd.DataFrame(aapl_sentiment) 
tech_df = pd.DataFrame(aapl_sentiment) 
cols = ['date', 'positive', 'neutral', 'negative', 'compound', 'text']
aapl_df = aapl_df[cols] # Set the reorganized columns for the dataframe
aapl_df.describe()

Unnamed: 0,positive,neutral,negative,compound
count,20.0,20.0,20.0,20.0
mean,0.06525,0.89715,0.03765,0.11821
std,0.085083,0.118522,0.080042,0.437554
min,0.0,0.574,0.0,-0.9422
25%,0.0,0.827,0.0,-0.00645
50%,0.0205,0.92,0.0,0.0
75%,0.11675,1.0,0.04675,0.39435
max,0.306,1.0,0.344,0.9044


In [121]:
msft_sentiment = []
analyzer = SentimentIntensityAnalyzer()

for article in amd_News["articles"]:
    try:
        date = article["publishedAt"][:10] # Measure for the date
        sentiment = analyzer.polarity_scores(text) # Getting polarity scores so we can build our sentiment from the analyzer
        pos = sentiment["pos"]
        neu = sentiment["neu"]
        neg = sentiment["neg"]
        compound = sentiment["compound"] # Compound sentiment
        text = article["content"][0:198]
        
        msft_sentiment.append({
            "date": date,
            "positive": pos,
            "neutral": neu,
            "negative": neg,
            "compound": compound, # Compound sentiment
            "text": text
        })
        
    except AttributeError:
        pass

msft_df = pd.DataFrame(msft_sentiment) 
msft_df = pd.DataFrame(msft_sentiment) 
cols = ['date', 'positive', 'neutral', 'negative', 'compound', 'text']
msft_df = msft_df[cols] # Set the reorganized columns for the dataframe
msft_df.describe()

Unnamed: 0,positive,neutral,negative,compound
count,20.0,20.0,20.0,20.0
mean,0.026,0.9261,0.04795,-0.04409
std,0.038471,0.085267,0.080186,0.305732
min,0.0,0.75,0.0,-0.6124
25%,0.0,0.90075,0.0,-0.23175
50%,0.0,0.9545,0.0,0.0
75%,0.04175,1.0,0.06575,0.0258
max,0.136,1.0,0.25,0.5574


In [125]:
aapl_df.head()

Unnamed: 0,date,positive,neutral,negative,compound,text
0,2021-07-14,0.119,0.881,0.0,0.6369,Retirement can be the best time of our lifebut...
1,2021-07-14,0.0,1.0,0.0,0.0,How long can the Federal Reserve hold off befo...
2,2021-06-24,0.0,1.0,0.0,0.0,BOSTON--(BUSINESS WIRE)--According to new rese...
3,2021-07-14,0.0,0.967,0.033,-0.0258,Markets opened higher as investors awaited tes...
4,2021-06-28,0.2,0.8,0.0,0.7569,"In this article, we discuss the 10 most popula..."


In [123]:
msft_df.head()

Unnamed: 0,date,positive,neutral,negative,compound,text
0,2021-07-16,0.0,0.75,0.25,-0.6124,Semiconductor heavyweight \r\n Intel\r\n is in...
1,2021-07-09,0.0,1.0,0.0,0.0,"When it comes to technical analysis, Im an ama..."
2,2021-07-16,0.072,0.782,0.146,-0.3672,Intel Corp. INTC -1.26% is exploring a deal to...
3,2021-07-16,0.037,0.963,0.0,0.0258,Semiconductor heavyweight \r\n Intel\r\n is in...
4,2021-07-08,0.0,1.0,0.0,0.0,Yves here. Get a cup of coffee. This is an ext...


In [135]:
tech_df.head()

Unnamed: 0,text,date,positive,neutral,negative,compound
0,Retirement can be the best time of our lifebut...,2021-07-14,0.119,0.881,0.0,0.6369
1,How long can the Federal Reserve hold off befo...,2021-07-14,0.0,1.0,0.0,0.0
2,BOSTON--(BUSINESS WIRE)--According to new rese...,2021-06-24,0.0,1.0,0.0,0.0
3,Markets opened higher as investors awaited tes...,2021-07-14,0.0,0.967,0.033,-0.0258
4,"In this article, we discuss the 10 most popula...",2021-06-28,0.2,0.8,0.0,0.7569


In [136]:
# Semiconductor_df file
tech_path = Path(r'C:\Users\Yonathan\Desktop\tech.csv', index=False)
tech_df.to_csv(tech_path)

# aapl_df file
aapl_path = Path(r'C:\Users\Yonathan\Desktop\aapl.csv', index=False)
aapl_df.to_csv(aapl_path)

# msft_df file
msft_path = Path(r'C:\Users\Yonathan\Desktop\msft.csv', index=False)
msft_df.to_csv(msft_path)