In [4]:
# apple stock history. https://finance.yahoo.com/quote/AAPL/history?p=AAPL


# Macroeconomic Factors

# 1yr Treasury rate. https://www.macrotrends.net/2492/1-year-treasury-rate-yield-chart
# 10yr Treasury rate. https://www.macrotrends.net/2016/10-year-treasury-bond-rate-yield-chart
# Fed Funds Rate (daily data). https://www.macrotrends.net/2015/fed-funds-rate-historical-chart
# GDP (absolute value). https://fred.stlouisfed.org/series/GDP
# GDP growth (yearly). https://data.worldbank.org/indicator/NY.GDP.MKTP.KD.ZG?locations=US
## GDP growth (quarterly). https://data.oecd.org/gdp/real-gdp-forecast.htm
# CPI (monthly). https://fred.stlouisfed.org/series/CPIAUCSL
## Treasury bill. https://home.treasury.gov/interest-rates-data-csv-archive
## Fed Funds Effective Rate (monthly). https://fred.stlouisfed.org/series/FEDFUNDS


# Financial Factors

# Gross Profit. https://www.macrotrends.net/stocks/charts/AAPL/apple/gross-profit
# P/E. https://www.macrotrends.net/stocks/charts/AAPL/apple/pe-ratio
# P/S. https://www.macrotrends.net/stocks/charts/AAPL/apple/ps-ratio


# Market Factors

# Dow Jones Industrial Avg. https://www.macrotrends.net/1319/dow-jones-100-year-historical-chart
# S&P500. https://www.kaggle.com/datasets/andrewmvd/sp-500-stocks

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from datetime import datetime, timedelta
from seaborn import set_style

In [2]:
set_style("whitegrid")

## Load apple data

In [3]:
pwd

'/Users/ziyuan/Library/CloudStorage/OneDrive-WashingtonUniversityinSt.Louis/Machine_Learning/Erdos_Bootcamp/Project/modeling/modeling'

In [4]:
apple_raw=pd.read_csv('../../Dataset/AAPL.csv')

In [5]:
apple_raw["Date"] = pd.to_datetime(apple_raw["Date"])

In [6]:
apple_raw=apple_raw.drop(columns=['Open','High','Low','Adj Close'])

In [7]:
apple_raw

Unnamed: 0,Date,Close,Volume
0,2010-11-26,11.250000,237585600
1,2010-11-29,11.316786,445785200
2,2010-11-30,11.112500,501858000
3,2010-12-01,11.300000,461750800
4,2010-12-02,11.362500,462837200
...,...,...,...
3016,2022-11-18,151.289993,74794600
3017,2022-11-21,148.009995,58724100
3018,2022-11-22,150.179993,51804100
3019,2022-11-23,151.070007,58301400


## Market Factors

In [16]:
DJ_raw=pd.read_csv('../../Dataset/Market_Factor/Dow_Jones.csv')
DJ_raw.head(3)

Unnamed: 0,Date,Dow_Jones
0,11/30/09,10344.84
1,12/1/09,10471.58
2,12/2/09,10452.68


In [17]:
DJ_raw["Date"] = pd.to_datetime(DJ_raw["Date"])

In [18]:
DJ_raw=DJ_raw.sort_values(by=['Date'])

In [19]:
DJ_raw.head(3)

Unnamed: 0,Date,Dow_Jones
0,2009-11-30,10344.84
1,2009-12-01,10471.58
2,2009-12-02,10452.68


In [20]:
SP500_raw=pd.read_csv('../../Dataset/Market_Factor/sp500_index.csv')
SP500_raw.head(3)

Unnamed: 0,Date,SP500
0,11/12/12,1380.03
1,11/13/12,1374.53
2,11/14/12,1355.49


In [21]:
SP500_raw["Date"] = pd.to_datetime(SP500_raw["Date"])

In [22]:
SP500_raw=SP500_raw.sort_values(by=['Date'])

In [23]:
SP500_raw.head(3)

Unnamed: 0,Date,SP500
0,2012-11-12,1380.03
1,2012-11-13,1374.53
2,2012-11-14,1355.49


## Market Factor Merge

In [24]:
fin_fac_list = [DJ_raw, SP500_raw]

for item in fin_fac_list:
    print([item.Date[0],item.Date[len(item)-1]])

[Timestamp('2009-11-30 00:00:00'), Timestamp('2022-12-16 00:00:00')]
[Timestamp('2012-11-12 00:00:00'), Timestamp('2022-11-11 00:00:00')]


In [25]:
merge_test1 = pd.merge_asof(apple_raw, DJ_raw, on='Date')
merge_test1

Unnamed: 0,Date,Close,Volume,Dow_Jones
0,2010-11-26,11.250000,237585600,11091.87
1,2010-11-29,11.316786,445785200,11052.49
2,2010-11-30,11.112500,501858000,11006.02
3,2010-12-01,11.300000,461750800,11255.78
4,2010-12-02,11.362500,462837200,11362.41
...,...,...,...,...
3016,2022-11-18,151.289993,74794600,33745.69
3017,2022-11-21,148.009995,58724100,33700.28
3018,2022-11-22,150.179993,51804100,34098.10
3019,2022-11-23,151.070007,58301400,34194.06


In [26]:
merge_test2 = pd.merge_asof(merge_test1, SP500_raw, on='Date')
merge_test2

Unnamed: 0,Date,Close,Volume,Dow_Jones,SP500
0,2010-11-26,11.250000,237585600,11091.87,
1,2010-11-29,11.316786,445785200,11052.49,
2,2010-11-30,11.112500,501858000,11006.02,
3,2010-12-01,11.300000,461750800,11255.78,
4,2010-12-02,11.362500,462837200,11362.41,
...,...,...,...,...,...
3016,2022-11-18,151.289993,74794600,33745.69,3992.93
3017,2022-11-21,148.009995,58724100,33700.28,3992.93
3018,2022-11-22,150.179993,51804100,34098.10,3992.93
3019,2022-11-23,151.070007,58301400,34194.06,3992.93


In [28]:
merge_test2 = merge_test2.dropna(subset=['SP500'])

In [29]:
merge_test2.to_csv('../../Dataset/merge_data_market_factor.csv', index=False)