In [4]:
# apple stock history. https://finance.yahoo.com/quote/AAPL/history?p=AAPL


# Macroeconomic Factors

# 1yr Treasury rate. https://www.macrotrends.net/2492/1-year-treasury-rate-yield-chart
# 10yr Treasury rate. https://www.macrotrends.net/2016/10-year-treasury-bond-rate-yield-chart
# Fed Funds Rate (daily data). https://www.macrotrends.net/2015/fed-funds-rate-historical-chart
# GDP (absolute value). https://fred.stlouisfed.org/series/GDP
# GDP growth (yearly). https://data.worldbank.org/indicator/NY.GDP.MKTP.KD.ZG?locations=US
## GDP growth (quarterly). https://data.oecd.org/gdp/real-gdp-forecast.htm
# CPI (monthly). https://fred.stlouisfed.org/series/CPIAUCSL
## Treasury bill. https://home.treasury.gov/interest-rates-data-csv-archive
## Fed Funds Effective Rate (monthly). https://fred.stlouisfed.org/series/FEDFUNDS


# Financial Factors

# Gross Profit. https://www.macrotrends.net/stocks/charts/AAPL/apple/gross-profit
# P/E. https://www.macrotrends.net/stocks/charts/AAPL/apple/pe-ratio
# P/S. https://www.macrotrends.net/stocks/charts/AAPL/apple/ps-ratio


# Market Factors

# Dow Jones Industrial Avg. https://www.macrotrends.net/1319/dow-jones-100-year-historical-chart
# S&P500. https://www.kaggle.com/datasets/andrewmvd/sp-500-stocks

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np

from datetime import datetime, timedelta
from seaborn import set_style
set_style("whitegrid")
from seaborn import set_style

In [2]:
set_style("whitegrid")

## Load apple data

In [3]:
pwd

'/Users/ziyuan/Library/CloudStorage/OneDrive-WashingtonUniversityinSt.Louis/Machine_Learning/Erdos_Bootcamp/Project/modeling/modeling'

In [4]:
apple_raw=pd.read_csv('../../Dataset/AAPL.csv')

In [5]:
apple_raw["Date"] = pd.to_datetime(apple_raw["Date"])

In [6]:
apple_raw=apple_raw.drop(columns=['Open','High','Low','Adj Close','Volume'])

In [7]:
apple_raw

Unnamed: 0,Date,Close
0,2010-11-26,11.250000
1,2010-11-29,11.316786
2,2010-11-30,11.112500
3,2010-12-01,11.300000
4,2010-12-02,11.362500
...,...,...
3016,2022-11-18,151.289993
3017,2022-11-21,148.009995
3018,2022-11-22,150.179993
3019,2022-11-23,151.070007


## Financial factors

In [8]:
gross_profit_raw=pd.read_csv('../../Dataset/Financial_Factor/Gross_Profit.csv')
gross_profit_raw.head(3)

Unnamed: 0,Date,Gross_Profit
0,9/30/22,38095
1,6/30/22,35885
2,3/31/22,42559


In [9]:
gross_profit_raw["Date"] = pd.to_datetime(gross_profit_raw["Date"])

In [10]:
gross_profit_raw=gross_profit_raw.sort_values(by=['Date'])

In [11]:
gross_profit_raw.head(3)

Unnamed: 0,Date,Gross_Profit
54,2009-03-31,3627
53,2009-06-30,3983
52,2009-09-30,5105


In [12]:
roe_raw=pd.read_csv('../../Dataset/Financial_Factor/ROE.csv')
roe_raw.head(3)

Unnamed: 0,Date,TTM Net Income,Shareholder's Equity,ROE,Unnamed: 4,Net_Inc,SH_equity
0,9/30/22,$99.80B,$50.67B,1.61,,99.8,50.67
1,6/30/22,$99.63B,$58.11B,1.53,,99.63,58.11
2,3/31/22,$101.94B,$67.40B,1.53,,101.94,67.4


In [13]:
roe_raw["Date"] = pd.to_datetime(roe_raw["Date"])

In [14]:
roe_raw=roe_raw.sort_values(by=['Date'])

In [15]:
roe_raw.head(3)

Unnamed: 0,Date,TTM Net Income,Shareholder's Equity,ROE,Unnamed: 4,Net_Inc,SH_equity
51,2009-12-31,$9.36B,$35.77B,0.32,,9.36,35.77
50,2010-03-31,$10.81B,$39.35B,0.33,,10.81,39.35
49,2010-06-30,$12.24B,$43.11B,0.33,,12.24,43.11


In [16]:
roe_raw = roe_raw.drop(columns=['TTM Net Income', 'Shareholder\'s Equity', 'Unnamed: 4', 'Net_Inc', 'SH_equity'])

In [17]:
roe_raw.head(3)

Unnamed: 0,Date,ROE
51,2009-12-31,0.32
50,2010-03-31,0.33
49,2010-06-30,0.33


In [18]:
pe_ps_raw=pd.read_csv('../../Dataset/Financial_Factor/PE_PS.csv')
pe_ps_raw.head(3)

Unnamed: 0,Date,PE_ratio,PS_ratio,Unnamed: 3,Unnamed: 4,Unnamed: 5,Unnamed: 6,Unnamed: 7,Unnamed: 8
0,9/30/22,22.58,5.74,,,,,,
1,6/30/22,22.49,5.81,,,,,,
2,3/31/22,28.22,7.48,,,,,,


In [19]:
pe_ps_raw["Date"] = pd.to_datetime(pe_ps_raw["Date"])

In [20]:
pe_ps_raw=pe_ps_raw.sort_values(by=['Date'])

In [21]:
pe_ps_raw = pe_ps_raw.drop(columns=['Unnamed: 3', 'Unnamed: 4', 'Unnamed: 5', 
                                    'Unnamed: 6', 'Unnamed: 7', 'Unnamed: 8'])

In [22]:
pe_ps_raw.head(3)

Unnamed: 0,Date,PE_ratio,PS_ratio
51,2009-12-31,20.74,3.5
50,2010-03-31,19.7,3.59
49,2010-06-30,17.39,3.45


## Financial Factor Merge

In [23]:
fin_fac_list = [gross_profit_raw, roe_raw, pe_ps_raw]

for item in fin_fac_list:
    print([item.Date[0],item.Date[len(item)-1]])

[Timestamp('2022-09-30 00:00:00'), Timestamp('2009-03-31 00:00:00')]
[Timestamp('2022-09-30 00:00:00'), Timestamp('2009-12-31 00:00:00')]
[Timestamp('2022-09-30 00:00:00'), Timestamp('2009-12-31 00:00:00')]


In [24]:
merge_test1 = pd.merge_asof(apple_raw, gross_profit_raw, on='Date')
merge_test1

Unnamed: 0,Date,Close,Gross_Profit
0,2010-11-26,11.250000,7512
1,2010-11-29,11.316786,7512
2,2010-11-30,11.112500,7512
3,2010-12-01,11.300000,7512
4,2010-12-02,11.362500,7512
...,...,...,...
3016,2022-11-18,151.289993,38095
3017,2022-11-21,148.009995,38095
3018,2022-11-22,150.179993,38095
3019,2022-11-23,151.070007,38095


In [25]:
merge_test2 = pd.merge_asof(merge_test1, roe_raw, on='Date')
merge_test2

Unnamed: 0,Date,Close,Gross_Profit,ROE
0,2010-11-26,11.250000,7512,0.34
1,2010-11-29,11.316786,7512,0.34
2,2010-11-30,11.112500,7512,0.34
3,2010-12-01,11.300000,7512,0.34
4,2010-12-02,11.362500,7512,0.34
...,...,...,...,...
3016,2022-11-18,151.289993,38095,1.61
3017,2022-11-21,148.009995,38095,1.61
3018,2022-11-22,150.179993,38095,1.61
3019,2022-11-23,151.070007,38095,1.61


In [26]:
merge_test3 = pd.merge_asof(merge_test2, pe_ps_raw, on='Date')
merge_test3

Unnamed: 0,Date,Close,Gross_Profit,ROE,PE_ratio,PS_ratio
0,2010-11-26,11.250000,7512,0.34,15.96,3.43
1,2010-11-29,11.316786,7512,0.34,15.96,3.43
2,2010-11-30,11.112500,7512,0.34,15.96,3.43
3,2010-12-01,11.300000,7512,0.34,15.96,3.43
4,2010-12-02,11.362500,7512,0.34,15.96,3.43
...,...,...,...,...,...,...
3016,2022-11-18,151.289993,38095,1.61,22.58,5.74
3017,2022-11-21,148.009995,38095,1.61,22.58,5.74
3018,2022-11-22,150.179993,38095,1.61,22.58,5.74
3019,2022-11-23,151.070007,38095,1.61,22.58,5.74


In [27]:
merge_test3.to_csv('../../Dataset/merge_data_fin_factor.csv', index=False)