In [4]:
import pandas as pd
import yfinance as yf
from sqlalchemy import create_engine
from ta import add_all_ta_features


In [38]:
portfolio = ["AAPL", "DIS","MSFT", "GOOGL", "NVDA", "DHR", "AMD", "SPY"]

In [39]:
def download_data(stock):
    return yf.download(stock, start="2004-1-01", period="max")


In [41]:
def calculateMA(stock_data):    
    # Calculate basic indicators
    stock_data["MA9"] = stock_data["Close"].rolling(window=9).mean()
    stock_data["MA21"] = stock_data["Close"].rolling(window=21).mean()
    stock_data["MA35"] = stock_data["Close"].rolling(window=35).mean()
    stock_data["MA50"] = stock_data["Close"].rolling(window=50).mean()
    stock_data["MA100"] = stock_data["Close"].rolling(window=100).mean()
    stock_data["MA200"] = stock_data["Close"].rolling(window=200).mean()
    return stock_data

In [64]:
def calculateVWAP(stock_data):
    # Calculate typical price
    stock_data["typical_price"] = (stock_data["High"] + stock_data["Low"] + stock_data["Close"]) / 3

    # # Calculate cumulative volume
    stock_data["cumulative_volume"] = stock_data["Volume"].cumsum()

    # # Calculate VWAP
    stock_data["VWAP"] = (stock_data["typical_price"] * stock_data["Volume"]).cumsum() / stock_data["cumulative_volume"]
    return stock_data

In [63]:
    

def getRawData(stock_data):
    # Convert Date to datetime
    stock_data["Date"] = pd.to_datetime(stock_data.index)

    stock_data = calculateMA(stock_data)

    stock_data = calculateVWAP(stock_data)

    # Select relevant columns
    stock_DB_data = stock_data[["Date", "Open", "High", "Low", "Close", "Volume", "MA9", "MA21", "MA35", "MA50", "MA100","MA200","VWAP"]]
    return stock_DB_data


In [58]:
def initialize_DB():
    # Replace with your database connection details
    #only run once to create DB
    return create_engine("sqlite:///my_stock_data.db")




def fill_DB(stock_table_data,stock):
    engine =initialize_DB()
    # Create table if not exists
    stock_table_data.to_sql(stock, engine, if_exists="replace", index=False)

    #sorted descending by date

    stock_table_data = pd.read_sql_query("SELECT * FROM "+stock+" ORDER BY Date DESC", engine)


    # Create table if not exists
    stock_table_data.to_sql(stock, engine, if_exists="replace", index=False)
    



In [65]:
for stock in portfolio:
    stock_data = download_data(stock)
    stock_table_data = getRawData(stock_data)
    fill_DB(stock_table_data,stock)



[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed
[*********************100%%**********************]  1 of 1 completed


In [5]:

# Example 1: Find all days with RSI above 70 (overbought)
overbought_days = pd.read_sql_query(
    "SELECT * FROM apple_stock WHERE MA21 > 70", engine
)

# Example 2: Calculate average volume for days with MA20 above Close
avg_vol_above_ma20 = pd.read_sql_query(
    "SELECT * FROM apple_stock WHERE MA21 > Close", engine
)


In [6]:
avg_vol_above_ma20

Unnamed: 0,Date,Open,High,Low,Close,Volume,MA9,MA21,MA35,MA50,MA100,MA200
0,2023-11-01 00:00:00.000000,171.000000,174.229996,170.119995,173.970001,56934900,171.173335,174.787145,174.544287,176.653401,181.806600,171.08515
1,2023-10-31 00:00:00.000000,169.350006,170.899994,167.899994,170.770004,44846000,171.338891,174.712383,174.551144,176.718601,181.876501,170.89500
2,2023-10-30 00:00:00.000000,169.020004,171.169998,168.869995,170.289993,51131000,171.902223,174.854287,174.709144,176.820001,181.974501,170.71495
3,2023-10-27 00:00:00.000000,166.910004,168.960007,166.830002,168.220001,58499100,172.664446,174.898097,174.968287,176.904001,182.049801,170.53055
4,2023-10-26 00:00:00.000000,170.369995,171.380005,165.669998,166.889999,70625300,173.831112,175.015717,175.252859,177.019601,182.159701,170.35690
...,...,...,...,...,...,...,...,...,...,...,...,...
4701,1981-01-20 00:00:00.000000,0.142857,0.142857,0.142299,0.142299,30083200,0.139819,0.143548,,,,
4702,1981-01-16 00:00:00.000000,0.138951,0.138951,0.138393,0.138393,13395200,0.139013,0.140944,,,,
4703,1981-01-15 00:00:00.000000,0.139509,0.140625,0.139509,0.139509,14067200,0.140377,0.139722,,,,
4704,1981-01-14 00:00:00.000000,0.136719,0.137277,0.136719,0.136719,14291200,0.141989,0.138871,,,,


In [7]:
#to kill or dispose the database
engine.dispose()

In [37]:
#delete table from DB
pd.read_sql_query("DROP TABLE AAPL", engine)

NameError: name 'engine' is not defined