In [1]:
import pandas as pd
import numpy as np
import yfinance as yf
from sqlalchemy import create_engine
from datetime import datetime


In [6]:
DB_USER = "root"
DB_PASSWORD = "1433"
DB_HOST = "localhost"
DB_NAME = "financial_analytics"

engine = create_engine(
    f"mysql+mysqlconnector://{DB_USER}:{DB_PASSWORD}@{DB_HOST}/{DB_NAME}"
)


In [7]:
assets_df = pd.read_sql("SELECT asset_id, ticker FROM assets", engine)
assets_df


Unnamed: 0,asset_id,ticker
0,11,^GSPC
1,1,AAPL
2,4,BAC
3,7,JNJ
4,3,JPM
5,2,MSFT
6,6,PG
7,8,TSLA
8,10,VZ
9,9,WMT


In [8]:
start_date = "2019-01-01"
end_date = datetime.today().strftime("%Y-%m-%d")


In [10]:
all_price_data = []

for _, row in assets_df.iterrows():
    ticker = row["ticker"]
    asset_id = row["asset_id"]

    data = yf.download(
        ticker,
        start=start_date,
        end=end_date,
        progress=False,
        auto_adjust=False
    )

    if data.empty:
        print(f"No data for {ticker}")
        continue

    # ðŸ”´ FIX: Flatten columns if MultiIndex
    if isinstance(data.columns, pd.MultiIndex):
        data.columns = data.columns.get_level_values(0)

    data.reset_index(inplace=True)

    # ðŸ”´ SAFE column selection
    required_cols = ["Date", "Adj Close", "Volume"]
    available_cols = data.columns.tolist()

    print(f"{ticker} columns:", available_cols)

    data = data[required_cols]
    data.columns = ["trade_date", "adj_close", "volume"]
    data["asset_id"] = asset_id

    all_price_data.append(data)

price_df = pd.concat(all_price_data, ignore_index=True)
price_df.head()


^GSPC columns: ['Date', 'Adj Close', 'Close', 'High', 'Low', 'Open', 'Volume']
AAPL columns: ['Date', 'Adj Close', 'Close', 'High', 'Low', 'Open', 'Volume']
BAC columns: ['Date', 'Adj Close', 'Close', 'High', 'Low', 'Open', 'Volume']
JNJ columns: ['Date', 'Adj Close', 'Close', 'High', 'Low', 'Open', 'Volume']
JPM columns: ['Date', 'Adj Close', 'Close', 'High', 'Low', 'Open', 'Volume']
MSFT columns: ['Date', 'Adj Close', 'Close', 'High', 'Low', 'Open', 'Volume']
PG columns: ['Date', 'Adj Close', 'Close', 'High', 'Low', 'Open', 'Volume']
TSLA columns: ['Date', 'Adj Close', 'Close', 'High', 'Low', 'Open', 'Volume']
VZ columns: ['Date', 'Adj Close', 'Close', 'High', 'Low', 'Open', 'Volume']
WMT columns: ['Date', 'Adj Close', 'Close', 'High', 'Low', 'Open', 'Volume']
XOM columns: ['Date', 'Adj Close', 'Close', 'High', 'Low', 'Open', 'Volume']


Unnamed: 0,trade_date,adj_close,volume,asset_id
0,2019-01-02,2510.030029,3733160000,11
1,2019-01-03,2447.889893,3858830000,11
2,2019-01-04,2531.939941,4234140000,11
3,2019-01-07,2549.689941,4133120000,11
4,2019-01-08,2574.409912,4120060000,11


In [11]:
price_df.columns


Index(['trade_date', 'adj_close', 'volume', 'asset_id'], dtype='object')

In [12]:
price_df.head()


Unnamed: 0,trade_date,adj_close,volume,asset_id
0,2019-01-02,2510.030029,3733160000,11
1,2019-01-03,2447.889893,3858830000,11
2,2019-01-04,2531.939941,4234140000,11
3,2019-01-07,2549.689941,4133120000,11
4,2019-01-08,2574.409912,4120060000,11


In [13]:
price_df.isnull().sum()


trade_date    0
adj_close     0
volume        0
asset_id      0
dtype: int64

In [14]:
price_df.to_sql(
    "market_prices",
    engine,
    if_exists="append",
    index=False,
    chunksize=1000
)


19503

In [15]:
price_df.isnull().sum()


trade_date    0
adj_close     0
volume        0
asset_id      0
dtype: int64

In [16]:
price_df.duplicated(
    subset=["asset_id", "trade_date"]
).sum()


np.int64(0)

In [17]:
price_df.dtypes


trade_date    datetime64[ns]
adj_close            float64
volume                 int64
asset_id               int64
dtype: object

In [18]:
price_df["trade_date"] = pd.to_datetime(price_df["trade_date"])
