### **Data preparation**

In [13]:
!pip install yfinance




[notice] A new release of pip is available: 25.2 -> 25.3
[notice] To update, run: python.exe -m pip install --upgrade pip


**Performs the following steps:**

1. Downloads historical stock data for tickers `AAPL`, `GOOG`, `MSFT`, `AMZN` over the past year using `yfinance`.
2. Computes technical indicators and features:
   - Simple moving averages (SMA) for short and long windows.
   - Logarithmic and percentage returns.
   - Rolling volatility.
   - Average True Range (ATR).
   - Rolling highs and lows for breakout detection.
3. Saves the prepared data for each ticker as CSV files in the `prepared_csv` directory.

All parameters (SMA, ATR, volatility windows, etc.) are configurable.

In [14]:
import pandas as pd
import numpy as np
import yfinance as yf
from datetime import datetime, timedelta
import os

TICKERS = [
    "AAPL", "GOOG", "MSFT", "AMZN",
    "TSLA", "NVDA", "META", "NFLX",
    "JPM", "BAC", "WMT", "DIS"
]
DAYS_BACK = 365  
OUT_DIR = "prepared_csv"
os.makedirs(OUT_DIR, exist_ok=True)

SMA_FAST = 10
SMA_SLOW = 50
VOL_WINDOW = 20
ATR_WINDOW = 14
BREAKOUT_WINDOW = 20

def normalize_columns(df):
    if isinstance(df.columns, pd.MultiIndex):
        df.columns = [col[0] for col in df.columns]

    if "Adj Close" not in df.columns:
        df["Adj Close"] = df["Close"]

    return df


def download_data(ticker, start, end):
    df = yf.download(ticker, start=start, end=end, progress=False)

    if df.empty:
        raise ValueError(f"No data for {ticker}")

    df = df.sort_index()
    df = normalize_columns(df)

    df["ticker"] = ticker
    return df


def add_features(df):
    df = df.copy()

    df["return"] = df["Adj Close"].pct_change()
    df["log_ret"] = np.log1p(df["return"])

    df[f"SMA_{SMA_FAST}"] = df["Adj Close"].rolling(SMA_FAST).mean()
    df[f"SMA_{SMA_SLOW}"] = df["Adj Close"].rolling(SMA_SLOW).mean()
    df["trend_strength"] = (
        df[f"SMA_{SMA_FAST}"] - df[f"SMA_{SMA_SLOW}"]
    ) / df["Adj Close"]

    df["volatility"] = df["log_ret"].rolling(VOL_WINDOW).std()

    high = df["High"]
    low = df["Low"]
    close = df["Adj Close"]
    prev_close = close.shift(1)

    tr = pd.concat(
        [
            high - low,
            (high - prev_close).abs(),
            (low - prev_close).abs()
        ],
        axis=1
    ).max(axis=1)

    df["atr"] = tr.rolling(ATR_WINDOW).mean()

    df["high_N"] = df["High"].rolling(BREAKOUT_WINDOW).max()
    df["low_N"] = df["Low"].rolling(BREAKOUT_WINDOW).min()

    df = df.dropna()
    return df


def prepare_all(tickers):
    end = datetime.now()
    start = end - timedelta(days=DAYS_BACK)

    for t in tickers:
        print(f"Download {t} ")

        df = download_data(t, start, end)
        df = add_features(df)

        out_path = os.path.join(OUT_DIR, f"{t}_prepared.csv")
        df.to_csv(out_path)

        print(f"Ready: {out_path}, str: {len(df)}\n")

if __name__ == "__main__":
    prepare_all(TICKERS)


  df = yf.download(ticker, start=start, end=end, progress=False)


Download AAPL 
Ready: prepared_csv\AAPL_prepared.csv, str: 200

Download GOOG 


  df = yf.download(ticker, start=start, end=end, progress=False)


Ready: prepared_csv\GOOG_prepared.csv, str: 200

Download MSFT 


  df = yf.download(ticker, start=start, end=end, progress=False)


Ready: prepared_csv\MSFT_prepared.csv, str: 200

Download AMZN 


  df = yf.download(ticker, start=start, end=end, progress=False)


Ready: prepared_csv\AMZN_prepared.csv, str: 200

Download TSLA 


  df = yf.download(ticker, start=start, end=end, progress=False)


Ready: prepared_csv\TSLA_prepared.csv, str: 200

Download NVDA 


  df = yf.download(ticker, start=start, end=end, progress=False)


Ready: prepared_csv\NVDA_prepared.csv, str: 200

Download META 


  df = yf.download(ticker, start=start, end=end, progress=False)


Ready: prepared_csv\META_prepared.csv, str: 200

Download NFLX 


  df = yf.download(ticker, start=start, end=end, progress=False)


Ready: prepared_csv\NFLX_prepared.csv, str: 200

Download JPM 


  df = yf.download(ticker, start=start, end=end, progress=False)


Ready: prepared_csv\JPM_prepared.csv, str: 200

Download BAC 


  df = yf.download(ticker, start=start, end=end, progress=False)


Ready: prepared_csv\BAC_prepared.csv, str: 200

Download WMT 


  df = yf.download(ticker, start=start, end=end, progress=False)


Ready: prepared_csv\WMT_prepared.csv, str: 200

Download DIS 


  df = yf.download(ticker, start=start, end=end, progress=False)


Ready: prepared_csv\DIS_prepared.csv, str: 200

