In [None]:
# Unmount if necessary
# from google.colab import drive
# drive.flush_and_unmount()

Drive not mounted, so nothing to flush and unmount.


In [1]:
# Colab cell
from google.colab import drive

drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [2]:
# Adjust these two for YOUR repo
REPO_OWNER = "ywanglab"
REPO_NAME  = "STAT4160"   # e.g., unified-stocks-team1

BASE_DIR   = "/content/drive/MyDrive/dspt25"
CLONE_DIR  = f"{BASE_DIR}/{REPO_NAME}"
REPO_URL   = f"https://github.com/{REPO_OWNER}/{REPO_NAME}.git"

import os, pathlib
pathlib.Path(BASE_DIR).mkdir(parents=True, exist_ok=True)


In [3]:
import os, subprocess, shutil, pathlib

if not pathlib.Path(CLONE_DIR).exists():
    !git clone {REPO_URL} {CLONE_DIR}
else:
    # If the folder exists, just ensure it's a git repo and pull latest
    os.chdir(CLONE_DIR)
    # !git status
    # !git pull --rebase # !git pull --ff-only
os.chdir(CLONE_DIR)
print("Working dir:", os.getcwd())

Working dir: /content/drive/MyDrive/dspt25/STAT4160


In [4]:
!git status

Refresh index: 100% (105/105), done.
On branch setup/git-lfs
Changes not staged for commit:
  (use "git add <file>..." to update what will be committed)
  (use "git restore <file>..." to discard changes in working directory)
	[31mmodified:   notebooks/lec2_inclass.ipynb[m
	[31mmodified:   notebooks/lec3-inclass.ipynb[m
	[31mmodified:   reports/system_check.md[m

Untracked files:
  (use "git add <file>..." to include in what will be committed)
	[31mreports/eda-AAPL.qmd[m
	[31mreports/eda-MSFT.qmd[m
	[31mreports/eda-NVDA.qmd[m
	[31mreports/eda.qmd[m
	[31mreports/figs/[m

no changes added to commit (use "git add" and/or "git commit -a")


In [None]:
!git pull --rebase # !git pull --ff-only

In [None]:
# Install Quarto CLI (one-time per Colab runtime)
# !wget -q https://quarto.org/download/latest/quarto-linux-amd64.deb -O /tmp/quarto.deb
# !dpkg -i /tmp/quarto.deb || apt-get -y -f install >/dev/null && dpkg -i /tmp/quarto.deb
# !quarto --version

#Alternatively, save it to G-drive, and only need to download the first time. The size of  quarto-linux-amd64.deb is ~125Mb.
# Path to store the deb package
deb_path = "/content/drive/MyDrive/quarto-linux-amd64.deb"

# Download only if not already saved
!test -f $deb_path || wget -q https://quarto.org/download/latest/quarto-linux-amd64.deb -O $deb_path

# Install from Drive (fast, no re-download)
!dpkg -i $deb_path || apt-get -y -f install >/dev/null && dpkg -i $deb_path #-f: fix package dependency issues
!quarto --version

Selecting previously unselected package quarto.
(Reading database ... 126371 files and directories currently installed.)
Preparing to unpack .../MyDrive/quarto-linux-amd64.deb ...
Unpacking quarto (1.7.33) ...
Setting up quarto (1.7.33) ...
(Reading database ... 130482 files and directories currently installed.)
Preparing to unpack .../MyDrive/quarto-linux-amd64.deb ...
Unpacking quarto (1.7.33) over (1.7.33) ...
Setting up quarto (1.7.33) ...
1.7.33


Related `dpkg` options

* `dpkg -l` → list installed packages.
* `dpkg -r packagename` → remove a package.
* `dpkg -i packagename.deb` → install a `.deb`.
* `dpkg -s packagename` → show status/details.

In [None]:
from textwrap import dedent
qproj = dedent("""\
project:
  type: website
  output-dir: docs1

website:
  title: "Unified Stocks — EDA"
  navbar:
    left:
      - href: index.qmd
        text: Home
      - href: reports/eda.qmd
        text: EDA (parametrized)

format:
  html:
    theme: cosmo
    toc: true
    code-fold: false

execute:
  echo: true
  warning: false
  cache: true
""")
open("_quarto.yml","w").write(qproj)
print(open("_quarto.yml").read())

project:
  type: website
  output-dir: docs1

website:
  title: "Unified Stocks — EDA"
  navbar:
    left:
      - href: index.qmd
        text: Home
      - href: reports/eda.qmd
        text: EDA (parametrized)

format:
  html:
    theme: cosmo
    toc: true
    code-fold: false

execute:
  echo: true
  cache: true



In [None]:
index = """\
---
title: "Unified Stocks Project"
---

Welcome! Use the navigation to view the EDA report.

- **Stock set**: see `tickers_25.csv`
- **Note**: Educational use only — no trading advice.
"""
open("index.qmd","w").write(index)
print(open("index.qmd").read())

---
title: "Unified Stocks Project"
---

Welcome! Use the navigation to view the EDA report.

- **Stock set**: see `tickers_25.csv`
- **Note**: Educational use only — no trading advice.



In [None]:
import os, pathlib
pathlib.Path("reports/figs").mkdir(parents=True, exist_ok=True)
# For Python
eda_qmd = """\
---
title: "Stock EDA"
format:
  html:
    toc: true
    number-sections: false
execute:
  echo: true
  warning: false
  cache: false     # keep off while testing params
jupyter: python3
---

::: callout-note
This report is parameterized. Example:
`-P symbol:MSFT -P start_date:2019-01-01 -P end_date:2025-08-01 -P rolling:30`.
:::

## Setup

```{python}
#| tags: [parameters]
# Default values (overridden by -P at render time)
symbol = "AAPL"
start_date = "2018-01-01"
end_date = ""          # empty means "open ended"
rolling = 20
```

```{python}
import pandas as pd
start = pd.to_datetime(start_date) if start_date else None
end   = pd.to_datetime(end_date) if end_date else None
roll  = int(rolling)
print("Using params:", dict(symbol=symbol, start=start, end=end, rolling=roll))
```

## Price over time for `{python} symbol` (`{python} start_date` → `{python} end_date`)

```{python}
# TODO: fetch prices using `symbol`, `start`, `end`
# df = ...
# ax = df["Close"].plot(title=f"{symbol} closing price")
# ax.set_xlabel(""); ax.set_ylabel("Price")
```

## Daily log returns — histogram

```{python}
# TODO: compute returns from df and plot
```

## Rolling mean & volatility (window = `{python} roll`)

```{python}
# TODO: use `roll` for rolling stats
```

## Summary table

```{python}
# TODO: build a summary DataFrame and display
```
"""


In [None]:
import os, pathlib
pathlib.Path("reports/figs").mkdir(parents=True, exist_ok=True)
#
eda_qmd = """\
---
title: "Stock EDA"
format:
  html:
    toc: true
    number-sections: false
execute-dir: "/content/drive/MyDrive/dspt25/STAT4160/reports"
execute:
  echo: false
  warning: false
  cache: false     # keep off while testing params

jupyter: python3
params:
  symbol: "AAPL"
  start_date: "2018-01-01"
  end_date: ""
  rolling: 20
---


::: callout-note
This report is parameterized. To change inputs without editing code, pass
`-P symbol:MSFT -P start_date:2019-01-01 -P end_date:2025-08-01 -P rolling:30` to `quarto render`.
:::

## Setup if using Python
```{python}
#| tags: [parameters]
# Default values (overridden by -P at render time)
SYMBOL = "AAPL"
START  = "2018-01-01"
END    = ""
ROLL   =  20
```

``` {python}
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import yfinance as yf
from pathlib import Path
from datetime import datetime

# Read parameters if using R
# SYMBOL = params.get("symbol", "AAPL")
# START  = params.get("start_date", "2018-01-01")
# END    = params.get("end_date", "")
# ROLL   = int(params.get("rolling", 20))

if not END:
    END = pd.Timestamp.today().strftime("%Y-%m-%d")

SYMBOL, START, END, ROLL
```

## Download and prepare data

``` {python}
# Fetch adjusted OHLCV
try:
    data = yf.download(SYMBOL, start=START, end=END, auto_adjust=True, progress=False)
except Exception as e:
    print("yfinance failed, falling back to synthetic series:", e)
    idx = pd.bdate_range(START, END)
    rng = np.random.default_rng(42)
    ret = rng.normal(0, 0.01, len(idx))
    price = 100 * np.exp(np.cumsum(ret))
    vol = rng.integers(1e5, 5e6, len(idx))
    data = pd.DataFrame({"Close": price, "Volume": vol}, index=idx)

# Tidy & features
df = data.rename(columns=str.lower).copy()
df = df[["close","volume"]].dropna()
df["log_return"] = np.log(df["close"]).diff()
df["roll_mean"]  = df["log_return"].rolling(ROLL, min_periods=ROLL//2).mean()
df["roll_vol"]   = df["log_return"].rolling(ROLL, min_periods=ROLL//2).std()
df = df.dropna()
df.head()
```

## Price over time

``` {python}
fig, ax = plt.subplots(figsize=(8,3))
ax.plot(df.index, df["close"])
ax.set_title(f"{SYMBOL} — Adjusted Close")
ax.set_xlabel("Date"); ax.set_ylabel("Price")
fig.tight_layout()
# figpath = Path("reports/figs")/f"{SYMBOL}_price.png"
figpath = Path("figs")/f"{SYMBOL}_price.png" #same changes for the rest of the figures
fig.savefig(figpath, dpi=144)
figpath
```

## Daily log returns — histogram

``` {python}
fig, ax = plt.subplots(figsize=(6,3))
ax.hist(df["log_return"], bins=50, alpha=0.8)
ax.set_title(f"{SYMBOL} — Daily Log Return Distribution")
ax.set_xlabel("log return"); ax.set_ylabel("count")
fig.tight_layout()
figpath = Path("figs")/f"{SYMBOL}_hist.png"
fig.savefig(figpath, dpi=144)
figpath
```

## Rolling mean & volatility (window = {params.rolling})

``` {python}
fig, ax = plt.subplots(figsize=(8,3))
ax.plot(df.index, df["roll_mean"], label="rolling mean")
ax.plot(df.index, df["roll_vol"],  label="rolling std")
ax.set_title(f"{SYMBOL} — Rolling Return Stats (window={ROLL})")
ax.set_xlabel("Date"); ax.set_ylabel("value")
ax.legend()
fig.tight_layout()
figpath = Path("figs")/f"{SYMBOL}_rolling.png"
fig.savefig(figpath, dpi=144)
figpath
```

## Summary table

``` {python}
summary = pd.DataFrame({
    "n_days": [len(df)],
    "start": [df.index.min().date()],
    "end":   [df.index.max().date()],
    "mean_daily_ret": [df["log_return"].mean()],
    "std_daily_ret":  [df["log_return"].std()],
    "ann_vol_approx": [df["log_return"].std()*np.sqrt(252)]
})
summary
```

 **Note**: Educational use only. This is not trading advice.
"""


In [None]:
open("reports/eda.qmd","w").write(eda_qmd)
print("Wrote reports/eda.qmd")

Wrote reports/eda.qmd


In [None]:
# !pip install jupyter-cache # Run this cell if the package is missing
# !pip install papermill   #Run this cell if papermill is passing.

In [None]:
# Single render with defaults (AAPL)
!quarto render reports/eda.qmd --output-dir docs1/


Executing 'eda.quarto_ipynb'
  Cell 1/7: ''...Done
  Cell 2/7: ''...Done
  Cell 3/7: ''...Done
  Cell 4/7: ''...Done
  Cell 5/7: ''...Done
  Cell 6/7: ''...Done
  Cell 7/7: ''...Done

[1mpandoc [22m
  to: html
  output-file: eda.html
  standalone: true
  title-prefix: Unified Stocks — EDA
  section-divs: true
  html-math-method: mathjax
  wrap: none
  default-image-extension: png
  toc: true
  number-sections: false
  variables: {}
  
[1mmetadata[22m
  document-css: false
  link-citations: true
  date-format: long
  lang: en
  theme: cosmo
  title: Stock EDA
  execute-dir: /content/drive/MyDrive/dspt25/STAT4160/reports
  jupyter: python3
  
Output created: ../docs1/reports/eda.html



In [None]:
# Render for MSFT with custom dates and rolling window
# !quarto render reports/eda.qmd -P symbol:MSFT -P start_date:2019-01-01 -P end_date:2025-08-01 -P rolling:30 --output-dir docs1/
!quarto render reports/eda.qmd -P SYMBOL:MSFT -P START:2019-01-01 -P END:2025-08-01 -P ROLL:30 --output-dir docs1/




Executing 'eda.quarto_ipynb'
  Cell 1/8: ''...Done
  Cell 2/8: ''...Done
  Cell 3/8: ''...Done
  Cell 4/8: ''...Done
  Cell 5/8: ''...Done
  Cell 6/8: ''...Done
  Cell 7/8: ''...Done
  Cell 8/8: ''...Done

[1mpandoc [22m
  to: html
  output-file: eda.html
  standalone: true
  title-prefix: Unified Stocks — EDA
  section-divs: true
  html-math-method: mathjax
  wrap: none
  default-image-extension: png
  toc: true
  number-sections: false
  variables: {}
  
[1mmetadata[22m
  document-css: false
  link-citations: true
  date-format: long
  lang: en
  theme: cosmo
  title: Stock EDA
  execute-dir: /content/drive/MyDrive/dspt25/STAT4160/reports
  jupyter: python3
  
Output created: ../docs1/reports/eda.html



In [None]:
# Render for NVDA with a different window
# !quarto render reports/eda.qmd -P symbol:NVDA -P start_date:2018-01-01 -P end_date:2025-08-01 -P rolling:60 --output-dir docs1/
!quarto render reports/eda.qmd -P SYMBOL:NVDA -P START:2018-01-01 -P END:2025-08-01 -P ROLL:60 --output-dir docs1/


Executing 'eda.quarto_ipynb'
  Cell 1/8: ''...Done
  Cell 2/8: ''...Done
  Cell 3/8: ''...Done
  Cell 4/8: ''...Done
  Cell 5/8: ''...Done
  Cell 6/8: ''...Done
  Cell 7/8: ''...Done
  Cell 8/8: ''...Done

[1mpandoc [22m
  to: html
  output-file: eda.html
  standalone: true
  title-prefix: Unified Stocks — EDA
  section-divs: true
  html-math-method: mathjax
  wrap: none
  default-image-extension: png
  toc: true
  number-sections: false
  variables: {}
  
[1mmetadata[22m
  document-css: false
  link-citations: true
  date-format: long
  lang: en
  theme: cosmo
  title: Stock EDA
  execute-dir: /content/drive/MyDrive/dspt25/STAT4160/reports
  jupyter: python3
  
Output created: ../docs1/reports/eda.html



In [None]:
# Example: write MSFT to docs/reports/eda-MSFT.html via project copy
import shutil, os
shutil.copy("reports/eda.qmd", "reports/eda-MSFT.qmd")
!quarto render reports/eda-MSFT.qmd -P SYMBOL:MSFT -P START:2019-01-01 -P END:2025-08-01 -P ROLL:30 --output-dir docs1/


Starting python3 kernel...Done

Executing 'eda-MSFT.quarto_ipynb'
  Cell 1/8: ''...Done
  Cell 2/8: ''...Done
  Cell 3/8: ''...Done
  Cell 4/8: ''...Done
  Cell 5/8: ''...Done
  Cell 6/8: ''...Done
  Cell 7/8: ''...Done
  Cell 8/8: ''...Done

[1mpandoc [22m
  to: html
  output-file: eda-MSFT.html
  standalone: true
  title-prefix: Unified Stocks — EDA
  section-divs: true
  html-math-method: mathjax
  wrap: none
  default-image-extension: png
  toc: true
  number-sections: false
  variables: {}
  
[1mmetadata[22m
  document-css: false
  link-citations: true
  date-format: long
  lang: en
  theme: cosmo
  title: Stock EDA
  execute-dir: /content/drive/MyDrive/dspt25/STAT4160/reports
  jupyter: python3
  
Output created: ../docs1/reports/eda-MSFT.html



In [None]:
with open('_quarto.yml') as f:
    print(f.read())


project:
  type: website
  output-dir: docs1

website:
  title: "Unified Stocks — EDA"
  navbar:
    left:
      - href: index.qmd
        text: Home
      - href: reports/eda.qmd
        text: EDA (parametrized)

format:
  html:
    theme: cosmo
    toc: true
    code-fold: false

execute:
  echo: true
  cache: true



In [None]:
!pip install ruamel.yaml

Collecting ruamel.yaml
  Downloading ruamel.yaml-0.18.15-py3-none-any.whl.metadata (25 kB)
Collecting ruamel.yaml.clib>=0.2.7 (from ruamel.yaml)
  Downloading ruamel.yaml.clib-0.2.12-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (2.7 kB)
Downloading ruamel.yaml-0.18.15-py3-none-any.whl (119 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m119.7/119.7 kB[0m [31m4.2 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading ruamel.yaml.clib-0.2.12-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (754 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m754.1/754.1 kB[0m [31m22.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: ruamel.yaml.clib, ruamel.yaml
Successfully installed ruamel.yaml-0.18.15 ruamel.yaml.clib-0.2.12


In [None]:
# Append MSFT page to navbar
from ruamel.yaml import YAML
yaml = YAML()
cfg = yaml.load(open("_quarto.yml"))
cfg["website"]["navbar"]["left"].append({"href": "reports/eda-MSFT.qmd", "text": "MSFT EDA"})
with open("_quarto.yml","w") as f:
    yaml.dump(cfg, f)


In [None]:
!quarto render --output-dir docs1/  # this will render all rendable files in the root directory and all subdirectories

[1m[34m[ 1/10] reports/eda.qmd[39m[22m

Starting python3 kernel...Done
[ColabKernelApp] ERROR | No such comm target registered: quarto_kernel_setup

Executing 'eda.quarto_ipynb'
  Cell 1/7: ''...Done
  Cell 2/7: ''...Done
  Cell 3/7: ''...Done
  Cell 4/7: ''...Done
  Cell 5/7: ''...Done
  Cell 6/7: ''...Done
  Cell 7/7: ''...Done

[ 2/10] reports/system_check.md[39m[22m
[ 3/10] reports/eda-MSFT.qmd[39m[22m

Starting python3 kernel...Done
[ColabKernelApp] ERROR | No such comm target registered: quarto_kernel_setup

Executing 'eda-MSFT.quarto_ipynb'
  Cell 1/7: ''...Done
  Cell 2/7: ''...Done
  Cell 3/7: ''...Done
  Cell 4/7: ''...Done
  Cell 5/7: ''...Done
  Cell 6/7: ''...Done
  Cell 7/7: ''...Done

[ 4/10] notebooks/testing.ipynb[39m[22m
[ 5/10] notebooks/lec2_inclass.ipynb[39m[22m
[ 6/10] notebooks/system_check.ipynb[39m[22m
[ 7/10] notebooks/lec2-hw.ipynb[39m[22m
[ 8/10] notebooks/lec2_hw.ipynb[39m[22m
[ 9/10] notebooks/reproducibility_demo.ipynb[39m[22m
[10/10]

## Homework

In [None]:
import shutil
shutil.copy("reports/eda.qmd", "reports/eda-AAPL.qmd")
shutil.copy("reports/eda.qmd", "reports/eda-MSFT.qmd")
shutil.copy("reports/eda.qmd", "reports/eda-NVDA.qmd")

'reports/eda-NVDA.qmd'

In [None]:
from ruamel.yaml import YAML
yaml = YAML()
cfg = yaml.load(open("_quarto.yml"))
cfg["website"]["navbar"]["left"].extend([
  {"href": "reports/eda-AAPL.qmd", "text": "AAPL"},
  {"href": "reports/eda-MSFT.qmd", "text": "MSFT"},
  {"href": "reports/eda-NVDA.qmd", "text": "NVDA"},
])
with open("_quarto.yml","w") as f:
    yaml.dump(cfg, f)


In [None]:
!quarto render --output-dir docs1/

In [None]:
# Create/append Makefile target
from pathlib import Path
text = "\n\nguard:\n\tpython tools/guard_large_files.py\n" # guard: Makefile target. \t: tab required.
text = """\
report:
\tquarto render reports/eda.qmd --output-dir docs/

reports-trio:
\tquarto render reports/eda-AAPL.qmd -P symbol:AAPL -P start_date:2018-01-01 -P end_date:2025-08-01 --output-dir docs/
\tquarto render reports/eda-MSFT.qmd -P symbol:MSFT -P start_date:2018-01-01 -P end_date:2025-08-01 --output-dir docs/
\tquarto render reports/eda-NVDA.qmd -P symbol:NVDA -P start_date:2018-01-01 -P end_date:2025-08-01 --output-dir docs/"
"""
p = Path("Makefile") # point to the Makefile
# p.write_text(p.read_text() + text if p.exists() else text) # if p exists, read exising content and append text and overwrites.
# the above code will append text everytime, casue error if repeatedly excute.
if p.exists():
    content = p.read_text()
    if ("report:" and "reports-trio:") not in content:
        p.write_text(content + text)
else:
    p.write_text(text)

print("Added 'reports' and 'report-trio' targets to Makefile")

Added 'reports' and 'report-trio' targets to Makefile


In [None]:
with open('Makefile') as f:
    print(f.read())



guard:
	python tools/guard_large_files.py
report:
	quarto render reports/eda.qmd --output-dir docs/

reports-trio:
	quarto render reports/eda-AAPL.qmd -P symbol:AAPL -P start_date:2018-01-01 -P end_date:2025-08-01 --output-dir docs/
	quarto render reports/eda-MSFT.qmd -P symbol:MSFT -P start_date:2018-01-01 -P end_date:2025-08-01 --output-dir docs/
	quarto render reports/eda-NVDA.qmd -P symbol:NVDA -P start_date:2018-01-01 -P end_date:2025-08-01 --output-dir docs/"

