In [3]:
import pandas as pd
import numpy as np
import requests
from io import StringIO

In [21]:
bc_url = "https://api.blockchain.info/charts/market-price?format=csv"

r = requests.get(bc_url)
s = StringIO(r.content.decode())

In [22]:
df = pd.read_csv(s, header=None, names=["date", "value"], index_col="date")
df.head()

Unnamed: 0_level_0,value
date,Unnamed: 1_level_1
2023-11-12 00:00:00,37140.27
2023-11-13 00:00:00,37058.98
2023-11-14 00:00:00,36497.35
2023-11-15 00:00:00,35545.97
2023-11-16 00:00:00,37891.48


In [23]:
# get the last retrieved value (latest date)
df.tail(1)["value"]  # series context
df.tail(1)[["value"]]  # dataframe context

Unnamed: 0_level_0,value
date,Unnamed: 1_level_1
2024-11-11 00:00:00,80477.81


In [25]:
# min and max values with their dates
df.loc[df["value"] == df["value"].max(), "value"]

date
2024-11-11 00:00:00    80477.81
Name: value, dtype: float64

In [26]:
df.loc[df["value"] == df["value"].min(), "value"]

date
2023-11-15 00:00:00    35545.97
Name: value, dtype: float64

In [30]:
# we can also set the index of df to be the date, and then use the `.idxmin` and `idxmax` methods
df.loc[df.idxmin()]

df.loc[df.agg(["idxmin", "idxmax"]).values.flatten()]


Unnamed: 0_level_0,value
date,Unnamed: 1_level_1
2023-11-15 00:00:00,35545.97
2024-11-11 00:00:00,80477.81


# Extension questions
1. Can you return the current bitcoin value without assigning the dataframe to an interium variable?
2. Use the `read_html` function looks for HTML formatted table and returns a list of dataframes from any found tables. Retrieve one year of S&P500 from Yahoo Finance. Look at the `Date`, `Close`, and `Volume` columns and show the date and volume of the days with the highest and lowest `Close` values. Use `requests` and set the User-Agent to `"Mozilla 5.0"`, use StringIO to decode the content and feed it into `read_html` to avoid Yahoo's user agent detection.
3. Create a two-row data frame with the highest and lowest closing prices for the S&P 500. Use the `to_csv` function to write this data to a new CSV file.

In [None]:
# 1. Get the latest bitcoin value without assigning to a variable
bc_url = "https://api.blockchain.info/charts/market-price?format=csv"

pd.read_csv(
    StringIO(requests.get(bc_url).content.decode()),
    header=None,
    names=["date", "value"],
    index_col="date",
).tail(1)[["value"]]

Unnamed: 0_level_0,value
date,Unnamed: 1_level_1
2024-11-11 00:00:00,80477.81


In [None]:
# 2. S&P 500 from HTML tables
# Use selenium instead of requests to try and avoid the anti scraping stuff
from selenium import webdriver
from selenium.webdriver.firefox.service import Service
from selenium.webdriver.firefox.options import Options

firefox_options = Options()
firefox_options.add_argument("--headless")
service = Service("/opt/homebrew/Cellar/geckodriver/0.35.0/bin/geckodriver")
driver = webdriver.Firefox(service=service, options=firefox_options)

yahoo_url = "https://finance.yahoo.com/quote/^GSPC/history/?p=^GSPC"

driver.get(yahoo_url)
driver.implicitly_wait(10)
page_source = driver.page_source

In [None]:
df = pd.read_html(StringIO(page_source))[0]
df = df.rename(columns={"Close Close price adjusted for splits.": "Close"})[
    ["Date", "Close", "Volume"]
]
df

Unnamed: 0,Date,Close,Volume
0,"Nov 11, 2024",5996.93,2121479000
1,"Nov 8, 2024",5995.54,4666740000
2,"Nov 7, 2024",5973.10,4925740000
3,"Nov 6, 2024",5929.04,6329530000
4,"Nov 5, 2024",5782.76,3768310000
...,...,...,...
246,"Nov 17, 2023",4514.02,3777240000
247,"Nov 16, 2023",4508.24,3964520000
248,"Nov 15, 2023",4502.88,4347170000
249,"Nov 14, 2023",4495.70,4700350000


In [59]:
# now finally get the dates with the highest and lowest Close values
df.query("Close == Close.min() | Close == Close.max()")

Unnamed: 0,Date,Close,Volume
0,"Nov 11, 2024",5996.93,2121479000
250,"Nov 13, 2023",4411.55,3326240000


In [60]:
# write the data to a new CSV file
df.query("Close == Close.min() | Close == Close.max()").to_csv("s_and_p_close.csv")