In [1]:
"""
Import Soybean Futures Data from Yahoo Finance
Creates a clean CSV with proper headers and log returns
"""

import yfinance as yf
import pandas as pd
import numpy as np

print("="*80)
print("IMPORTING SOYBEAN FUTURES DATA")
print("="*80)

# Define the ticker symbol for Soybean futures
ticker = "ZS=F"

print(f"\nDownloading {ticker} data from 2005-01-01...")

# Download data from 2005 onward (daily frequency)
data = yf.download(ticker, start="2005-01-01", progress=False)

# Select only Close and Volume
data = data[["Close", "Volume"]].copy()

# Reset index to make Date a column (not index)
data = data.reset_index()

# Rename columns to be clear
data = data.rename(columns={
    'Date': 'Date',
    'Close': 'Close',
    'Volume': 'Volume'
})

# Calculate log returns
data['log_return'] = np.log(data['Close'] / data['Close'].shift(1))
data.loc[0, 'log_return'] = 0  # First value = 0

# Reorder columns: Date, Close, Volume, log_return
data = data[['Date', 'Close', 'Volume', 'log_return']]

# Remove any rows with NaN
data = data.dropna()

print(f"\n✓ Downloaded {len(data)} observations")
print(f"  Date range: {data['Date'].min()} to {data['Date'].max()}")

# Display sample rows
print("\nFirst 5 rows:")
print(data.head())

print("\nLast 5 rows:")
print(data.tail())

# Save to CSV with clean header
output_file = "daily_soybean_prices.csv"
data.to_csv(output_file, index=False)

print(f"\n✓ Saved to: {output_file}")

print("\n" + "="*80)
print("IMPORT COMPLETE!")
print("="*80)
print("\nColumn structure:")
print(f"  Date        : {data['Date'].dtype}")
print(f"  Close       : Closing/settlement price")
print(f"  Volume      : Trading volume")
print(f"  log_return  : Log returns (ln(Close_t / Close_t-1))")

IMPORTING SOYBEAN FUTURES DATA

Downloading ZS=F data from 2005-01-01...


  data = yf.download(ticker, start="2005-01-01", progress=False)



✓ Downloaded 5305 observations
  Date range: 2005-01-03 00:00:00 to 2026-02-02 00:00:00

First 5 rows:
Price        Date   Close Volume log_return
Ticker               ZS=F   ZS=F           
0      2005-01-03  537.25     47   0.000000
1      2005-01-04  529.75     54  -0.014058
2      2005-01-05  534.00     37   0.007991
3      2005-01-06  541.75     35   0.014409
4      2005-01-07  551.75     17   0.018290

Last 5 rows:
Price        Date    Close  Volume log_return
Ticker                ZS=F    ZS=F           
5300   2026-01-27  1067.25   81662   0.005167
5301   2026-01-28  1075.00  144498   0.007235
5302   2026-01-29  1072.25  144858  -0.002561
5303   2026-01-30  1064.25  144858  -0.007489
5304   2026-02-02  1059.00   85901  -0.004945

✓ Saved to: daily_soybean_prices.csv

IMPORT COMPLETE!

Column structure:
  Date        : datetime64[ns]
  Close       : Closing/settlement price
  Volume      : Trading volume
  log_return  : Log returns (ln(Close_t / Close_t-1))
