Script that combines weather data and closing prices for both corn and soybean seperately

Corn merge

In [None]:
"""
Merge Corn Price Data with Weather Data
Creates corn_combined.csv with next_day_return for prediction
"""

import pandas as pd
import numpy as np
import os

# Auto-detect file location
try:
    script_dir = os.path.dirname(os.path.abspath(__file__))
except NameError:
    script_dir = os.getcwd()

print("="*80)
print("MERGING CORN DATA")
print("="*80)

# Load price data
price_file = os.path.join(script_dir, 'daily_corn_prices.csv')
print(f"\nLoading price data: {price_file}")

try:
    price_df = pd.read_csv(price_file, parse_dates=['Date'])
    price_df = price_df.rename(columns={'Date': 'date'})
    print(f"✓ Loaded {len(price_df)} price observations")
except FileNotFoundError:
    print(f"❌ File not found: {price_file}")
    print("\nPlease run 'import_corn_clean.py' first to create this file")
    exit()

# Load weather data
weather_file = os.path.join(script_dir, 'corn_data.csv')
print(f"\nLoading weather data: {weather_file}")

try:
    weather_df = pd.read_csv(weather_file, parse_dates=['date'])
    print(f"✓ Loaded {len(weather_df)} weather observations")
except FileNotFoundError:
    print(f"❌ File not found: {weather_file}")
    exit()

# Merge on date
print("\nMerging datasets on 'date'...")
combined_df = pd.merge(price_df, weather_df, on='date', how='inner')
print(f"✓ Combined dataset: {len(combined_df)} observations")

# Ensure log_return exists
if 'log_return' not in combined_df.columns:
    combined_df['log_return'] = np.log(combined_df['Close'] / combined_df['Close'].shift(1))
    combined_df.loc[0, 'log_return'] = 0
    print("✓ Calculated log_return")

# Create next_day_return (our prediction target)
combined_df['next_day_return'] = combined_df['log_return'].shift(-1)
print("✓ Created next_day_return (tomorrow's return)")

# Show info
print(f"\nDate range: {combined_df['date'].min().date()} to {combined_df['date'].max().date()}")
print(f"Total columns: {len(combined_df.columns)}")

# Show sample
print("\nSample data:")
print(combined_df[['date', 'Close', 'log_return', 'next_day_return', 'Volume']].head(5))

# Save
output_file = os.path.join(script_dir, 'corn_combined.csv')
combined_df.to_csv(output_file, index=False)
print(f"\n✓ Saved to: corn_combined.csv")

# Verify next_day_return
valid_returns = combined_df['next_day_return'].dropna()
print(f"\nNext_day_return statistics:")
print(f"  Valid observations: {len(valid_returns)}")
print(f"  Mean: {valid_returns.mean():.6f}")
print(f"  Std: {valid_returns.std():.6f}")
print(f"  Min: {valid_returns.min():.6f}")
print(f"  Max: {valid_returns.max():.6f}")

print("\n" + "="*80)
print("✓ MERGE COMPLETE")
print("="*80)
print("\nYou can now run: corn_returns_analysis.py")

MERGING CORN DATA

Loading price data: c:\Users\roosd\Downloads\econometrie jaar 3\TIC\Spring2026-TIC\1_data\daily_corn_prices.csv
✓ Loaded 5304 price observations

Loading weather data: c:\Users\roosd\Downloads\econometrie jaar 3\TIC\Spring2026-TIC\1_data\corn_data.csv
❌ File not found: c:\Users\roosd\Downloads\econometrie jaar 3\TIC\Spring2026-TIC\1_data\corn_data.csv

Merging datasets on 'date'...


NameError: name 'weather_df' is not defined

: 

soybean merge

In [12]:
"""
Merge Soybean Price Data with Weather Data
Creates soybean_combined.csv with next_day_return for prediction
"""

import pandas as pd
import numpy as np
import os

# Auto-detect file location
try:
    script_dir = os.path.dirname(os.path.abspath(__file__))
except NameError:
    script_dir = os.getcwd()

print("="*80)
print("MERGING SOYBEAN DATA")
print("="*80)

# Load price data
price_file = os.path.join(script_dir, 'daily_soybean_prices.csv')
print(f"\nLoading price data: {price_file}")

try:
    price_df = pd.read_csv(price_file, parse_dates=['Date'])
    price_df = price_df.rename(columns={'Date': 'date'})
    print(f"✓ Loaded {len(price_df)} price observations")
except FileNotFoundError:
    print(f"❌ File not found: {price_file}")
    print("\nPlease run 'import_soybean_clean.py' first to create this file")
    exit()

# Load weather data
weather_file = os.path.join(script_dir, 'soybean_data.csv')
print(f"\nLoading weather data: {weather_file}")

try:
    weather_df = pd.read_csv(weather_file, parse_dates=['date'])
    print(f"✓ Loaded {len(weather_df)} weather observations")
except FileNotFoundError:
    print(f"❌ File not found: {weather_file}")
    exit()

# Merge on date
print("\nMerging datasets on 'date'...")
combined_df = pd.merge(price_df, weather_df, on='date', how='inner')
print(f"✓ Combined dataset: {len(combined_df)} observations")

# Ensure log_return exists
if 'log_return' not in combined_df.columns:
    combined_df['log_return'] = np.log(combined_df['Close'] / combined_df['Close'].shift(1))
    combined_df.loc[0, 'log_return'] = 0
    print("✓ Calculated log_return")

# Create next_day_return (our prediction target)
combined_df['next_day_return'] = combined_df['log_return'].shift(-1)
print("✓ Created next_day_return (tomorrow's return)")

# Show info
print(f"\nDate range: {combined_df['date'].min().date()} to {combined_df['date'].max().date()}")
print(f"Total columns: {len(combined_df.columns)}")

# Show sample
print("\nSample data:")
print(combined_df[['date', 'Close', 'log_return', 'next_day_return', 'Volume']].head(5))

# Save
output_file = os.path.join(script_dir, 'soybean_combined.csv')
combined_df.to_csv(output_file, index=False)
print(f"\n✓ Saved to: soybean_combined.csv")

# Verify next_day_return
valid_returns = combined_df['next_day_return'].dropna()
print(f"\nNext_day_return statistics:")
print(f"  Valid observations: {len(valid_returns)}")
print(f"  Mean: {valid_returns.mean():.6f}")
print(f"  Std: {valid_returns.std():.6f}")
print(f"  Min: {valid_returns.min():.6f}")
print(f"  Max: {valid_returns.max():.6f}")

print("\n" + "="*80)
print("✓ MERGE COMPLETE")
print("="*80)
print("\nYou can now run: soybean_returns_analysis.py")

MERGING SOYBEAN DATA

Loading price data: c:\Users\roosd\Downloads\econometrie jaar 3\TIC\TIC-Quantitative-Research-Group\data_comodities\daily_soybean_prices.csv
✓ Loaded 5261 price observations

Loading weather data: c:\Users\roosd\Downloads\econometrie jaar 3\TIC\TIC-Quantitative-Research-Group\data_comodities\soybean_data.csv
✓ Loaded 20394 weather observations

Merging datasets on 'date'...
✓ Combined dataset: 5243 observations
✓ Created next_day_return (tomorrow's return)

Date range: 2005-01-03 to 2025-10-31
Total columns: 45

Sample data:
        date   Close  log_return  next_day_return Volume
0 2005-01-03  537.25    0.000000        -0.014058     47
1 2005-01-04  529.75   -0.014058         0.007991     54
2 2005-01-05   534.0    0.007991         0.014409     37
3 2005-01-06  541.75    0.014409         0.018290     35
4 2005-01-07  551.75    0.018290         0.014842     17

✓ Saved to: soybean_combined.csv

Next_day_return statistics:
  Valid observations: 5242
  Mean: 0.00013