# Helios Corn Futures Climate Challenge - Submission Sample

This notebook demonstrates how to:
1. Load and explore the competition dataset
2. Engineer climate risk features
3. Evaluate your approach using the CFCS metric
4. Prepare a submission

**Goal**: Create novel climate risk features that show stronger correlations with corn futures prices.

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime
import warnings
warnings.filterwarnings('ignore')

# Set display options
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', None)

print("Libraries loaded successfully!")

Libraries loaded successfully!


## 1. Load Competition Data

In [2]:
# Load main dataset
print("Loading main dataset...")
df = pd.read_csv('/kaggle/input/forecasting-the-future-the-helios-corn-climate-challenge/corn_climate_risk_futures_daily_master.csv')
df['date_on'] = pd.to_datetime(df['date_on'])

# Load regional market share data
print("Loading regional market share data...")
market_share_df = pd.read_csv('/kaggle/input/forecasting-the-future-the-helios-corn-climate-challenge/corn_regional_market_share.csv')

print(f"Main dataset shape: {df.shape}")
print(f"Date range: {df['date_on'].min()} to {df['date_on'].max()}")
print(f"Countries: {df['country_name'].nunique()}")
print(f"Regions: {df['region_name'].nunique()}")

df.head()

Loading main dataset...
Loading regional market share data...
Main dataset shape: (320661, 41)
Date range: 2016-01-01 00:00:00 to 2025-12-15 00:00:00
Countries: 11
Regions: 89


Unnamed: 0,ID,crop_name,country_name,country_code,region_name,region_id,harvest_period,growing_season_year,date_on,climate_risk_cnt_locations_heat_stress_risk_low,climate_risk_cnt_locations_heat_stress_risk_medium,climate_risk_cnt_locations_heat_stress_risk_high,climate_risk_cnt_locations_unseasonably_cold_risk_low,climate_risk_cnt_locations_unseasonably_cold_risk_medium,climate_risk_cnt_locations_unseasonably_cold_risk_high,climate_risk_cnt_locations_excess_precip_risk_low,climate_risk_cnt_locations_excess_precip_risk_medium,climate_risk_cnt_locations_excess_precip_risk_high,climate_risk_cnt_locations_drought_risk_low,climate_risk_cnt_locations_drought_risk_medium,climate_risk_cnt_locations_drought_risk_high,futures_close_ZC_1,futures_close_ZC_2,futures_close_ZW_1,futures_close_ZS_1,futures_zc1_ret_pct,futures_zc1_ret_log,futures_zc_term_spread,futures_zc_term_ratio,futures_zc1_ma_20,futures_zc1_ma_60,futures_zc1_ma_120,futures_zc1_vol_20,futures_zc1_vol_60,futures_zw_zc_spread,futures_zc_zw_ratio,futures_zs_zc_spread,futures_zc_zs_ratio,date_on_year,date_on_month,date_on_year_month
0,8af42722-3f05-4ede-80fc-605e0e2b3b67,Corn: Commodity Tracked,Argentina,AR,Buenos Aires,bffad37a-7c60-432f-984a-8ea83a944311,Harvest,2017,2016-06-15,23,0,0,23,0,0,23,0,0,16,7,0,429.0,434.0,477.5,1156.0,-0.017182,-0.017331,5.0,1.011655,414.4125,387.695833,375.014583,0.01352,0.015724,48.5,0.898429,727.0,0.371107,2016,6,2016_06
1,54f4ddc5-e7ab-4bfb-ad6a-5649841af563,Corn: Commodity Tracked,Argentina,AR,Buenos Aires,bffad37a-7c60-432f-984a-8ea83a944311,Harvest,2017,2016-06-16,23,0,0,23,0,0,23,0,0,14,9,0,425.25,430.5,472.5,1134.5,-0.008741,-0.00878,5.25,1.012346,415.7,388.616667,375.5125,0.013799,0.015792,47.25,0.9,709.25,0.374835,2016,6,2016_06
2,63a41fce-d371-4295-a58a-dc6491664020,Corn: Commodity Tracked,Argentina,AR,Buenos Aires,bffad37a-7c60-432f-984a-8ea83a944311,Harvest,2017,2016-06-17,23,0,0,14,1,8,23,0,0,14,8,1,437.75,442.75,481.25,1159.5,0.029394,0.028971,5.0,1.011422,418.0875,389.770833,376.122917,0.013442,0.016145,43.5,0.90961,721.75,0.377533,2016,6,2016_06
3,cddfa440-e0eb-4735-beb1-1aca2afefe53,Corn: Commodity Tracked,Argentina,AR,Buenos Aires,bffad37a-7c60-432f-984a-8ea83a944311,Harvest,2017,2016-06-18,23,0,0,20,0,3,23,0,0,15,6,2,,,,,,,,,,,,,,,,,,2016,6,2016_06
4,3eaacfe1-29be-4da9-b5c9-a9457d2d2b83,Corn: Commodity Tracked,Argentina,AR,Buenos Aires,bffad37a-7c60-432f-984a-8ea83a944311,Harvest,2017,2016-06-19,23,0,0,21,2,0,23,0,0,16,7,0,,,,,,,,,,,,,,,,,,2016,6,2016_06


## 2. Data Exploration

In [3]:
# Identify climate risk and futures columns
climate_cols = [c for c in df.columns if c.startswith('climate_risk_')]
futures_cols = [c for c in df.columns if c.startswith('futures_')]

print(f"Climate risk columns ({len(climate_cols)}):")
for col in climate_cols:
    print(f"  - {col}")

print(f"\nFutures columns ({len(futures_cols)}):")
for col in futures_cols[:10]:  # Show first 10
    print(f"  - {col}")
if len(futures_cols) > 30:
    print(f"  ... and {len(futures_cols) - 10} more")

Climate risk columns (12):
  - climate_risk_cnt_locations_heat_stress_risk_low
  - climate_risk_cnt_locations_heat_stress_risk_medium
  - climate_risk_cnt_locations_heat_stress_risk_high
  - climate_risk_cnt_locations_unseasonably_cold_risk_low
  - climate_risk_cnt_locations_unseasonably_cold_risk_medium
  - climate_risk_cnt_locations_unseasonably_cold_risk_high
  - climate_risk_cnt_locations_excess_precip_risk_low
  - climate_risk_cnt_locations_excess_precip_risk_medium
  - climate_risk_cnt_locations_excess_precip_risk_high
  - climate_risk_cnt_locations_drought_risk_low
  - climate_risk_cnt_locations_drought_risk_medium
  - climate_risk_cnt_locations_drought_risk_high

Futures columns (17):
  - futures_close_ZC_1
  - futures_close_ZC_2
  - futures_close_ZW_1
  - futures_close_ZS_1
  - futures_zc1_ret_pct
  - futures_zc1_ret_log
  - futures_zc_term_spread
  - futures_zc_term_ratio
  - futures_zc1_ma_20
  - futures_zc1_ma_60


In [4]:
# Check data quality
print("Data Quality Summary:")
print(f"Missing values in climate columns: {df[climate_cols].isnull().sum().sum()}")
print(f"Missing values in futures columns: {df[futures_cols].isnull().sum().sum()}")

# Show sample statistics
print("\nClimate Risk Statistics:")
print(df[climate_cols].describe())

Data Quality Summary:
Missing values in climate columns: 0
Missing values in futures columns: 1719210

Climate Risk Statistics:
       climate_risk_cnt_locations_heat_stress_risk_low  \
count                                    320661.000000   
mean                                         11.499699   
std                                           9.902119   
min                                           0.000000   
25%                                           2.000000   
50%                                           9.000000   
75%                                          20.000000   
max                                          32.000000   

       climate_risk_cnt_locations_heat_stress_risk_medium  \
count                                       320661.000000   
mean                                             0.080733   
std                                              0.666120   
min                                              0.000000   
25%                                         

## 3. Baseline Feature Engineering

Let's start with some basic feature engineering approaches to establish a baseline.

### ⚠️ **CRITICAL NAMING REQUIREMENT**

**ALL engineered climate features must start with `climate_risk_` for the evaluation metric to detect them!**

- ✅ **Correct**: `climate_risk_heat_stress_weighted`, `climate_risk_drought_ma_30d`
- ❌ **Wrong**: `heat_stress_risk`, `my_climate_feature`, `weather_index`

The evaluation system automatically finds features by this prefix. Incorrect naming = zero score for those features!

In [5]:
# Create a working copy
merged_daily_df = df.copy()

# Add basic time features
merged_daily_df['day_of_year'] = merged_daily_df['date_on'].dt.dayofyear
merged_daily_df['quarter'] = merged_daily_df['date_on'].dt.quarter

print("Added basic time features")
print(f"Dataset shape: {merged_daily_df.shape}")

Added basic time features
Dataset shape: (320661, 43)


In [6]:
# Merge with market share data for production weighting
merged_daily_df = merged_daily_df.merge(
    market_share_df[['region_id', 'percent_country_production']], 
    on='region_id', 
    how='left'
)

# Fill missing production percentages with small default value
merged_daily_df['percent_country_production'] = merged_daily_df['percent_country_production'].fillna(1.0)

print("Merged with market share data")
print(f"Production share range: {merged_daily_df['percent_country_production'].min():.1f}% to {merged_daily_df['percent_country_production'].max():.1f}%")

Merged with market share data
Production share range: 0.0% to 73.0%


### 3.1 Production-Weighted Risk Scores

##### The Logic:
Input Data Structure
For each region/day, we have counts of locations at different risk levels:

- low_col: Number of locations with LOW risk
- med_col: Number of locations with MEDIUM risk
- high_col: Number of locations with HIGH risk

##### Weighted Scoring System
The formula creates a weighted average risk score where:

- Low risk locations = 0 points (not included in numerator)
- Medium risk locations = 1 point each (med_col * 1)
- High risk locations = 2 points each (high_col * 2)

##### Normalization
Divides by total locations to get a 0-2 scale:

- 0.0 = All locations are low risk
- 1.0 = All locations are medium risk
- 2.0 = All locations are high risk
- 0.5 = Half locations medium, half low
- 1.5 = Half locations high, half medium

##### Market share weighting
Finally we weight by production importance so the regions that produce more are accounted for more

In [7]:
# Create production-weighted risk indices
risk_categories = ['heat_stress', 'unseasonably_cold', 'excess_precip', 'drought']

for risk_type in risk_categories:
    # Get the three risk level columns for this category
    low_col = f'climate_risk_cnt_locations_{risk_type}_risk_low'
    med_col = f'climate_risk_cnt_locations_{risk_type}_risk_medium' 
    high_col = f'climate_risk_cnt_locations_{risk_type}_risk_high'
    
    # Calculate total locations
    total_locations = merged_daily_df[low_col] + merged_daily_df[med_col] + merged_daily_df[high_col]
    
    # Calculate risk score (0=all low, 1=all medium, 2=all high)
    risk_score = (merged_daily_df[med_col] + 2 * merged_daily_df[high_col]) / (total_locations + 1e-6) # 1e-6 prevents division by zero
    
    # Weight by production importance
    weighted_risk = risk_score * (merged_daily_df['percent_country_production'] / 100)
    
    # Store new features
    merged_daily_df[f'climate_risk_{risk_type}_score'] = risk_score
    merged_daily_df[f'climate_risk_{risk_type}_weighted'] = weighted_risk
    
    print(f"Created {risk_type} risk features")

print("\nProduction-weighted risk scores created!")

Created heat_stress risk features
Created unseasonably_cold risk features
Created excess_precip risk features
Created drought risk features

Production-weighted risk scores created!


### 3.2 Composite Risk Indices

In [8]:
# Create composite risk indices
temperature_risks = ['heat_stress', 'unseasonably_cold']
precipitation_risks = ['excess_precip', 'drought']

# Temperature stress index
temp_scores = [f'climate_risk_{risk}_score' for risk in temperature_risks]
merged_daily_df['climate_risk_temperature_stress'] = merged_daily_df[temp_scores].max(axis=1)

# Precipitation stress index  
precip_scores = [f'climate_risk_{risk}_score' for risk in precipitation_risks]
merged_daily_df['climate_risk_precipitation_stress'] = merged_daily_df[precip_scores].max(axis=1)

# Overall climate stress (maximum of all risks)
all_risk_scores = [f'climate_risk_{risk}_score' for risk in risk_categories]
merged_daily_df['climate_risk_overall_stress'] = merged_daily_df[all_risk_scores].max(axis=1)

# Combined stress (average of all risks)
merged_daily_df['climate_risk_combined_stress'] = merged_daily_df[all_risk_scores].mean(axis=1)

print("Created composite risk indices:")
print("- Temperature stress (max of heat/cold)")
print("- Precipitation stress (max of wet/dry)")
print("- Overall stress (max of all risks)")
print("- Combined stress (average of all risks)")

Created composite risk indices:
- Temperature stress (max of heat/cold)
- Precipitation stress (max of wet/dry)
- Overall stress (max of all risks)
- Combined stress (average of all risks)


### 3.3 Temporal Features

In [9]:
# Sort by region and date for time series features
merged_daily_df = merged_daily_df.sort_values(['region_id', 'date_on'])

# Create rolling averages for key risk metrics
windows = [7, 14, 30]  # 1 week, 2 weeks, 1 month

for window in windows:
    for risk_type in risk_categories:
        score_col = f'climate_risk_{risk_type}_score'
        
        # Rolling average
        merged_daily_df[f'climate_risk_{risk_type}_ma_{window}d'] = (
            merged_daily_df.groupby('region_id')[score_col]
            .rolling(window=window, min_periods=1)
            .mean()
            .reset_index(level=0, drop=True)
        )
        
        # Rolling maximum (peak risk)
        merged_daily_df[f'climate_risk_{risk_type}_max_{window}d'] = (
            merged_daily_df.groupby('region_id')[score_col]
            .rolling(window=window, min_periods=1)
            .max()
            .reset_index(level=0, drop=True)
        )

print(f"Created rolling features for {len(windows)} time windows")
print(f"New dataset shape: {merged_daily_df.shape}")

Created rolling features for 3 time windows
New dataset shape: (320661, 80)


### 3.4 Risk Momentum and Changes

In [10]:
# Calculate risk changes and momentum
for risk_type in risk_categories:
    score_col = f'climate_risk_{risk_type}_score'
    
    # Daily change
    merged_daily_df[f'climate_risk_{risk_type}_change_1d'] = (
        merged_daily_df.groupby('region_id')[score_col].diff(1)
    )
    
    # Weekly change
    merged_daily_df[f'climate_risk_{risk_type}_change_7d'] = (
        merged_daily_df.groupby('region_id')[score_col].diff(7)
    )
    
    # Risk acceleration (change in change)
    merged_daily_df[f'climate_risk_{risk_type}_acceleration'] = (
        merged_daily_df.groupby('region_id')[f'climate_risk_{risk_type}_change_1d'].diff(1)
    )

print("Created risk momentum features:")
print("- Daily and weekly changes")
print("- Risk acceleration (second derivative)")

Created risk momentum features:
- Daily and weekly changes
- Risk acceleration (second derivative)


### 3.5 Cross-Regional Features

In [11]:
# Create country-level aggregated risk features
country_features = []

for risk_type in risk_categories:
    score_col = f'climate_risk_{risk_type}_score'
    weighted_col = f'climate_risk_{risk_type}_weighted'
    
    # Country-level daily aggregations
    country_agg = merged_daily_df.groupby(['country_name', 'date_on']).agg({
        score_col: ['mean', 'max', 'std'],
        weighted_col: 'sum',
        'percent_country_production': 'sum'
    }).round(4)
    
    # Flatten column names
    country_agg.columns = [f'country_{risk_type}_{"_".join(col).strip()}' for col in country_agg.columns]
    country_agg = country_agg.reset_index()
    
    # Merge back to main dataset
    merged_daily_df = merged_daily_df.merge(
        country_agg, 
        on=['country_name', 'date_on'], 
        how='left'
    )
    
    country_features.extend(country_agg.columns[2:])  # Exclude country_name and date_on

print(f"Created {len(country_features)} country-level aggregated features")
print(f"Final dataset shape: {merged_daily_df.shape}")


Created 20 country-level aggregated features
Final dataset shape: (320661, 112)


## 4. Evaluation Test

In [12]:
def compute_monthly_climate_futures_correlations(df):

    # Dynamic detection
    climate_cols = [c for c in df.columns if c.startswith("climate_risk_")]
    futures_cols = [c for c in df.columns if c.startswith("futures_")]

    # Remove future data
    max_valid_date = df["date_on"].max()
    df = df[df["date_on"] <= max_valid_date]

    results = []

    # Loop by commodity + month
    for comm in df["crop_name"].unique():
        df_comm = df[df["crop_name"] == comm]

        for country in sorted(df_comm["country_name"].unique()):
            df_country = df_comm[df_comm["country_name"] == country]

            for month in sorted(df_country["date_on_month"].unique()):
                df_month = df_country[df_country["date_on_month"] == month]

                for clim in climate_cols:
                    for fut in futures_cols:

                        if df_month[clim].std() > 0 and df_month[fut].std() > 0:
                            corr = df_month[[clim, fut]].corr().iloc[0, 1]
                        else:
                            corr = None

                        results.append({
                            "crop_name": comm,
                            "country_name": country,
                            "month": month,
                            "climate_variable": clim,
                            "futures_variable": fut,
                            "correlation": corr
                        })

    results_df = pd.DataFrame(results)
    #  round correlation to 5 decimal places
    results_df['correlation'] = results_df['correlation'].round(5)
    return results_df


Take a peek at the most significant correlations

In [13]:
monthly_corr_df = compute_monthly_climate_futures_correlations(merged_daily_df)
#  Get the significant correlations greater than 0.5 or less than -0.5
significant_monthly_correlations = monthly_corr_df[
    (monthly_corr_df["correlation"] >= 0.5) | (monthly_corr_df["correlation"] <= -0.5)
]
# Sort by correlation
significant_monthly_correlations = significant_monthly_correlations.sort_values(by='correlation')
significant_monthly_correlations.head(10)

Unnamed: 0,crop_name,country_name,month,climate_variable,futures_variable,correlation
26124,Corn: Commodity Tracked,Canada,2,climate_risk_excess_precip_ma_14d,futures_zc1_vol_60,-0.5968
26141,Corn: Commodity Tracked,Canada,2,climate_risk_excess_precip_max_14d,futures_zc1_vol_60,-0.5948
26005,Corn: Commodity Tracked,Canada,2,climate_risk_excess_precip_max_7d,futures_zc1_vol_60,-0.58986
749,Corn: Commodity Tracked,Argentina,1,climate_risk_excess_precip_ma_30d,futures_close_ZC_2,-0.56005
25988,Corn: Commodity Tracked,Canada,2,climate_risk_excess_precip_ma_7d,futures_zc1_vol_60,-0.55448
748,Corn: Commodity Tracked,Argentina,1,climate_risk_excess_precip_ma_30d,futures_close_ZC_1,-0.55296
110907,Corn: Commodity Tracked,Ukraine,1,climate_risk_unseasonably_cold_max_30d,futures_zc_zs_ratio,-0.54027
26249,Corn: Commodity Tracked,Canada,2,climate_risk_excess_precip_ma_30d,futures_close_ZC_2,-0.53892
26256,Corn: Commodity Tracked,Canada,2,climate_risk_excess_precip_ma_30d,futures_zc1_ma_20,-0.5389
26260,Corn: Commodity Tracked,Canada,2,climate_risk_excess_precip_ma_30d,futures_zc1_vol_60,-0.53836


In [14]:
def calculate_cfcs_score(correlations_df):
    """
    Calculate the Climate-Futures Correlation Score (CFCS) for leaderboard ranking.
    
    CFCS = (0.5 × Avg_Sig_Corr_Score) + (0.3 × Max_Corr_Score) + (0.2 × Sig_Count_Score)
    
    Focus on significant correlations (≥ |0.5|) only for average calculation.
    """
    # Remove null correlations
    valid_corrs = correlations_df["correlation"].dropna()
    
    if len(valid_corrs) == 0:
        return {'cfcs_score': 0.0, 'error': 'No valid correlations'}
    
    # Calculate base metrics
    abs_corrs = valid_corrs.abs()
    max_abs_corr = abs_corrs.max()
    significant_mask = abs_corrs >= 0.5
    significant_corrs = abs_corrs[significant_mask]
    significant_count = len(significant_corrs)
    total_count = len(valid_corrs)
    
    # Calculate component scores - ONLY average significant correlations
    if significant_count > 0:
        avg_sig_corr = significant_corrs.mean()
        avg_sig_score = min(100, avg_sig_corr * 100)  # Cap at 100 when avg sig reaches 1.0
    else:
        avg_sig_corr = 0.0
        avg_sig_score = 0.0
    
    max_corr_score = min(100, max_abs_corr * 100)  # Cap at 100 when max reaches 1.0
    sig_count_score = (significant_count / total_count) * 100  # Percentage
    
    # Composite score: Focus more on quality of significant correlations
    cfcs = (0.5 * avg_sig_score) + (0.3 * max_corr_score) + (0.2 * sig_count_score)
    
    return {
        'cfcs_score': round(cfcs, 2),
        'avg_significant_correlation': round(avg_sig_corr, 4),
        'max_abs_correlation': round(max_abs_corr, 4),
        'significant_correlations_pct': round(sig_count_score, 2),
        'avg_sig_score': round(avg_sig_score, 2),
        'max_corr_score': round(max_corr_score, 2),
        'sig_count_score': round(sig_count_score, 2),
        'total_correlations': total_count,
        'significant_correlations': significant_count
    }

# Calculate the CFCS score
score_results = calculate_cfcs_score(monthly_corr_df)

print("=== CLIMATE-FUTURES CORRELATION SCORE (CFCS) ===")
print(f"Final CFCS Score: {score_results['cfcs_score']}")
print()
print("Component Breakdown:")
print(f"  Average Significant |Correlation|: {score_results['avg_significant_correlation']:.4f} → Score: {score_results['avg_sig_score']}")
print(f"  Maximum |Correlation|: {score_results['max_abs_correlation']:.4f} → Score: {score_results['max_corr_score']}")
print(f"  Significant Correlations: {score_results['significant_correlations']}/{score_results['total_correlations']} ({score_results['significant_correlations_pct']:.1f}%) → Score: {score_results['sig_count_score']}")
print()
print("Score Calculation:")
print(f"  CFCS = (0.5 × {score_results['avg_sig_score']}) + (0.3 × {score_results['max_corr_score']}) + (0.2 × {score_results['sig_count_score']})")
print(f"  CFCS = {0.5 * score_results['avg_sig_score']:.1f} + {0.3 * score_results['max_corr_score']:.1f} + {0.2 * score_results['sig_count_score']:.1f} = {score_results['cfcs_score']}")
print()
print("Key Insight: This metric focuses on the QUALITY of significant correlations rather than being diluted by weak signals.")

=== CLIMATE-FUTURES CORRELATION SCORE (CFCS) ===
Final CFCS Score: 49.5

Component Breakdown:
  Average Significant |Correlation|: 0.5549 → Score: 55.49
  Maximum |Correlation|: 0.7243 → Score: 72.43
  Significant Correlations: 164/115804 (0.1%) → Score: 0.14

Score Calculation:
  CFCS = (0.5 × 55.49) + (0.3 × 72.43) + (0.2 × 0.14)
  CFCS = 27.7 + 21.7 + 0.0 = 49.5

Key Insight: This metric focuses on the QUALITY of significant correlations rather than being diluted by weak signals.


## 5. Submit to Competition

In [15]:
# Must drop NAs in order for submission to go through. 
merged_daily_df_copy = merged_daily_df.dropna()
merged_daily_df_copy.head()

Unnamed: 0,ID,crop_name,country_name,country_code,region_name,region_id,harvest_period,growing_season_year,date_on,climate_risk_cnt_locations_heat_stress_risk_low,climate_risk_cnt_locations_heat_stress_risk_medium,climate_risk_cnt_locations_heat_stress_risk_high,climate_risk_cnt_locations_unseasonably_cold_risk_low,climate_risk_cnt_locations_unseasonably_cold_risk_medium,climate_risk_cnt_locations_unseasonably_cold_risk_high,climate_risk_cnt_locations_excess_precip_risk_low,climate_risk_cnt_locations_excess_precip_risk_medium,climate_risk_cnt_locations_excess_precip_risk_high,climate_risk_cnt_locations_drought_risk_low,climate_risk_cnt_locations_drought_risk_medium,climate_risk_cnt_locations_drought_risk_high,futures_close_ZC_1,futures_close_ZC_2,futures_close_ZW_1,futures_close_ZS_1,futures_zc1_ret_pct,futures_zc1_ret_log,futures_zc_term_spread,futures_zc_term_ratio,futures_zc1_ma_20,futures_zc1_ma_60,futures_zc1_ma_120,futures_zc1_vol_20,futures_zc1_vol_60,futures_zw_zc_spread,futures_zc_zw_ratio,futures_zs_zc_spread,futures_zc_zs_ratio,date_on_year,date_on_month,date_on_year_month,day_of_year,quarter,percent_country_production,climate_risk_heat_stress_score,climate_risk_heat_stress_weighted,climate_risk_unseasonably_cold_score,climate_risk_unseasonably_cold_weighted,climate_risk_excess_precip_score,climate_risk_excess_precip_weighted,climate_risk_drought_score,climate_risk_drought_weighted,climate_risk_temperature_stress,climate_risk_precipitation_stress,climate_risk_overall_stress,climate_risk_combined_stress,climate_risk_heat_stress_ma_7d,climate_risk_heat_stress_max_7d,climate_risk_unseasonably_cold_ma_7d,climate_risk_unseasonably_cold_max_7d,climate_risk_excess_precip_ma_7d,climate_risk_excess_precip_max_7d,climate_risk_drought_ma_7d,climate_risk_drought_max_7d,climate_risk_heat_stress_ma_14d,climate_risk_heat_stress_max_14d,climate_risk_unseasonably_cold_ma_14d,climate_risk_unseasonably_cold_max_14d,climate_risk_excess_precip_ma_14d,climate_risk_excess_precip_max_14d,climate_risk_drought_ma_14d,climate_risk_drought_max_14d,climate_risk_heat_stress_ma_30d,climate_risk_heat_stress_max_30d,climate_risk_unseasonably_cold_ma_30d,climate_risk_unseasonably_cold_max_30d,climate_risk_excess_precip_ma_30d,climate_risk_excess_precip_max_30d,climate_risk_drought_ma_30d,climate_risk_drought_max_30d,climate_risk_heat_stress_change_1d,climate_risk_heat_stress_change_7d,climate_risk_heat_stress_acceleration,climate_risk_unseasonably_cold_change_1d,climate_risk_unseasonably_cold_change_7d,climate_risk_unseasonably_cold_acceleration,climate_risk_excess_precip_change_1d,climate_risk_excess_precip_change_7d,climate_risk_excess_precip_acceleration,climate_risk_drought_change_1d,climate_risk_drought_change_7d,climate_risk_drought_acceleration,country_heat_stress_climate_risk_heat_stress_score_mean,country_heat_stress_climate_risk_heat_stress_score_max,country_heat_stress_climate_risk_heat_stress_score_std,country_heat_stress_climate_risk_heat_stress_weighted_sum,country_heat_stress_percent_country_production_sum,country_unseasonably_cold_climate_risk_unseasonably_cold_score_mean,country_unseasonably_cold_climate_risk_unseasonably_cold_score_max,country_unseasonably_cold_climate_risk_unseasonably_cold_score_std,country_unseasonably_cold_climate_risk_unseasonably_cold_weighted_sum,country_unseasonably_cold_percent_country_production_sum,country_excess_precip_climate_risk_excess_precip_score_mean,country_excess_precip_climate_risk_excess_precip_score_max,country_excess_precip_climate_risk_excess_precip_score_std,country_excess_precip_climate_risk_excess_precip_weighted_sum,country_excess_precip_percent_country_production_sum,country_drought_climate_risk_drought_score_mean,country_drought_climate_risk_drought_score_max,country_drought_climate_risk_drought_score_std,country_drought_climate_risk_drought_weighted_sum,country_drought_percent_country_production_sum
7,36bcf707-3c9a-4516-a20b-eaaaff8ee81c,Corn: Commodity Tracked,Russia,RU,Republic of Mordovia,01ab8962-db3d-49ef-af56-b877ce4f59d7,Off-season,2016,2016-01-08,1,0,0,0,0,1,1,0,0,1,0,0,357.0,362.75,478.5,879.5,0.011331,0.011268,5.75,1.016106,365.1125,368.979167,372.45625,0.010629,0.010832,121.5,0.746082,522.5,0.405912,2016,1,2016_01,8,1,1.0,0.0,0.0,1.999998,0.02,0.0,0.0,0.0,0.0,1.999998,0.0,1.999998,0.5,0.0,0.0,1.999998,1.999998,0.0,0.0,0.0,0.0,0.0,0.0,1.999998,1.999998,0.0,0.0,0.0,0.0,0.0,0.0,1.999998,1.999998,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,89.0,1.8831,2.0,0.3667,1.539,89.0,0.1982,1.6667,0.4192,0.2824,89.0,0.18,2.0,0.5568,0.0875,89.0
10,a7c6b1e6-6f03-4d3b-be88-0b607303c97a,Corn: Commodity Tracked,Russia,RU,Republic of Mordovia,01ab8962-db3d-49ef-af56-b877ce4f59d7,Off-season,2016,2016-01-11,1,0,0,0,0,1,0,0,1,1,0,0,351.75,357.5,469.0,881.0,-0.014706,-0.014815,5.75,1.016347,363.8375,368.525,372.0,0.010219,0.01084,117.25,0.75,529.25,0.399262,2016,1,2016_01,11,1,1.0,0.0,0.0,1.999998,0.02,1.999998,0.02,0.0,0.0,1.999998,1.999998,1.999998,0.999999,0.0,0.0,1.999998,1.999998,0.571428,1.999998,0.0,0.0,0.0,0.0,1.999998,1.999998,0.363636,1.999998,0.0,0.0,0.0,0.0,1.999998,1.999998,0.363636,1.999998,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.999999,1.999998,0.999999,0.0,0.0,0.0,0.0,0.0,0.0,0.0,89.0,1.0824,2.0,0.8124,0.7535,89.0,0.4927,2.0,0.6497,0.5643,89.0,0.2,2.0,0.5,0.105,89.0
11,8f030962-0267-4f93-97c9-5ae11990b65e,Corn: Commodity Tracked,Russia,RU,Republic of Mordovia,01ab8962-db3d-49ef-af56-b877ce4f59d7,Off-season,2016,2016-01-12,1,0,0,0,0,1,0,1,0,1,0,0,356.75,362.25,481.25,890.75,0.014215,0.014115,5.5,1.015417,363.025,368.2125,371.616667,0.010766,0.010967,124.5,0.741299,534.0,0.400505,2016,1,2016_01,12,1,1.0,0.0,0.0,1.999998,0.02,0.999999,0.01,0.0,0.0,1.999998,0.999999,1.999998,0.749999,0.0,0.0,1.999998,1.999998,0.714285,1.999998,0.0,0.0,0.0,0.0,1.999998,1.999998,0.416666,1.999998,0.0,0.0,0.0,0.0,1.999998,1.999998,0.416666,1.999998,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,-0.999999,0.999999,-1.999998,0.0,0.0,0.0,0.0,0.0,0.0,0.0,89.0,1.45,2.0,0.86,0.976,89.0,0.5424,2.0,0.5952,0.6523,89.0,0.16,2.0,0.4726,0.085,89.0
12,c18c8f9b-63f2-4017-9923-f904db0f7da9,Corn: Commodity Tracked,Russia,RU,Republic of Mordovia,01ab8962-db3d-49ef-af56-b877ce4f59d7,Off-season,2016,2016-01-13,1,0,0,1,0,0,0,0,1,1,0,0,358.0,363.0,478.0,899.0,0.003504,0.003498,5.0,1.013966,361.975,367.9,371.239583,0.009983,0.010968,120.0,0.748954,541.0,0.39822,2016,1,2016_01,13,1,1.0,0.0,0.0,0.0,0.0,1.999998,0.02,0.0,0.0,0.0,1.999998,1.999998,0.5,0.0,0.0,1.714284,1.999998,0.999999,1.999998,0.0,0.0,0.0,0.0,1.846152,1.999998,0.538461,1.999998,0.0,0.0,0.0,0.0,1.846152,1.999998,0.538461,1.999998,0.0,0.0,0.0,0.0,0.0,-1.999998,-1.999998,-1.999998,0.999999,1.999998,1.999998,0.0,0.0,0.0,0.0,0.0,0.0,0.0,89.0,0.558,2.0,0.8801,0.1877,89.0,0.848,2.0,0.7344,0.9941,89.0,0.22,2.0,0.5017,0.0955,89.0
13,070e5716-45dc-44cd-8113-a20d6248cefc,Corn: Commodity Tracked,Russia,RU,Republic of Mordovia,01ab8962-db3d-49ef-af56-b877ce4f59d7,Off-season,2016,2016-01-14,1,0,0,1,0,0,0,1,0,1,0,0,358.0,362.75,468.75,882.25,0.0,0.0,4.75,1.013268,361.0125,367.65,370.952083,0.009992,0.010902,110.75,0.763733,524.25,0.405781,2016,1,2016_01,14,1,1.0,0.0,0.0,0.0,0.0,0.999999,0.01,0.0,0.0,0.0,0.999999,0.999999,0.25,0.0,0.0,1.42857,1.999998,1.142856,1.999998,0.0,0.0,0.0,0.0,1.714284,1.999998,0.571428,1.999998,0.0,0.0,0.0,0.0,1.714284,1.999998,0.571428,1.999998,0.0,0.0,0.0,0.0,0.0,0.0,-1.999998,1.999998,-0.999999,0.999999,-1.999998,0.0,0.0,0.0,0.0,0.0,0.0,0.0,89.0,0.1141,2.0,0.4107,0.0761,89.0,1.039,2.0,0.6199,1.1249,89.0,0.14,2.0,0.4453,0.0745,89.0


In [16]:
merged_daily_df_copy.to_csv('/kaggle/working/submission.csv', index=False)