**Mounting Drives**


In [1]:
from google.colab import files, drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


**Data loading and filtering**


In [38]:
import numpy as np
import pandas as pd
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn import preprocessing
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import make_pipeline
import math

# Import CVs to Dataframes
weatherDF = pd.read_csv('/content/drive/My Drive/input/wx-frontier-agg.csv')
cropsDF = pd.read_csv('/content/drive/My Drive/input/Processed_Iowa+Cerro+Gordo_1960+2009_Annual+Crop.csv')

# Filtering and dataframe organization
cropsDF = cropsDF.sort_values(by='Year')
cropsDF['YEAR'] = cropsDF.Year
cropsDF.drop('Year', axis=1, inplace=True)
cropsDF.Value = cropsDF.Value.astype('float')

# Generating Corn Dataframe and Bean Data Frame
cornDF = cropsDF.loc[(cropsDF['Data Item'].str.contains('BU / ACRE')) & (cropsDF['Commodity'] == 'CORN')]
beansDF = cropsDF.loc[(cropsDF['Data Item'].str.contains('BU / ACRE')) & (cropsDF['Commodity'] == 'SOYBEANS')]

cornDF = cornDF[['YEAR', 'Value']]
beansDF = beansDF[['YEAR', 'Value']]

# Combining crop data with weather data
cropWeatherDF = weatherDF.merge(cornDF, on='YEAR')
cropWeatherDF['corn'] = cropWeatherDF.Value
cropWeatherDF.drop('Value', axis=1, inplace=True)
cropWeatherDF = cropWeatherDF.merge(beansDF, on='YEAR')
cropWeatherDF['beans'] = cropWeatherDF.Value
cropWeatherDF.drop('Value', axis=1, inplace=True)

# Normalizing Data
features = ['YEAR', 'GSP', 'GDD', 'GSTmax', 'GSTmin', 'frost', 'summer' ,'HWI', 'CWI', 'dry' ,'wet', 'PRCP95P']
ys = ['corn', 'beans']

def norm(x):
  return (x - x.mean()) / x.std()

for feat in features + ys:
  cropWeatherDF[feat] = norm(cropWeatherDF[feat])

cropWeatherDF = cropWeatherDF.sample(frac=1)

**Ridge Regression**

In [44]:
# Assigning X and y for corn and weather data
X = cropWeatherDF[features]
y = cropWeatherDF['corn']

# Define Ridge Model
ridge_Model = Ridge(alpha=0.001)
ridge_Model.fit(X, y)

# Cross-validations
cornRMSE = -cross_val_score(ridge_Model, X, y, scoring="neg_mean_squared_error", cv=5).mean()
cornRMSE = math.sqrt(cornRMSE)
cornR2 = cross_val_score(ridge_Model, X, y, scoring="r2", cv=5).mean()

# RMSE and R2 Scores (Corn w/ Weather Data)
print("Ridge Regression RMSE Score (Corn w/ Weather Data):  {}".format(round(cornRMSE, 3)))
print("Ridge Regression R2 Score (Corn w/ Weather Data): {} \n".format(round(cornR2, 3)))


# Assigning X and y for bean and weather data
X = cropWeatherDF[features]
y = cropWeatherDF['beans']

# Define Ridge Model
ridge_Model = Ridge(alpha=0.001)
ridge_Model.fit(X, y)

# Cross-validations
beanRMSE = -cross_val_score(ridge_Model, X, y, scoring="neg_mean_squared_error", cv=5).mean()
beanRMSE = math.sqrt(beanRMSE)
beanR2 = cross_val_score(ridge_Model, X, y, scoring="r2", cv=5).mean()

# RMSE and R2 Scores (Bean w/ Weather Data)
print("Ridge Regression RMSE Score (Bean w/ Weather Data):  {}".format(round(beanRMSE, 3)))
print("Ridge Regression R2 Score (Bean w/ Weather Data): {}".format(round(beanR2, 3)))

Ridge Regression RMSE Score (Corn w/ Weather Data):  0.419
Ridge Regression R2 Score (Corn w/ Weather Data): 0.807 

Ridge Regression RMSE Score (Bean w/ Weather Data):  0.42
Ridge Regression R2 Score (Bean w/ Weather Data): 0.815


**Random Forest**


In [46]:
from sklearn import tree
from sklearn import ensemble

# Assigning X and y for corn and weather data
X = cropWeatherDF[features]
y = cropWeatherDF['corn']

# Define Random Forest Model
RF_model = ensemble.RandomForestRegressor()

# Cross-validations
cornRMSE = -cross_val_score(RF_model, X, y, scoring="neg_mean_squared_error", cv=5).mean()
cornRMSE = math.sqrt(cornRMSE)
cornR2 = cross_val_score(RF_model, X, y, scoring="r2", cv=5).mean()

# RMSE and R2 Scores
print("Random Forest RMSE Score (Corn w/ Weather Data) {}".format(round(cornRMSE, 3)))
print("Random Forest R2 Score (Corn w/ Weather Data) {} \n".format(round(cornR2, 3)))

# Assigning X and y for bean and weather data
X = cropWeatherDF[features]
y = cropWeatherDF['beans']

# Cross-validations
beanRMSE = -cross_val_score(RF_model, X, y, scoring="neg_mean_squared_error", cv=5).mean()
beanRMSE = math.sqrt(beanRMSE)
beanR2 = cross_val_score(RF_model, X, y, scoring="r2", cv=5).mean()

# RMSE and R2 Scores
print("Random Forest RMSE Score (Corn w/ Weather Data) {}".format(round(beanRMSE, 3)))
print("Random Forest R2 Score (Corn w/ Weather Data) {} \n".format(round(beanR2, 3)))


Random Forest RMSE Score (Corn w/ Weather Data) 0.359
Random Forest R2 Score (Corn w/ Weather Data) 0.86 

Random Forest RMSE Score (Corn w/ Weather Data) 0.432
Random Forest R2 Score (Corn w/ Weather Data) 0.803 

