**Mounting Drives**


In [3]:
from google.colab import files, drive
drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


**Data loading and Normalization**


In [4]:
import numpy as np
import pandas as pd
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge
from sklearn import preprocessing
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import make_pipeline
import math

# Import CVs to Dataframes
# weatherDF = pd.read_csv('/content/drive/My Drive/input/wx-frontier-agg.csv')
weatherDF = pd.read_csv('/content/drive/My Drive/input/wx-frontier-agg-2.csv')
cropsDF = pd.read_csv('/content/drive/My Drive/input/Processed_Iowa+Cerro+Gordo_1960+2009_Annual+Crop.csv')

# Filtering and dataframe organization
cropsDF = cropsDF.sort_values(by='Year')
cropsDF['YEAR'] = cropsDF.Year
cropsDF.drop('Year', axis=1, inplace=True)
cropsDF.Value = cropsDF.Value.astype('float')

# Generating Corn Dataframe and Bean Data Frame
cornDF = cropsDF.loc[(cropsDF['Data Item'].str.contains('BU / ACRE')) & (cropsDF['Commodity'] == 'CORN')]
beansDF = cropsDF.loc[(cropsDF['Data Item'].str.contains('BU / ACRE')) & (cropsDF['Commodity'] == 'SOYBEANS')]

cornDF = cornDF[['YEAR', 'Value']]
beansDF = beansDF[['YEAR', 'Value']]

# Combining crop data with weather data
cropWeatherDF = weatherDF.merge(cornDF, on='YEAR')
cropWeatherDF['corn'] = cropWeatherDF.Value
cropWeatherDF.drop('Value', axis=1, inplace=True)
cropWeatherDF = cropWeatherDF.merge(beansDF, on='YEAR')
cropWeatherDF['beans'] = cropWeatherDF.Value
cropWeatherDF.drop('Value', axis=1, inplace=True)

# Normalizing Data
features = ['YEAR', 'GSP', 'GDD', 'GSTmax', 'GSTmin', 'frost', 'summer' ,'HWI', 'CWI', 'dry' ,'wet', 'PRCP95P']
ys = ['corn', 'beans']

def norm(x):
  return (x - x.mean()) / x.std()

for feat in features + ys:
  cropWeatherDF[feat] = norm(cropWeatherDF[feat])

cropWeatherDF = cropWeatherDF.sample(frac=1)

**Ridge Regression**

In [11]:
# Assigning X and y for corn and weather data
X = cropWeatherDF[features]
y = cropWeatherDF['corn']

# Generating list of Alphas
alph = [round((x+1) * 0.1, 1) for x in range(0, 100)]

# Ridge Regression on Corn and Weather
finalRMSE1 = 100
RMSEAlph1 = 0

finalR21 = -100
R2Alph1 = 0

for Alph in alph:
  ridge_Model = Ridge(alpha=Alph)
  ridge_Model.fit(X, y)

  cornRMSE = -cross_val_score(ridge_Model, X, y, scoring="neg_mean_squared_error", cv=5).mean()
  cornRMSE = math.sqrt(cornRMSE)
  cornR2 = ridge_Model.score(X, y)

  if finalRMSE1 > cornRMSE:
    finalRMSE1 = cornRMSE
    RMSEAlph1 = Alph
  
  if finalR21 < cornR2:
    finalR21 = cornR2
    R2Alph1 = Alph

print("Lowest RMSE for Corn: {}".format(round(finalRMSE1, 2)))
print("Lowest RMSE found at Alpha: {}".format(round(RMSEAlph1, 2)))

print("Highest R2 for Corn: {}".format(round(finalR21, 2)))
print("Highest R2 found at Alpha: {}".format(round(R2Alph1, 2)))
print()

# Assigning X and y for bean and weather data
X = cropWeatherDF[features]
y = cropWeatherDF['beans']

# Ridge Regression on Beans and Weather
finalRMSE = 100
RMSEAlph = 0

finalR2 = -100
R2Alph = 0

for Alph in alph:
  ridge_Model = Ridge(alpha=Alph)
  ridge_Model.fit(X, y)

  beanRMSE = -cross_val_score(ridge_Model, X, y, scoring="neg_mean_squared_error", cv=5).mean()
  beanRMSE = math.sqrt(beanRMSE)
  beanR2 = ridge_Model.score(X, y)

  if finalRMSE > beanRMSE:
    finalRMSE = beanRMSE
    RMSEAlph = Alph
  
  if finalR2 < beanR2:
    finalR2 = beanR2
    R2Alph = Alph

print("Lowest RMSE for Beans: {}".format(round(finalRMSE, 2)))
print("Lowest RMSE found at Alpha: {}".format(round(RMSEAlph, 2)))

print("Highest R2 for Beans: {}".format(round(finalR2, 2)))
print("Highest R2 found at Alpha: {}\n\n".format(round(R2Alph, 2)))


print("****** FINAL STATS ******")
X = cropWeatherDF[features]
y = cropWeatherDF['corn']
print("Alphas being used for Corn: {} {}".format(round(RMSEAlph1, 2), round(R2Alph1, 2)))
# Using lowest RMSE Alpha for Beans
ridge_Model = Ridge(alpha=RMSEAlph1)
ridge_Model.fit(X, y)
beanRMSEAlpha = -cross_val_score(ridge_Model, X, y, scoring="neg_mean_squared_error", cv=5).mean()
beanRMSEAlpha = math.sqrt(beanRMSEAlpha)
beanR2Alpha = ridge_Model.score(X, y)

print("Using the lowest Corn RMSE alpha: RMSE: {} R2:{}\n".format(round(beanRMSEAlpha, 2), round(beanR2Alpha, 2)))

# Using Highest R2 Alpha for Beans
ridge_Model = Ridge(alpha=R2Alph1)
ridge_Model.fit(X, y)
beanRMSEAlpha = -cross_val_score(ridge_Model, X, y, scoring="neg_mean_squared_error", cv=5).mean()
beanRMSEAlpha = math.sqrt(beanRMSEAlpha)
beanR2Alpha = ridge_Model.score(X, y)

print("Using the Highest Corn R2 alpha: RMSE: {} R2:{}\n".format(round(beanRMSEAlpha, 2), round(beanR2Alpha, 2)))

X = cropWeatherDF[features]
y = cropWeatherDF['beans']
print("Alphas being used: {} {}".format(round(RMSEAlph, 2), round(R2Alph, 2)))

# Using lowest RMSE Alpha for Beans
ridge_Model = Ridge(alpha=RMSEAlph)
ridge_Model.fit(X, y)
beanRMSEAlpha1 = -cross_val_score(ridge_Model, X, y, scoring="neg_mean_squared_error", cv=5).mean()
beanRMSEAlpha1 = math.sqrt(beanRMSEAlpha1)
beanR2Alpha1 = ridge_Model.score(X, y)

print("Using the lowest Bean RMSE alpha: RMSE: {} R2:{}\n".format(round(beanRMSEAlpha1, 2), round(beanR2Alpha1, 2)))

# Using Highest R2 Alpha for Beans
ridge_Model = Ridge(alpha=R2Alph)
ridge_Model.fit(X, y)
beanRMSEAlpha1 = -cross_val_score(ridge_Model, X, y, scoring="neg_mean_squared_error", cv=5).mean()
beanRMSEAlpha1 = math.sqrt(beanRMSEAlpha1)
beanR2Alpha1 = ridge_Model.score(X, y)

print("Using the Highest Bean R2 alpha: RMSE: {} R2:{}\n".format(round(beanRMSEAlpha1, 2), round(beanR2Alpha1, 2)))




Lowest RMSE for Corn: 0.38
Lowest RMSE found at Alpha: 1.7
Highest R2 for Corn: 0.9
Highest R2 found at Alpha: 0.1

Lowest RMSE for Beans: 0.4
Lowest RMSE found at Alpha: 0.9
Highest R2 for Beans: 0.89
Highest R2 found at Alpha: 0.1


****** FINAL STATS ******
Alphas being used for Corn: 1.7 0.1
Using the lowest Corn RMSE alpha: RMSE: 0.38 R2:0.9

Using the Highest Corn R2 alpha: RMSE: 0.38 R2:0.9

Alphas being used: 0.9 0.1
Using the lowest Bean RMSE alpha: RMSE: 0.4 R2:0.89

Using the Highest Bean R2 alpha: RMSE: 0.4 R2:0.89



**Random Forest**


In [None]:
from sklearn import tree
from sklearn import ensemble
param_grid = {
    'bootstrap': [True],
    'max_depth': [80, 90, 100, 110],
    'max_features': [2, 3],
    'min_samples_leaf': [3, 4, 5],
    'min_samples_split': [8, 10, 12],
    'n_estimators': [100, 200, 300, 1000]
}

# Assigning X and y for corn and weather data
X = cropWeatherDF[features]
y = cropWeatherDF['corn']

# Define Random Forest Modelss

# Cross-validations
cornRMSE = GridSearchCV(RF_model, param_grid = param_grid, scoring="neg_mean_squared_error", cv=5)
cornRMSE.fit(X, y)
print("RMSE for Corn v. Weahter Data {}".format(round(cornRMSE.score(X, y), 2)))

cornR2 = GridSearchCV(RF_model, param_grid = param_grid, scoring="r2", cv=5)
cornR2.fit(X, y)
print("R2 for Corn v. Weahter Data {}".format(round(cornR2.score(X, y), 2)))

# Assigning X and y for bean and weather data
X = cropWeatherDF[features]
y = cropWeatherDF['beans']
RF_model.fit(X, y)

# Cross-validations
beanRMSE = GridSearchCV(RF_model, param_grid = param_grid, scoring="neg_mean_squared_error", cv=5)
beanRMSE.fit(X, y)
print("RMSE for Bean v. Weahter Data {}".format(round(beanRMSE.score(X, y), 2)))

beanR2 = GridSearchCV(RF_model, param_grid = param_grid, scoring="r2", cv=5)
beanR2.fit(X, y)
print("R2 for Bean v. Weather Data {}".format(round(beanR2.score(X, y), 2)))



RMSE for Corn v. Weahter Data -0.13
R2 for Corn v. Weahter Data 0.86
RMSE for Bean v. Weahter Data -0.17
R2 for Bean v. Weahter Data 0.84
