# Corn and soybean production datasets

In [None]:
import numpy as np
import pandas as pd
import seaborn as sns

In [None]:
# data from https://www.kaggle.com/ainslie/usda-wasde-monthly-corn-soybean-projections
corn_file = "corn-soybean/USDAProj_Corn.csv"
soybean_file = "corn-soybean/USDAProj_Soybean.csv"

In [None]:
corn = pd.read_csv(corn_file)
soybean = pd.read_csv(soybean_file)

In [None]:
corn

In [None]:
X = corn.iloc[:, 1:7].values
y = corn["Total Supply"].values

In [None]:
from sklearn.svm import LinearSVR

In [None]:
model = LinearSVR()

In [None]:
model.fit(X=X, y=y)

In [None]:
model.score(X=X, y=y)

In [None]:
sns.scatterplot(data=corn, x="Production", y="Total Supply")

## Compress features with PCA

In [None]:
from sklearn.decomposition import PCA
import seaborn as sns; sns.set()
import plotly.express as px

In [None]:
pca = PCA()

In [None]:
Z = pca.fit_transform(X=corn.iloc[:, 1:])

In [None]:
Z = pd.DataFrame(Z, columns=["PC{}".format(i+1) for i in range(112)])
Z

In [None]:
variances = pd.DataFrame()
variances["principal component"] = [i+1 for i in range(112)]
variances["explained variance"] = pca.explained_variance_
variances.head()

In [None]:
sns.lineplot(x="principal component", y="explained variance", data=variances)

In [None]:
ratio = pd.DataFrame()
ratio["principal component"] = [i+1 for i in range(112)]
ratio["cumulated explained variance ratio"] = np.cumsum(pca.explained_variance_ratio_)
ratio.head()

In [None]:
ax = sns.lineplot(x="principal component", y="cumulated explained variance ratio", data=ratio)
ax.set(ylim=(0, 1.1))

In [None]:
pca = PCA(n_components=3)

In [None]:
Z = pca.fit_transform(X=corn.iloc[:, 1:])

In [None]:
Z = pd.DataFrame(Z, columns=["PC1", "PC2", "PC3"])
Z

In [None]:
corn = pd.concat([corn, Z], axis=1)
corn.head()

In [None]:
fig = px.scatter_3d(corn, x="PC1", y="PC2", z="PC3",
                    color="Total Supply", opacity=0.7)
fig.update_traces(marker=dict(size=5))
fig.show()