In [23]:
import numpy as np
import pandas as pd
import plotly.express as px
from scipy.ndimage.filters import gaussian_filter1d
from collections import OrderedDict
import math

In [24]:
# Reading CSV and filtering some unwanted data

wantedCrops = [
    'BARLEY',
    'BEANS',
    'CORN',
    'HAY',
    'WHEAT'
]

states = [
    'California',
    'Illinois',
    'Indiana',
    'Iowa',
    'Kansas',
    'Michigan',
    'Minnesota',
    'Missouri',
    'Nebraska',
    'NorthDakota',
    'Ohio',
    'SouthDakota',
    'Wisconsin'
]

# Set this as the state index to process each state
currState = 1

df = pd.read_csv('../data/agri/'+states[currState]+'_1960+2009_Annual+Crop+Data.csv')
df = df[['Year', 'Commodity', 'Value']]

# Sort by  year
df = df.sort_values(by='Year')
df.shape[0]
df.head()

Unnamed: 0,Year,Commodity,Value
15046,1960,WHEAT,29.0
14979,1960,HAY,2171000.0
14980,1960,HAY,91299000.0
14981,1960,HAY,4682000.0
14982,1960,HAY,2.16


In [25]:
# Delete unwated crops and keeping
# only the list above

dfLen = df.shape[0]

for i in range(0, dfLen):
    currCrop = df['Commodity'][i]
    if currCrop not in wantedCrops:
        df = df.drop([i])

df = df.dropna(inplace = False)

print('Done!')


Done!


In [26]:
# Create a new object to store the 
# years and values to hold average per year

crops = {
    'Year': [],
    'Value': []
}

# Store every year in the list
crops['Year'] = list(OrderedDict.fromkeys(df.Year.to_numpy()))

# Remove all commas from the 'Value' column
df = df.replace(',','', regex=True)

# Remove all non digits and convert to floats
df = df[pd.to_numeric(df['Value'], errors='coerce').notnull()]
df["Value"] = df["Value"].str.replace(",","").astype(float)

df


Unnamed: 0,Year,Commodity,Value
15046,1960,WHEAT,29.00
14979,1960,HAY,2171000.00
14980,1960,HAY,91299000.00
14981,1960,HAY,4682000.00
14982,1960,HAY,2.16
...,...,...,...
11116,2009,WHEAT,820000.00
11124,2009,WHEAT,45920000.00
11125,2009,WHEAT,56.00
11126,2009,WHEAT,5.00


In [27]:
secondCrops = {
    'Year': [],
    'Commodity':[],
    'Value':[]
}

for i in crops['Year']:
    tempDF = df.loc[df['Year'] == i]
    for j in wantedCrops:
        num = tempDF.loc[df['Commodity'] == j].mean().Value

        if not math.isnan(num):
            secondCrops['Year'].append(i)
            secondCrops['Value'].append(num)
            secondCrops['Commodity'].append(j)

tempDF = pd.DataFrame(secondCrops,  columns=['Year', 'Commodity', 'Value'])

In [29]:
# Process graph and save image under images/agri/

fig2 = px.line(
    tempDF, 
    x="Year", 
    y=gaussian_filter1d(tempDF["Value"], sigma=1.4), 
    
    color='Commodity', 
    labels = {
        'Year': 'Years',
        'y': 'Crop Yield'
    },

    title='Individual yearly crops in '+ states[currState] +' 1960 - 2009'

)

fig2.write_image('../images/agri/'+ states[currState] +'.png')