In [20]:
import pandas as pd
import seaborn as sns
import plotly.express as px

pd.set_option('display.max_columns', None)
pd.set_option('precision', 2)

In [21]:
df = pd.read_csv('avocado.csv')
df = df.drop('Unnamed: 0', axis = 1)
df.head()

Unnamed: 0,Date,AveragePrice,Total Volume,4046,4225,4770,Total Bags,Small Bags,Large Bags,XLarge Bags,type,year,region
0,12/27/2015,1.33,64236.62,1036.74,54454.85,48.16,8696.87,8603.62,93.25,0.0,conventional,2015,Albany
1,12/20/2015,1.35,54876.98,674.28,44638.81,58.33,9505.56,9408.07,97.49,0.0,conventional,2015,Albany
2,12/13/2015,0.93,118220.22,794.7,109149.67,130.5,8145.35,8042.21,103.14,0.0,conventional,2015,Albany
3,12/6/2015,1.08,78992.15,1132.0,71976.41,72.58,5811.16,5677.4,133.76,0.0,conventional,2015,Albany
4,11/29/2015,1.28,51039.6,941.48,43838.39,75.78,6183.95,5986.26,197.69,0.0,conventional,2015,Albany


In [22]:
df['Date'] = pd.to_datetime(df.Date)
df['Date']

0       2015-12-27
1       2015-12-20
2       2015-12-13
3       2015-12-06
4       2015-11-29
           ...    
18244   2018-02-04
18245   2018-01-28
18246   2018-01-21
18247   2018-01-14
18248   2018-01-07
Name: Date, Length: 18249, dtype: datetime64[ns]

In [23]:
list(df['region'].unique()).index('TotalUS')

51

In [24]:
df['region'].unique()

array(['Albany', 'Atlanta', 'BaltimoreWashington', 'Boise', 'Boston',
       'BuffaloRochester', 'California', 'Charlotte', 'Chicago',
       'CincinnatiDayton', 'Columbus', 'DallasFtWorth', 'Denver',
       'Detroit', 'GrandRapids', 'GreatLakes', 'HarrisburgScranton',
       'HartfordSpringfield', 'Houston', 'Indianapolis', 'Jacksonville',
       'LasVegas', 'LosAngeles', 'Louisville', 'MiamiFtLauderdale',
       'Midsouth', 'Nashville', 'NewOrleansMobile', 'NewYork',
       'Northeast', 'NorthernNewEngland', 'Orlando', 'Philadelphia',
       'PhoenixTucson', 'Pittsburgh', 'Plains', 'Portland',
       'RaleighGreensboro', 'RichmondNorfolk', 'Roanoke', 'Sacramento',
       'SanDiego', 'SanFrancisco', 'Seattle', 'SouthCarolina',
       'SouthCentral', 'Southeast', 'Spokane', 'StLouis', 'Syracuse',
       'Tampa', 'TotalUS', 'West', 'WestTexNewMexico'], dtype=object)

In [25]:
df.columns

Index(['Date', 'AveragePrice', 'Total Volume', '4046', '4225', '4770',
       'Total Bags', 'Small Bags', 'Large Bags', 'XLarge Bags', 'type', 'year',
       'region'],
      dtype='object')

In [26]:
regions = df['region'].unique()
frames = []


for region in regions:
    region_data = df[df['region'] == region]
    frames.append(region_data)

In [31]:
cities = frames[4].append(frames[42]).sort_values('Date')
cities = cities[cities['type'] == 'organic']
cities.head()

Unnamed: 0,Date,AveragePrice,Total Volume,4046,4225,4770,Total Bags,Small Bags,Large Bags,XLarge Bags,type,year,region
9385,2015-01-04,1.83,2192.13,8.66,939.43,0.0,1244.04,1244.04,0.0,0.0,organic,2015,Boston
11361,2015-01-04,1.18,22630.58,13175.57,9028.34,0.0,426.67,426.67,0.0,0.0,organic,2015,SanFrancisco
11360,2015-01-11,1.12,29676.78,17907.49,11586.46,0.0,182.83,182.83,0.0,0.0,organic,2015,SanFrancisco
9384,2015-01-11,1.94,2217.82,12.82,956.07,0.0,1248.93,1248.93,0.0,0.0,organic,2015,Boston
11359,2015-01-18,1.27,41482.48,26336.41,14789.4,0.0,356.67,356.67,0.0,0.0,organic,2015,SanFrancisco


In [32]:
fig = px.line(cities, x = 'Date', y = 'AveragePrice', line_group = 'region',
              color = 'region', title = 'Average Prices of Organic Avocados in Boston & San Francisco',
              range_y = [0, 3.5], template = 'ggplot2', labels = {'AveragePrice' : 'Average Price'})

fig.update_xaxes(title_font = {'size' : 18, 'family' : 'Courier', 'color' : 'gray'},
                 tickfont = {'size' : 16, 'family' : 'Courier', 'color' : 'gray'})

fig.update_yaxes(title_font = {'size' : 18, 'family' : 'Courier', 'color' : 'gray'},
                 tickfont = {'size' : 16, 'family' : 'Courier', 'color' : 'gray'})

fig.show()

In [33]:
usa = frames[51].sort_values('Date')
usa.head()

Unnamed: 0,Date,AveragePrice,Total Volume,4046,4225,4770,Total Bags,Small Bags,Large Bags,XLarge Bags,type,year,region
2703,2015-01-04,0.95,31300000.0,12400000.0,13600000.0,844093.32,4500000.0,3590000.0,894945.63,18672.81,conventional,2015,TotalUS
11829,2015-01-04,1.46,613000.0,233000.0,217000.0,4370.99,159000.0,115000.0,43573.12,0.0,organic,2015,TotalUS
2702,2015-01-11,1.01,29100000.0,11500000.0,12100000.0,866574.66,4520000.0,3780000.0,718333.87,15789.15,conventional,2015,TotalUS
11828,2015-01-11,1.42,670000.0,271000.0,261000.0,3830.42,134000.0,107000.0,26915.63,0.0,organic,2015,TotalUS
2701,2015-01-18,1.03,29000000.0,11900000.0,11700000.0,831301.9,4650000.0,3870000.0,771093.2,7935.35,conventional,2015,TotalUS


In [34]:

fig = px.line(usa, x = 'Date', y = 'AveragePrice', line_group = 'region',
              color = 'type', title = 'Average Prices of Organic & Conventional Avocados in USA',
              template = 'ggplot2', labels = {'AveragePrice' : 'Average Price'})

fig.update_xaxes(title_font = {'size' : 18, 'family' : 'Courier', 'color' : 'gray'},
                 tickfont = {'size' : 16, 'family' : 'Courier', 'color' : 'gray'})

fig.update_yaxes(title_font = {'size' : 18, 'family' : 'Courier', 'color' : 'gray'},
                 tickfont = {'size' : 16, 'family' : 'Courier', 'color' : 'gray'})

fig.show()

In [18]:
total = pd.DataFrame({'Date' : [], 'AveragePrice' : [], 'Total Volume' : [], '4046' : [], '4225' : [], '4770' : [],
                      'Total Bags' : [], 'Small Bags' : [], 'Large Bags' : [], 'XLarge Bags' : [], 'type' : [],
                      'year' : [], 'region' : []})



In [19]:
fig = px.scatter(usa, x = 'Total Volume', y = 'AveragePrice', color = 'type', 
                 title = 'Total Bags vs Price of Avocado',
                 template = 'ggplot2', labels = {'AveragePrice' : 'Average Price'})

fig.update_xaxes(title_font = {'size' : 18, 'family' : 'Courier', 'color' : 'gray'},
                 tickfont = {'size' : 16, 'family' : 'Courier', 'color' : 'gray'})

fig.update_yaxes(title_font = {'size' : 18, 'family' : 'Courier', 'color' : 'gray'},
                 tickfont = {'size' : 16, 'family' : 'Courier', 'color' : 'gray'})

fig.show()