In [47]:
import pandas as pd
from pprint import pprint
import json
import numpy as np

## Top Quality Coffee dashborad
---
- converting top quality coffee dataset into different graph (bar, pie and map) format
- all the graph formatted data will be store in a big json format

---
- statistical tests for the top quality coffee dataset

In [48]:
# create a function to convert dataframe to json format that wanted
def arange_json(df, dic):
    column_list = df.columns
    for column in column_list:
        dic[column] = list(df[column])

In [49]:
top_coffee_df = pd.read_json("../dataset_conversion/output_json/top_quality.json")
top_coffee_df.columns = ["coffee_name", "coffee_rating", "roaster_name", "roaster_city",
                         "coffee_origin_country", "coffee_origin_city", "roast_level",
                         "aroma", "acidity", "body", "flavor", "aftertaste", "species",
                         "price_USD_kg", "roaster_country"]
top_coffee_df.head()


Unnamed: 0,coffee_name,coffee_rating,roaster_name,roaster_city,coffee_origin_country,coffee_origin_city,roast_level,aroma,acidity,body,flavor,aftertaste,species,price_USD_kg,roaster_country
0,Colombia Pink Bourbon,95,modcup,Jersey City,[Colombia],Piendamó;Cauca Department,Light,9,9,9,9,9,Bourbon,120.0,United States of America
1,Kenya Kiambu Mandela Estate AA Washed Process,94,Buon Caffe,Taipei,[Kenya],Kiambu County,Medium-Light,9,9,9,9,8,SL28;SL34,50.793651,Taiwan
2,Costa Rica Volcán Azul Geisha Yeast-Washed,94,Kafe Coffee Roastery,Zhubei,[Costa Rica],West Valley,Light,9,9,9,9,8,Geisha,115.2,Taiwan
3,Ethiopia Yirgacheffe Adame G1 Natural,94,Caoban Coffee,Taipei,[Ethiopia],Yirgacheffe growing region,Light,9,9,9,9,8,,80.026455,Taiwan
4,Kenya Gichathaini,94,Temple Coffee,Sacramento,[Kenya],Mathira West District;Nyeri growing region,Light,9,9,9,9,8,SL28;SL34;Ruiru 11,73.488345,United States of America


In [50]:
# caluate the total counts
total_counts = top_coffee_df["coffee_name"].count()
# caluate the number of coffee that is rated higher than 96 score
total_counts_high_rating = top_coffee_df.loc[(top_coffee_df["coffee_rating"] >= 97), "coffee_name"].count()
# caluate the number of coffee that is less than 35 USD per kg
total_counts_low_price = top_coffee_df.loc[(top_coffee_df["price_USD_kg"] <= 35), "coffee_name"].count()


print(f"The total number of coffee blend in this dataset: {total_counts}")
print(f"The number of coffee that rated more than 97: {total_counts_high_rating}")
print(f"the number of coffee that less than 35 USD per kg: {total_counts_low_price}")

The total number of coffee blend in this dataset: 1567
The number of coffee that rated more than 97: 34
the number of coffee that less than 35 USD per kg: 34


### Coffee rating

In [51]:
grp_rating_all = top_coffee_df.groupby("coffee_rating")
rating_df_all = grp_rating_all["coffee_name"].count().reset_index().rename(columns= {"coffee_name" : "all"})

# create a dataframe that coffee_rating is 97 or more
rating_high_rating_df = top_coffee_df.loc[(top_coffee_df["coffee_rating"] >= 97), :]
grp_rating_high_rating = rating_high_rating_df.groupby("coffee_rating")
# get the count of rating group -> reset index -> rename the column with a proper name
rating_grp_high_rating = grp_rating_high_rating["coffee_name"].count().reset_index().rename(columns= {"coffee_name" : "high_rated"})

# crate a datafram that coffee price less than 35
rating_low_price_df = top_coffee_df.loc[(top_coffee_df["price_USD_kg"] <= 35), :]
grp_rating_low_price = rating_low_price_df.groupby("coffee_rating")
# get the count of rating group -> reset index -> rename the column with a proper name
rating_grp_low_price = grp_rating_low_price["coffee_name"].count().reset_index().rename(columns= {"coffee_name" : "low_price"})

# merging the another column for high rating 
rating_grp_merge = pd.merge(rating_df_all, rating_grp_high_rating, on= "coffee_rating", how= "outer")
rating_grp_merge = pd.merge(rating_grp_merge, rating_grp_low_price, on= "coffee_rating", how= "outer")

rating_grp_merge.head()

Unnamed: 0,coffee_rating,all,high_rated,low_price
0,94,952,,23.0
1,95,453,,10.0
2,96,128,,1.0
3,97,31,31.0,
4,98,3,3.0,


In [52]:
# convert into json format
json_rating_all = {}
arange_json(rating_df_all, json_rating_all)

json_rating_high = {}
arange_json(rating_grp_high_rating, json_rating_high)

json_rating_low_price ={}
arange_json(rating_grp_low_price, json_rating_low_price)

### Coffee price (USD/kg)

In [53]:
price_df = top_coffee_df.loc[:,["coffee_name", "coffee_rating", "price_USD_kg"]]
price_df.head()

Unnamed: 0,coffee_name,coffee_rating,price_USD_kg
0,Colombia Pink Bourbon,95,120.0
1,Kenya Kiambu Mandela Estate AA Washed Process,94,50.793651
2,Costa Rica Volcán Azul Geisha Yeast-Washed,94,115.2
3,Ethiopia Yirgacheffe Adame G1 Natural,94,80.026455
4,Kenya Gichathaini,94,73.488345


In [54]:
max_price = price_df["price_USD_kg"].max()
min_price = price_df["price_USD_kg"].min()
print(f"The highest price is {max_price}")
print(f"The lowest price is {min_price}")

The highest price is 8250.0
The lowest price is 10.2589729269


In [55]:
# create a copy of price_df to create a table for different price range
price_all_df = price_df.copy()
# set up bins and bins group name 
bins = [0, 50, 100, 150, 200, 300, 400, 500, 1000, 9000]
grp_name = ["< $50", "$50 - $100", "$100 - $150", "$150 - $200", "$200 - $300", "$300 - $400", "$400 - $500", "$500 - $1000", "> $1000"]
# cutting the price_USD_kg into different 9 groups
price_all_df["PriceRangeUSD_kg"] = pd.cut(price_all_df["price_USD_kg"], bins= bins,labels= grp_name, include_lowest= False)
# grouping price together and provide the counts
grp_price_all = price_all_df.groupby("PriceRangeUSD_kg")
# get the count of each  price range -> reset index -> rename the column with a proper name
price_grp_all = grp_price_all["coffee_name"].count().reset_index().rename(columns= {"coffee_name" : "all"})

# convert into json format
json_price_grp_all = price_grp_all.to_json(orient= "columns")

price_grp_all

Unnamed: 0,PriceRangeUSD_kg,all
0,< $50,338
1,$50 - $100,690
2,$100 - $150,135
3,$150 - $200,79
4,$200 - $300,130
5,$300 - $400,42
6,$400 - $500,15
7,$500 - $1000,22
8,> $1000,15


In [56]:
# convert into json format
json_price_all = {}
arange_json(price_grp_all, json_price_all)
pprint(json_price_all)

{'PriceRangeUSD_kg': ['< $50',
                      '$50 - $100',
                      '$100 - $150',
                      '$150 - $200',
                      '$200 - $300',
                      '$300 - $400',
                      '$400 - $500',
                      '$500 - $1000',
                      '> $1000'],
 'all': [338, 690, 135, 79, 130, 42, 15, 22, 15]}


In [57]:
# only select the coffee bean that rated more than 97 score
price_high_rated_df = price_df.loc[(price_df["coffee_rating"] >= 97), :]
# set up bins and bins group name 
bins = [0, 50, 100, 150, 200, 300, 400, 500, 1000, 9000]
grp_name = ["< $50", "$50 - $100", "$100 - $150", "$150 - $200", "$200 - $300", "$300 - $400", "$400 - $500", "$500 - $1000", "> $1000"]
# cutting the price_USD_kg into different 9 groups
price_high_rated_df["PriceRangeUSD_kg"] = pd.cut(price_high_rated_df["price_USD_kg"], bins= bins,labels= grp_name, include_lowest= False)
# grouping price together and provide the counts
grp_price_high_rated = price_high_rated_df.groupby("PriceRangeUSD_kg")
# get the count of each price range -> reset index -> rename the column with a proper name
price_grp_high_rated = grp_price_high_rated["coffee_name"].count().reset_index().rename(columns= {"coffee_name" : "high_rated"})

# convert into json format
json_price_grp_high_rated = price_grp_high_rated.to_json(orient= "columns")

price_grp_high_rated


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  import sys


Unnamed: 0,PriceRangeUSD_kg,high_rated
0,< $50,3
1,$50 - $100,7
2,$100 - $150,0
3,$150 - $200,3
4,$200 - $300,4
5,$300 - $400,3
6,$400 - $500,1
7,$500 - $1000,2
8,> $1000,5


In [58]:
# convert into json format
json_price_high_rated = {}
arange_json(price_grp_high_rated, json_price_high_rated)
pprint(json_price_high_rated)

{'PriceRangeUSD_kg': ['< $50',
                      '$50 - $100',
                      '$100 - $150',
                      '$150 - $200',
                      '$200 - $300',
                      '$300 - $400',
                      '$400 - $500',
                      '$500 - $1000',
                      '> $1000'],
 'high_rated': [3, 7, 0, 3, 4, 3, 1, 2, 5]}


In [59]:
# create a coffee that less than 35 USD dataframe
low_price_df = price_df.loc[(price_df["price_USD_kg"] <= 35), :]
# set up bins and bins group name 
bins = [10, 15, 20, 25, 30, 36]
grp_name = ["$10 - $15", "$15 - $20", "$20 - $25", "$25 - $30", "$30 - $35"]
# cutting the price_USD_kg into different 9 groups
low_price_df["PriceRangeUSD_kg"] = pd.cut(low_price_df["price_USD_kg"], bins= bins,labels= grp_name, include_lowest= False)
# grouping price together and provide the counts
grp_low_price = low_price_df.groupby("PriceRangeUSD_kg")
# get the count of each species -> reset index -> rename the column with a proper name
low_price_group = grp_low_price["coffee_name"].count().reset_index().rename(columns= {"coffee_name" : "low_price"})

# convert into json format
json_low_price_group = low_price_group.to_json(orient= "columns")

low_price_group


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  import sys


Unnamed: 0,PriceRangeUSD_kg,low_price
0,$10 - $15,3
1,$15 - $20,4
2,$20 - $25,2
3,$25 - $30,7
4,$30 - $35,18


In [60]:
# convert into json format
json_low_price = {}
arange_json(low_price_group, json_low_price)
pprint(json_low_price)

{'PriceRangeUSD_kg': ['$10 - $15',
                      '$15 - $20',
                      '$20 - $25',
                      '$25 - $30',
                      '$30 - $35'],
 'low_price': [3, 4, 2, 7, 18]}


### Species analysis

In [61]:
species_df = top_coffee_df.loc[:,["coffee_name", "coffee_rating", "species", "price_USD_kg"]]
species_df.head()

Unnamed: 0,coffee_name,coffee_rating,species,price_USD_kg
0,Colombia Pink Bourbon,95,Bourbon,120.0
1,Kenya Kiambu Mandela Estate AA Washed Process,94,SL28;SL34,50.793651
2,Costa Rica Volcán Azul Geisha Yeast-Washed,94,Geisha,115.2
3,Ethiopia Yirgacheffe Adame G1 Natural,94,,80.026455
4,Kenya Gichathaini,94,SL28;SL34;Ruiru 11,73.488345


In [62]:
# converting species column into list
species_df["species"] = species_df["species"].str.split(";")

# and then explode allowing double or triple the row to store all the list items in species
species_df = species_df.explode("species")
species_df.head()

Unnamed: 0,coffee_name,coffee_rating,species,price_USD_kg
0,Colombia Pink Bourbon,95,Bourbon,120.0
1,Kenya Kiambu Mandela Estate AA Washed Process,94,SL28,50.793651
1,Kenya Kiambu Mandela Estate AA Washed Process,94,SL34,50.793651
2,Costa Rica Volcán Azul Geisha Yeast-Washed,94,Geisha,115.2
3,Ethiopia Yirgacheffe Adame G1 Natural,94,,80.026455


In [63]:
# groupby species for all
grp_species = species_df.groupby("species")
# get the count of each species -> reset index -> rename the column with a proper name
species_grp_all = grp_species["coffee_name"].count().reset_index().rename(columns= {"coffee_name" : "all"})
species_sort_all = species_grp_all.sort_values(by= "all", ascending= True).iloc[:10,:]

# create a dataframe that coffee_rating is 97 or more
species_high_rating_df = species_df.loc[(species_df["coffee_rating"] >= 97), :]
grp_species_high_rating = species_high_rating_df.groupby("species")
# get the count of each species -> reset index -> rename the column with a proper name
species_grp_high_rating = grp_species_high_rating["coffee_name"].count().reset_index().rename(columns= {"coffee_name" : "high_rated"})
species_sort_high_rating = species_grp_high_rating.sort_values(by= "high_rated", ascending= True).iloc[:10,:]

# crate a datafram that coffee price less than 35
species_low_price_df = species_df.loc[(species_df["price_USD_kg"] <= 35), :]
grp_species_low_price = species_low_price_df.groupby("species")
# get the count of each species -> reset index -> rename the column with a proper name
species_grp_low_price = grp_species_low_price["coffee_name"].count().reset_index().rename(columns= {"coffee_name" : "low_price"})
species_sort_low_price = species_grp_low_price.sort_values(by= "low_price", ascending= True).iloc[:10,:]


# merging the another column for high rating 
species_grp_merge = pd.merge(species_grp_all, species_grp_high_rating, on= "species", how= "outer")
species_grp_merge = pd.merge(species_grp_merge, species_grp_low_price, on= "species", how= "outer")

# convert into json format
json_species_grp = species_grp_merge.to_json(orient= "columns")

species_grp_merge.head()

Unnamed: 0,species,all,high_rated,low_price
0,74110,14,,
1,74158,6,,
2,Batian,25,2.0,
3,Bourbon,188,4.0,5.0
4,Castillo,27,,


In [64]:
# convert into json format
json_species_all = {}
arange_json(species_sort_all, json_species_all)

json_species_high_rated = {}
arange_json(species_sort_high_rating, json_species_high_rated)

json_species_low_price ={}
arange_json(species_sort_low_price, json_species_low_price)


### Roast level

In [65]:
roast_lv_df = top_coffee_df.loc[:,["coffee_name", "coffee_rating", "roast_level", "price_USD_kg"]]
roast_lv_df.head()

Unnamed: 0,coffee_name,coffee_rating,roast_level,price_USD_kg
0,Colombia Pink Bourbon,95,Light,120.0
1,Kenya Kiambu Mandela Estate AA Washed Process,94,Medium-Light,50.793651
2,Costa Rica Volcán Azul Geisha Yeast-Washed,94,Light,115.2
3,Ethiopia Yirgacheffe Adame G1 Natural,94,Light,80.026455
4,Kenya Gichathaini,94,Light,73.488345


In [66]:
roast_level_list = ["Light", "Medium-Light", "Medium", "Medium-Dark", "Dark"]

In [67]:
def adding_row_not_in_df(df,column):

    for item in roast_level_list:
        if item not in df["roast_level"].values:
            dum = {'roast_level':item, column :0 }
            df = df.append(dum, ignore_index=True)
    return df

In [68]:
# groupby roast_level for all
grp_roast_lv = roast_lv_df.groupby("roast_level")
# get the count of each roast_level -> reset index -> rename the column with a proper name
roast_lv_grp_all = grp_roast_lv["coffee_name"].count().reset_index().rename(columns= {"coffee_name": "all"})
roast_lv_grp_all = roast_lv_grp_all.sort_values(by= "roast_level")
roast_lv_grp_all






Unnamed: 0,roast_level,all
0,Dark,3
1,Light,292
2,Medium,218
3,Medium-Dark,40
4,Medium-Light,1000


In [69]:
# create a dataframe that coffee_rating is 97 or more
roast_lv_high_rating_df = roast_lv_df.loc[(roast_lv_df["coffee_rating"] >= 97), :]
grp_roast_lv_high_rating= roast_lv_high_rating_df.groupby("roast_level")
# get the count of each roast_level-> reset index -> rename the column with a proper name
grp_roast_lv_high_rating = grp_roast_lv_high_rating["coffee_name"].count().reset_index().rename(columns= {"coffee_name" : "high_rated"})

# adding zero to those that doesn't have any counts in the roast level list
for item in roast_level_list:
    if item not in grp_roast_lv_high_rating["roast_level"].values:
        dum = {'roast_level':item, 'high_rated':0 }
        grp_roast_lv_high_rating = grp_roast_lv_high_rating.append(dum, ignore_index=True)

roast_lv_grp_high_rating = grp_roast_lv_high_rating.sort_values(by= "roast_level")
roast_lv_grp_high_rating


Unnamed: 0,roast_level,high_rated
4,Dark,0
0,Light,4
1,Medium,8
3,Medium-Dark,0
2,Medium-Light,22


In [70]:
# crate a datafram that coffee price less than 35
roast_lv_low_price_df = roast_lv_df.loc[(roast_lv_df["price_USD_kg"] <= 35), :]
grp_roast_lv_low_price = roast_lv_low_price_df.groupby("roast_level")
# get the count of each roast_level -> reset index -> rename the column with a proper name
grp_roast_lv_low_price = grp_roast_lv_low_price["coffee_name"].count().reset_index().rename(columns= {"coffee_name" : "low_price"})

# adding zero to those that doesn't have any counts in the roast level list
for item in roast_level_list:
    if item not in grp_roast_lv_low_price["roast_level"].values:
        dum = {"roast_level" : item, "low_price":0 }
        grp_roast_lv_low_price = grp_roast_lv_low_price .append(dum, ignore_index=True)

roast_lv_grp_low_price = grp_roast_lv_low_price.sort_values(by= "roast_level")
roast_lv_grp_low_price

Unnamed: 0,roast_level,low_price
4,Dark,0
0,Light,2
1,Medium,12
2,Medium-Dark,1
3,Medium-Light,13


In [71]:
# merging the another column for high rating 
roast_lv_grp_merge = pd.merge(roast_lv_grp_all, roast_lv_grp_high_rating, on= "roast_level", how= "outer")
roast_lv_grp_merge = pd.merge(roast_lv_grp_merge, roast_lv_grp_low_price, on= "roast_level", how= "outer")

# convert into json format
json_roast_lv_grp = roast_lv_grp_merge.to_json(orient= "columns")

roast_lv_grp_merge.head()

Unnamed: 0,roast_level,all,high_rated,low_price
0,Dark,3,0,0
1,Light,292,4,2
2,Medium,218,8,12
3,Medium-Dark,40,0,1
4,Medium-Light,1000,22,13


In [72]:
# convert into json format
json_roast_lv_all = {}
arange_json(roast_lv_grp_all, json_roast_lv_all)

json_roast_lv_high_rated = {}
arange_json(roast_lv_grp_high_rating, json_roast_lv_high_rated)

json_roast_lv_low_price ={}
arange_json(roast_lv_grp_low_price, json_roast_lv_low_price)

### Coffee Bean Origin country 

In [73]:
origin_df = top_coffee_df.loc[:,["coffee_name", "coffee_rating", "coffee_origin_country", "price_USD_kg"]]
origin_df.head()


Unnamed: 0,coffee_name,coffee_rating,coffee_origin_country,price_USD_kg
0,Colombia Pink Bourbon,95,[Colombia],120.0
1,Kenya Kiambu Mandela Estate AA Washed Process,94,[Kenya],50.793651
2,Costa Rica Volcán Azul Geisha Yeast-Washed,94,[Costa Rica],115.2
3,Ethiopia Yirgacheffe Adame G1 Natural,94,[Ethiopia],80.026455
4,Kenya Gichathaini,94,[Kenya],73.488345


In [74]:
# and then explode allowing double or triple the row to store all the list items in species
origin_df = origin_df.explode("coffee_origin_country")
origin_df.head()

Unnamed: 0,coffee_name,coffee_rating,coffee_origin_country,price_USD_kg
0,Colombia Pink Bourbon,95,Colombia,120.0
1,Kenya Kiambu Mandela Estate AA Washed Process,94,Kenya,50.793651
2,Costa Rica Volcán Azul Geisha Yeast-Washed,94,Costa Rica,115.2
3,Ethiopia Yirgacheffe Adame G1 Natural,94,Ethiopia,80.026455
4,Kenya Gichathaini,94,Kenya,73.488345


In [75]:
# groupby origin country for all
grp_origin = origin_df.groupby("coffee_origin_country")
# get the count of each origin country -> reset index -> rename the column with a proper name
origin_grp_all = grp_origin["coffee_name"].count().reset_index().rename(columns= {"coffee_name" : "all"})
origin_sort_all = origin_grp_all.sort_values(by= "all", ascending= True).iloc[:11,:]

# create a dataframe that coffee_rating is 97 or more
origin_high_rating_df = origin_df.loc[(origin_df["coffee_rating"] >= 97), :]
grp_origin_high_rating = origin_high_rating_df.groupby("coffee_origin_country")
# get the count of each origin country -> reset index -> rename the column with a proper name
origin_grp_high_rating = grp_origin_high_rating["coffee_name"].count().reset_index().rename(columns= {"coffee_name" : "high_rated"})
origin_sort_high_rating = origin_grp_high_rating.sort_values(by= "high_rated", ascending= True).iloc[:10,:]

# crate a datafram that coffee price less than 35
origin_low_price_df = origin_df.loc[(origin_df["price_USD_kg"] <= 35), :]
grp_origin_low_price = origin_low_price_df.groupby("coffee_origin_country")
# get the count of each origin country -> reset index -> rename the column with a proper name
origin_grp_low_price = grp_origin_low_price["coffee_name"].count().reset_index().rename(columns= {"coffee_name" : "low_price"})
# sorting data (descending) -> get top10
origin_sort_low_price = origin_grp_low_price.sort_values(by= "low_price", ascending= True).iloc[:11,:]

# merging the another column for high rating 
origin_grp_merge = pd.merge(origin_grp_all, origin_grp_high_rating, on= "coffee_origin_country", how= "outer")
origin_grp_merge = pd.merge(origin_grp_merge, origin_grp_low_price, on= "coffee_origin_country", how= "outer")

# convert into json format
json_origin_grp = origin_grp_merge.to_json(orient= "columns")

origin_grp_merge.head()

Unnamed: 0,coffee_origin_country,all,high_rated,low_price
0,Bolivia,3,,
1,Brazil,18,,3.0
2,Burundi,10,,
3,China,1,,
4,Colombia,175,1.0,3.0


In [76]:
# convert into json format
json_origin_all = {}
arange_json(origin_sort_all, json_origin_all)

json_origin_high_rated = {}
arange_json(origin_sort_high_rating, json_origin_high_rated)

json_origin_low_price = {}
arange_json(origin_sort_low_price, json_origin_low_price)


### Roaster Located (Country)

In [77]:
roaster_df = top_coffee_df.loc[:,["coffee_name", "coffee_rating", "roaster_country", "price_USD_kg"]]
roaster_df.head()

Unnamed: 0,coffee_name,coffee_rating,roaster_country,price_USD_kg
0,Colombia Pink Bourbon,95,United States of America,120.0
1,Kenya Kiambu Mandela Estate AA Washed Process,94,Taiwan,50.793651
2,Costa Rica Volcán Azul Geisha Yeast-Washed,94,Taiwan,115.2
3,Ethiopia Yirgacheffe Adame G1 Natural,94,Taiwan,80.026455
4,Kenya Gichathaini,94,United States of America,73.488345


In [78]:
# groupby roaster_country for all
grp_roaster = roaster_df.groupby("roaster_country")
# get the count of each roaster_country -> reset index -> rename the column with a proper name
roaster_grp_all = grp_roaster["coffee_name"].count().reset_index().rename(columns= {"coffee_name" : "all"})
roaster_sort_all = roaster_grp_all.sort_values(by= "all", ascending= True).iloc[:10,:]

# create a dataframe that coffee_rating is 97 or more
roaster_high_rating_df = roaster_df.loc[(roaster_df["coffee_rating"] >= 97), :]
grp_roaster_high_rating = roaster_high_rating_df.groupby("roaster_country")
# get the count of each roaster_country -> reset index -> rename the column with a proper name
roaster_grp_high_rating = grp_roaster_high_rating["coffee_name"].count().reset_index().rename(columns= {"coffee_name" : "high_rated"})
roaster_sort_high_rating = roaster_grp_high_rating.sort_values(by= "high_rated", ascending= True).iloc[:10,:]

# crate a datafram that coffee price less than 35
roaster_low_price_df = roaster_df.loc[(roaster_df["price_USD_kg"] <= 35), :]
grp_roaster_low_price = roaster_low_price_df.groupby("roaster_country")
# get the count of each roaster_country-> reset index -> rename the column with a proper name
roaster_grp_low_price = grp_roaster_low_price["coffee_name"].count().reset_index().rename(columns= {"coffee_name" : "low_price"})
roaster_sort_low_price = roaster_grp_low_price.sort_values(by= "low_price", ascending= True).iloc[:10,:]

# merging the another column for high rating 
roaster_grp_merge = pd.merge(roaster_grp_all, roaster_grp_high_rating, on= "roaster_country", how= "outer")
roaster_grp_merge = pd.merge(roaster_grp_merge, roaster_grp_low_price, on= "roaster_country", how= "outer")

roaster_grp_merge.head()

Unnamed: 0,roaster_country,all,high_rated,low_price
0,Antigua and Barbuda,4,,
1,Australia,4,,
2,Canada,22,,4.0
3,China,65,2.0,1.0
4,Colombia,1,,


In [79]:
# convert into json format
json_roaster_all = {}
arange_json(roaster_sort_all, json_roaster_all)

json_roaster_high_rated = {}
arange_json(roaster_sort_high_rating, json_roaster_high_rated)

json_roaster_low_price ={}
arange_json(roaster_sort_low_price, json_roaster_low_price)

## Creating Json file to store all the table
----
### output files checklist:
||rating|price|species|raost lv|origin|roaster|
|---|---|---|---|---|---|---|
|all|json_rating_all|json_price_all|json_species_all|json_roast_lv_all|json_origin_all|json_roaster_all|
|high rated|json_rating_high|json_price_high_rated|json_species_high_rated|json_roast_lv_high_rated|json_origin_high_rated|json_roaster_high_rated|
low price|json_rating_low_price|json_low_price|json_species_high_rated|json_roast_lv_low_price|json_origin_low_price|json_roaster_high_rated

In [80]:
# creating all, high_rated and low price dictionary to store each dashboard data
all = [{
    "total_number" : str(total_counts),
    "price" : json_price_all,
    "rating": json_rating_all,
    "species" : json_species_all,
    "roast_level" :json_roast_lv_all,
    "origin_country" : json_origin_all,
    "roaster" : json_roaster_all 
}]
high_rated = [{
    "total_number" : str(total_counts_high_rating),
    "price" : json_price_high_rated,
    "rating": json_rating_high,
    "species" : json_species_high_rated,
    "roast_level" :json_roast_lv_high_rated,
    "origin_country" : json_origin_high_rated,
    "roaster" : json_roaster_high_rated
}]
low_price = [{
    "total_number" : str(total_counts_low_price),
    "price" : json_low_price,
    "rating": json_rating_low_price,
    "species" : json_species_low_price,
    "roast_level" :json_roast_lv_low_price,
    "origin_country" : json_origin_low_price,
    "roaster" : json_roaster_low_price 
}]

In [81]:
# output json file
with open("./output_json/dashboard_graph_all.json", "w") as output_file:
    json.dump(all, output_file)

with open("./output_json/dashboard_graph_high_rated.json", "w") as output_file:
    json.dump(high_rated, output_file)

with open("./output_json/dashboard_graph_low_price.json", "w") as output_file:
    json.dump(low_price, output_file)
