In [197]:
# import dependencies

import pandas as pd
import numpy as np
import os

In [198]:
# set file path and read file
filepath = "./Resources/purchase_data.json"
heroes_df = pd.read_json(filepath)
heroes_df.head(10)

Unnamed: 0,Age,Gender,Item ID,Item Name,Price,SN
0,38,Male,165,Bone Crushing Silver Skewer,3.37,Aelalis34
1,21,Male,119,"Stormbringer, Dark Blade of Ending Misery",2.32,Eolo46
2,34,Male,174,Primitive Blade,2.46,Assastnya25
3,21,Male,92,Final Critic,1.36,Pheusrical25
4,23,Male,63,Stormfury Mace,1.27,Aela59
5,20,Male,10,Sleepwalker,1.73,Tanimnya91
6,20,Male,153,Mercenary Sabre,4.57,Undjaskla97
7,29,Female,169,"Interrogator, Blood Blade of the Queen",3.32,Iathenudil29
8,25,Male,118,"Ghost Reaver, Longsword of Magic",2.77,Sondenasta63
9,31,Male,99,"Expiration, Warscythe Of Lost Worlds",4.53,Hilaerin92


In [199]:
#Calculate Total Players

total_players = heroes_df["SN"].nunique()
total_players

# How to display the results in dataframe form

# Count of all players including duplicates:
# player_count = heroes_df["SN"].count()
# player_count

573

In [200]:
# Calculate number of unique items
unique_items = int(heroes_df["Item ID"].nunique())
unique_items

183

In [201]:
# Calculate average purchase price
average_purchase = round(heroes_df["Price"].mean(),2)
average_purchase

2.93

In [202]:
# Calculate Total number of purchases
purchase_count = heroes_df.shape[0]
purchase_count

780

In [203]:
# Calculate Total Revenue
total_revenue = round(heroes_df["Price"].sum(),2)
total_revenue

2286.33

In [204]:
# Create Purchase Summary Table
purchase_summary_df = pd.DataFrame({"Number of Unique Items":[unique_items],"Average Price":[average_purchase],
                                   "Number of Purchases":[purchase_count],"Total Revenue":[total_revenue]})
purchase_summary_df.head()

Unnamed: 0,Average Price,Number of Purchases,Number of Unique Items,Total Revenue
0,2.93,780,183,2286.33


In [205]:
# Count male and female players
gender_df = pd.DataFrame(heroes_df.groupby('Gender')['SN'].nunique())
gender_df.head()

Unnamed: 0_level_0,SN
Gender,Unnamed: 1_level_1
Female,100
Male,465
Other / Non-Disclosed,8


In [206]:
# Retrieve gender counts and calculate percentages
male_count = heroes_df[heroes_df["Gender"]=='Male']["SN"].nunique()
female_count = heroes_df[heroes_df["Gender"]=='Female']["SN"].nunique()
other_count = total_players - male_count - female_count
male_percent = male_count * 100 / total_players
female_percent = female_count * 100 / total_players
other_percent = other_count * 100 / total_players

print(male_percent, female_percent, other_percent)


81.15183246073299 17.452006980802793 1.3961605584642234


In [207]:
# Create gender summary table

gender_summary_df = pd.DataFrame({"Gender":["Male","Female","Other/Non-Disclosed"],"Percentage of Players":[male_percent,female_percent,other_percent],
                                 "Total Count":[male_count,female_count,other_count]})
gender_summary_df = gender_summary_df.set_index("Gender")
gender_summary_df.head()

Unnamed: 0_level_0,Percentage of Players,Total Count
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1
Male,81.151832,465
Female,17.452007,100
Other/Non-Disclosed,1.396161,8


In [208]:
# Purchasing Analysis(Gender)

gender_purchase_count = heroes_df["Gender"].value_counts()
gender_avg_price = heroes_df.groupby("Gender")["Price"].mean()
gender_total_value = heroes_df.groupby("Gender")["Price"].sum()
gender_columns = ["Male","Female","Other/Non-Disclosed"]

# Had trouble calculating normalization
# gender_normals = heroes_df.groupby("Gender")["Price"].value_counts(normalize=True)



gender_purch_summary_df = pd.DataFrame({"Purchase Count":gender_purchase_count,"Average Purchase Price":gender_avg_price,"Total Purchase Value":gender_total_value})
gender_purch_summary_df.head()


Unnamed: 0,Average Purchase Price,Purchase Count,Total Purchase Value
Female,2.815515,136,382.91
Male,2.950521,633,1867.68
Other / Non-Disclosed,3.249091,11,35.74


In [245]:
# Age Demographics
bins = [0,10,14,19,24,29,35,39,100]
group_labels = ["<10","10-14","15-19","20-24","25-29","30-34","35-39","40+"]
heroes_df["Age Group"] = pd.cut(heroes_df["Age"],bins,labels=group_labels)
heroes_df.head()



Unnamed: 0,Age,Gender,Item ID,Item Name,Price,SN,Age Group
0,38,Male,165,Bone Crushing Silver Skewer,3.37,Aelalis34,35-39
1,21,Male,119,"Stormbringer, Dark Blade of Ending Misery",2.32,Eolo46,20-24
2,34,Male,174,Primitive Blade,2.46,Assastnya25,30-34
3,21,Male,92,Final Critic,1.36,Pheusrical25,20-24
4,23,Male,63,Stormfury Mace,1.27,Aela59,20-24


In [252]:
player_group_df = heroes_df.groupby("SN")
player_group_df.head()

Unnamed: 0,Age,Gender,Item ID,Item Name,Price,SN,Age Group
0,38,Male,165,Bone Crushing Silver Skewer,3.37,Aelalis34,35-39
1,21,Male,119,"Stormbringer, Dark Blade of Ending Misery",2.32,Eolo46,20-24
2,34,Male,174,Primitive Blade,2.46,Assastnya25,30-34
3,21,Male,92,Final Critic,1.36,Pheusrical25,20-24
4,23,Male,63,Stormfury Mace,1.27,Aela59,20-24
5,20,Male,10,Sleepwalker,1.73,Tanimnya91,20-24
6,20,Male,153,Mercenary Sabre,4.57,Undjaskla97,20-24
7,29,Female,169,"Interrogator, Blood Blade of the Queen",3.32,Iathenudil29,25-29
8,25,Male,118,"Ghost Reaver, Longsword of Magic",2.77,Sondenasta63,25-29
9,31,Male,99,"Expiration, Warscythe Of Lost Worlds",4.53,Hilaerin92,30-34


In [258]:
age_group_total = player_group_df.groupby("Age Group")
age_group_total.head(10)

AttributeError: Cannot access callable attribute 'groupby' of 'DataFrameGroupBy' objects, try using the 'apply' method

In [None]:
# Got stuck on the Age demographics step and ran out of time...