In [81]:
# Import dependencies
import pandas as pd
import os

In [82]:
# Save file path to variable
json_path = os.path.join('purchase_data.json')


In [429]:
# Read with Pandas
heroesofpymoli_df = pd.read_json(json_path)

In [84]:
heroesofpymoli_df.columns

Index(['Age', 'Gender', 'Item ID', 'Item Name', 'Price', 'SN'], dtype='object')

In [85]:
# Create the GroupBy object based on the "SN" column
heroesofpymoli_df["SN"].nunique()


573

In [430]:
unique_items = heroesofpymoli_df["Item Name"].nunique()
average_price = heroesofpymoli_df["Price"].mean()
total_number_purchase = heroesofpymoli_df["Price"].count()
total_revenue = heroesofpymoli_df["Price"].sum()
#print(unique_items, average_price,total_number_purchase, total_revenue)


In [87]:
total_purchasing_total_df = pd.DataFrame({"Unique Items" : [unique_items],
                                          "Average price" : [average_price],
                                          "Total Number of Purchases" : [total_number_purchase],
                                          "Total Revenue" : [total_revenue]
                                         })
total_purchasing_total2_df = total_purchasing_total_df[["Unique Items","Average price", 
                                            "Total Number of Purchases","Total Revenue"]]

total_purchasing_total2_df

Unnamed: 0,Unique Items,Average price,Total Number of Purchases,Total Revenue
0,179,2.931192,780,2286.33


In [431]:
total_gender = heroesofpymoli_df["Gender"].count()
male = heroesofpymoli_df["Gender"].value_counts()['Male']
female = heroesofpymoli_df["Gender"].value_counts()['Female']
non_gender_specific = total_gender - male - female
#print(total_gender, male, female, non_gender_specific)

In [432]:
# Calculate percentage of respondents belonging to each gender
male_percent = (male/total_gender) * 100
female_percent = (female/total_gender) * 100
non_gender_specific_percent = (non_gender_specific/total_gender) * 100
#print(f" % Male: {male_percent}\n % Female: {female_percent}\n % non_specifc: {non_gender_specific}")

In [90]:
gender_demo_df = pd.DataFrame({"Total Count": [male, female, non_gender_specific], 
                               "Percentage of Players": [male_percent, female_percent, non_gender_specific_percent] 
                               }, index=["Male", "Female", "Other/Non-Disclosed"])
gender_demo_df

Unnamed: 0,Percentage of Players,Total Count
Male,81.153846,633
Female,17.435897,136
Other/Non-Disclosed,1.410256,11


In [433]:
# Grouping the DataFrame by "Gender"
gender_group = heroesofpymoli_df.groupby("Gender")
#gender_group.head(5)

In [532]:
# Purchase count by gender
purchase_count_male = gender_group["Gender"].count()['Male']
purchase_count_female = gender_group["Gender"].count()['Female']
purchase_count_other = gender_group["Gender"].count()['Other / Non-Disclosed']

# Total purchase value by gender
total_purchase_value_male = gender_group["Price"].sum()['Male']
total_purchase_value_female = gender_group["Price"].sum()['Female']
total_purchase_value_other = gender_group["Price"].sum()['Other / Non-Disclosed']

# Avergae purchase price by gender
average_purchase_price_male = gender_group["Price"].mean()['Male']
average_purchase_price_female = gender_group["Price"].mean()['Female']
average_purchase_price_other = gender_group["Price"].mean()['Other / Non-Disclosed']

# Normalized totals by gender
normalized_total_gender = (gender_group["Price"].sum()/gender_group["Gender"].count())
normalized_total_gender.head()

Gender
Female                   2.815515
Male                     2.950521
Other / Non-Disclosed    3.249091
dtype: float64

In [535]:
gender_purchase_analysis_df = pd.DataFrame({"Purchase Count": [purchase_count_male, purchase_count_female,
                                                                 purchase_count_other], 
                               "Average Purchase Price": [average_purchase_price_male, 
                                                          average_purchase_price_female, average_purchase_price_other],
                                "Total Purchase Value": [total_purchase_value_male, total_purchase_value_female,
                                                        total_purchase_value_other],
                                
                               }, index=["Male", "Female", "Other/Non-Disclosed"])   
gender_purchase_analysis_df

Unnamed: 0,Average Purchase Price,Purchase Count,Total Purchase Value
Male,2.950521,633,1867.68
Female,2.815515,136,382.91
Other/Non-Disclosed,3.249091,11,35.74


In [505]:
# create the bins to store data
bins = [0, 10, 15, 20, 25, 30, 35, 40, 120]
bin_names = ['<10', '10-14', '15-19', '20-24', '25-29', '30-34', '35-39', '40+']
bin_df = heroesofpymoli_df.copy()
age_demo_count = pd.cut(heroesofpymoli_df["Age"], bins, labels=bin_names).value_counts()
age_demo_percent = (age_demo/total_gender)*100
percent_players_df = pd.DataFrame({"Percentage of Players": age_demo_percent, "Total Count":age_demo_count})
percent_players_df

Unnamed: 0,Percentage of Players,Total Count
20-24,39.102564,305
15-19,23.589744,184
10-14,10.0,78
25-29,9.74359,76
30-34,7.435897,58
35-39,5.641026,44
<10,4.102564,32
40+,0.384615,3


In [504]:
bin_df["Age Groups"] = pd.cut(heroesofpymoli_df["Age"], bins, labels =bin_names)
bin_cut = pd.cut(bin_df["Age"], bins, labels = bin_names)
age_grouped_bin_df = bin_df.groupby(["Age Groups"])
purchase_count_age = age_grouped_bin_df["Age"].count()
avg_price_age = age_grouped_bin_df["Price"].mean()
total_purchase_age = age_grouped_bin_df["Price"].sum()
dupdrop = heroesofpymoli_df.drop_duplicates(subset = 'SN', keep = "first")
dupdrop["Age Groups"] = pd.cut(dupdrop["Age"], bins, labels = bin_names)
dupdrop = dupdrop.groupby(["Age Groups"])
normalized_total_bin_df = (age_grouped_bin_df["Price"].sum()/dupdrop["SN"].count())


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy


In [503]:
age_demo_bin_df = pd.DataFrame({ "Purchase Count": purchase_count_age,
                           "Average Purchase Price": avg_price_age,
                           "Total Purchase Value": total_purchase_age,
                           "Normalized Total": normalized_total_bin_df})

age_demo_bin_df

Unnamed: 0_level_0,Average Purchase Price,Normalized Total,Purchase Count,Total Purchase Value
Age Groups,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
<10,3.019375,4.391818,32,96.62
10-14,2.873718,4.150926,78,224.15
15-19,2.873587,3.803885,184,528.74
20-24,2.959377,3.857308,305,902.61
25-29,2.892368,4.227308,76,219.82
30-34,3.073448,4.051364,58,178.26
35-39,2.8975,5.0996,44,127.49
40+,2.88,2.88,3,8.64


In [436]:

top_5_spenders_df= heroesofpymoli_df.groupby('SN').sum().reset_index().sort_values(['Price'], 
                    ascending=False).groupby('SN').head(5).reset_index(drop=True).drop(['Age', 'Item ID'], axis=1)

top_5_spenders_df1= heroesofpymoli_df.groupby('SN').count().reset_index().sort_values(['Price'], 
                    ascending=False).groupby('SN').head(5).reset_index(drop=True).drop(['Age','Item ID','Gender','Price'], 
                                                                                       axis=1)
merge_table = pd.merge(top_5_spenders_df, top_5_spenders_df1,on="SN")
merge_table1=merge_table.rename(columns = {'Item Name':'Purchase Count'})
top_5_mean_df = pd.DataFrame({"Average Price" :top_5_spenders_df['Price']/top_5_spenders_df1['Item Name']})


In [437]:
merge_table = pd.merge(top_5_spenders_df, top_5_spenders_df1,on="SN")
merge_table1_df=merge_table.rename(columns = {'Item Name':'Purchase Count'})


In [439]:
top_spenders = pd.merge(merge_table1_df, top_5_mean_df, left_index=True, right_index=True)
merge_table1_df=merge_table.rename(columns = {'Item Name':'Purchase Count', 'Price':'Total Purchase Value'})
top_spenders.head(5)


Unnamed: 0,SN,Total Purchase Value,Purchase Count,Average Price
0,Undirrala66,17.06,5,3.412
1,Saedue76,13.56,4,3.39
2,Mindimnya67,12.74,4,3.185
3,Haellysu29,12.73,3,3.1825
4,Eoda93,11.58,3,2.895


In [424]:
#Most Popular Items
top_5_items_df= heroesofpymoli_df.groupby(['Item Name']).count().reset_index().sort_values(['Price'], 
                    ascending=False).groupby('Item Name').head(5).reset_index(drop=True).drop(['Age','Item ID','Gender','Price'], axis=1)
top_5_items_rename_df = top_5_items_df.rename(columns={'SN':'Purchase Count'})



In [425]:
 
x_df = heroesofpymoli_df.loc[heroesofpymoli_df['Item Name'].isin(['Final Critic','Arcane Gem','Betrayal, Whisper of Grieving Widows','Stormcaller', 'Woeful Adamantite Claymore'])]
xy_df = x_df.drop_duplicates(subset = ['Item Name', 'Item ID'], keep='first')


In [426]:
merge_table_popular_item = pd.merge(top_5_items_rename_df, xy_df,on="Item Name")
Final_popular_item_df = merge_table_popular_item.drop(columns=['Age', 'Gender', 'SN'])


In [427]:
top_5_itemsx_df= heroesofpymoli_df.groupby(['Item Name']).sum().reset_index().sort_values(['Price'], 
                    ascending=False).groupby('Item Name').head(5).reset_index(drop=True).drop(['Age','Item ID'], axis=1)
y_df = top_5_itemsx_df.loc[top_5_itemsx_df['Item Name'].isin(['Final Critic','Arcane Gem','Betrayal, Whisper of Grieving Widows','Stormcaller', 'Woeful Adamantite Claymore'])]
y1_df = y_df.rename(columns={'Price': 'Total Purchase Value'}).reset_index(drop=True)


In [423]:
merge_table_popular_item_final = pd.merge(Final_popular_item_df, y1_df,on="Item Name")
merge_table_popular_item_final

Unnamed: 0,Item Name,Purchase Count,Item ID,Price,Total Purchase Value
0,Final Critic,14,92,1.36,38.6
1,Final Critic,14,101,4.62,38.6
2,Arcane Gem,11,84,2.23,24.53
3,"Betrayal, Whisper of Grieving Widows",11,39,2.35,25.85
4,Stormcaller,10,30,4.15,34.65
5,Stormcaller,10,180,2.78,34.65
6,Woeful Adamantite Claymore,9,175,1.24,11.16


In [459]:
profitable_items_df= heroesofpymoli_df.groupby(['Item Name']).sum().reset_index().sort_values(['Price'], 
                    ascending=False).groupby('Item Name').head(5).reset_index(drop=True).drop(['Age','Item ID'], axis=1)
profitable_items_rename_df = profitable_items_df.rename(columns={'Price': 'Total Purchase Value'}).reset_index(drop=True)
#profitable_items_count_df.head()

Unnamed: 0,Item Name,Gender,Price,SN
0,Final Critic,14,14,14
1,Arcane Gem,11,11,11
2,"Betrayal, Whisper of Grieving Widows",11,11,11
3,Stormcaller,10,10,10
4,Woeful Adamantite Claymore,9,9,9


In [460]:
profitable_items1_df = heroesofpymoli_df.loc[heroesofpymoli_df['Item Name'].isin
                                             (['Final Critic','Retribution Axe','Stormcaller','Spectral Diamond Doomblade','Orenmir'])]
dup_drop_df = profitable_items1_df.drop_duplicates(subset = ['Item Name', 'Item ID'], keep='first')
#dup_drop_df

Unnamed: 0,Age,Gender,Item ID,Item Name,Price,SN
3,21,Male,92,Final Critic,1.36,Pheusrical25
50,32,Female,32,Orenmir,4.95,Saistyphos30
54,25,Female,101,Final Critic,4.62,Minduli80
57,24,Male,34,Retribution Axe,4.14,Alallo58
101,25,Male,30,Stormcaller,4.15,Assistasda90
107,29,Male,115,Spectral Diamond Doomblade,4.25,Undirrala66
119,19,Male,180,Stormcaller,2.78,Yasur35


In [468]:
v_df = top_5_items_df.loc[top_5_items_df['Item Name'].isin
                                             (['Final Critic','Retribution Axe','Stormcaller','Spectral Diamond Doomblade','Orenmir'])]
v1_df = v_df.rename(columns={'SN': 'Purchase Count'})


In [467]:
merge_table_profitable_item_final = pd.merge(dup_drop_df, profitable_items_rename_df, on="Item Name")
Final_profitable_item = merge_table_profitable_item_final.drop(columns={'Age', 'Gender', 'SN'})
g_df = pd.merge(Final_profitable_item ,v1_df, on="Item Name")
g_df

Unnamed: 0,Item ID,Item Name,Price,Total Purchase Value,Purchase Count
0,92,Final Critic,1.36,38.6,14
1,101,Final Critic,4.62,38.6,14
2,32,Orenmir,4.95,29.7,6
3,34,Retribution Axe,4.14,37.26,9
4,30,Stormcaller,4.15,34.65,10
5,180,Stormcaller,2.78,34.65,10
6,115,Spectral Diamond Doomblade,4.25,29.75,7
