In [30]:
import pandas as pd
import numpy as np

heroes_file = "Resources/purchase_data.csv"

purchase_data = pd.read_csv(heroes_file)

In [32]:
total_players = len(purchase_data["SN"].value_counts())

players = pd.DataFrame({"Total Players":[total_players]})
players

Unnamed: 0,Total Players
0,576


In [34]:
unique_items = len((purchase_data["Item ID"]).unique())
avg_price = (purchase_data["Price"]).mean()
purchases = (purchase_data["Purchase ID"]).count()
revenue = (purchase_data["Price"]).sum()

purchasing_analysis_df = pd.DataFrame({"Number of Unique Items":[unique_items],
                           "Average Price":[avg_price], 
                           "Number of Purchases": [purchases], 
                           "Total Revenue": [revenue]})

purchasing_analysis_df.style.format({'Average Price':"${:,.2f}",
                                     'Total Revenue': '${:,.2f}'})

Unnamed: 0,Number of Unique Items,Average Price,Number of Purchases,Total Revenue
0,183,$3.05,780,"$2,379.77"


In [40]:
gender_stats = purchase_data.groupby("Gender")

total_count = gender_stats.nunique()["SN"]

percent_players = total_count / total_players * 100

gender_demo = pd.DataFrame({"Total Count": total_count, "Percentage of Players": percent_players})

gender_demo.sort_values(["Total Count"], ascending = False).style.format({"Percentage of Players":"{:.2f}"})


Unnamed: 0_level_0,Total Count,Percentage of Players
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1
Male,484,84.03
Female,81,14.06
Other / Non-Disclosed,11,1.91


In [29]:
purchase_count = gender_stats["Purchase ID"].count()

avg_price = gender_stats["Price"].mean()

avg_total = gender_stats["Price"].sum()

avg_per_person = avg_total/total_count_gender

gender_demo = pd.DataFrame({"Purchase Count": purchase_count, 
                                    "Average Purchase Price": avg_price,
                                    "Total Purchase Value":avg_total,
                                    "Avg Total Purchase per Person": avg_per_person})

gender_demo.index.name = "Gender"

gender_demo.style.format({"Total Purchase Value":"${:,.2f}",
                          "Average Purchase Price":"${:,.2f}",
                          "Avg Total Purchase per Person":"${:,.2f}"})


Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value,Avg Total Purchase per Person
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Female,113,$3.20,$361.94,$4.47
Male,652,$3.02,"$1,967.64",$4.07
Other / Non-Disclosed,15,$3.35,$50.19,$4.56


In [48]:
bins = [0, 9.90, 14.90, 19.90, 24.90, 29.90, 34.90, 39.90, 99999]
groups = ["<10", "10-14", "15-19", "20-24", "25-29", "30-34", "35-39", "40+"]

purchase_data["Age Group"] = pd.cut(purchase_data["Age"],bins, labels=groups)
purchase_data

age_group = purchase_data.groupby("Age Group")

total_count = age_group["SN"].nunique()

percentage_age = (total_count/total_players) * 100

age_demo = pd.DataFrame({"Total Count": total_count, "Percentage of Players": percentage_age})

age_demo.style.format({"Percentage of Players":"{:,.2f}"})

Unnamed: 0_level_0,Total Count,Percentage of Players
Age Group,Unnamed: 1_level_1,Unnamed: 2_level_1
<10,17,2.95
10-14,22,3.82
15-19,107,18.58
20-24,258,44.79
25-29,77,13.37
30-34,52,9.03
35-39,31,5.38
40+,12,2.08


In [49]:
purchase_count_age = age_grouped["Purchase ID"].count()

avg_price_age = age_grouped["Price"].mean()

total_purchase_value = age_grouped["Price"].sum()

avg_purchase_per_person_age = total_purchase_value/total_count_age

age_demo = pd.DataFrame({"Purchase Count": purchase_count_age,
                                 "Average Purchase Price": avg_price_age,
                                 "Total Purchase Value":total_purchase_value,
                                 "Average Purchase Total per Person": avg_purchase_per_person_age})

age_demo.style.format({"Average Purchase Price":"${:,.2f}",
                               "Total Purchase Value":"${:,.2f}",
                               "Average Purchase Total per Person":"${:,.2f}"})


Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value,Average Purchase Total per Person
Age Group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
<10,23,$3.35,$77.13,$4.54
10-14,28,$2.96,$82.78,$3.76
15-19,136,$3.04,$412.89,$3.86
20-24,365,$3.05,"$1,114.06",$4.32
25-29,101,$2.90,$293.00,$3.81
30-34,73,$2.93,$214.00,$4.12
35-39,41,$3.60,$147.67,$4.76
40+,13,$2.94,$38.24,$3.19


In [51]:
spenders = purchase_data.groupby("SN")

purchase_count = spenders["Purchase ID"].count()

avg_purchase_price = spenders["Price"].mean()

purchase_total = spenders["Price"].sum()

top_spenders = pd.DataFrame({"Purchase Count": purchase_count,
                             "Average Purchase Price": avg_purchase_price,
                             "Total Purchase Value":purchase_total})

formatted_spenders = top_spenders.sort_values(["Total Purchase Value"], ascending=False).head()

formatted_spenders.style.format({"Average Purchase Total":"${:,.2f}",
                                 "Average Purchase Price":"${:,.2f}", 
                                 "Total Purchase Value":"${:,.2f}"})

Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value
SN,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Lisosia93,5,$3.79,$18.96
Idastidru52,4,$3.86,$15.45
Chamjask73,3,$4.61,$13.83
Iral74,4,$3.40,$13.62
Iskadarya95,3,$4.37,$13.10


In [53]:
items = purchase_data[["Item ID", "Item Name", "Price"]]

item_types = items.groupby(["Item ID","Item Name"])

purchase_count= item_types["Price"].count()

purchase_value = (item_types["Price"].sum()) 

item_price = purchase_value/purchase_count

most_popular = pd.DataFrame({"Purchase Count": purchase_count, 
                                   "Item Price": item_price,
                                   "Total Purchase Value":purchase_value})

popular = most_popular.sort_values(["Purchase Count"], ascending=False).head()

popular.style.format({"Item Price":"${:,.2f}",
                                "Total Purchase Value":"${:,.2f}"})

Unnamed: 0_level_0,Unnamed: 1_level_0,Purchase Count,Item Price,Total Purchase Value
Item ID,Item Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
178,"Oathbreaker, Last Hope of the Breaking Storm",12,$4.23,$50.76
145,Fiery Glass Crusader,9,$4.58,$41.22
108,"Extraction, Quickblade Of Trembling Hands",9,$3.53,$31.77
82,Nirvana,9,$4.90,$44.10
19,"Pursuit, Cudgel of Necromancy",8,$1.02,$8.16


In [54]:
profitable = most_popular_items.sort_values(["Total Purchase Value"],ascending=False).head()
profitable.style.format({"Item Price":"${:,.2f}",
                         "Total Purchase Value":"${:,.2f}"})

Unnamed: 0_level_0,Unnamed: 1_level_0,Purchase Count,Item Price,Total Purchase Value
Item ID,Item Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
178,"Oathbreaker, Last Hope of the Breaking Storm",12,$4.23,$50.76
82,Nirvana,9,$4.90,$44.10
145,Fiery Glass Crusader,9,$4.58,$41.22
92,Final Critic,8,$4.88,$39.04
103,Singed Scalpel,8,$4.35,$34.80


In [None]:
The most obvious trend in the Heroes of Pymoli data is that the largest majority of players
fall within the age rage of 20-24 years old.This being almost half of all the totals players are from that age group.
The male gender is not only the largest percentage of players based on gender, but also the largest buyers of any
unqiue add ons avaible to purchase. Even though there is a large number of purchases for add ons, there is no 
specific outlier for a frequently bought item. The most popular item was only purchased 12 times. 