# Heroes of Pymolly: Observable trends
* According to the analysis of gender demographics, the majority of the players of Heroes of Pymolly are men, covering 84% of total players, followed by women with 14.03%. 
* Even though men are the ones that make more purchases, and therefore their purchase value is the highest, the ones that are classified as Other/non-disclosed tend to spend more in their purchases with an average of \\$4.56 per person, followed by women with an average purchase per person of \\$4.47 per person.
* Within the age demographic analysis it can be seen that most of the players of the game are those in the 20-24 age range, with 44.79%,  followed by people in the 15-19 age range with 18.58%. 
* As before, people between 20 and 24 years old accumulate most of the purchases and therefore the highest pruchase value, however, people between 35 and 39 years old and children under 10 years old tend to spend more, with \\$4.76 and \\$4.54 per person.


In [1]:
import pandas as pd
import numpy as np

In [2]:
path = "../Resources/02-Homework_04-Pandas_Instructions_HeroesOfPymoli_Resources_purchase_data.csv"
df = pd.read_csv(path)
df.head()

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56
2,2,Ithergue48,24,Male,92,Final Critic,4.88
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27
4,4,Iskosia90,23,Male,131,Fury,1.44


# Player Count

In [3]:
Total_players = df["SN"].value_counts().size

In [4]:
Total_Players_df = pd.DataFrame({"Total Players": [Total_players]})

In [5]:
Total_Players_df = Total_Players_df.style.set_properties(**{'text-align': 'left'})
Total_Players_df

Unnamed: 0,Total Players
0,576


# Purchasing analysis (Total)

In [6]:
Purchases = df["Item Name"].count()

In [7]:
Unique_items = df["Item Name"].unique().size

In [8]:
average_price = df["Price"].mean()

In [9]:
Total_revenue = df["Price"].sum()

In [10]:
Purchasing_analysis = pd.DataFrame({
    "Unique items": [Unique_items],
    "Average Price": [average_price],
    "Number of Purchases" : [Purchases],
    "Total revenue": [Total_revenue]    
})

In [11]:
Purchasing_analysis = Purchasing_analysis.style.format({"Average Price":"${:.2f}",
                                                      "Total revenue":"${:,}"}).set_properties(
    **{'text-align': 'left'})

In [12]:
Purchasing_analysis

Unnamed: 0,Unique items,Average Price,Number of Purchases,Total revenue
0,179,$3.05,780,"$2,379.77"


# Gender Demographics

In [13]:
male = df.loc[df["Gender"]=="Male", "SN"].value_counts().size

In [14]:
female = df.loc[df["Gender"]=="Female", "SN"].value_counts().size

In [15]:
Other = df.loc[df["Gender"]=="Other / Non-Disclosed", "SN"].value_counts().size

In [16]:
gender_demographics_df = pd.DataFrame({
    "Gender": ["Male", "Female", "Other/Non-Disclosed"],
    "Total Count":[male, female, Other], 
    "Percentage of players": [(male/Total_players),
                              (female/Total_players), 
                              (Other/Total_players)]
})

In [17]:
gender_demographics_df = gender_demographics_df.set_index("Gender")

In [18]:
gender_demographics_final = gender_demographics_df.style.format({
    'Percentage of players': '{:.2%}'}).set_properties(
    **{'text-align': 'left'})
gender_demographics_final

Unnamed: 0_level_0,Total Count,Percentage of players
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1
Male,484,84.03%
Female,81,14.06%
Other/Non-Disclosed,11,1.91%


# Purchasing Analysis (Gender)

In [19]:
data_gender = df.groupby("Gender").agg({"SN":"count", "Price":["mean", "sum"]})

In [20]:
data_gender.columns = data_gender.columns.droplevel(0)

In [21]:
gender_nums = [81,484,11]

In [22]:
data_gender["Avg Total Purchase per Person"] = data_gender["sum"]/gender_nums

In [23]:
data_gender = data_gender.rename(columns={"count":"Purchase Count", 
                            "mean":"Average Purchase Price",
                           "sum": "Total Purchase Value"})

In [24]:
data_gender = data_gender.style.format({"Average Purchase Price":"${:.2f}",
                                       "Total Purchase Value":"${:,.2f}",
                                       "Avg Total Purchase per Person":"${:.2f}"
                                       }).highlight_max(color="green").set_properties(
    **{'text-align': 'left'})

In [25]:
data_gender

Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value,Avg Total Purchase per Person
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Female,113,$3.20,$361.94,$4.47
Male,652,$3.02,"$1,967.64",$4.07
Other / Non-Disclosed,15,$3.35,$50.19,$4.56


# Age Demographics

In [26]:
bins = [0,9,14,19,24,29,34,39,150]
labels = ["<10", "10-14", "15-19", "20-24", "25-29", "30-34", "35-39", "40+"]

In [27]:
df["Age Range"] = pd.cut(df["Age"], bins, labels=labels)

In [28]:
df_ages = df.groupby("Age Range")

In [29]:
total_count = df_ages["SN"].nunique()

In [30]:
percentage_players = total_count/Total_players


In [31]:
Summary_demographics = pd.DataFrame({
    "Total Count": total_count,
    "Percentage of Players": percentage_players
})

In [32]:
Summary_demographics = Summary_demographics.style.format({
    "Percentage of Players":"{:.2%}"
}).highlight_max(color="green").highlight_min(color="indianred").set_properties(
    **{'text-align': 'left'})

In [33]:
Summary_demographics

Unnamed: 0_level_0,Total Count,Percentage of Players
Age Range,Unnamed: 1_level_1,Unnamed: 2_level_1
<10,17,2.95%
10-14,22,3.82%
15-19,107,18.58%
20-24,258,44.79%
25-29,77,13.37%
30-34,52,9.03%
35-39,31,5.38%
40+,12,2.08%


# Purchasing Analysis (Age)

In [34]:
purchase_count = df_ages["SN"].count()

In [35]:
avg_pur_price = df_ages["Price"].mean()

In [36]:
total_pur_value = df_ages["Price"].sum()

In [37]:
avg_total_pp = total_pur_value/total_count

In [38]:
analysis_age_df = pd.DataFrame({
    "Purchase Count":purchase_count,
    "Average Purchase Price":avg_pur_price,
    "Total Purchase Value": total_pur_value,
    "Avg Total Purchase per Person":avg_total_pp
})

In [39]:
analysis_age_df = analysis_age_df.style.format({"Average Purchase Price":"${:.2f}",
                                               "Total Purchase Value":"${:,.2f}",
                                               "Avg Total Purchase per Person":"${:.2f}"
                                               }).highlight_max(color="green").highlight_min(color="indianred").set_properties(**{
    'text-align': 'left'})

In [40]:
analysis_age_df

Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value,Avg Total Purchase per Person
Age Range,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
<10,23,$3.35,$77.13,$4.54
10-14,28,$2.96,$82.78,$3.76
15-19,136,$3.04,$412.89,$3.86
20-24,365,$3.05,"$1,114.06",$4.32
25-29,101,$2.90,$293.00,$3.81
30-34,73,$2.93,$214.00,$4.12
35-39,41,$3.60,$147.67,$4.76
40+,13,$2.94,$38.24,$3.19


# Top Spenders

In [41]:
spenders_df = df.groupby("SN").agg({"SN":"count", "Price":["mean", "sum"]})

In [42]:
spenders_df.columns = spenders_df.columns.droplevel(0)

In [43]:
spenders_df = spenders_df.rename(columns={"count":"Purchase Count", 
                                          "mean": "Average Purchase Price", 
                                          "sum":"Total Purchase Value"})

In [44]:
top_5 = spenders_df.sort_values("Total Purchase Value", ascending=False).head()

In [45]:
top_5 = top_5.style.format({"Average Purchase Price":"${:.2f}", 
                           "Total Purchase Value":"${:.2f}"}).set_properties(**{
    'text-align': 'left'})

In [46]:
top_5

Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value
SN,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Lisosia93,5,$3.79,$18.96
Idastidru52,4,$3.86,$15.45
Chamjask73,3,$4.61,$13.83
Iral74,4,$3.40,$13.62
Iskadarya95,3,$4.37,$13.10


# Most Popular Items

In [47]:
popular_items_df = df.groupby(["Item ID", "Item Name"])

In [48]:
purch_count_item = popular_items_df["Item Name"].count()

In [49]:
total_value = popular_items_df["Price"].sum()

In [50]:
item_price = total_value/purch_count_item

In [51]:
analysis_items = pd.DataFrame({
    "Purchase Count":purch_count_item,
    "Item Price": item_price,
    "Total Purchase Value":total_value
    
})

In [52]:
top_5_items = analysis_items.sort_values("Purchase Count", ascending=False).head()

In [53]:
top_5_items = top_5_items.style.format({"Item Price":"${:.2f}",
                                       "Total Purchase Value":"${:.2f}"}).set_properties(**{
    'text-align': 'left'})
top_5_items

Unnamed: 0_level_0,Unnamed: 1_level_0,Purchase Count,Item Price,Total Purchase Value
Item ID,Item Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
92,Final Critic,13,$4.61,$59.99
178,"Oathbreaker, Last Hope of the Breaking Storm",12,$4.23,$50.76
145,Fiery Glass Crusader,9,$4.58,$41.22
132,Persuasion,9,$3.22,$28.99
108,"Extraction, Quickblade Of Trembling Hands",9,$3.53,$31.77


# Most Profitable Items

In [54]:
profitable_items = analysis_items.sort_values("Total Purchase Value", 
                                              ascending=False).head()

In [55]:
profitable_items = profitable_items.style.format({"Item Price":"${:.2f}",
                                                 "Total Purchase Value":"${:.2f}"}).set_properties(**{
    'text-align': 'left'})
profitable_items

Unnamed: 0_level_0,Unnamed: 1_level_0,Purchase Count,Item Price,Total Purchase Value
Item ID,Item Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
92,Final Critic,13,$4.61,$59.99
178,"Oathbreaker, Last Hope of the Breaking Storm",12,$4.23,$50.76
82,Nirvana,9,$4.90,$44.10
145,Fiery Glass Crusader,9,$4.58,$41.22
103,Singed Scalpel,8,$4.35,$34.80
