<big><big><b>Heroes of Pymoli Data Analysis</b></big></big>

Observed Trend 1:  The bulk of the profits generated from this game come from the 20-24 age range.<br>
Observed Trend 2:  The more expensive items generate more revenue, despite their lower purchase counts.<br>
Observed Trend 3:  Other / Non-Disclosed gendered players spend more per item than male or female players, but in such low numbers that it is hardly significant.<br>

In [1]:
import pandas as pd
import numpy as np
df_path = 'purchase_data.JSON'
df = pd.read_json(df_path)


<big><b>PLAYER COUNT</b></big>

In [2]:
player_count = len(df['SN'].unique())

player_count_df = pd.DataFrame({
    'Total Players' : player_count
                                },
    index = [0]
)

player_count_df


Unnamed: 0,Total Players
0,573


<big><b>Purchasing Analysis (Total)</b></big>

In [3]:
purchase_count = df['Price'].count()

total_revenue = df['Price'].sum()

avg_price = total_revenue / purchase_count

unique_items = len(df['Item Name'].unique())

purchasing_analysis = pd.DataFrame({
    'Number of Unique Items' : unique_items,
    'Average Price' : avg_price,
    'Number of Purchases' : purchase_count,
    'Total Revenue' : total_revenue
                                    },
    index = [0]
)

purchasing_analysis = purchasing_analysis[['Number of Unique Items', 'Average Price', 'Number of Purchases', 'Total Revenue']]

purchasing_analysis['Average Price'] = purchasing_analysis['Average Price'].map('$ {:,.2f}'.format)
purchasing_analysis['Total Revenue'] = purchasing_analysis['Total Revenue'].map('$ {:,.2f}'.format)


purchasing_analysis


Unnamed: 0,Number of Unique Items,Average Price,Number of Purchases,Total Revenue
0,179,$ 2.93,780,"$ 2,286.33"


<big><b>Gender Demographics</b></big>

In [4]:
unique_player_df = df.drop_duplicates(subset='SN', keep="last")

total_players_by_gender = unique_player_df['Gender'].value_counts().values

gender_list = df['Gender'].value_counts().keys()

percent_players_by_gender = (total_players_by_gender / player_count) * 100

gender_demographics = pd.DataFrame({
    'Percentage of Players' : percent_players_by_gender,
    'Total Count' : total_players_by_gender
                                    },
    index = [gender_list]
)

gender_demographics


Unnamed: 0,Percentage of Players,Total Count
Male,81.151832,465
Female,17.452007,100
Other / Non-Disclosed,1.396161,8


<big><b>Purchasing Analysis (Gender)</b></big>

In [5]:
male_purchases = df.loc[df['Gender'] == 'Male']
female_purchases = df.loc[df['Gender'] == 'Female']
other_purchases = df.loc[df['Gender'] == 'Other / Non-Disclosed']

purchase_count_gender = pd.Series([male_purchases['Price'].count(), 
                                female_purchases['Price'].count(),  
                                other_purchases['Price'].count()
                                ])

total_value_gender = pd.Series([male_purchases['Price'].sum(), 
                                female_purchases['Price'].sum(),  
                                other_purchases['Price'].sum()
                                ])

avg_price_gender = total_value_gender / purchase_count_gender

norm_price_gender = total_value_gender / total_players_by_gender

purchasing_analysis = pd.DataFrame({
    'Purchase Count' : purchase_count_gender,
    'Average Purchase Price' : avg_price_gender,
    'Total Purchase Value' : total_value_gender,
    'Normalized Totals' : norm_price_gender,
    'Gender' : ['Male', 'Female', 'Other / Non-Disclosed']
                                    })

purchasing_analysis.set_index('Gender', inplace=True)

purchasing_analysis['Average Purchase Price'] = purchasing_analysis['Average Purchase Price'].map('$ {:,.2f}'.format)
purchasing_analysis['Total Purchase Value'] = purchasing_analysis['Total Purchase Value'].map('$ {:,.2f}'.format)
purchasing_analysis['Normalized Totals'] = purchasing_analysis['Normalized Totals'].map('$ {:,.2f}'.format)

purchasing_analysis = purchasing_analysis[['Purchase Count', 'Average Purchase Price', 'Total Purchase Value', 'Normalized Totals']]

purchasing_analysis


Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value,Normalized Totals
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Male,633,$ 2.95,"$ 1,867.68",$ 4.02
Female,136,$ 2.82,$ 382.91,$ 3.83
Other / Non-Disclosed,11,$ 3.25,$ 35.74,$ 4.47


<big><b>Age Demographics</b></big>

In [6]:


bins = [0, 10, 15, 20, 25, 30, 35, 40, 150]

group_labels = ["<10","10-14","15-19","20-24","25-29","30-34","35-39","40+"]

binned_unique_df = df.drop_duplicates(subset='SN', keep='last')
binned_unique_df['Total Count'] = pd.cut(binned_unique_df["Age"],bins,labels=group_labels)

age_demographics = pd.DataFrame(binned_unique_df['Total Count'].value_counts())

age_demographics['Percentage of Players'] = round((age_demographics['Total Count'] / player_count) * 100, 2)

age_demographics = age_demographics.reindex(["<10","10-14","15-19","20-24","25-29","30-34","35-39","40+"])

age_demographics = age_demographics[['Percentage of Players', 'Total Count']]

age_demographics


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  


Unnamed: 0,Percentage of Players,Total Count
<10,3.84,22
10-14,9.42,54
15-19,24.26,139
20-24,40.84,234
25-29,9.08,52
30-34,7.68,44
35-39,4.36,25
40+,0.52,3


<big><b>Purchasing Analysis (Age)</b></big>

In [7]:
binned_purchase_df = df
binned_purchase_df['Total Count'] = pd.cut(binned_purchase_df["Age"],bins,labels=group_labels)

binned_purchase_df = binned_purchase_df.groupby('Total Count')

binned_purchase_count = binned_purchase_df['Price'].count()

binned_total_value = binned_purchase_df['Price'].sum()

binned_avg_price = binned_total_value / binned_purchase_count

binned_norm_price = binned_total_value / age_demographics['Total Count']

binned_purchase_analysis_df = pd.DataFrame(
            {'Purchase Count' : binned_purchase_count,
             'Average Purchase Price' : binned_avg_price,
             'Total Purchase Value' : binned_total_value,
             'Normalized Totals' : binned_norm_price
        }
)

binned_purchase_analysis_df['Average Purchase Price'] = binned_purchase_analysis_df['Average Purchase Price'].map('$ {:,.2f}'.format)
binned_purchase_analysis_df['Total Purchase Value'] = binned_purchase_analysis_df['Total Purchase Value'].map('$ {:,.2f}'.format)
binned_purchase_analysis_df['Normalized Totals'] = binned_purchase_analysis_df['Normalized Totals'].map('$ {:,.2f}'.format)

binned_purchase_analysis_df = binned_purchase_analysis_df[['Purchase Count', 'Average Purchase Price', 'Total Purchase Value', 'Normalized Totals']]

binned_purchase_analysis_df


Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value,Normalized Totals
Total Count,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
<10,32,$ 3.02,$ 96.62,$ 4.39
10-14,78,$ 2.87,$ 224.15,$ 4.15
15-19,184,$ 2.87,$ 528.74,$ 3.80
20-24,305,$ 2.96,$ 902.61,$ 3.86
25-29,76,$ 2.89,$ 219.82,$ 4.23
30-34,58,$ 3.07,$ 178.26,$ 4.05
35-39,44,$ 2.90,$ 127.49,$ 5.10
40+,3,$ 2.88,$ 8.64,$ 2.88


<big><b>Top Spenders</b></big>

In [8]:
spenders_df = df[['Item ID', 'Price', 'SN']]

spenders_count = spenders_df.groupby('SN')['Price'].count()
spenders_df = spenders_df.join(spenders_count, on='SN', lsuffix='_l', rsuffix='_r')

spenders_total = spenders_df.groupby('SN')['Price_l'].sum()
spenders_df = spenders_df.join(spenders_total, on='SN', lsuffix='_l', rsuffix='_r')

spenders_avg = spenders_df['Price_l_r'].values / spenders_df['Price_r']
spenders_df['Average Purchase Price'] = spenders_avg

spenders_df = spenders_df.rename(columns=
    {
        'Price_l_l' : 'Price',
        'Price_l_r' : 'Total Purchase Value',
        'Price_r' : 'Purchase Count'
    }
)

spenders_df.drop_duplicates('SN', inplace=True)
spenders_df.sort_values('Total Purchase Value', inplace=True, ascending=False)
spenders_df = spenders_df.iloc[0:5,:]

spenders_df['Price'] = spenders_df['Price'].map('$ {:,.2f}'.format)
spenders_df['Total Purchase Value'] = spenders_df['Total Purchase Value'].map('$ {:,.2f}'.format)
spenders_df['Average Purchase Price'] = spenders_df['Average Purchase Price'].map('$ {:,.2f}'.format)

spenders_df.set_index('SN', inplace=True)


spenders_df = spenders_df[['Purchase Count', 'Average Purchase Price', 'Total Purchase Value']]

spenders_df

Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value
SN,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Undirrala66,5,$ 3.41,$ 17.06
Saedue76,4,$ 3.39,$ 13.56
Mindimnya67,4,$ 3.18,$ 12.74
Haellysu29,3,$ 4.24,$ 12.73
Eoda93,3,$ 3.86,$ 11.58


<big><b>Most Popular Items</b></big>

In [9]:
item_group_df = df[['Item ID', 'Item Name', 'Price']]

item_count_df = item_group_df

most_item_count = item_group_df.groupby('Item ID').count()
most_item_count = most_item_count['Price']
item_count_df = item_count_df.join(most_item_count, on='Item ID',lsuffix='_left', rsuffix='_right')

item_count_df = item_count_df.rename(columns=
    {
        'Price_left' : 'Item Price',
        'Price_right' : 'Purchase Count'
    }
)

item_count_df = item_count_df.drop_duplicates('Item ID')
item_count_df['Total Purchase Value'] = item_count_df['Item Price'].values * item_count_df['Purchase Count']

item_count_df = item_count_df[['Item ID', 'Item Name', 'Purchase Count', 'Item Price', 'Total Purchase Value']]

item_count_df.set_index(['Item ID', 'Item Name'], inplace=True)

most_prof_item_count_df = item_count_df
most_pop_item_count_df = item_count_df

most_pop_item_count_df = most_pop_item_count_df.sort_values('Purchase Count', ascending=False)
most_pop_item_count_df = most_pop_item_count_df.iloc[0:5,:]

most_pop_item_count_df['Item Price'] = most_pop_item_count_df['Item Price'].map('$ {:,.2f}'.format)
most_pop_item_count_df['Total Purchase Value'] = most_pop_item_count_df['Total Purchase Value'].map('$ {:,.2f}'.format)


most_pop_item_count_df

Unnamed: 0_level_0,Unnamed: 1_level_0,Purchase Count,Item Price,Total Purchase Value
Item ID,Item Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
39,"Betrayal, Whisper of Grieving Widows",11,$ 2.35,$ 25.85
84,Arcane Gem,11,$ 2.23,$ 24.53
175,Woeful Adamantite Claymore,9,$ 1.24,$ 11.16
13,Serenity,9,$ 1.49,$ 13.41
31,Trickster,9,$ 2.07,$ 18.63


<big><b>Most Profitable Items</b></big>

In [10]:
most_prof_item_count_df = item_count_df.sort_values('Total Purchase Value', ascending=False)
most_prof_item_count_df = most_prof_item_count_df.iloc[0:5,:]

most_prof_item_count_df['Item Price'] = most_prof_item_count_df['Item Price'].map('$ {:,.2f}'.format)
most_prof_item_count_df['Total Purchase Value'] = most_prof_item_count_df['Total Purchase Value'].map('$ {:,.2f}'.format)


most_prof_item_count_df

Unnamed: 0_level_0,Unnamed: 1_level_0,Purchase Count,Item Price,Total Purchase Value
Item ID,Item Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
34,Retribution Axe,9,$ 4.14,$ 37.26
115,Spectral Diamond Doomblade,7,$ 4.25,$ 29.75
32,Orenmir,6,$ 4.95,$ 29.70
103,Singed Scalpel,6,$ 4.87,$ 29.22
107,"Splitter, Foe Of Subtlety",8,$ 3.61,$ 28.88
