In [1]:
import pandas as pd
import json

In [2]:
# Read with Pandas
purchase_df = pd.read_json('purchase_data.json')

In [3]:
#Show database to look at data
purchase_df.head(2)

Unnamed: 0,Age,Gender,Item ID,Item Name,Price,SN
0,38,Male,165,Bone Crushing Silver Skewer,3.37,Aelalis34
1,21,Male,119,"Stormbringer, Dark Blade of Ending Misery",2.32,Eolo46


In [4]:
#Calculate total number of players
total_players = {"Total Players": purchase_df['SN'].nunique()}
total_players_df = pd.DataFrame([total_players])
total_players_df

Unnamed: 0,Total Players
0,573


In [5]:
#caluclate unique items for purchasing analysis table
unique_items = {"Unique Items": purchase_df['Item Name'].nunique()}
items_df = pd.DataFrame([unique_items])
items_df

Unnamed: 0,Unique Items
0,179


In [6]:
#calculate average price of unique items for purchasing analysis table
avg_price = {"Average Price":purchase_df["Price"].mean()}
avg_price_df = pd.DataFrame([avg_price])
avg_price_df = pd.DataFrame(avg_price_df["Average Price"].map("${:,.2f}".format))
avg_price_df

Unnamed: 0,Average Price
0,$2.93


In [7]:
#Add average price to purchasing analysis table
items_df = items_df.join(avg_price_df)
items_df

Unnamed: 0,Unique Items,Average Price
0,179,$2.93


In [8]:
#Calculate number of purchases for purchasing analysis table
number_purchases = {"Number of Purchases":len(purchase_df)}
number_purchases_df = pd.DataFrame([number_purchases])
number_purchases_df

Unnamed: 0,Number of Purchases
0,780


In [9]:
#Add number of purchases to purchasing analysis table
items_df = items_df.join(number_purchases_df)
items_df

Unnamed: 0,Unique Items,Average Price,Number of Purchases
0,179,$2.93,780


In [10]:
#Calculate total revenue for purchasing analysis table
total_revenue = {"Total Revenue": purchase_df["Price"].sum()}
total_revenue_df = pd.DataFrame([total_revenue])
total_revenue_df = pd.DataFrame(total_revenue_df["Total Revenue"].map("${:,.2f}".format))
total_revenue_df

Unnamed: 0,Total Revenue
0,"$2,286.33"


In [11]:
#Add total revenue to purchasing analysis table
items_df = items_df.join(total_revenue_df)
items_df

Unnamed: 0,Unique Items,Average Price,Number of Purchases,Total Revenue
0,179,$2.93,780,"$2,286.33"


In [12]:
#Calculate total number of each gender by adding screen names and subtracting duplicates
purchase_dd_df = purchase_df.drop_duplicates('SN')
total_genders_df = purchase_dd_df['Gender']
total_genders_df = total_genders_df.value_counts()
total_genders_df

Male                     465
Female                   100
Other / Non-Disclosed      8
Name: Gender, dtype: int64

In [13]:
#Calculate total number of players from total of each gender
total_gender_count = total_genders_df.sum()
total_gender_count

573

In [14]:
#Calculate percent of each gender
percent_gender_df = total_genders_df/total_gender_count*100
percent_gender_df

Male                     81.151832
Female                   17.452007
Other / Non-Disclosed     1.396161
Name: Gender, dtype: float64

In [15]:
#Build gender demographics table
genders_df = pd.concat([percent_gender_df.rename("Percentage of Players").map("{:,.1f}%".format), total_genders_df.rename("Total Count")], axis=1)
genders_df

Unnamed: 0,Percentage of Players,Total Count
Male,81.2%,465
Female,17.5%,100
Other / Non-Disclosed,1.4%,8


In [16]:
#Begin purchase analysis by gender calculations; first is total genders by purchase
purchase_analysis_gender = purchase_df['Gender'].value_counts()
purchase_analysis_gender

Male                     633
Female                   136
Other / Non-Disclosed     11
Name: Gender, dtype: int64

In [17]:
#Create a dataframe from our genders by purchase
purchase_count_df = pd.DataFrame(purchase_analysis_gender)
purchase_count_df.index.name = 'Gender'
purchase_count_df = purchase_count_df.rename(columns={'Gender':'Total Purchases'})
purchase_count_df

Unnamed: 0_level_0,Total Purchases
Gender,Unnamed: 1_level_1
Male,633
Female,136
Other / Non-Disclosed,11


In [18]:
#Calculate average purchase price by gender
avg_purchase_price = purchase_df.groupby('Gender')
avg_purchase_price = avg_purchase_price['Price'].mean()
avg_purchase_price

Gender
Female                   2.815515
Male                     2.950521
Other / Non-Disclosed    3.249091
Name: Price, dtype: float64

In [19]:
#Turn average purchase price into a dataframe and format correctly
avg_purchase_price_df = pd.DataFrame(avg_purchase_price)
avg_purchase_price_df.index.name = 'Gender'
avg_purchase_price_df = avg_purchase_price_df.rename(columns={'Price':'Avg Price'})
avg_purchase_price_df = avg_purchase_price_df['Avg Price'].map("${:,.2f}".format)
avg_purchase_price_df

Gender
Female                   $2.82
Male                     $2.95
Other / Non-Disclosed    $3.25
Name: Avg Price, dtype: object

In [20]:
#Continue building purchase analysis table (by gender)
purchases_df = purchase_count_df.join(avg_purchase_price_df)
purchases_df

Unnamed: 0_level_0,Total Purchases,Avg Price
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1
Male,633,$2.95
Female,136,$2.82
Other / Non-Disclosed,11,$3.25


In [21]:
#Calculate total purchase value per gender
purchase_value = purchase_df.groupby('Gender')
purchase_value_df = purchase_value['Price'].sum().map("${:,.2f}".format)
purchase_value_df

Gender
Female                     $382.91
Male                     $1,867.68
Other / Non-Disclosed       $35.74
Name: Price, dtype: object

In [22]:
#Create a dataframe from purchase value per gender
purchase_value_df = pd.DataFrame(purchase_value_df)
purchase_value_df.index.name = 'Gender'
purchase_value_df = purchase_value_df.rename(columns={'Price':'Value'})
purchase_value_df

Unnamed: 0_level_0,Value
Gender,Unnamed: 1_level_1
Female,$382.91
Male,"$1,867.68"
Other / Non-Disclosed,$35.74


In [23]:
#Add total value to dataframe of purchase analysis per gender
purchases_df["Total Value"] = purchase_value_df
purchases_df

Unnamed: 0_level_0,Total Purchases,Avg Price,Total Value
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Male,633,$2.95,"$1,867.68"
Female,136,$2.82,$382.91
Other / Non-Disclosed,11,$3.25,$35.74


In [24]:
#Make some changes so I can do math
total_val = purchases_df['Total Value'].replace({'\$': '', ',': ''}, regex=True)
total_val

Gender
Male                     1867.68
Female                    382.91
Other / Non-Disclosed      35.74
Name: Total Value, dtype: object

In [25]:
#Make one more change for math in next step
total_val = total_val.astype(float)
total_val

Gender
Male                     1867.68
Female                    382.91
Other / Non-Disclosed      35.74
Name: Total Value, dtype: float64

In [26]:
#Calculate normalized totals of purchases per gender
norm_totals = total_val / total_genders_df
norm_totals = norm_totals.map("${:,.2f}".format)
norm_totals

Gender
Male                     $4.02
Female                   $3.83
Other / Non-Disclosed    $4.47
dtype: object

In [27]:
#Add normalized totals to get complete table for purchase analysis by genders
purchases_df["Normalized Totals"] = norm_totals
purchases_df

Unnamed: 0_level_0,Total Purchases,Avg Price,Total Value,Normalized Totals
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Male,633,$2.95,"$1,867.68",$4.02
Female,136,$2.82,$382.91,$3.83
Other / Non-Disclosed,11,$3.25,$35.74,$4.47


In [28]:
#Create the bins of ages and labels for Age Demographics table
bins = [0, 10, 15, 20, 25,30,35,40,45,100]
label_names = ['<10', '10-14', '15-19', '20-24', '25-29', '30-34', '35-39', '40-44', '45+']

In [29]:
#Start by creating a new column with bins titled "Age Group"
age_bins_df = purchase_df
age_bins_df["Age Group"] = pd.cut(
    purchase_df["Age"], bins, labels=label_names)
age_bins_df.head()

Unnamed: 0,Age,Gender,Item ID,Item Name,Price,SN,Age Group
0,38,Male,165,Bone Crushing Silver Skewer,3.37,Aelalis34,35-39
1,21,Male,119,"Stormbringer, Dark Blade of Ending Misery",2.32,Eolo46,20-24
2,34,Male,174,Primitive Blade,2.46,Assastnya25,30-34
3,21,Male,92,Final Critic,1.36,Pheusrical25,20-24
4,23,Male,63,Stormfury Mace,1.27,Aela59,20-24


In [30]:
#Caluclate number of purchases by age group
count_purchases_age = age_bins_df.groupby('Age Group')
count_purchases_age = count_purchases_age.count()['Price']
count_purchases_age

Age Group
<10       32
10-14     78
15-19    184
20-24    305
25-29     76
30-34     58
35-39     44
40-44      3
45+        0
Name: Price, dtype: int64

In [31]:
#Calculate average of purchases by age group
avg_purchases_age = age_bins_df.groupby('Age Group')
avg_purchases_age = avg_purchases_age.mean()['Price'].map("${:,.2f}".format)
avg_purchases_age = avg_purchases_age.replace('$nan', '$0.00')
avg_purchases_age

Age Group
<10      $3.02
10-14    $2.87
15-19    $2.87
20-24    $2.96
25-29    $2.89
30-34    $3.07
35-39    $2.90
40-44    $2.88
45+      $0.00
Name: Price, dtype: object

In [32]:
#Calculate total value of purchases by age group
total_purchases_age = age_bins_df.groupby('Age Group')
total_purchases_age = total_purchases_age.sum()['Price'].map("${:,.2f}".format)
total_purchases_age

Age Group
<10       $96.62
10-14    $224.15
15-19    $528.74
20-24    $902.61
25-29    $219.82
30-34    $178.26
35-39    $127.49
40-44      $8.64
45+        $0.00
Name: Price, dtype: object

In [33]:
#Calculate number of players per age group (dropping duplicates in SN) to use for normalized totals
sn_totals_age = age_bins_df.drop_duplicates(subset=['SN'], keep=False)
sn_totals_age = sn_totals_age.groupby('Age Group')
sn_totals_age = sn_totals_age.count()['Price']
sn_totals_age

Age Group
<10       13
10-14     35
15-19    100
20-24    175
25-29     35
30-34     32
35-39     12
40-44      3
45+        0
Name: Price, dtype: int64

In [34]:
#Calculate normalized total purchases by age group
norm_age_totals = age_bins_df.groupby('Age Group')
norm_age_totals = norm_age_totals.sum()['Price']
norm_age_totals = norm_age_totals / sn_totals_age
norm_age_totals = norm_age_totals.map("${:,.2f}".format)
norm_age_totals = norm_age_totals.replace('$nan', '$0.00')
norm_age_totals

Age Group
<10       $7.43
10-14     $6.40
15-19     $5.29
20-24     $5.16
25-29     $6.28
30-34     $5.57
35-39    $10.62
40-44     $2.88
45+       $0.00
Name: Price, dtype: object

In [35]:
#Start to build purchases by age group table
age_df = pd.DataFrame(count_purchases_age)
age_df = age_df.rename(columns={'Price':'Total Purchases'})
age_df

Unnamed: 0_level_0,Total Purchases
Age Group,Unnamed: 1_level_1
<10,32
10-14,78
15-19,184
20-24,305
25-29,76
30-34,58
35-39,44
40-44,3
45+,0


In [36]:
#Next step in building purchase by age group table
age_df['Avg Purchase Price'] = avg_purchases_age
age_df

Unnamed: 0_level_0,Total Purchases,Avg Purchase Price
Age Group,Unnamed: 1_level_1,Unnamed: 2_level_1
<10,32,$3.02
10-14,78,$2.87
15-19,184,$2.87
20-24,305,$2.96
25-29,76,$2.89
30-34,58,$3.07
35-39,44,$2.90
40-44,3,$2.88
45+,0,$0.00


In [37]:
#Next step in building purchase by age group table
age_df['Total Purchase Value'] = total_purchases_age
age_df

Unnamed: 0_level_0,Total Purchases,Avg Purchase Price,Total Purchase Value
Age Group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
<10,32,$3.02,$96.62
10-14,78,$2.87,$224.15
15-19,184,$2.87,$528.74
20-24,305,$2.96,$902.61
25-29,76,$2.89,$219.82
30-34,58,$3.07,$178.26
35-39,44,$2.90,$127.49
40-44,3,$2.88,$8.64
45+,0,$0.00,$0.00


In [38]:
#Last step in building purchase by age group table
age_df['Normalized Purchase Price'] = norm_age_totals
age_df

Unnamed: 0_level_0,Total Purchases,Avg Purchase Price,Total Purchase Value,Normalized Purchase Price
Age Group,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
<10,32,$3.02,$96.62,$7.43
10-14,78,$2.87,$224.15,$6.40
15-19,184,$2.87,$528.74,$5.29
20-24,305,$2.96,$902.61,$5.16
25-29,76,$2.89,$219.82,$6.28
30-34,58,$3.07,$178.26,$5.57
35-39,44,$2.90,$127.49,$10.62
40-44,3,$2.88,$8.64,$2.88
45+,0,$0.00,$0.00,$0.00


In [39]:
#Calculate total amount spent per screen name, sort from most to least
total_price = purchase_df.groupby(['SN']).sum()["Price"]
total_price_df = pd.DataFrame(total_price).sort_values('Price', ascending=False)
total_price_df.head()

Unnamed: 0_level_0,Price
SN,Unnamed: 1_level_1
Undirrala66,17.06
Saedue76,13.56
Mindimnya67,12.74
Haellysu29,12.73
Eoda93,11.58


In [40]:
#Calculate total number of purchases by screen name
total_count = purchase_df.groupby(['SN']).count()["Price"]
total_count.sort_values(ascending=False).head()

SN
Undirrala66    5
Hailaphos89    4
Mindimnya67    4
Qarwen67       4
Sondastan54    4
Name: Price, dtype: int64

In [41]:
#Combine purchase count with price to begin top spenders table
total_price_df["Purchase Count"] = total_count
total_price_df.head()

Unnamed: 0_level_0,Price,Purchase Count
SN,Unnamed: 1_level_1,Unnamed: 2_level_1
Undirrala66,17.06,5
Saedue76,13.56,4
Mindimnya67,12.74,4
Haellysu29,12.73,3
Eoda93,11.58,3


In [42]:
#Calculate average price paid per screen name
avg_price = purchase_df.groupby(['SN']).mean()["Price"].map("${:,.2f}".format)
avg_price.head()

SN
Adairialis76    $2.46
Aduephos78      $2.23
Aeduera68       $1.93
Aela49          $2.46
Aela59          $1.27
Name: Price, dtype: object

In [43]:
#Add average price per screen name to top spenders table
total_price_df["Average Price"] = avg_price
total_price_df.head()

Unnamed: 0_level_0,Price,Purchase Count,Average Price
SN,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Undirrala66,17.06,5,$3.41
Saedue76,13.56,4,$3.39
Mindimnya67,12.74,4,$3.18
Haellysu29,12.73,3,$4.24
Eoda93,11.58,3,$3.86


In [44]:
#Calculate total purchase value per screen name
total_purchase_value = purchase_df.groupby(['SN']).sum()["Price"].map("${:,.2f}".format)
total_purchase_value.head()

SN
Adairialis76    $2.46
Aduephos78      $6.70
Aeduera68       $5.80
Aela49          $2.46
Aela59          $1.27
Name: Price, dtype: object

In [45]:
#Add total purchase value per screen name to top spenders table
total_price_df["Total Purchase Value"] = total_purchase_value
total_price_df.head()

Unnamed: 0_level_0,Price,Purchase Count,Average Price,Total Purchase Value
SN,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Undirrala66,17.06,5,$3.41,$17.06
Saedue76,13.56,4,$3.39,$13.56
Mindimnya67,12.74,4,$3.18,$12.74
Haellysu29,12.73,3,$4.24,$12.73
Eoda93,11.58,3,$3.86,$11.58


In [46]:
#Delete an unnecessary column, display final top spenders table
total_price_df = total_price_df.drop('Price', 1)
total_price_df.head()

Unnamed: 0_level_0,Purchase Count,Average Price,Total Purchase Value
SN,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Undirrala66,5,$3.41,$17.06
Saedue76,4,$3.39,$13.56
Mindimnya67,4,$3.18,$12.74
Haellysu29,3,$4.24,$12.73
Eoda93,3,$3.86,$11.58


In [47]:
#Begin most popular items table by sorting by item ID and name and sorting from most to least
purchases_count = purchase_df.groupby(['Item ID','Item Name']).count()['Price']
purchases_count_df = pd.DataFrame(purchases_count)
purchases_count_df = purchases_count_df.sort_values('Price',ascending=False)
purchases_count_df = purchases_count_df.rename(columns={"Price":"Purchase Count"})
purchases_count_df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Purchase Count
Item ID,Item Name,Unnamed: 2_level_1
39,"Betrayal, Whisper of Grieving Widows",11
84,Arcane Gem,11
31,Trickster,9
175,Woeful Adamantite Claymore,9
13,Serenity,9


In [48]:
#Calculate average purchase price per item for most popular items table
purchase_price = purchase_df.groupby(['Item ID','Item Name'])['Price']
purchase_price = purchase_price.mean().map("${:,.2f}".format)

In [49]:
#Calculate total purchase price per item for most popular items table
total_price = purchase_df.groupby(['Item ID','Item Name'])['Price']
total_price = total_price.sum().map("${:,.2f}".format)

In [50]:
#Finalize most popular items table
purchases_count_df['Price'] = purchase_price
purchases_count_df['Total Price']=total_price
purchases_count_df.head(6)

Unnamed: 0_level_0,Unnamed: 1_level_0,Purchase Count,Price,Total Price
Item ID,Item Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
39,"Betrayal, Whisper of Grieving Widows",11,$2.35,$25.85
84,Arcane Gem,11,$2.23,$24.53
31,Trickster,9,$2.07,$18.63
175,Woeful Adamantite Claymore,9,$1.24,$11.16
13,Serenity,9,$1.49,$13.41
34,Retribution Axe,9,$4.14,$37.26


In [51]:
#Sum total purchase values for each item
item_total_purchase = purchase_df.groupby(['Item ID','Item Name'])['Price']
item_total_purchase = item_total_purchase.sum()
item_total_purchase_df = pd.DataFrame(item_total_purchase)
item_total_purchase_df = item_total_purchase_df.rename(columns={"Price":"Total Purchase Value"})
item_total_purchase_df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Total Purchase Value
Item ID,Item Name,Unnamed: 2_level_1
0,Splinter,1.82
1,Crucifer,9.12
2,Verdict,3.4
3,Phantomlight,1.79
4,Bloodlord's Fetish,2.28


In [52]:
#Calculate item purchase count and add to most profitable item table
item_purchase_count = purchase_df.groupby(['Item ID','Item Name'])['Price']
item_purchase_count = item_purchase_count.count()
item_total_purchase_df['Total Purchase Count'] = item_purchase_count
item_total_purchase_df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Total Purchase Value,Total Purchase Count
Item ID,Item Name,Unnamed: 2_level_1,Unnamed: 3_level_1
0,Splinter,1.82,1
1,Crucifer,9.12,4
2,Verdict,3.4,1
3,Phantomlight,1.79,1
4,Bloodlord's Fetish,2.28,1


In [53]:
#Use purchase price calculated before; add to most profitable item table
item_total_purchase_df['Purchase Price'] = purchase_price
item_total_purchase_df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Total Purchase Value,Total Purchase Count,Purchase Price
Item ID,Item Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,Splinter,1.82,1,$1.82
1,Crucifer,9.12,4,$2.28
2,Verdict,3.4,1,$3.40
3,Phantomlight,1.79,1,$1.79
4,Bloodlord's Fetish,2.28,1,$2.28


In [54]:
#Sort most profitable item table by total purchase value
item_total_purchase_df = item_total_purchase_df.sort_values('Total Purchase Value', ascending=False)
item_total_purchase_df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Total Purchase Value,Total Purchase Count,Purchase Price
Item ID,Item Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
34,Retribution Axe,37.26,9,$4.14
115,Spectral Diamond Doomblade,29.75,7,$4.25
32,Orenmir,29.7,6,$4.95
103,Singed Scalpel,29.22,6,$4.87
107,"Splitter, Foe Of Subtlety",28.88,8,$3.61


In [55]:
#Reorder columns for final most profitable item table
cols = ['Total Purchase Count','Purchase Price', 'Total Purchase Value']
item_total_purchase_df = item_total_purchase_df[cols]
item_total_purchase_df.head()

Unnamed: 0_level_0,Unnamed: 1_level_0,Total Purchase Count,Purchase Price,Total Purchase Value
Item ID,Item Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
34,Retribution Axe,9,$4.14,37.26
115,Spectral Diamond Doomblade,7,$4.25,29.75
32,Orenmir,6,$4.95,29.7
103,Singed Scalpel,6,$4.87,29.22
107,"Splitter, Foe Of Subtlety",8,$3.61,28.88
