In [28]:
# Load libraries
import pandas as pd
import json
import numpy as py

# Initialize variables
fileInput='purchase_data.json'

# Open input file, read into fram
pdframePurchaseData = pd.read_json(fileInput)

# Unique players
UniquePlayerCount = len(pdframePurchaseData.groupby('SN').nunique())
pdframeOutput = pd.DataFrame(data={'Total Players':{0: UniquePlayerCount}})
pdframeOutput

# Purchase Analysis (Total)
UniqueItems=len(pdframePurchaseData.groupby('Item ID').nunique())
AvgPurchasePrice=pdframePurchaseData['Price'].mean()
TotalNumberPurchases=len(pdframePurchaseData)
TotalRevenue=pdframePurchaseData['Price'].sum()
pdfOutput = pd.DataFrame(data={'Number of Unique Items':{0: UniqueItems},
                                  'Average Price':{0:AvgPurchasePrice},
                                  'Number of Purchases' :{0: TotalNumberPurchases},
                                  'Total Revenue':{0: TotalRevenue}})
pdfOutput

# Gender demographics
pdfOutput2=pdframePurchaseData.groupby('Gender', as_index=True).agg({'SN': pd.Series.nunique})
pdfOutput2['Pct'] = pdfOutput2['SN'] / UniquePlayerCount * 100
pdfOutput2.columns = ['Unique Players', 'Pct']
print(pdfOutput2)
print("")

# Purchasing analysis (gender)
pdfOutput3=pdframePurchaseData.groupby('Gender', as_index=True).agg({'Item Name': 'count','Price': 'mean'})
pdfOutput3['Total Purchase Value'] = pdfOutput3['Price'] * pdfOutput3['Item Name']
pdfOutput3.columns = ['Purchases', 'Avg Price', 'Total Purchase Value']
print(pdfOutput3)
print("")

# Age demographics
bins = [0, 10, 14, 18, 22, 26, 30, 34, 38]
group_names = ['<10', '11-14', '15-18', '19-22', '23-26', '27-30', '31-34', '>35']
pdfBinned= pdframePurchaseData[0:len(pdframePurchaseData)].copy()
categories = pd.cut(pdfBinned['Age'], bins, labels=group_names)
pdfBinned['Ages'] = pd.cut(pdfBinned['Age'], bins, labels=group_names)
pdfBinned['agesBinned'] = pd.cut(pdfBinned['Age'], bins)
pdfOutput4=pdfBinned.groupby('Ages', as_index=True).agg({'Item Name': 'count','Price': 'mean'})
pdfOutput4['Total Purchase Value'] = pdfOutput4['Price'] * pdfOutput4['Item Name']
pdfOutput4.columns = ['Purchases', 'Avg Price', 'Total Purchase Value']
print(pdfOutput4)
print("")

# Top spenders
pdfOutput5=pdframePurchaseData.groupby('SN', as_index=True).agg({'Item Name': 'count','Price': 'mean'})
pdfOutput5['Total Purchase Value'] = pdfOutput5['Price'] * pdfOutput5['Item Name']
pdfOutput5.columns = ['Purchases', 'Avg Price', 'Total Purchase Value']
print(pdfOutput5.sort_values('Total Purchase Value', ascending=False).head(5))
print("")

# Most popular items
pdfOutput6=pdframePurchaseData.groupby(['Item ID','Item Name','Price'], as_index=True).agg({'Item ID': 'count','Price': 'sum'})
pdfOutput6.columns = ['Purchases', 'Total PV']
cols = pdfOutput6.columns.tolist()
print(pdfOutput6.sort_values('Purchases', ascending=False).head(5))
print("")

# Most profitable items
print(pdfOutput6.sort_values('Total PV', ascending=False).head(5))

   Total Players
0            573

   Average Price  Number of Purchases  Number of Unique Items  Total Revenue
0       2.931192                  780                     183        2286.33

                       Unique Players        Pct
Gender                                          
Female                            100  17.452007
Male                              465  81.151832
Other / Non-Disclosed               8   1.396161

                       Purchases  Avg Price  Total Purchase Value
Gender                                                           
Female                       136   2.815515                382.91
Male                         633   2.950521               1867.68
Other / Non-Disclosed         11   3.249091                 35.74

       Purchases  Avg Price  Total Purchase Value
Ages                                             
<10           32   3.019375                 96.62
11-14         31   2.702903                 83.79
15-18        111   2.876757      

In [23]:
pdfOutput6

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Purchases,Total PV
Item ID,Item Name,Price,Unnamed: 3_level_1,Unnamed: 4_level_1
0,Splinter,1.82,1,1.82
1,Crucifer,2.28,4,9.12
2,Verdict,3.40,1,3.40
3,Phantomlight,1.79,1,1.79
4,Bloodlord's Fetish,2.28,1,2.28
5,Putrid Fan,1.32,3,3.96
6,Rusty Skull,1.20,3,3.60
7,"Thorn, Satchel of Dark Souls",4.51,6,27.06
8,"Purgatory, Gem of Regret",3.91,6,23.46
9,"Thorn, Conqueror of the Corrupted",2.04,2,4.08
