In [1]:
import pandas as pd
import numpy as np

In [2]:
Heroes_original_df = pd.read_json('raw_data/purchase_data.json')

In [3]:
Heroes_original_df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 780 entries, 0 to 779
Data columns (total 6 columns):
Age          780 non-null int64
Gender       780 non-null object
Item ID      780 non-null int64
Item Name    780 non-null object
Price        780 non-null float64
SN           780 non-null object
dtypes: float64(1), int64(2), object(3)
memory usage: 42.7+ KB


In [4]:
Heroes_original_df.columns

Index(['Age', 'Gender', 'Item ID', 'Item Name', 'Price', 'SN'], dtype='object')

In [5]:
Heroes_original_df.head()

Unnamed: 0,Age,Gender,Item ID,Item Name,Price,SN
0,38,Male,165,Bone Crushing Silver Skewer,3.37,Aelalis34
1,21,Male,119,"Stormbringer, Dark Blade of Ending Misery",2.32,Eolo46
2,34,Male,174,Primitive Blade,2.46,Assastnya25
3,21,Male,92,Final Critic,1.36,Pheusrical25
4,23,Male,63,Stormfury Mace,1.27,Aela59


In [6]:
Heroes_original_df['Gender'].value_counts()

Male                     633
Female                   136
Other / Non-Disclosed     11
Name: Gender, dtype: int64

## Total Number of Players

In [7]:
# Total Number of Players
Total_no_of_players = pd.DataFrame([Heroes_original_df['SN'].nunique()],
                                   columns = ['Total Players'])
Total_no_of_players

Unnamed: 0,Total Players
0,573


## Purchasing Analysis (Total)

In [8]:
Purchasing_analysis_total = pd.DataFrame([[Heroes_original_df['Item Name'].nunique(), Heroes_original_df['Price'].mean(),
                                     Heroes_original_df['Item Name'].count(),Heroes_original_df['Price'].sum()]],
                                   columns = ['No of Unique Items','Average Price','Number of Purchases','Total Revenue']
                                  )
Purchasing_analysis_total['Average Price'] = Purchasing_analysis_total['Average Price'].map("${:.2f}".format)
Purchasing_analysis_total['Total Revenue'] = Purchasing_analysis_total['Total Revenue'].map("${:.2f}".format)

Purchasing_analysis_total


Unnamed: 0,No of Unique Items,Average Price,Number of Purchases,Total Revenue
0,179,$2.93,780,$2286.33


### Gender Demographics

In [9]:
Gender_Demographics = pd.DataFrame(Heroes_original_df.groupby('Gender').SN.nunique())
Gender_Demographics.columns = ['Total Count']
Gender_Demographics['Percentage of Players'] = round(Gender_Demographics['Total Count']/Heroes_original_df['SN'].nunique()*100)
Gender_Demographics = Gender_Demographics[['Percentage of Players','Total Count']]
Gender_Demographics.sort_values('Total Count', ascending = False)

Unnamed: 0_level_0,Percentage of Players,Total Count
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1
Male,81.0,465
Female,17.0,100
Other / Non-Disclosed,1.0,8


### Purchasing Analysis (Gender)


In [11]:
Purchasing_analysis_gender = pd.DataFrame(Heroes_original_df.groupby(['Gender']).SN.nunique())
Purchasing_analysis_gender.columns = ['Purchase Count']
Purchasing_analysis_gender['Average Purchase Price'] = Heroes_original_df.groupby(['Gender']).Price.mean()
Purchasing_analysis_gender['Total Purchase Price'] = Heroes_original_df.groupby(['Gender']).Price.sum()
Purchasing_analysis_gender['Normalized Totals'] = Purchasing_analysis_gender['Total Purchase Price'] / Purchasing_analysis_gender['Purchase Count']
Purchasing_analysis_gender['Average Purchase Price'] = Purchasing_analysis_gender['Average Purchase Price'].map("${:.2f}".format)
Purchasing_analysis_gender['Total Purchase Price'] = Purchasing_analysis_gender['Total Purchase Price'].map("${:.2f}".format)
Purchasing_analysis_gender['Normalized Totals'] = Purchasing_analysis_gender['Normalized Totals'].map("${:.2f}".format)
Purchasing_analysis_gender

Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Price,Normalized Totals
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Female,100,$2.82,$382.91,$3.83
Male,465,$2.95,$1867.68,$4.02
Other / Non-Disclosed,8,$3.25,$35.74,$4.47


### Age Demographics

In [12]:
ranges = [ (0,10),(10,15), (15, 20), (20, 25),(25,30),(30,35),(35,40) ]
Age_ranges = ['<10','10-14','15-19','20-24','25-29','30-34','35-39','40+']
Age_ranges_count = []
Age_ranges_count.append(Heroes_original_df.loc[Heroes_original_df['Age'] < 10].SN.nunique())
Age_ranges_count.append(Heroes_original_df.loc[Heroes_original_df['Age'].isin(range(10,15))].SN.nunique())
Age_ranges_count.append(Heroes_original_df.loc[Heroes_original_df['Age'].isin(range(15,20))].SN.nunique())
Age_ranges_count.append(Heroes_original_df.loc[Heroes_original_df['Age'].isin(range(20,25))].SN.nunique())
Age_ranges_count.append(Heroes_original_df.loc[Heroes_original_df['Age'].isin(range(25,30))].SN.nunique())
Age_ranges_count.append(Heroes_original_df.loc[Heroes_original_df['Age'].isin(range(30,35))].SN.nunique())
Age_ranges_count.append(Heroes_original_df.loc[Heroes_original_df['Age'].isin(range(35,40))].SN.nunique())
Age_ranges_count.append(Heroes_original_df.loc[Heroes_original_df['Age'] >= 40].SN.nunique())
Age_demographics = pd.DataFrame(list(zip(Age_ranges_count)),index=Age_ranges)
Age_demographics.index.name = 'Age Limits'
Age_demographics.columns = ['Total Count']
Age_demographics['Percentage Of Players'] = round(((Age_demographics['Total Count'] / Heroes_original_df['SN'].nunique()) * 100),2)
Age_demographics = Age_demographics[['Percentage Of Players','Total Count']]
Age_demographics

Unnamed: 0_level_0,Percentage Of Players,Total Count
Age Limits,Unnamed: 1_level_1,Unnamed: 2_level_1
<10,3.32,19
10-14,4.01,23
15-19,17.45,100
20-24,45.2,259
25-29,15.18,87
30-34,8.2,47
35-39,4.71,27
40+,1.92,11


### Purchasing Analysis (Age)

In [13]:
Age_ranges_mean = []
Age_ranges_mean.append(Heroes_original_df.loc[Heroes_original_df['Age'] < 10].Price.mean())
Age_ranges_mean.append(Heroes_original_df.loc[Heroes_original_df['Age'].isin(range(10,15))].Price.mean())
Age_ranges_mean.append(Heroes_original_df.loc[Heroes_original_df['Age'].isin(range(15,20))].Price.mean())
Age_ranges_mean.append(Heroes_original_df.loc[Heroes_original_df['Age'].isin(range(20,25))].Price.mean())
Age_ranges_mean.append(Heroes_original_df.loc[Heroes_original_df['Age'].isin(range(25,30))].Price.mean())
Age_ranges_mean.append(Heroes_original_df.loc[Heroes_original_df['Age'].isin(range(30,35))].Price.mean())
Age_ranges_mean.append(Heroes_original_df.loc[Heroes_original_df['Age'].isin(range(35,40))].Price.mean())
Age_ranges_mean.append(Heroes_original_df.loc[Heroes_original_df['Age'] >= 40].Price.mean())

Age_ranges_sum = []
Age_ranges_sum.append(Heroes_original_df.loc[Heroes_original_df['Age'] < 10].Price.sum())
Age_ranges_sum.append(Heroes_original_df.loc[Heroes_original_df['Age'].isin(range(10,15))].Price.sum())
Age_ranges_sum.append(Heroes_original_df.loc[Heroes_original_df['Age'].isin(range(15,20))].Price.sum())
Age_ranges_sum.append(Heroes_original_df.loc[Heroes_original_df['Age'].isin(range(20,25))].Price.sum())
Age_ranges_sum.append(Heroes_original_df.loc[Heroes_original_df['Age'].isin(range(25,30))].Price.sum())
Age_ranges_sum.append(Heroes_original_df.loc[Heroes_original_df['Age'].isin(range(30,35))].Price.sum())
Age_ranges_sum.append(Heroes_original_df.loc[Heroes_original_df['Age'].isin(range(35,40))].Price.sum())
Age_ranges_sum.append(Heroes_original_df.loc[Heroes_original_df['Age'] >= 40].Price.sum())

Age_ranges_count = []
Age_ranges_count.append(Heroes_original_df.loc[Heroes_original_df['Age'] < 10].SN.count())
Age_ranges_count.append(Heroes_original_df.loc[Heroes_original_df['Age'].isin(range(10,15))].SN.count())
Age_ranges_count.append(Heroes_original_df.loc[Heroes_original_df['Age'].isin(range(15,20))].SN.count())
Age_ranges_count.append(Heroes_original_df.loc[Heroes_original_df['Age'].isin(range(20,25))].SN.count())
Age_ranges_count.append(Heroes_original_df.loc[Heroes_original_df['Age'].isin(range(25,30))].SN.count())
Age_ranges_count.append(Heroes_original_df.loc[Heroes_original_df['Age'].isin(range(30,35))].SN.count())
Age_ranges_count.append(Heroes_original_df.loc[Heroes_original_df['Age'].isin(range(35,40))].SN.count())
Age_ranges_count.append(Heroes_original_df.loc[Heroes_original_df['Age'] >= 40].SN.count())

Age_ranges_count
Age_ranges_mean
Age_ranges_sum


[83.46000000000001,
 96.94999999999999,
 386.41999999999996,
 978.7700000000001,
 370.33,
 197.24999999999994,
 119.4,
 53.75]