In [1]:
# Dependencies and Setup
import pandas as pd

# File to Load (Remember to Change These)
file_to_load = "HeroesOfPymoli/purchase_data.csv"

# Read Purchasing File and store into Pandas data frame
purchase_data = pd.read_csv(file_to_load)

In [2]:
purchase_data.head()

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56
2,2,Ithergue48,24,Male,92,Final Critic,4.88
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27
4,4,Iskosia90,23,Male,131,Fury,1.44


In [3]:
# TOTAL PLAYERS

# Print total number of players
tot_players = purchase_data['SN'].nunique()

print('Total Players: ', tot_players)

Total Players:  576


In [4]:
# PURCHASING ANALYSIS (TOTAL)

# Print total number of unique items
unique_items = purchase_data['Item Name'].nunique()

print('Unique Items: ', unique_items)

Unique Items:  179


In [49]:
# Print average item price, format to include dollar sign and round to two decimals
avg_price = '${:.2f}'.format(purchase_data['Price'].mean())

print('Average Price per Item: ', avg_price)

Average Price per Item:  $3.05


In [6]:
# Print total purchases
tot_purchases = len(purchase_data)

print('Total Purchases: ', tot_purchases)

Total Purchases:  780


In [47]:
# Print total revenue, format to include dollar sign and round to two decimals
tot_rev = '${:.2f}'.format(purchase_data['Price'].sum())

print('Total Revenue: ', tot_rev)

Total Revenue:  $2379.77


In [50]:
# Create/display summary data frame
pur_analysis_df = pd.DataFrame(
                        {'Total Players' : [tot_players],
                        'Unique Items' : [unique_items],
                        'Average Price' : [avg_price],
                        'Total Purchases' : [tot_purchases],
                        'Total Revenue' : [tot_rev]
                        }
                                )

pur_analysis_df

Unnamed: 0,Total Players,Unique Items,Average Price,Total Purchases,Total Revenue
0,576,179,$3.05,780,$2379.77


In [9]:
# GENDER DEMOGRAPHICS

# Drop duplicates in SN column to get total unique users
pur_data_no_dups = purchase_data.drop_duplicates('SN')

In [10]:
# Male player count
male_count = pur_data_no_dups['Gender'].value_counts().Male

male_count

484

In [11]:
# Male player proportion
male_prop = pur_data_no_dups['Gender'].value_counts(normalize = True).Male

# Male player percentage
male_pct = '{:.2f}%'.format(male_prop * 100)

male_pct

'84.03%'

In [12]:
# Female player count
female_count = pur_data_no_dups['Gender'].value_counts().Female

female_count

81

In [13]:
# Female player proportion
female_prop = pur_data_no_dups['Gender'].value_counts(normalize = True).Female

# Female player percentage
female_pct = '{:.2f}%'.format(female_prop * 100)

female_pct

'14.06%'

In [14]:
# Other/non-disclosed player count
other_count = tot_players - male_count - female_count

other_count

11

In [15]:
# Other/non-disclosed player proportion
other_prop = other_count / tot_players

# Other/non-disclosed player percentage
other_pct = '{:.2f}%'.format(other_prop * 100)

other_pct

'1.91%'

In [16]:
# Create/display gender demographics data frame
gender_demo_df = pd.DataFrame(
                        {'Total Count' : [male_count, female_count, other_count],
                        'Percentage' : [male_pct, female_pct, other_pct]
                        },
                        index = ['Male', 'Female', 'Other / Non-Disclosed']
                                )

gender_demo_df

Unnamed: 0,Total Count,Percentage
Male,484,84.03%
Female,81,14.06%
Other / Non-Disclosed,11,1.91%


In [29]:
# PURCHASING ANALYSIS (GENDER)

# Filter dataset by male purchases
male_pur_df = purchase_data.loc[purchase_data['Gender'] == 'Male', :]

male_pur_df.head()

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56
2,2,Ithergue48,24,Male,92,Final Critic,4.88
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27
4,4,Iskosia90,23,Male,131,Fury,1.44


In [30]:
# Filter dataset by female purchases
female_pur_df = purchase_data.loc[purchase_data['Gender'] == 'Female', :]

female_pur_df.head()

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
15,15,Lisassa64,21,Female,98,"Deadline, Voice Of Subtlety",2.89
18,18,Reunasu60,22,Female,82,Nirvana,4.9
38,38,Reulae52,10,Female,116,Renewed Skeletal Katana,4.18
41,41,Assosia88,20,Female,7,"Thorn, Satchel of Dark Souls",1.33
55,55,Phaelap26,25,Female,84,Arcane Gem,3.79


In [31]:
# Filter dataset by other/non-disclosed purchases
other_pur_df = purchase_data.loc[purchase_data['Gender'] == 'Other / Non-Disclosed', :]

other_pur_df.head()

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
9,9,Chanosian48,35,Other / Non-Disclosed,136,Ghastly Adamantite Protector,3.58
22,22,Siarithria38,38,Other / Non-Disclosed,24,Warped Fetish,3.81
82,82,Haerithp41,16,Other / Non-Disclosed,160,Azurewrath,4.4
111,111,Sundim98,21,Other / Non-Disclosed,41,Orbit,4.75
228,228,Jiskirran77,20,Other / Non-Disclosed,80,Dreamsong,3.39


In [32]:
# Male purchase count
male_purchase_count = len(male_pur_df)

male_purchase_count

652

In [33]:
# Female purchase count
female_purchase_count = len(female_pur_df)

female_purchase_count

113

In [34]:
# Other purchase count
other_purchase_count = len(other_pur_df)

other_purchase_count

15

In [44]:
# Average male purchase price, formatted
male_mean_pur = '${:.2f}'.format(male_pur_df['Price'].mean())

male_mean_pur

'$3.02'

In [45]:
# Average female purchase price, formatted
female_mean_pur = '${:.2f}'.format(female_pur_df['Price'].mean())

female_mean_pur

'$3.20'

In [42]:
# Average other/non-disclosed purchase price, formatted
other_mean_pur = '${:.2f}'.format(other_pur_df['Price'].mean())

other_mean_pur

'$3.35'

In [40]:
# Total male purchase value, formatted
male_tot_pur = '${:.2f}'.format(male_pur_df['Price'].sum())

male_tot_pur

'$1967.64'

In [51]:
# Total female purchase value, formatted
female_tot_pur = '${:.2f}'.format(female_pur_df['Price'].sum())

female_tot_pur

'$361.94'

In [52]:
# Total other/non-disclosed purchase value, formatted
other_tot_pur = '${:.2f}'.format(other_pur_df['Price'].sum())

other_tot_pur

'$50.19'

In [None]:
# Average total purchase per individual male player


In [56]:
# Using GroupBy in order to separate the data into fields according to "state" values
grouped_pur_df = purchase_data.groupby(['Gender'])

grouped_pur_df.count().head()

Unnamed: 0_level_0,Purchase ID,SN,Age,Item ID,Item Name,Price
Gender,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Female,113,113,113,113,113,113
Male,652,652,652,652,652,652
Other / Non-Disclosed,15,15,15,15,15,15


In [18]:
# AGE DEMOGRAPHICS

In [19]:
# PURCHASING ANALYSIS (AGE)

In [20]:
# TOP SPENDERS

In [21]:
# MOST POPULAR ITEMS

In [22]:
# MOST PROFITABLE ITEMS