In [1]:
# Dependencies and Setup
import pandas as pd

# File to Load (Remember to Change These)
file_to_load = "purchase_data.csv"

# Read Purchasing File and store into Pandas data frame
purchase_data = pd.read_csv(file_to_load)

In [2]:
purchase_data.head()

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56
2,2,Ithergue48,24,Male,92,Final Critic,4.88
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27
4,4,Iskosia90,23,Male,131,Fury,1.44


# TOTAL PLAYERS

In [3]:
# Print total number of players
tot_players = purchase_data['SN'].nunique()

print('Total Players: ', tot_players)

Total Players:  576


# PURCHASING ANALYSIS (TOTAL)

In [4]:
# Print total number of unique items
unique_items = purchase_data['Item Name'].nunique()

print('Unique Items: ', unique_items)

Unique Items:  179


In [5]:
# Print average item price, format to include dollar sign and round to two decimals
avg_price = '${:.2f}'.format(purchase_data['Price'].mean())

print('Average Price per Item: ', avg_price)

Average Price per Item:  $3.05


In [6]:
# Print total purchases
tot_purchases = len(purchase_data)

print('Total Purchases: ', tot_purchases)

Total Purchases:  780


In [7]:
# Print total revenue, format to include dollar sign and round to two decimals
tot_rev = '${:.2f}'.format(purchase_data['Price'].sum())

print('Total Revenue: ', tot_rev)

Total Revenue:  $2379.77


In [8]:
# Create/display summary data frame
pur_analysis_df = pd.DataFrame(
                        {'Total Players' : [tot_players],
                        'Unique Items' : [unique_items],
                        'Average Price' : [avg_price],
                        'Total Purchases' : [tot_purchases],
                        'Total Revenue' : [tot_rev]
                        }
                                )

pur_analysis_df

Unnamed: 0,Total Players,Unique Items,Average Price,Total Purchases,Total Revenue
0,576,179,$3.05,780,$2379.77


# GENDER DEMOGRAPHICS

In [9]:
# Drop duplicates in SN column to get total unique users
pur_data_no_dups = purchase_data.drop_duplicates('SN')

## Male

In [10]:
# Male player count
male_count = pur_data_no_dups['Gender'].value_counts().Male

male_count

484

In [11]:
# Male player proportion
male_prop = pur_data_no_dups['Gender'].value_counts(normalize = True).Male

# Male player percentage
male_pct = '{:.2f}%'.format(male_prop * 100)

male_pct

'84.03%'

## Female

In [12]:
# Female player count
female_count = pur_data_no_dups['Gender'].value_counts().Female

female_count

81

In [13]:
# Female player proportion
female_prop = pur_data_no_dups['Gender'].value_counts(normalize = True).Female

# Female player percentage
female_pct = '{:.2f}%'.format(female_prop * 100)

female_pct

'14.06%'

## Other/Non-Disclosed

In [14]:
# Other/non-disclosed player count
other_count = tot_players - male_count - female_count

other_count

11

In [15]:
# Other/non-disclosed player proportion
other_prop = other_count / tot_players

# Other/non-disclosed player percentage
other_pct = '{:.2f}%'.format(other_prop * 100)

other_pct

'1.91%'

## Summary Data Frame

In [16]:
# Create/display gender demographics data frame
gender_demo_df = pd.DataFrame(
                        {'Total Count' : [male_count, female_count, other_count],
                        'Percentage' : [male_pct, female_pct, other_pct]
                        },
                        index = ['Male', 'Female', 'Other / Non-Disclosed']
                                )

gender_demo_df

Unnamed: 0,Total Count,Percentage
Male,484,84.03%
Female,81,14.06%
Other / Non-Disclosed,11,1.91%


# PURCHASING ANALYSIS (GENDER)

## Filter Dataset by Gender

In [17]:
# Filter dataset by male purchases
male_pur_df = purchase_data.loc[purchase_data['Gender'] == 'Male', :]

male_pur_df.head()

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56
2,2,Ithergue48,24,Male,92,Final Critic,4.88
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27
4,4,Iskosia90,23,Male,131,Fury,1.44


In [18]:
# Filter dataset by female purchases
female_pur_df = purchase_data.loc[purchase_data['Gender'] == 'Female', :]

female_pur_df.head()

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
15,15,Lisassa64,21,Female,98,"Deadline, Voice Of Subtlety",2.89
18,18,Reunasu60,22,Female,82,Nirvana,4.9
38,38,Reulae52,10,Female,116,Renewed Skeletal Katana,4.18
41,41,Assosia88,20,Female,7,"Thorn, Satchel of Dark Souls",1.33
55,55,Phaelap26,25,Female,84,Arcane Gem,3.79


In [19]:
# Filter dataset by other/non-disclosed purchases
other_pur_df = purchase_data.loc[purchase_data['Gender'] == 'Other / Non-Disclosed', :]

other_pur_df.head()

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
9,9,Chanosian48,35,Other / Non-Disclosed,136,Ghastly Adamantite Protector,3.58
22,22,Siarithria38,38,Other / Non-Disclosed,24,Warped Fetish,3.81
82,82,Haerithp41,16,Other / Non-Disclosed,160,Azurewrath,4.4
111,111,Sundim98,21,Other / Non-Disclosed,41,Orbit,4.75
228,228,Jiskirran77,20,Other / Non-Disclosed,80,Dreamsong,3.39


## GroupBy Gender

In [20]:
# Using GroupBy in order to separate the data into fields based on "Gender" values
grouped_pur_gender = purchase_data.groupby(['Gender'])

## Purchase Count by Gender

In [21]:
# Purchase count by gender
gender_count = grouped_pur_gender['Purchase ID'].count()
gender_count

Gender
Female                   113
Male                     652
Other / Non-Disclosed     15
Name: Purchase ID, dtype: int64

## Average Purchase Price by Gender

In [22]:
# Average purchase price by gender
avg_price = grouped_pur_gender['Price'].mean()
avg_price_neat = avg_price.round(decimals=2)
avg_price_neat

Gender
Female                   3.20
Male                     3.02
Other / Non-Disclosed    3.35
Name: Price, dtype: float64

## Total purchase value by gender

In [23]:
# Total purchase value by gender
tot_value = grouped_pur_gender['Price'].sum()
tot_value_neat = tot_value.round(decimals=2)
tot_value_neat

Gender
Female                    361.94
Male                     1967.64
Other / Non-Disclosed      50.19
Name: Price, dtype: float64

## Average total purchase per person by gender

### Male

In [24]:
# Average total purchase per person by gender
# Male
# Using GroupBy in order to separate the data into fields based on "SN" values
grouped_SN_male = male_pur_df.groupby(['SN'])

male_sum = grouped_SN_male['Price'].sum()

male_mean = male_sum.mean()

male_mean_neat = '{:.2f}'.format(male_mean)

male_mean_neat

'4.07'

### Female

In [25]:
# Average total purchase per person by gender
# Female
# Using GroupBy in order to separate the data into fields based on "SN" values
grouped_SN_female = female_pur_df.groupby(['SN'])

female_sum = grouped_SN_female['Price'].sum()

female_mean = female_sum.mean()

female_mean_neat = '{:.2f}'.format(female_mean)

female_mean_neat

'4.47'

### Other/Non-Disclosed

In [26]:
# Average total purchase per person by gender
# Other/Non-disclosed
# Using GroupBy in order to separate the data into fields based on "SN" values
grouped_SN_other = other_pur_df.groupby(['SN'])

other_sum = grouped_SN_other['Price'].sum()

other_mean = other_sum.mean()

other_mean_neat = '{:.2f}'.format(other_mean)

other_mean_neat

'4.56'

## Summary Data Frame

In [27]:
# Create/display gender purchase analysis data frame
gender_analysis_df = pd.DataFrame(
                        {'Purchase Count' : [gender_count['Female'], gender_count['Male'], gender_count['Other / Non-Disclosed']],
                        'Average Purchase Price ($)' : [avg_price_neat['Female'], avg_price_neat['Male'], avg_price_neat['Other / Non-Disclosed']],
                        'Total Purchase Value ($)' : [tot_value_neat['Female'], tot_value_neat['Male'], tot_value_neat['Other / Non-Disclosed']],
                        'Avg Total Purchase per Person ($)' : [female_mean_neat, male_mean_neat, other_mean_neat]
                        },
                        index = ['Female', 'Male', 'Other / Non-Disclosed']
                                )

gender_analysis_df

Unnamed: 0,Purchase Count,Average Purchase Price ($),Total Purchase Value ($),Avg Total Purchase per Person ($)
Female,113,3.2,361.94,4.47
Male,652,3.02,1967.64,4.07
Other / Non-Disclosed,15,3.35,50.19,4.56


# AGE DEMOGRAPHICS

# PURCHASING ANALYSIS (AGE)

# TOP SPENDERS

# MOST POPULAR ITEMS

# MOST PROFITABLE ITEMS