### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [151]:
# Dependencies and Setup
import pandas as pd

In [152]:
# File to Load (Remember to Change These)
file_to_load = "Resources/purchase_data.csv"

In [153]:
# Read Purchasing File and store into Pandas data frame
purchase_data = pd.read_csv(file_to_load)
purchase_data.head(3)

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56
2,2,Ithergue48,24,Male,92,Final Critic,4.88


In [154]:
# global vars
f_money='${:,.2f}'.format
f_percent='{:,.2f}%'.format

## Player Count

* Display the total number of players


In [156]:
def player_count(purchase_data):
    unique_players = len(purchase_data['SN'].unique())
    total_players = pd.DataFrame({"Total Players": [unique_players] })
    return total_players

player_count(purchase_data)

Unnamed: 0,Total Players
0,576


## Purchasing Analysis (Total)

* Run basic calculations to obtain number of unique items, average price, etc.


* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame


In [18]:
def purchasing_analysis_total(purchase_data):
    # create temp variables to hold the columns
    numb_uni_items='Number of Unique Items'
    avrg_purch_pri='Average Purchase Price'
    total_purch='Total Number of Purchases'
    total_rev='Total Revenue'
    unique_items = len(purchase_data['Item Name'].unique())
    purchases = purchase_data['Price']
    analysis = pd.DataFrame({numb_uni_items: [unique_items],
                             avrg_purch_pri: [purchases.mean()],
                             total_purch: [len(purchases)],
                             total_rev: [purchases.sum()] })
    # format data
    analysis[avrg_purch_pri] = analysis[avrg_purch_pri].map(f_money)
    analysis[total_rev] = analysis[total_rev].map(f_money)
    return analysis

purchasing_analysis_total(purchase_data)

Unnamed: 0,Number of Unique Items,Average Purchase Price,Total Number of Purchases,Total Revenue
0,179,$3.05,780,"$2,379.77"


## Gender Demographics

* Percentage and Count of Male Players
* Percentage and Count of Female Players
* Percentage and Count of Other / Non-Disclosed

In [158]:
def demographics_column(purchase_data, column):
    per_of_play = 'Percentage of Players'
    demographics = pd.DataFrame({'Total Count': purchase_data[column].value_counts()})
    demographics[per_of_play] = 100 * purchase_data[column].value_counts(normalize=True)
    demographics[per_of_play] = demographics[per_of_play].map(f_percent)
    return demographics

demographics_column(purchase_data,'Gender')

Unnamed: 0,Total Count,Percentage of Players
Male,652,83.59%
Female,113,14.49%
Other / Non-Disclosed,15,1.92%



## Purchasing Analysis (Gender)

* Run basic calculations to obtain Purchase Count,	Average Purchase Price,	Total Purchase Value, Avg Total Purchase per Person



* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame

In [47]:
# https://www.youtube.com/watch?v=qy0fDqoMJx8
# https://www.youtube.com/watch?v=Wb2Tp35dZ-I
def purchasing_analysis_column(purchase_data, column_filter):
    pur_count='Purchase Count'
    avrg_pur_price='Average Purchase Price'
    total_pur='Total Purchase Value'
    avrg_pur_total_per_pers='Average Purchase Total per Person'
    column_analysis = pd.DataFrame()
    for column in purchase_data[column_filter].unique():
        filtered = purchase_data.loc[purchase_data[column_filter] == column]
        analysis = pd.DataFrame({pur_count: [len(filtered)],
                                 avrg_pur_price: filtered['Price'].mean(),
                                 total_pur: filtered['Price'].sum(),
                                 avrg_pur_total_per_pers: filtered.groupby('SN')['Price'].sum().mean() },
                                index=[column])
        column_analysis = pd.concat([analysis, column_analysis])
    column_analysis[avrg_pur_price] = column_analysis[avrg_pur_price].map(f_money)
    column_analysis[total_pur] = column_analysis[total_pur].map(f_money)
    column_analysis[avrg_pur_total_per_pers] = column_analysis[avrg_pur_total_per_pers].map(f_money)
    return column_analysis
    
purchasing_analysis_column(purchase_data, 'Gender')

Unnamed: 0,Purchase Count,Average Purchase Price,Total Purchase Value,Average Purchase Total per Person
Female,113,$3.20,$361.94,$4.47
Other / Non-Disclosed,15,$3.35,$50.19,$4.56
Male,652,$3.02,"$1,967.64",$4.07


## Age Demographics

* Establish bins for ages


* Categorize the existing players using the age bins. Hint: use pd.cut()


* Calculate the numbers and percentages by age group


* Create a summary data frame to hold the results


* Optional: round the percentage column to two decimal points


* Display Age Demographics Table


In [93]:
age_bins = [0,10,15,20,25,30,35,40]
age_labels = ['<10','10-14','15-19','20-24','25-29','30-34', '40+']
purchase_data_ages = pd.DataFrame(purchase_data)
purchase_data_ages['Age Group'] = pd.cut(purchase_data['Age'], bins=age_bins, labels=age_labels)

demographics_column(purchase_data_ages,'Age Group').reindex(age_labels)

Unnamed: 0,Total Count,Percentage of Players
<10,32,4.14%
10-14,54,6.99%
15-19,200,25.87%
20-24,325,42.04%
25-29,77,9.96%
30-34,52,6.73%
40+,33,4.27%


## Purchasing Analysis (Age)

* Bin the purchase_data data frame by age


* Run basic calculations to obtain purchase count, avg. purchase price, avg. purchase total per person etc. in the table below


* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame

In [94]:
purchasing_analysis_column(purchase_data_ages,'Age Group').reindex(age_labels)

Unnamed: 0,Purchase Count,Average Purchase Price,Total Purchase Value,Average Purchase Total per Person
<10,32,$3.40,$108.96,$4.54
10-14,54,$2.90,$156.60,$3.82
15-19,200,$3.11,$621.56,$4.14
20-24,325,$3.02,$981.64,$4.23
25-29,77,$2.88,$221.42,$3.75
30-34,52,$2.99,$155.71,$4.21
40+,33,$3.40,$112.35,$4.32


## Top Spenders

* Run basic calculations to obtain the results in the table below


* Create a summary data frame to hold the results


* Sort the total purchase value column in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the summary data frame



In [148]:
def grouped_purchasing(purchase_data, group_filter, sort=-1):
    filter_agg = ['count','mean','sum']
    summary = purchase_data.groupby(group_filter)['Price'].agg(filter_agg)
    if sort > -1:
        summary.sort_values(by=[filter_agg[sort]], ascending=False, inplace=True)
    # format
    summary[filter_agg[1]] = summary[filter_agg[1]].map(f_money)
    summary[filter_agg[2]] = summary[filter_agg[2]].map(f_money)
    # rename
    summary = summary.rename(columns={filter_agg[0]: 'Purchase Count',
                                      filter_agg[1]: 'Average Purchase Price',
                                      filter_agg[2]: 'Total Purchase Value' })
    return summary

top_spenders = grouped_purchasing(purchase_data, ['SN'], 2)
top_spenders.head(5)

Unnamed: 0_level_0,Purchase Count,Average Purchase Price,Total Purchase Value
SN,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
Lisosia93,5,$3.79,$18.96
Idastidru52,4,$3.86,$15.45
Chamjask73,3,$4.61,$13.83
Iral74,4,$3.40,$13.62
Iskadarya95,3,$4.37,$13.10


## Most Popular Items

* Retrieve the Item ID, Item Name, and Item Price columns


* Group by Item ID and Item Name. Perform calculations to obtain purchase count, average item price, and total purchase value


* Create a summary data frame to hold the results


* Sort the purchase count column in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the summary data frame



In [149]:
most_popular_items = grouped_purchasing(purchase_data, ['Item ID', 'Item Name'], 2)
most_popular_items = most_popular_items.rename(columns={'Average Purchase Price': 'Item Price'})
most_popular_items.head(5)

Unnamed: 0_level_0,Unnamed: 1_level_0,Purchase Count,Item Price,Total Purchase Value
Item ID,Item Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
92,Final Critic,13,$4.61,$59.99
178,"Oathbreaker, Last Hope of the Breaking Storm",12,$4.23,$50.76
82,Nirvana,9,$4.90,$44.10
145,Fiery Glass Crusader,9,$4.58,$41.22
103,Singed Scalpel,8,$4.35,$34.80


## Most Profitable Items

* Sort the above table by total purchase value in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the data frame



In [150]:
most_popular_items = grouped_purchasing(purchase_data, ['Item ID', 'Item Name'], 0)
most_popular_items = most_popular_items.rename(columns={'Average Purchase Price': 'Item Price'})
most_popular_items.head(5)

Unnamed: 0_level_0,Unnamed: 1_level_0,Purchase Count,Item Price,Total Purchase Value
Item ID,Item Name,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
92,Final Critic,13,$4.61,$59.99
178,"Oathbreaker, Last Hope of the Breaking Storm",12,$4.23,$50.76
145,Fiery Glass Crusader,9,$4.58,$41.22
132,Persuasion,9,$3.22,$28.99
108,"Extraction, Quickblade Of Trembling Hands",9,$3.53,$31.77
