### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [2]:
# Dependencies and Setup
import pandas as pd

# File to Load (Remember to Change These)
file_to_load = "Resources/purchase_data.csv"

# Read Purchasing File and store into Pandas data frame
purchase_data = pd.read_csv(file_to_load)
purchase_data.head()

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56
2,2,Ithergue48,24,Male,92,Final Critic,4.88
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27
4,4,Iskosia90,23,Male,131,Fury,1.44


## Player Count

* Display the total number of players


In [4]:
#total number of players 
player_count_df = pd.DataFrame({
    "Total Players":[len(purchase_data['SN'].unique())]
})
player_count_df

Unnamed: 0,Total Players
0,576


## Purchasing Analysis (Total)

* Run basic calculations to obtain number of unique items, average price, etc.


* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame


In [5]:
purchases_df = pd.DataFrame({
    "Number of Unique Items":[len(purchase_data['Item ID'].unique())],
    "Average Purchase Price":[round(purchase_data['Price'].mean(),2)],
    "Total Number of Purchases":[purchase_data['Purchase ID'].count()],
    "Total Revenue":[purchase_data['Price'].sum()]
})

purchases_df

Unnamed: 0,Number of Unique Items,Average Purchase Price,Total Number of Purchases,Total Revenue
0,183,3.05,780,2379.77


## Gender Demographics

In [9]:
#purchases by gender
purchase_data.groupby('Gender')['Purchase ID'].count()

Gender
Female                   113
Male                     652
Other / Non-Disclosed     15
Name: Purchase ID, dtype: int64

In [9]:
#unique purchasers
len(purchase_data['SN'].unique())

576

In [10]:
#percentage and count of male, female, and other players
user_list = purchase_data.drop_duplicates(subset = 'SN')
gender_df = user_list['Gender'].value_counts()
for i in range(len(gender_df)):
    print("{}: {}, %: {} \n".format(gender_df.index[i], gender_df[i], round(gender_df[i]/gender_df.sum()*100,2)))

Male: 484, %: 84.03 

Female: 81, %: 14.06 

Other / Non-Disclosed: 11, %: 1.91 



* Percentage and Count of Male Players


* Percentage and Count of Female Players


* Percentage and Count of Other / Non-Disclosed





## Purchasing Analysis (Gender)

In [10]:
#purchase count by gender
purchase_data.groupby('Gender')['Purchase ID'].count()

Gender
Female                   113
Male                     652
Other / Non-Disclosed     15
Name: Purchase ID, dtype: int64

In [12]:
#average purchase price by gender
round(purchase_data.groupby('Gender')['Price'].mean(),2)

Gender
Female                   3.20
Male                     3.02
Other / Non-Disclosed    3.35
Name: Price, dtype: float64

In [13]:
#toal purchase value by gender
purchase_data.groupby('Gender')['Price'].sum()

Gender
Female                    361.94
Male                     1967.64
Other / Non-Disclosed      50.19
Name: Price, dtype: float64

In [29]:
#average purchase total per person by gender
#i take this as the averages of all purchases by each man, woman, and other
gender_data = purchase_data.groupby(['Gender','SN'])['Price'].sum()
#gender_data

In [49]:
averages_gender=gender_data.groupby('Gender').mean()
averages_gender

Gender
Female                   4.468395
Male                     4.065372
Other / Non-Disclosed    4.562727
Name: Price, dtype: float64

* Run basic calculations to obtain purchase count, avg. purchase price, avg. purchase total per person etc. by gender




* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame

In [31]:
purchase_data['Age'].max()

45

## Age Demographics

* Establish bins for ages


* Categorize the existing players using the age bins. Hint: use pd.cut()


* Calculate the numbers and percentages by age group


* Create a summary data frame to hold the results


* Optional: round the percentage column to two decimal points


* Display Age Demographics Table


In [35]:
bins = [0,9,14,19,24,29,34,39,44,99]
bin_names = ['<10', '10-14', '15-19', '20-24', '25-29', '30-34', '35-39', '40-44', '>44']

purchase_data['Age Group'] = pd.cut(purchase_data['Age'], bins, labels=bin_names)
purchase_data

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price,Age Group
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53,20-24
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56,40-44
2,2,Ithergue48,24,Male,92,Final Critic,4.88,20-24
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27,20-24
4,4,Iskosia90,23,Male,131,Fury,1.44,20-24
5,5,Yalae81,22,Male,81,Dreamkiss,3.61,20-24
6,6,Itheria73,36,Male,169,"Interrogator, Blood Blade of the Queen",2.18,35-39
7,7,Iskjaskst81,20,Male,162,Abyssal Shard,2.67,20-24
8,8,Undjask33,22,Male,21,Souleater,1.10,20-24
9,9,Chanosian48,35,Other / Non-Disclosed,136,Ghastly Adamantite Protector,3.58,35-39


In [41]:
#purchase count
purchase_data.groupby('Age Group')['Purchase ID'].count()

Age Group
<10       23
10-14     28
15-19    136
20-24    365
25-29    101
30-34     73
35-39     41
40-44     12
>44        1
Name: Purchase ID, dtype: int64

In [44]:
#average purchase price
round(purchase_data.groupby('Age Group')['Price'].mean(),2)

Age Group
<10      3.35
10-14    2.96
15-19    3.04
20-24    3.05
25-29    2.90
30-34    2.93
35-39    3.60
40-44    3.04
>44      1.70
Name: Price, dtype: float64

In [45]:
#total purchase value
purchase_data.groupby('Age Group')['Price'].sum()

Age Group
<10        77.13
10-14      82.78
15-19     412.89
20-24    1114.06
25-29     293.00
30-34     214.00
35-39     147.67
40-44      36.54
>44         1.70
Name: Price, dtype: float64

In [50]:
#average purchase total per person by age group
#i take this as the average of all purchases by each person in each age group
age_data = purchase_data.groupby(['Age Group','SN'])['Price'].sum()
age_data.head()

Age Group  SN          
<10        Anallorgue57    5.70
           Chadjask77      4.93
           Chanossast57    4.32
           Eurithphos97    4.55
           Eusri44         3.09
Name: Price, dtype: float64

In [52]:
averages_age = age_data.groupby('Age Group').mean()
averages_age

Age Group
<10      4.537059
10-14    3.762727
15-19    3.858785
20-24    4.318062
25-29    3.805195
30-34    4.115385
35-39    4.763548
40-44    3.321818
>44      1.700000
Name: Price, dtype: float64

## Purchasing Analysis (Age)

* Bin the purchase_data data frame by age


* Run basic calculations to obtain purchase count, avg. purchase price, avg. purchase total per person etc. in the table below


* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame

## Top Spenders

* Run basic calculations to obtain the results in the table below


* Create a summary data frame to hold the results


* Sort the total purchase value column in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the summary data frame



In [121]:
#identify top 5 spenders by total purchase value, 
#list SN, purchase count, averae purchase price, and total purchase value
spenders = purchase_data.groupby('SN')['Price'].sum()
spenders=spenders.sort_values(ascending=False).head()
spenders

#for each spender, display name, purchase count, avg price, and total value
for i in range(len(spenders)):
    print("SN: {} // Purchase Count: {} // Avg Purchase Price: ${} // Revenue: ${}".format(
        spenders.index[i],
        purchase_data.loc[purchase_data['SN'] == spenders.index[i]]['Purchase ID'].count(),
        round(purchase_data.loc[purchase_data['SN'] == spenders.index[i]]['Price'].mean(),2),
        round(purchase_data.loc[purchase_data['SN'] == spenders.index[i]]['Price'].sum(),2)))

SN: Lisosia93 // Purchase Count: 5 // Avg Purchase Price: $3.79 // Revenue: $18.96
SN: Idastidru52 // Purchase Count: 4 // Avg Purchase Price: $3.86 // Revenue: $15.45
SN: Chamjask73 // Purchase Count: 3 // Avg Purchase Price: $4.61 // Revenue: $13.83
SN: Iral74 // Purchase Count: 4 // Avg Purchase Price: $3.4 // Revenue: $13.62
SN: Iskadarya95 // Purchase Count: 3 // Avg Purchase Price: $4.37 // Revenue: $13.1


## Most Popular Items

* Retrieve the Item ID, Item Name, and Item Price columns


* Group by Item ID and Item Name. Perform calculations to obtain purchase count, item price, and total purchase value


* Create a summary data frame to hold the results


* Sort the purchase count column in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the summary data frame



In [113]:
#most popular items by purchase count
items_sold = purchase_data.groupby('Item ID')['Item ID'].count()
items_sold = items_sold.sort_values(ascending=False).head()
items_sold

Item ID
178    12
82      9
108     9
145     9
92      8
Name: Item ID, dtype: int64

In [114]:
#purchase_data.loc[purchase_data['Item ID'] == 178]['Item Name'].max()

In [122]:
#for each popular item, display name, purchase count, avg price, and total value
for i in range(len(items_sold)):
    print("Item ID: {} // Item Name: {} // Sold: {} // Price: ${} // Revenue ${}".format(
        items_sold.index[i],
        purchase_data.loc[purchase_data['Item ID'] == items_sold.index[i]]['Item Name'].max(),
        purchase_data.loc[purchase_data['Item ID'] == items_sold.index[i]]['Purchase ID'].count(),
        round(purchase_data.loc[purchase_data['Item ID'] == items_sold.index[i]]['Price'].mean(),2),
        round(purchase_data.loc[purchase_data['Item ID'] == items_sold.index[i]]['Price'].sum(),2)))

Item ID: 178 // Item Name: Oathbreaker, Last Hope of the Breaking Storm // Sold: 12 // Price: $4.23 // Revenue $50.76
Item ID: 82 // Item Name: Nirvana // Sold: 9 // Price: $4.9 // Revenue $44.1
Item ID: 108 // Item Name: Extraction, Quickblade Of Trembling Hands // Sold: 9 // Price: $3.53 // Revenue $31.77
Item ID: 145 // Item Name: Fiery Glass Crusader // Sold: 9 // Price: $4.58 // Revenue $41.22
Item ID: 92 // Item Name: Final Critic // Sold: 8 // Price: $4.88 // Revenue $39.04


## Most Profitable Items

* Sort the above table by total purchase value in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the data frame



In [118]:
#most profitable items by total purchase value
profit_items = purchase_data.groupby('Item ID')['Price'].sum()
profit_items = profit_items.sort_values(ascending=False).head()
profit_items

Item ID
178    50.76
82     44.10
145    41.22
92     39.04
103    34.80
Name: Price, dtype: float64

In [123]:
#for each profitable item, display name, purchase count, avg price, and total value
for i in range(len(profit_items)):
    print("Item ID: {} // Item Name: {} // Sold: {} // Price: ${} // Revenue ${}".format(
        profit_items.index[i],
        purchase_data.loc[purchase_data['Item ID'] == profit_items.index[i]]['Item Name'].max(),
        purchase_data.loc[purchase_data['Item ID'] == profit_items.index[i]]['Purchase ID'].count(),
        round(purchase_data.loc[purchase_data['Item ID'] == profit_items.index[i]]['Price'].mean(),2),
        round(purchase_data.loc[purchase_data['Item ID'] == profit_items.index[i]]['Price'].sum(),2)))

Item ID: 178 // Item Name: Oathbreaker, Last Hope of the Breaking Storm // Sold: 12 // Price: $4.23 // Revenue $50.76
Item ID: 82 // Item Name: Nirvana // Sold: 9 // Price: $4.9 // Revenue $44.1
Item ID: 145 // Item Name: Fiery Glass Crusader // Sold: 9 // Price: $4.58 // Revenue $41.22
Item ID: 92 // Item Name: Final Critic // Sold: 8 // Price: $4.88 // Revenue $39.04
Item ID: 103 // Item Name: Singed Scalpel // Sold: 8 // Price: $4.35 // Revenue $34.8
