### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [1]:
# Dependencies and Setup
import pandas as pd
import numpy as np

# File to Load (Remember to Change These)
file_to_load = "Resources/purchase_data.csv"

# Read Purchasing File and store into Pandas data frame
purchase_data = pd.read_csv(file_to_load)

In [2]:
purchase_data.isnull().values.any()
purchase_data.columns

Index(['Purchase ID', 'SN', 'Age', 'Gender', 'Item ID', 'Item Name', 'Price'], dtype='object')

## Player Count

* Display the total number of players


In [3]:
unique_data = purchase_data.drop_duplicates(subset = ["SN"])
df=pd.DataFrame([len(unique_data)], columns=["Total Players"]) 
df


Unnamed: 0,Total Players
0,576


## Purchasing Analysis (Total)

* Run basic calculations to obtain number of unique items, average price, etc.


* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame


In [4]:
unique_items = purchase_data["Item ID"].unique()
number_unique_items = len(unique_items)
average_price = purchase_data["Price"].mean()
total_purchases = purchase_data["Purchase ID"].count()
total_revenue = purchase_data["Price"].sum()

summary_list = [{"Number of Unique Items": number_unique_items,
                "Average Price": average_price,
               "Total Number of Purchases": total_purchases,
               "Total Revenue": total_revenue}]
summary_df = pd.DataFrame(summary_list)
summary_df = summary_df.round(2)
summary_df["Average Price"]=summary_df["Average Price"].map("${0:,.2f}".format)
summary_df["Total Revenue"]=summary_df["Total Revenue"].map("${0:,.2f}".format)
summary_df

Unnamed: 0,Number of Unique Items,Average Price,Total Number of Purchases,Total Revenue
0,179,$3.05,780,"$2,379.77"


## Gender Demographics

* Percentage and Count of Male Players


* Percentage and Count of Female Players


* Percentage and Count of Other / Non-Disclosed




In [5]:
unique_data = purchase_data.drop_duplicates(subset = ["SN"])

unique_player = unique_data[["Gender"]]
unique_gender_total = unique_player["Gender"].count()
unique_gender_total
male_total = unique_player["Gender"].value_counts()['Male']
male_percent = male_total/unique_gender_total*100
female_total = unique_player["Gender"].value_counts()['Female']
female_percent = female_total/unique_gender_total*100
other_total = unique_player["Gender"].value_counts()['Other / Non-Disclosed']
other_percent = other_total/unique_gender_total*100

gender_data = {"Total": [male_total, female_total, other_total], "Percent": [male_percent, female_percent, other_percent]} 
gender_df = pd.DataFrame(gender_data, index=["Male", "Female", "Other / Non-Disclosed"]) 
gender_df["Percent"]=gender_df["Percent"].map("{:.2f}%".format)
gender_df


Unnamed: 0,Total,Percent
Male,484,84.03%
Female,81,14.06%
Other / Non-Disclosed,11,1.91%



## Purchasing Analysis (Gender)

* Run basic calculations to obtain purchase count, avg. purchase price, avg. purchase total per person etc. by gender




* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame

In [6]:
avg_price_group = purchase_data.groupby(["Gender"], as_index=False) ["Price"].mean()
avg_price=avg_price_group.rename(columns={"Price": "Average Price"})
avg_price=avg_price.sort_values(by=["Average Price"], ascending = False)

In [7]:
purchases_sum = purchase_data.groupby(["Gender"], as_index=False) ["Price"].sum()
purchases_sum_new = purchases_sum.rename(columns={"Price": "Purchases (Sum)"})
purchases_sum_new=purchases_sum_new


In [8]:
total_purchases_sum = purchases_sum_new["Purchases (Sum)"].sum(axis=0)


In [9]:
purchase_count = purchase_data.groupby(["Gender"], as_index=False) ["Purchase ID"].count()
purchase_count_new = purchase_count.rename(columns={"Purchase ID": "Purchases (Count)"})
purchase_count_new

Unnamed: 0,Gender,Purchases (Count)
0,Female,113
1,Male,652
2,Other / Non-Disclosed,15


In [10]:
average_purchase = purchase_data.groupby(["Gender"], as_index=False) ["Price"].sum()

In [11]:
female_avg_purchase = average_purchase.iloc[0,1]/gender_df.iloc[1,0]
male_avg_purchase = average_purchase.iloc[1,1]/gender_df.iloc[0,0]
other_avg_purchase = average_purchase.iloc[2,1]/gender_df.iloc[2,0]
genders = {"Gender": ['Female', 'Male', 'Other / Non-Disclosed'], "Average Purchase Per Person": [female_avg_purchase, male_avg_purchase, other_avg_purchase]}
gender1_df = pd.DataFrame(genders, columns = ["Gender", "Average Purchase Per Person"])
gender1_df

Unnamed: 0,Gender,Average Purchase Per Person
0,Female,4.468395
1,Male,4.065372
2,Other / Non-Disclosed,4.562727


In [12]:
merged_df = pd.merge(purchase_count_new, avg_price, how="left")
merged_df = pd.merge(merged_df, purchases_sum_new)
merged_df = pd.merge(merged_df, gender1_df)
merged_df
merged_df ["Average Purchase Per Person"] = merged_df["Average Purchase Per Person"].map("${:.2f}".format)
merged_df ["Average Price"]= merged_df["Average Price"].map("${:,.2f}".format)
merged_df ["Purchases (Sum)"]= merged_df["Purchases (Sum)"].map("${:,.2f}".format)


In [13]:
sorted_merged_df = merged_df.sort_values(by=["Purchases (Count)"], ascending = False)
sorted_merged_df


Unnamed: 0,Gender,Purchases (Count),Average Price,Purchases (Sum),Average Purchase Per Person
1,Male,652,$3.02,"$1,967.64",$4.07
0,Female,113,$3.20,$361.94,$4.47
2,Other / Non-Disclosed,15,$3.35,$50.19,$4.56


## Age Demographics

* Establish bins for ages


* Categorize the existing players using the age bins. Hint: use pd.cut()


* Calculate the numbers and percentages by age group


* Create a summary data frame to hold the results


* Optional: round the percentage column to two decimal points


* Display Age Demographics Table


In [14]:
unique_data["Age"].min()
unique_data["Age"].max()
bins=[0, 9, 14, 19, 24, 29, 34, 39, 45]
group_names=["<10", "10-14", "15-19", "20-24", "25-29", "30-34", "35-39", "40+"]

In [15]:
pd.set_option('mode.chained_assignment', None)

In [16]:
unique_data["Age Bin"] = pd.cut(unique_data.Age, bins, labels=group_names, include_lowest=True)

In [17]:
age_count = unique_data.groupby(["Age Bin"], as_index=False) ["Purchase ID"].count() 
age_count=age_count.rename(columns={"Purchase ID": "Total Count"})
age_count["Percent"] = (age_count["Total Count"]/len(unique_data))*100
age_count["Percent"]= age_count["Percent"].map("{:.2f}%".format)
age_count

Unnamed: 0,Age Bin,Total Count,Percent
0,<10,17,2.95%
1,10-14,22,3.82%
2,15-19,107,18.58%
3,20-24,258,44.79%
4,25-29,77,13.37%
5,30-34,52,9.03%
6,35-39,31,5.38%
7,40+,12,2.08%


## Purchasing Analysis (Age)

* Bin the purchase_data data frame by age


* Run basic calculations to obtain purchase count, avg. purchase price, avg. purchase total per person etc. in the table below


* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame

In [18]:
bins=[0, 9, 14, 19, 24, 29, 34, 39, 45]
group_names=["<10", "10-14", "15-19", "20-24", "25-29", "30-34", "35-39", "40+"]

In [19]:
purchase_data["Age Bin"] = pd.cut(purchase_data.Age, bins, labels=group_names, include_lowest=True)


In [20]:
purchase_count = purchase_data.groupby(["Age Bin"], as_index=False) ["Purchase ID"].count()
purchase_count = purchase_count.rename(columns={"Purchase ID": "Purchase Count"})

In [21]:
purchase_count2 = purchase_data.groupby(["Age Bin"], as_index=False) ['Price'].sum()
purchase_count2 = purchase_count2.rename(columns={"Price": "Total Purchase Value"})

In [22]:
purchase_count3 = purchase_data.groupby(["Age Bin"], as_index=False) ['Price'].mean()
purchase_count3 = purchase_count3.rename(columns={"Price": "Avg Total Purchase Per Person"})

In [23]:
merged_purchases = pd.merge(purchase_count, purchase_count2, how="left")
merged_purchases = pd.merge(merged_purchases, purchase_count3, how="left")
merged_purchases["Average Purchase Price"]=merged_purchases["Total Purchase Value"]/merged_purchases["Purchase Count"]

In [24]:
merged_purchases["Average Purchase Price"]= merged_purchases["Average Purchase Price"].map("{:.2f}%".format)
merged_purchases["Total Purchase Value"]=merged_purchases["Total Purchase Value"].map("${:,.2f}".format)
merged_purchases["Avg Total Purchase Per Person"]= merged_purchases["Avg Total Purchase Per Person"].map("{:.2f}%".format)
merged_purchases

Unnamed: 0,Age Bin,Purchase Count,Total Purchase Value,Avg Total Purchase Per Person,Average Purchase Price
0,<10,23,$77.13,3.35%,3.35%
1,10-14,28,$82.78,2.96%,2.96%
2,15-19,136,$412.89,3.04%,3.04%
3,20-24,365,"$1,114.06",3.05%,3.05%
4,25-29,101,$293.00,2.90%,2.90%
5,30-34,73,$214.00,2.93%,2.93%
6,35-39,41,$147.67,3.60%,3.60%
7,40+,13,$38.24,2.94%,2.94%


## Top Spenders

* Run basic calculations to obtain the results in the table below


* Create a summary data frame to hold the results


* Sort the total purchase value column in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the summary data frame



In [25]:
purchase_data.columns

Index(['Purchase ID', 'SN', 'Age', 'Gender', 'Item ID', 'Item Name', 'Price',
       'Age Bin'],
      dtype='object')

In [26]:
top_spenders = purchase_data.groupby(["SN"], as_index=False) ["Item ID"].count()
top_spenders = top_spenders.rename(columns={"Item ID": "Purchase Count"})
top_spenders

Unnamed: 0,SN,Purchase Count
0,Adairialis76,1
1,Adastirin33,1
2,Aeda94,1
3,Aela59,1
4,Aelaria33,1
...,...,...
571,Yathecal82,3
572,Yathedeu43,2
573,Yoishirrala98,1
574,Zhisrisu83,2


In [27]:
top_spend_sum = purchase_data.groupby(["SN"], as_index=False) ["Price"].sum()
top_spend_sum = top_spend_sum.rename(columns={"Price": "Total Purchase Value"})

In [28]:
top_spend_avg = purchase_data.groupby(["SN"], as_index=False) ["Price"].mean()
top_spend_avg = top_spend_avg.rename(columns={"Price": "Average Purchase Price"})
top_spend_avg

Unnamed: 0,SN,Average Purchase Price
0,Adairialis76,2.280000
1,Adastirin33,4.480000
2,Aeda94,4.910000
3,Aela59,4.320000
4,Aelaria33,1.790000
...,...,...
571,Yathecal82,2.073333
572,Yathedeu43,3.010000
573,Yoishirrala98,4.580000
574,Zhisrisu83,3.945000


In [29]:
merged_top_spend = pd.merge(top_spenders, top_spend_avg, how="left")
merged_top_spend = pd.merge(merged_top_spend, top_spend_sum, how="left")
merged_top_spend

Unnamed: 0,SN,Purchase Count,Average Purchase Price,Total Purchase Value
0,Adairialis76,1,2.280000,2.28
1,Adastirin33,1,4.480000,4.48
2,Aeda94,1,4.910000,4.91
3,Aela59,1,4.320000,4.32
4,Aelaria33,1,1.790000,1.79
...,...,...,...,...
571,Yathecal82,3,2.073333,6.22
572,Yathedeu43,2,3.010000,6.02
573,Yoishirrala98,1,4.580000,4.58
574,Zhisrisu83,2,3.945000,7.89


In [30]:
merged_top_spend = merged_top_spend.sort_values(by=["Total Purchase Value"], ascending=False)
merged_top_spend.columns

Index(['SN', 'Purchase Count', 'Average Purchase Price',
       'Total Purchase Value'],
      dtype='object')

In [31]:
merged_top_spend["Average Purchase Price"]= merged_top_spend["Average Purchase Price"].map("${:,.2f}".format)
merged_top_spend["Total Purchase Value"]= merged_top_spend["Total Purchase Value"].map("${:,.2f}".format)
merged_top_spend.set_index(["SN"])
merged_top_spend

Unnamed: 0,SN,Purchase Count,Average Purchase Price,Total Purchase Value
360,Lisosia93,5,$3.79,$18.96
246,Idastidru52,4,$3.86,$15.45
106,Chamjask73,3,$4.61,$13.83
275,Iral74,4,$3.40,$13.62
281,Iskadarya95,3,$4.37,$13.10
...,...,...,...,...
257,Ililsasya43,1,$1.02,$1.02
277,Irilis75,1,$1.02,$1.02
32,Aidai61,1,$1.01,$1.01
117,Chanirra79,1,$1.01,$1.01


## Most Popular Items

In [47]:
most_popular = purchase_data.groupby(["Item ID", "Item Name"], as_index=False) ["Purchase ID"].count()
most_popular = most_popular.rename(columns={"Purchase ID": "Purchase Count"})
most_popular2 = purchase_data.groupby(["Item ID", "Price"], as_index=False) ["Price"].sum()

merged_popular = pd.merge(most_popular, most_popular2, how="left")
popular_total_value = merged_popular["Purchase Count"] * merged_popular["Price"]

merged_popular["Total Purchase Value"]=popular_total_value
merged_popular["Total Purchase Value"]= merged_popular["Total Purchase Value"].map("${:,.2f}".format)
merged_popular["Price"]= merged_popular["Price"].map("${:,.2f}".format)
merged_popular.set_index(["Item ID"])
merged_popular.sort_values(by=["Purchase Count"])
merged_popular

Unnamed: 0,Item ID,Item Name,Purchase Count,Price,Total Purchase Value
0,0,Splinter,4,$5.12,$20.48
1,1,Crucifer,4,$1.99,$7.96
2,1,Crucifer,4,$9.78,$39.12
3,2,Verdict,6,$14.88,$89.28
4,3,Phantomlight,6,$14.94,$89.64
...,...,...,...,...,...
178,178,"Oathbreaker, Last Hope of the Breaking Storm",12,$50.76,$609.12
179,179,"Wolf, Promise of the Moonwalker",6,$26.88,$161.28
180,181,Reaper's Toll,5,$8.30,$41.50
181,182,Toothpick,3,$12.09,$36.27


* Retrieve the Item ID, Item Name, and Item Price columns


* Group by Item ID and Item Name. Perform calculations to obtain purchase count, average item price, and total purchase value


* Create a summary data frame to hold the results


* Sort the purchase count column in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the summary data frame



## Most Profitable Items

* Sort the above table by total purchase value in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the data frame



In [43]:
most_profit = merged_popular.sort_values(by=["Total Purchase Value"], ascending=False)

In [44]:
most_profit

Unnamed: 0,Item ID,Item Name,Purchase Count,Price,Total Purchase Value
39,39,"Betrayal, Whisper of Grieving Widows",5,$19.70,$98.50
114,114,Yearning Mageblade,5,$19.10,$95.50
24,24,Warped Fetish,5,$19.05,$95.25
88,88,"Emberling, Defender of Delusions",5,$18.75,$93.75
12,11,Brimstone,5,$18.25,$91.25
...,...,...,...,...,...
53,53,Vengeance Cleaver,7,$14.35,$100.45
15,14,Possessed Core,2,$5.22,$10.44
31,30,Stormcaller,3,$3.36,$10.08
104,104,Gladiator's Glaive,1,$1.93,$1.93
