### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [1]:
# Dependencies and Setup
import pandas as pd
import numpy as np

# File to Load (Remember to Change These)
file_to_load = "Resources/purchase_data.csv"

# Read Purchasing File and store into Pandas data frame
purchase_data = pd.read_csv(file_to_load)

In [2]:
purchase_data.head()

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56
2,2,Ithergue48,24,Male,92,Final Critic,4.88
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27
4,4,Iskosia90,23,Male,131,Fury,1.44


## Player Count

* Display the total number of players


In [3]:
total_players = len(purchase_data)
print(total_players)



780


## Purchasing Analysis (Total)

* Run basic calculations to obtain number of unique items, average price, etc.


* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame


In [4]:
#purchase_data.describe()
unique_items = purchase_data["Item Name"].unique()
number_unique_items = len(unique_items)
average_price = purchase_data["Price"].mean()
total_purchases = purchase_data["Purchase ID"].count()
total_revenue = purchase_data["Price"].sum()

summary_list = [{"Number of Unique Items": number_unique_items,
                "Average Price": average_price,
               "Total Number of Purchases": total_purchases,
               "Total Revenue": total_revenue}]
summary_df = pd.DataFrame(summary_list)
#summary_df
summary_df = summary_df.round(2)
summary_df["Average Price"]=summary_df["Average Price"].map("${0:,.0f}".format)
summary_df["Total Revenue"]=summary_df["Total Revenue"].map("${0:,.0f}".format)
summary_df

Unnamed: 0,Number of Unique Items,Average Price,Total Number of Purchases,Total Revenue
0,179,$3,780,"$2,380"


In [5]:
purchase_data.head()
#unique_players=purchase_data["SN"].unique()
#print(len(unique_players))

Unnamed: 0,Purchase ID,SN,Age,Gender,Item ID,Item Name,Price
0,0,Lisim78,20,Male,108,"Extraction, Quickblade Of Trembling Hands",3.53
1,1,Lisovynya38,40,Male,143,Frenzied Scimitar,1.56
2,2,Ithergue48,24,Male,92,Final Critic,4.88
3,3,Chamassasya86,24,Male,100,Blindscythe,3.27
4,4,Iskosia90,23,Male,131,Fury,1.44


## Gender Demographics

* Percentage and Count of Male Players


* Percentage and Count of Female Players


* Percentage and Count of Other / Non-Disclosed




In [6]:
gender_total = purchase_data["Gender"].count()
gender_total
male_total = purchase_data["Gender"].value_counts()['Male']
male_percent = male_total/gender_total*100
female_total = purchase_data["Gender"].value_counts()['Female']
female_percent = female_total/gender_total*100
other_total = purchase_data["Gender"].value_counts()['Other / Non-Disclosed']
other_percent = other_total/gender_total*100

gender_data = {"Total": [male_total, female_total, other_total], "Percent": [male_percent, female_percent, other_percent]} 
gender_df = pd.DataFrame(gender_data, index=["Male", "Female", "Other / Non-Disclosed"]) 
gender_df["Percent"]=gender_df["Percent"].map("{:.2f}%".format)
gender_df


Unnamed: 0,Total,Percent
Male,652,83.59%
Female,113,14.49%
Other / Non-Disclosed,15,1.92%



## Purchasing Analysis (Gender)

* Run basic calculations to obtain purchase count, avg. purchase price, avg. purchase total per person etc. by gender




* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame

In [41]:
avg_price_group = purchase_data.groupby(["Gender"], as_index=False) ["Price"].mean()
avg_price=avg_price_group.rename(columns={"Price": "Average Price"})
avg_price

Unnamed: 0,Gender,Average Price
0,Female,3.203009
1,Male,3.017853
2,Other / Non-Disclosed,3.346


In [68]:
#fix
avg_purchase_total = purchase_data.groupby(["Gender"], as_index=False) ["Price"].sum()
avg_purchase_total_new = avg_purchase_total.rename(columns={"Price": "Average Purchase Per Person"})
avg_purchase_total_new



Unnamed: 0,Gender,Average Purchase Per Person
0,Female,361.94
1,Male,1967.64
2,Other / Non-Disclosed,50.19


In [47]:
purchase_count = purchase_data.groupby(["Gender"], as_index=False) ["Purchase ID"].count()
purchase_count_new = purchase_count.rename(columns={"Purchase ID": "Total Purchases"})

In [82]:
merged_df= pd.merge(purchase_count_new, avg_price, how="left")
merged_df1 = pd.merge(merged_df, avg_purchase_total_new)
merged_df1
#merged_df1 ["Average Purchases Per Person"] =(merged_df1["Average Purchase Per Person"]/merged_df["Total Purchases"])
merged_df1 ["Average Purchases Per Person"] = (merged_df1["Average Purchase Per Person"]/len(purchase_data))
merged_df1
merged_df1.drop(columns=["Average Purchase Per Person"])

Unnamed: 0,Gender,Total Purchases,Average Price,Average Purchases Per Person
0,Female,113,3.203009,0.464026
1,Male,652,3.017853,2.522615
2,Other / Non-Disclosed,15,3.346,0.064346


## Age Demographics

* Establish bins for ages


* Categorize the existing players using the age bins. Hint: use pd.cut()


* Calculate the numbers and percentages by age group


* Create a summary data frame to hold the results


* Optional: round the percentage column to two decimal points


* Display Age Demographics Table


## Purchasing Analysis (Age)

* Bin the purchase_data data frame by age


* Run basic calculations to obtain purchase count, avg. purchase price, avg. purchase total per person etc. in the table below


* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame

## Top Spenders

* Run basic calculations to obtain the results in the table below


* Create a summary data frame to hold the results


* Sort the total purchase value column in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the summary data frame



## Most Popular Items

* Retrieve the Item ID, Item Name, and Item Price columns


* Group by Item ID and Item Name. Perform calculations to obtain purchase count, average item price, and total purchase value


* Create a summary data frame to hold the results


* Sort the purchase count column in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the summary data frame



## Most Profitable Items

* Sort the above table by total purchase value in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the data frame

