### Note
* Instructions have been included for each segment. You do not have to follow them exactly, but they are included to help you think through the steps.

In [1]:
# Dependencies and Setup
import pandas as pd
import numpy as np

# File to Load (Remember to Change These)
file_to_load = "Resources/purchase_data.csv"

# Read Purchasing File and store into Pandas data frame
purchase_data = pd.read_csv(file_to_load)

In [2]:
purchase_data.isnull().values.any()


## Player Count

* Display the total number of players


In [3]:
df=pd.DataFrame({"Total Players": [len(purchase_data)]})
df


## Purchasing Analysis (Total)

* Run basic calculations to obtain number of unique items, average price, etc.


* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame


In [4]:
unique_items = purchase_data["Item ID"].unique()
number_unique_items = len(unique_items)
average_price = purchase_data["Price"].mean()
total_purchases = purchase_data["Purchase ID"].count()
total_revenue = purchase_data["Price"].sum()

summary_list = [{"Number of Unique Items": number_unique_items,
                "Average Price": average_price,
               "Total Number of Purchases": total_purchases,
               "Total Revenue": total_revenue}]
summary_df = pd.DataFrame(summary_list)
#summary_df
summary_df = summary_df.round(2)
summary_df["Average Price"]=summary_df["Average Price"].map("${0:,.2f}".format)
summary_df["Total Revenue"]=summary_df["Total Revenue"].map("${0:,.2f}".format)
summary_df

## Gender Demographics

* Percentage and Count of Male Players


* Percentage and Count of Female Players


* Percentage and Count of Other / Non-Disclosed




In [5]:
unique_player = purchase_data.drop_duplicates(subset = ["SN"])
unique_player_df = unique_player[["Gender"]]
unique_gender_total = unique_player_df["Gender"].count()
unique_gender_total
male_total = unique_player_df["Gender"].value_counts()['Male']
male_percent = male_total/unique_gender_total*100
female_total = unique_player_df["Gender"].value_counts()['Female']
female_percent = female_total/unique_gender_total*100
other_total = unique_player_df["Gender"].value_counts()['Other / Non-Disclosed']
other_percent = other_total/unique_gender_total*100

gender_data = {"Total": [male_total, female_total, other_total], "Percent": [male_percent, female_percent, other_percent]} 
gender_df = pd.DataFrame(gender_data, index=["Male", "Female", "Other / Non-Disclosed"]) 
gender_df["Percent"]=gender_df["Percent"].map("{:.2f}%".format)
gender_df



## Purchasing Analysis (Gender)

* Run basic calculations to obtain purchase count, avg. purchase price, avg. purchase total per person etc. by gender




* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame

In [6]:
avg_price_group = purchase_data.groupby(["Gender"], as_index=False) ["Price"].mean()
avg_price=avg_price_group.rename(columns={"Price": "Average Price"})
avg_price

In [7]:
#fix
purchases_sum = purchase_data.groupby(["Gender"], as_index=False) ["Price"].sum()
purchases_sum_new = purchases_sum.rename(columns={"Price": "Purchases (Sum)"})
purchases_sum_new

In [8]:
total_purchases_sum = purchases_sum_new["Purchases (Sum)"].sum(axis=0)
total_purchases

In [9]:
purchase_count = purchase_data.groupby(["Gender"], as_index=False) ["Purchase ID"].count()
purchase_count_new = purchase_count.rename(columns={"Purchase ID": "Purchases (Count)"})
purchase_count_new

Unnamed: 0,Gender,Purchases (Count)
0,Female,113
1,Male,652
2,Other / Non-Disclosed,15


In [10]:
average_purchase = purchase_data.groupby(["Gender"], as_index=False) ["Price"].sum()

In [11]:
female_avg_purchase = average_purchase.iloc[0,1]/gender_df.iloc[1,0]
male_avg_purchase = average_purchase.iloc[1,1]/gender_df.iloc[0,0]
other_avg_purchase = average_purchase.iloc[2,1]/gender_df.iloc[2,0]
genders = {"Gender": ['Female', 'Male', 'Other / Non-Disclosed'], "Average Purchase Per Person": [female_avg_purchase, male_avg_purchase, other_avg_purchase]}
gender1_df = pd.DataFrame(genders, columns = ["Gender", "Average Purchase Per Person"])
gender1_df

Unnamed: 0,Gender,Average Purchase Per Person
0,Female,4.468395
1,Male,4.065372
2,Other / Non-Disclosed,4.562727


In [23]:
merged_df = pd.merge(purchase_count_new, avg_price, how="left")
merged_df = pd.merge(merged_df, purchases_sum_new)
merged_df = pd.merge(merged_df, gender1_df)
merged_df
merged_df ["Average Purchase Per Person"] = merged_df["Average Purchase Per Person"].map("${:.2f}".format)
merged_df ["Average Price"]= merged_df["Average Price"].map("${:.2f}".format)
merged_df ["Purchases (Sum)"]= merged_df["Purchases (Sum)"].map("${:.2f}".format)
merged_df

Unnamed: 0,Gender,Purchases (Count),Average Price,Purchases (Sum),Average Purchase Per Person
0,Female,113,$3.20,$361.94,$4.47
1,Male,652,$3.02,$1967.64,$4.07
2,Other / Non-Disclosed,15,$3.35,$50.19,$4.56


## Age Demographics

* Establish bins for ages


* Categorize the existing players using the age bins. Hint: use pd.cut()


* Calculate the numbers and percentages by age group


* Create a summary data frame to hold the results


* Optional: round the percentage column to two decimal points


* Display Age Demographics Table


In [12]:
purchase_data["Age"].min()
purchase_data["Age"].max()
bins=[0, 12, 19, 29, 39, 49, 100]
group_names=["Child (0-12)", "Teen (13-19)", "Twenty (20-29)", "Thirty(30-39)", "Forty(40-49)", "Fifty+"]

In [13]:
purchase_data["Age Summary"] = pd.cut(purchase_data["Age"], bins, labels=group_names, include_lowest=True)


In [14]:
ageGroups_df = purchase_data.groupby("Age Summary")
ageGroups_df
purchases_count_Age = purchase_data.groupby(["Age Summary"], as_index=False) ["Purchase ID"].count()
purchases_count_Age

Unnamed: 0,Age Summary,Purchase ID
0,Child (0-12),45
1,Teen (13-19),142
2,Twenty (20-29),466
3,Thirty(30-39),114
4,Forty(40-49),13
5,Fifty+,0


In [15]:
ageGroups_df = purchase_data.groupby("Age Summary")
purchasesRevenue_Age = purchase_data.groupby(["Age Summary"], as_index=False) ["Price"].sum()
purchasesRevenue_Age

Unnamed: 0,Age Summary,Price
0,Child (0-12),143.55
1,Teen (13-19),429.25
2,Twenty (20-29),1407.06
3,Thirty(30-39),361.67
4,Forty(40-49),38.24
5,Fifty+,0.0


In [16]:
ageGroups_df = purchase_data.groupby("Age Summary")
purchasePercent_Age = purchase_data.groupby(["Age Summary"], as_index=False) ["Price"].mean()
purchasePercent_Age

Unnamed: 0,Age Summary,Price
0,Child (0-12),3.19
1,Teen (13-19),3.022887
2,Twenty (20-29),3.019442
3,Thirty(30-39),3.172544
4,Forty(40-49),2.941538
5,Fifty+,


## Purchasing Analysis (Age)

* Bin the purchase_data data frame by age


* Run basic calculations to obtain purchase count, avg. purchase price, avg. purchase total per person etc. in the table below


* Create a summary data frame to hold the results


* Optional: give the displayed data cleaner formatting


* Display the summary data frame

## Top Spenders

* Run basic calculations to obtain the results in the table below


* Create a summary data frame to hold the results


* Sort the total purchase value column in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the summary data frame



## Most Popular Items

* Retrieve the Item ID, Item Name, and Item Price columns


* Group by Item ID and Item Name. Perform calculations to obtain purchase count, average item price, and total purchase value


* Create a summary data frame to hold the results


* Sort the purchase count column in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the summary data frame



## Most Profitable Items

* Sort the above table by total purchase value in descending order


* Optional: give the displayed data cleaner formatting


* Display a preview of the data frame

