# 4.9 Part 2

## Points to this Script:
1. Imports
2. Sales & Descriptive Findings
3. Customer Demographics
4. Exports

### 1. Imports

In [None]:
# Importing libraries

import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt
import seaborn as sns
import scipy

In [None]:
# Importing data frame

path = r'C:\Users\walls\Documents\Coding\Data Analysis\CareerFoundry\Data Immersion A4\Instacart Basket Analysis 01-25'
df_final = pd.read_pickle(os.path.join(path, 'Data', 'Prepared Data', 'customer_merged.pkl'))

In [None]:
df_final.head()

In [None]:
df_final.shape

In [None]:
df_final.info()

### 2. Sales & Descriptive Findings

In [None]:
df_final['order_hour_of_day'].describe()

In [None]:
# Create a histogram of the “order_hour_of_day” column

hour_bar_chart = df_final['order_hour_of_day'].plot.hist(bins = 24, edgecolor='black', linewidth=1)

##### Observations:
1. Peak hours: 10 & 11am and once again around 2 & 3pm
2. Late night (10pm - 1am) are relatively low.
3. Evening hours are also low despite correlation to dinner times.
4. Lowest hours are in the early morning, 1-5am.

In [None]:
# Create a bar chart from the “loyalty_flag” column

loyalty_bar_chart = df_final['loyalty_flag'].value_counts().plot.bar()
plt.xticks(rotation=0)

##### Observations:
1. There are more regular customers than loyal and new customers.
2. Marketing towards more casual shoppers like regular customers may bring in more loyal customers

In [None]:
# Creating a sample at 70/30 for expendinture analysis

np.random.seed(4)
dev = np.random.rand(len(df_final)) <= 0.7

In [None]:
# Store 70% of sample to big

big = df_final[dev]

In [None]:
# Store 30% of sample to small

small = df_final[~dev]

In [None]:
# Checking length

len(df_final)

In [None]:
len(big) + len(small)

In [None]:
# Getting just the columns needed
df_sample_set = small[['order_hour_of_day', 'prices']]

In [None]:
# Create line chart for order_hour to price difference
line = sns.lineplot(data = df_sample_set, x = 'order_hour_of_day',y = 'prices')

##### Observations:
1. Price to hour difference is always fluctuating.
2. There is a peak in price at 3 or 4am and around 7 or 8am. Likely to be late night shopping and early shopping for the day.
3. Perhaps people are pressed for time and ordering without thinking of price.

### 3. Customer Demographics

In [None]:
# Getting just the columns needed

df_sample_set2 = small[['age', 'num_dependants']]

In [None]:
# Creating a line chart exploring the connections between age and number of dependants

line_age_deps = sns.lineplot(data = df_sample_set2, x = 'age', y = 'num_dependants')

##### Observations:
1. Most have 1 dependant
2. No one has 0 dependants using the app
3. Older ages such as 60-70 have a higher number of dependants
4. 18-19 year olds have the lowest number of dependants

In [None]:
# Create a scatterplot to explore connections between age and (income)
age_income_scatterplot = sns.scatterplot(x = 'age', y = 'income',data = df_final)

##### Observations: 
1. Income under 200,000 is consistent for all age groups
2. Those aged 40 - 80 are more so between 200K to 300K in income
3. Ages 18 - 39 don't have an income over 400K
4. A smaller population of those 40 to 80 make 400K to 600K

##### Summary
1. df_customer_op_merged now df_final
2. df_final shape (32404859, 33)

### 4. Exports

In [None]:
# Exporting "order_hour_of_day" frequency bar chart
hour_bar_chart.figure.savefig(os.path.join(path, 'Analysis','Visualizations', 'hour_day_bar.png'))

In [None]:
# Exporting bar chart for loyalty flags
loyalty_bar_chart.figure.savefig(os.path.join(path, 'Analysis','Visualizations', 'loyalty_bar.png'))

In [None]:
# Exporting line chart for hour of day and price difference
line.figure.savefig(os.path.join(path, 'Analysis','Visualizations', 'hour_day_price.png'))

In [None]:
# Exporting line chart for age and dependants
line_age_deps.figure.savefig(os.path.join(path, 'Analysis','Visualizations', 'age_deps.png'))

In [None]:
# Exporting scatterplot for age and income
age_income_scatterplot.figure.savefig(os.path.join(path, 'Analysis', 'Visualizations', 'age_income.png'))