## Case Study Questions

1. What is the total amount each customer spent at the restaurant?
2. How many days has each customer visited the restaurant?
3. What was the first item from the menu purchased by each customer?
4. What is the most purchased item on the menu and how many times was it purchased by all customers?
5. Which item was the most popular for each customer?
6. Which item was purchased first by the customer after they became a member?
7. Which item was purchased just before the customer became a member?
8. What is the total items and amount spent for each member before they became a member?
9.  If each $1 spent equates to 10 points and sushi has a 2x points multiplier - how many points would each customer have?
10. In the first week after a customer joins the program (including their join date) they earn 2x points on all items, not just sushi - how many points do customer A and B have at the end of January?

## import data

In [2]:
import polars as pl
import duckdb

db = '~/repos/db-butters/8-week-sql-challenge/case-1/case-1.duckdb'
con = duckdb.connect(db, read_only=True)

sales = con.sql('from dannys_diner.sales;').pl()
menu = con.sql('from dannys_diner.menu;').pl()
members = con.sql('from dannys_diner.members;').pl()

con.close()

### sales table

In [65]:
display(sales.head(10))

customer_id,order_date,product_id
str,date,i32
"""A""",2021-01-01,1
"""A""",2021-01-01,2
"""A""",2021-01-07,2
"""A""",2021-01-10,3
"""A""",2021-01-11,3
"""A""",2021-01-11,3
"""B""",2021-01-01,2
"""B""",2021-01-02,2
"""B""",2021-01-04,1
"""B""",2021-01-11,1


### menu table

In [30]:
display(menu)

product_id,product_name,price
i32,str,i32
1,"""sushi""",10
2,"""curry""",15
3,"""ramen""",12


### members table

In [31]:
display(members)

customer_id,join_date
str,date
"""A""",2021-01-07
"""B""",2021-01-09


## 1. total spent per customer

In [35]:
(sales.join(menu, on='product_id')
      .group_by('customer_id')
      .agg(
          pl.col('price').sum().alias('total_spent')
      ).sort('customer_id')
)

customer_id,total_spent
str,i32
"""A""",76
"""B""",74
"""C""",36


## 2. customer days at restaurant

In [58]:
(sales.group_by('customer_id')
      .agg(pl.col('order_date').n_unique().alias('days_visited'))
      .sort('customer_id')
)

customer_id,days_visited
str,u32
"""A""",4
"""B""",6
"""C""",2


## 3. first item purchased by each customer

In [54]:
(sales.join(menu, on='product_id')
      .group_by(['customer_id', 'order_date'])
      .agg(pl.col('product_id'), pl.col('product_name'))
      .with_columns(pl.col('order_date').rank('dense').over('customer_id').alias('rank'))
      .filter(pl.col('rank') == 1)
      .drop('order_date', 'rank')
      .sort('customer_id')
)

customer_id,product_id,product_name
str,list[i32],list[str]
"""A""","[1, 2]","[""sushi"", ""curry""]"
"""B""",[2],"[""curry""]"
"""C""","[3, 3]","[""ramen"", ""ramen""]"


## 4. most purchased item and number of purchases by each customer

In [30]:
n_max_purchases = (sales.group_by('product_id')
      .agg(pl.col('customer_id').len().alias('purchases'))
      .select(pl.col('purchases').max())
).item()

most_purchased_item = (sales.group_by('product_id')
      .agg(pl.col('customer_id').len().alias('purchases'))
      .filter(pl.col('purchases') == n_max_purchases)
      .join(menu, on='product_id')
      .select('product_id', 'product_name', 'price', 'purchases')
)

display(most_purchased_item)

product_id,product_name,price,purchases
i32,str,i32,u32
3,"""ramen""",12,8


In [66]:
customer_mpi_purchases = (sales.filter(pl.col('product_id') == most_purchased_item.select(pl.col('product_id')).item())
      .group_by('customer_id')
      .agg(pl.col('product_id').len().alias('purchases'))
      .sort('customer_id')
)

customer_mpi_purchases

customer_id,purchases
str,u32
"""A""",3
"""B""",2
"""C""",3


## 5. most popular item for each customer

In [67]:
most_popular_item_per_customer = (sales.group_by('customer_id', 'product_id')
      .agg(pl.col('order_date').len().alias('purchases'))
      .sort(['customer_id', 'product_id'])
      .with_columns(pl.col('purchases').rank('dense', descending=True).over(['customer_id']).alias('rank'))
      .filter(pl.col('rank') == 1)
      .join(menu, on='product_id')
      .group_by('customer_id')
      .agg(
          pl.col('product_id').alias('most_popular_item_ids'),
          pl.col('product_name').alias('most_popular_item_names'),
      )
)

most_popular_item_per_customer

customer_id,most_popular_item_ids,most_popular_item_names
str,list[i32],list[str]
"""A""",[3],"[""ramen""]"
"""B""","[1, 2, 3]","[""sushi"", ""curry"", ""ramen""]"
"""C""",[3],"[""ramen""]"


## 6. first item purchase after membership

In [14]:
(sales.join(members, how='left', on='customer_id')
      .filter(pl.col('order_date') >= pl.col('join_date'))
      .sort(['order_date'])
      .with_columns(pl.col('order_date').rank('dense').over('customer_id').alias('rank'))
      .filter(pl.col('rank') == 1)
      .drop(['rank', 'join_date'])
      .join(menu, how='left', on='product_id')
      .select(['customer_id', 'product_id', 'product_name', 'price', 'order_date'])
)

customer_id,product_id,product_name,price,order_date
str,i32,str,i32,date
"""A""",2,"""curry""",15,2021-01-07
"""B""",1,"""sushi""",10,2021-01-11


## 7. last item purchased before becoming member

In [14]:
(sales.join(members, how='left', on='customer_id')
      .filter(pl.col('order_date') < pl.col('join_date'))
      .with_columns(pl.col('order_date').rank('dense', descending=True).over('customer_id').alias('rank'))
      .filter(pl.col('rank') == 1)
      .join(menu, how='left', on='product_id')
      .group_by('customer_id')
      .agg(
          pl.col('product_id').alias('product_ids'),
          pl.col('product_name').alias('product_names'),
      ).sort('customer_id')
)

customer_id,product_ids,product_names
str,list[i32],list[str]
"""A""","[1, 2]","[""sushi"", ""curry""]"
"""B""",[1],"[""sushi""]"


## 8. number of items and total spent before becoming a member

In [17]:
(sales.join(members, how='left', on='customer_id')
      .join(menu, how='left', on='product_id')
      .filter((pl.col('order_date') < pl.col('join_date')) | pl.col('join_date').is_null())
      .group_by('customer_id')
      .agg(
          pl.col('product_id').len().alias('total_items_purchased'),
          pl.col('price').sum().alias('total_spent'),
      )
      .sort('customer_id')
)

customer_id,total_items_purchased,total_spent
str,u32,i32
"""A""",2,25
"""B""",3,40
"""C""",3,36


## 9. points accumulation per customer
(10 points for every dollar spent, 2x multiplier for sushi)

In [8]:
(sales.join(menu, how='left', on='product_id')
      .with_columns(pl.when(pl.col('product_id') == 1)
                      .then(pl.col('price') * 10 * 2)
                      .otherwise(pl.col('price') * 10)
                      .alias('points'))
      .group_by('customer_id')
      .agg(pl.col('points').sum())
      .sort('customer_id')
)

customer_id,points
str,i32
"""A""",860
"""B""",940
"""C""",360


## 10. enhanced points program

In [37]:
(sales.join(menu, how='left', on='product_id')
      .join(members, how='left', on='customer_id')
      .with_columns(
          pl.when(
                (((pl.col('order_date') - pl.col('join_date')).dt.total_days() >= 0)
                    & ((pl.col('order_date') - pl.col('join_date')).dt.total_days() <= 7))
                | (pl.col('product_id') == 1)
            ).then(pl.col('price') * 10 * 2)
            .otherwise(pl.col('price') * 10)
            .alias('points')
      )
      .group_by('customer_id')
      .agg(pl.col('points').sum())
      .sort('customer_id')
)

customer_id,points
str,i32
"""A""",1370
"""B""",1060
"""C""",360


## bonus - join all the things

In [7]:
(sales.join(menu, how='left', on='product_id')
      .join(members, how='left', on='customer_id')
      .with_columns(
          pl.when(pl.col('order_date') >= pl.col('join_date'))
            .then(pl.lit('Y'))
            .otherwise(pl.lit('N'))
            .alias('member')
      )
      .select(['customer_id', 'order_date', 'product_name', 'price', 'member'])
      .sort(['customer_id', 'order_date', 'product_name'])
)

customer_id,order_date,product_name,price,member
str,date,str,i32,str
"""A""",2021-01-01,"""curry""",15,"""N"""
"""A""",2021-01-01,"""sushi""",10,"""N"""
"""A""",2021-01-07,"""curry""",15,"""Y"""
"""A""",2021-01-10,"""ramen""",12,"""Y"""
"""A""",2021-01-11,"""ramen""",12,"""Y"""
…,…,…,…,…
"""B""",2021-01-16,"""ramen""",12,"""Y"""
"""B""",2021-02-01,"""ramen""",12,"""Y"""
"""C""",2021-01-01,"""ramen""",12,"""N"""
"""C""",2021-01-01,"""ramen""",12,"""N"""


## bonus - rank all the things

In [22]:
(sales.join(menu, how='left', on='product_id')
      .join(members, how='left', on='customer_id')
      .with_columns(
          pl.when(pl.col('order_date') >= pl.col('join_date'))
            .then(pl.lit('Y'))
            .otherwise(pl.lit('N'))
            .alias('member')
      )
      .select(['customer_id', 'order_date', 'product_name', 'price', 'member'])
      .sort(['customer_id', 'order_date', 'product_name'])
      .with_columns(
          pl.when(pl.col('member') == 'Y')
            .then(pl.col('order_date').rank('dense').over(['customer_id', 'member'], order_by=['order_date', 'product_name']).alias('ranking'))
            .otherwise(pl.lit(None))
      )
)

customer_id,order_date,product_name,price,member,ranking
str,date,str,i32,str,u32
"""A""",2021-01-01,"""curry""",15,"""N""",
"""A""",2021-01-01,"""sushi""",10,"""N""",
"""A""",2021-01-07,"""curry""",15,"""Y""",1
"""A""",2021-01-10,"""ramen""",12,"""Y""",2
"""A""",2021-01-11,"""ramen""",12,"""Y""",3
…,…,…,…,…,…
"""B""",2021-01-16,"""ramen""",12,"""Y""",2
"""B""",2021-02-01,"""ramen""",12,"""Y""",3
"""C""",2021-01-01,"""ramen""",12,"""N""",
"""C""",2021-01-01,"""ramen""",12,"""N""",
