# SQL-GRECO 

In [6]:
# Importing Libraries 
import pandas as pd
import sqlite3 as sql

# Read csv files using Pandas
campaign_desc= pd.read_csv(f"./campaign_desc.csv")
campaign_table = pd.read_csv(f"./campaign_table.csv")
coupon = pd.read_csv(f"./coupon.csv")
coupon_redempt = pd.read_csv(f"./coupon_redempt.csv")
hh_demographic = pd.read_csv(f"./hh_demographic.csv")
product = pd.read_csv(f"./product.csv")
transactions = pd.read_csv(f"./transactions.csv")

# Creating a SQLite Database Connection:
database = './Database.db'
connection_obj = sql.connect(database)
 
# Cursor Object
cursor_obj = connection_obj.cursor()

# Converting the DataFrames to SQL DB
campaign_desc.to_sql('campaign_desc', connection_obj, if_exists='replace', index = False)
campaign_table.to_sql('campaign_table', connection_obj, if_exists='replace', index = False)
coupon.to_sql('coupon', connection_obj, if_exists='replace', index = False)
coupon_redempt.to_sql('coupon_redempt', connection_obj, if_exists='replace', index = False)
hh_demographic.to_sql('hh_demographic', connection_obj, if_exists='replace', index = False)
product.to_sql('product', connection_obj, if_exists='replace', index = False)
transactions.to_sql('transactions', connection_obj, if_exists='replace', index = False)

312357

## Data dictionary ##

### Transaction ###
- `household_key`: The key indicating the household (customer) who bought the product
- `BASKET_ID`: The basket (bill) this item was sold as a part of
- `DAY`: Day (in absolute numbers)
- `PRODUCT_ID`: Product code
- `QUANTITY`: No. of units sold
- `SALES_VALUE`: Value of the sale in $
- `STORE_ID`: Store code
- `RETAIL_DISC`: Discount applied on the item (not as a part of a coupon/campaign)
- `TRANS_TIME`: Time of the sale (in HHMM format)
- `WEEK_NO`: Week number
- `COUPON_DISC`: Discount applied on the item as part of a coupon

### Product ###
- `PRODUCT_ID`: Product code
- `MANUFACTURER`: Manufacturer code
- `DEPARTMENT`: Department of the product
- `BRAND`: Brand type of the product – National or Private label
- `COMMODITY_DESC`: Product commodity type description
- `SUB_COMMODITY_DESC`: Product sub-commodity type description
- `CURR_SIZE_OF_PRODUCT`: Size of the product – In various units

### Coupon ###
- `COUPON_UPC`: Coupon Code
- `PRODUCT_ID`: Product that this code was applicable to
- `CAMPAIGN`: Campaign in which this coupon was offered

### Coupon_redempt ###
- `household_key`: The household (customer) who redeemed this coupon
- `DAY`: Day on which the coupon was redeemed
- `COUPON_UPC`: Coupon Code
- `CAMPAIGN`: Campaign in which this coupon was offered

### hh_demographic ###
- `AGE_DESC`: Age group of the household
- `INCOME_DESC`: Income range of the household
- `HOMEOWNER_DESC`: Housing status of the household – Renter/Homeowner etc.
- `HH_COMP_DESC`: Composition of the people in the household
- `HOUSEHOLD_SIZE_DESC`: Number of people in the household
- `KID_CATEGORY_DESC`: Number of kids in the household if any
- `household_key`: Household (customer) code

### Campaign_table ###
- `DESCRIPTION`: Type of the campaign
- `household_key`: Household(s) this campaign was applicable to
- `CAMPAIGN`: Campaign code

### Campaign_desc ###
- `DESCRIPTION`: Type of the campaign
- `CAMPAIGN`: Campaign code
- `START_DAY`: Start date of the campaign
- `END_DAY`: End date of the campaign


### Questions 

1.	Which are the top 3 manufacturers for the retailer in terms of number of products produced? * distinct 
2.	Total number of days that each type of campaign (Type A, Type B & Type C) ran. Which campaign 
    (Not desc, the campaign e.g., 8,26 etc.) ran for most days?
3.	How many coupons are mapped to each Campaign type? (e.g., Type A - 20 etc.)
4.	How much has the company spent on retail discounts and campaign-based discounts? Which is higher?
5.	What is the distribution of low, medium & high-income houses targeted by each type of campaign? 
    (Default - Low < 40K, Medium 40K to 80K & High 80K<) {you could change the default threshold, 
    just provide the reasoning for new threshold in the comments. Extra credits will be provided for improved thresholds}
6.	What is the distribution of purchase value for Single, Adults with no kids, Adults with kids’ households? 
7.	What are the total sales generated by products have no defined size of the product?
8.	Which is the busiest hour of sales across all stores? Justify your approach in comments.
9.	Assuming Day = 0 to be Jan 1, 2020, how much sales was made in December 2020?
10.	Find the number of households who have made purchases in any number of consecutive weeks
11.	Does the increase in number of kids in a household correspond to a higher average purchase value 
    of each purchase by a household? (Calculate for each purchase and not each item)
12.	Were there any number of coupons not redeemed by customers? If so, how many?
13.	What were the two items were most frequently purchased together? If there’s more than one, 
    display top 5 in order of product ID.
14.	Assuming cost to run all three types of campaign is same, which campaign turned out to be 
    most profitable for the retailer? Justify your approach in the comments
15. What is the average % increase/decrease in sales value (of each purchase by a household) in all purchases 
    following a purchase with a discount applied from a coupon? ** ( Omitted Question )

In [7]:
# 1. Which are the top 3 manufacturers for the retailer in terms of number of products produced? 

sql_code = """
-- WRITE YOUR CODE HERE 
"""

cursor_obj.execute(sql_code) # Executing the code 
connection_obj.commit() #  Commit changes (if any)
results = cursor_obj.fetchall() # Fetch all the results

df=pd.DataFrame(results)
df

In [8]:
# 2. Which campaign (Not desc, the campaign e.g., 8,26 etc.) ran for most days?

sql_code = """
-- WRITE YOUR CODE HERE 
"""

cursor_obj.execute(sql_code) # Executing the code 
connection_obj.commit() #  Commit changes (if any)
results = cursor_obj.fetchall() # Fetch all the results

df=pd.DataFrame(results)
df

In [9]:
# 2.1 Total number of days that each type of campaign (Type A, Type B & Type C) ran.

sql_code = """
-- WRITE YOUR CODE HERE 
"""

cursor_obj.execute(sql_code) # Executing the code 
connection_obj.commit() #  Commit changes (if any)
results = cursor_obj.fetchall() # Fetch all the results

df=pd.DataFrame(results)
df

In [10]:
# 3. How many coupons are mapped to each Campaign type? (e.g., Type A - 20 etc.)

sql_code = """
-- WRITE YOUR CODE HERE 
"""

cursor_obj.execute(sql_code) # Executing the code 
connection_obj.commit() #  Commit changes (if any)
results = cursor_obj.fetchall() # Fetch all the results

df=pd.DataFrame(results)
df

In [11]:
# 4 How much has the company spent on retail discounts and campaign-based discounts? Which is higher?

sql_code = """
-- WRITE YOUR CODE HERE 
"""

cursor_obj.execute(sql_code) # Executing the code 
connection_obj.commit() #  Commit changes (if any)
results = cursor_obj.fetchall() # Fetch all the results

df=pd.DataFrame(results)
df

In [12]:
# 5 What is the distribution of low, medium & high-income houses targeted by each type of campaign? 
# (Default - Low < 40K, Medium 40K to 80K & High 80K<) 
# {you could change the default threshold, just provide the reasoning for new threshold in the comments. 
# Extra credits will be provided for improved thresholds}

sql_code = """
-- WRITE YOUR CODE HERE 
"""

cursor_obj.execute(sql_code) # Executing the code 
connection_obj.commit() #  Commit changes (if any)
results = cursor_obj.fetchall() # Fetch all the results

df=pd.DataFrame(results)
df

In [13]:
# 6. What is the distribution of purchase value for Single, Adults with no kids, Adults with kids’ households?

sql_code = """
-- WRITE YOUR CODE HERE 
"""

cursor_obj.execute(sql_code) # Executing the code 
connection_obj.commit() #  Commit changes (if any)
results = cursor_obj.fetchall() # Fetch all the results

df=pd.DataFrame(results)
df

In [14]:
# 7. What are the total sales generated by products have no defined size of the product? 

sql_code = """
-- WRITE YOUR CODE HERE 
"""

cursor_obj.execute(sql_code) # Executing the code 
connection_obj.commit() #  Commit changes (if any)
results = cursor_obj.fetchall() # Fetch all the results

df=pd.DataFrame(results)
df

In [15]:
# 8. Which is the busiest hour of sales across all stores? Justify your approach in comments.

sql_code = """
-- WRITE YOUR CODE HERE 
"""

cursor_obj.execute(sql_code) # Executing the code 
connection_obj.commit() #  Commit changes (if any)
results = cursor_obj.fetchall() # Fetch all the results

df=pd.DataFrame(results)
df

In [16]:
# 9. Assuming Day = 0 to be Jan 1, 2020, how much sales was made in December 2020?

sql_code = """
-- WRITE YOUR CODE HERE 
"""

cursor_obj.execute(sql_code) # Executing the code 
connection_obj.commit() #  Commit changes (if any)
results = cursor_obj.fetchall() # Fetch all the results

df=pd.DataFrame(results)
df

In [17]:
# 10. Find the number of households who have made purchases in any number of consecutive weeks

sql_code = """
-- WRITE YOUR CODE HERE 
"""

cursor_obj.execute(sql_code) # Executing the code 
connection_obj.commit() #  Commit changes (if any)
results = cursor_obj.fetchall() # Fetch all the results

df=pd.DataFrame(results)
df

In [18]:
# 11. Does the increase in number of kids in a household correspond to a higher 
# average purchase value of each purchase by a household? (Calculate for each purchase and not each item) 

sql_code = """
-- WRITE YOUR CODE HERE 
"""

cursor_obj.execute(sql_code) # Executing the code 
connection_obj.commit() #  Commit changes (if any)
results = cursor_obj.fetchall() # Fetch all the results

df=pd.DataFrame(results)
df

In [19]:
# 12. Were there any number of coupons not redeemed by customers? If so, how many?

sql_code = """
-- WRITE YOUR CODE HERE 
"""

cursor_obj.execute(sql_code) # Executing the code 
connection_obj.commit() #  Commit changes (if any)
results = cursor_obj.fetchall() # Fetch all the results

df=pd.DataFrame(results)
df

In [20]:
# 13. What were the two items were most frequently purchased together? 
# If there’s more than one, display top 5 in order of product ID.

sql_code = """
-- WRITE YOUR CODE HERE 
"""

cursor_obj.execute(sql_code) # Executing the code 
connection_obj.commit() #  Commit changes (if any)
results = cursor_obj.fetchall() # Fetch all the results

df=pd.DataFrame(results)
df

In [21]:
# 14. Assuming cost to run all three types of campaign is same, 
# which campaign turned out to be most profitable for the retailer? Justify your approach in the comments

sql_code = """
-- WRITE YOUR CODE HERE 
"""

cursor_obj.execute(sql_code) # Executing the code 
connection_obj.commit() #  Commit changes (if any)
results = cursor_obj.fetchall() # Fetch all the results

df=pd.DataFrame(results)
df

In [22]:
# 15. What is the average % increase/decrease in sales value (of each purchase by a household) in all purchases 
#     following a purchase with a discount applied from a coupon?

sql_code = """
-- WRITE YOUR CODE HERE 
"""

cursor_obj.execute(sql_code) # Executing the code 
connection_obj.commit() #  Commit changes (if any)
results = cursor_obj.fetchall() # Fetch all the results

df=pd.DataFrame(results)
df