<h1 align="center">Python Assessment - week1</h1>

# Question 1: Local Retail Transaction Analyzer

In [4]:
import pandas as pd
import numpy as np

### 1. Load & Inspect Data:

In [5]:
import os
present_working_directory = os.getcwd()
os.listdir()

['Retail_Data.csv',
 'week1-Q1.ipynb',
 '.ipynb_checkpoints',
 'week2-Q2.ipynb',
 'Online Retail.xlsx',
 'Cleaned_Retail_Data.csv']

In [12]:
present_working_directory

'/home/varunm15t38hedu/assets/week1'

In [19]:
try:
    df = pd.read_csv("Retail_Data.csv")
    if os.getcwd() != present_working_directory:
        raise FileNotFoundError

except FileNotFoundError as e:
    print(f"File was not found the location: {present_working_directory}")
    print(e)

### 2. Clean Invalid Records:

In [20]:
df.describe()

Unnamed: 0,Transaction ID,Age,Quantity,Price per Unit,Total Amount
count,1000.0,1000.0,1000.0,1000.0,1000.0
mean,500.5,41.392,2.514,179.89,456.0
std,288.819436,13.68143,1.132734,189.681356,559.997632
min,1.0,18.0,1.0,25.0,25.0
25%,250.75,29.0,1.0,30.0,60.0
50%,500.5,42.0,3.0,50.0,135.0
75%,750.25,53.0,4.0,300.0,900.0
max,1000.0,64.0,4.0,500.0,2000.0


In [21]:
df.isnull().sum()

Transaction ID      0
Date                0
Customer ID         0
Gender              0
Age                 0
Product Category    0
Quantity            0
Price per Unit      0
Total Amount        0
dtype: int64

#### Not needed to use dropna() method

In [120]:
df = df[(df.Quantity<=0) | (df["Price per Unit"]<=0)]

### 3. Identify Unique Customers and Products:

In [34]:
customer_id = set(df["Customer ID"])
product_category = set(df["Product Category"])

In [121]:
print(customer_id)
print(product_category)

{'CUST520', 'CUST102', 'CUST279', 'CUST668', 'CUST413', 'CUST129', 'CUST084', 'CUST351', 'CUST228', 'CUST367', 'CUST542', 'CUST750', 'CUST175', 'CUST502', 'CUST970', 'CUST995', 'CUST011', 'CUST814', 'CUST565', 'CUST494', 'CUST046', 'CUST325', 'CUST856', 'CUST913', 'CUST957', 'CUST576', 'CUST156', 'CUST163', 'CUST699', 'CUST935', 'CUST053', 'CUST918', 'CUST854', 'CUST017', 'CUST800', 'CUST866', 'CUST851', 'CUST306', 'CUST924', 'CUST080', 'CUST241', 'CUST538', 'CUST702', 'CUST730', 'CUST626', 'CUST206', 'CUST161', 'CUST149', 'CUST458', 'CUST211', 'CUST075', 'CUST536', 'CUST647', 'CUST107', 'CUST569', 'CUST588', 'CUST777', 'CUST887', 'CUST960', 'CUST240', 'CUST805', 'CUST877', 'CUST127', 'CUST657', 'CUST806', 'CUST182', 'CUST474', 'CUST572', 'CUST671', 'CUST761', 'CUST460', 'CUST679', 'CUST138', 'CUST962', 'CUST534', 'CUST888', 'CUST781', 'CUST899', 'CUST469', 'CUST442', 'CUST562', 'CUST152', 'CUST809', 'CUST931', 'CUST968', 'CUST748', 'CUST574', 'CUST807', 'CUST200', 'CUST043', 'CUST083'

### 4. Text Cleaning:

In [43]:
df["Product Category"]= df["Product Category"].str.upper()

(or)

In [44]:
df["Product Category"] = pd.Series(list(map(lambda x: x.upper(),list(df["Product Category"]))))

### 5. Reusable Validation Function:

In [50]:
def is_valid(row):
    if row>0:
        return True
    return False

In [53]:
quantity_bool = df["Quantity"].apply(is_valid)
price_bool = df["Price per Unit"].apply(is_valid)

In [106]:
df = df[ quantity_bool & price_bool ]

### 6. Model with Classes:

In [107]:
class Customer:
    def __init__(self, customer_id):
        self.customer_id = customer_id
        self.total_spent = 0
        self.purchases = []
        
    def add_purchase(self, amount):
        self.purchases.append((self.customer_id, amount))
        self.total_spent += amount
        
    def print_purchases(self):
        print("List of Customer Purchases")
        print("<"+"="*30+">")
        for x in self.purchases:
            print(f"customer ID: {x[0]}")
            print(f"amount: {x[1]}")
            print("-"*20)
        print(f"Total Amount Spent by customer {self.customer_id} is: {self.total_spent}")

In [108]:
customer = Customer(1234)

In [109]:
customer.add_purchase(43)
customer.add_purchase(34)
customer.add_purchase(34)
customer.add_purchase(100)

In [110]:
customer.print_purchases()

List of Customer Purchases
customer ID: 1234
amount: 43
--------------------
customer ID: 1234
amount: 34
--------------------
customer ID: 1234
amount: 34
--------------------
customer ID: 1234
amount: 100
--------------------
Total Amount Spent by customer 1234 is: 211


### 7. Exception Handling:
o Already covered in Step 1.


### 8. Calculate Total Revenue:

In [123]:
df["total_revenue"] = df.Quantity * df["Price per Unit"]

In [124]:
df["total_revenue"]

Series([], Name: total_revenue, dtype: int64)

### 9. Summarize Revenue by Category:

In [112]:
df["Product Category"].unique()

array(['BEAUTY', 'CLOTHING', 'ELECTRONICS'], dtype=object)

In [113]:
category_revenue = df.groupby("Product Category")["total_revenue"].sum()

In [114]:
category_revenue = category_revenue.to_dict()

In [115]:
sorted_category_revenue = sorted(category_revenue.items(), key=lambda x: x[1], reverse=True)

In [116]:
sorted_category_revenue = dict(sorted_category_revenue)

In [117]:
for x in sorted_category_revenue:
    print(f"Category: {x}\nTotal_Price: {sorted_category_revenue[x]}")
    print("*"*20)

Category: ELECTRONICS
Total_Price: 156905
********************
Category: CLOTHING
Total_Price: 155580
********************
Category: BEAUTY
Total_Price: 143515
********************


### 10. Export Cleaned Data:

In [119]:
df.to_csv("assets/week1/Cleaned_Retail_Data.csv")

<h2 align="center">🌻🌻🌻🌺🌻🌻🌻</h2>