<h1 align="center">Python Assessment - week1</h1>

## Question 2: Online Retail System Analytics 

### 1. Environment Setup and Data Load:

In [1]:
import pandas as pd
import numpy as np

In [2]:
df = pd.read_excel("Online Retail.xlsx")

In [3]:
df.head()

Unnamed: 0,InvoiceNo,StockCode,Description,Quantity,InvoiceDate,UnitPrice,CustomerID,Country
0,536365,85123A,WHITE HANGING HEART T-LIGHT HOLDER,6,2010-12-01 08:26:00,2.55,17850.0,United Kingdom
1,536365,71053,WHITE METAL LANTERN,6,2010-12-01 08:26:00,3.39,17850.0,United Kingdom
2,536365,84406B,CREAM CUPID HEARTS COAT HANGER,8,2010-12-01 08:26:00,2.75,17850.0,United Kingdom
3,536365,84029G,KNITTED UNION FLAG HOT WATER BOTTLE,6,2010-12-01 08:26:00,3.39,17850.0,United Kingdom
4,536365,84029E,RED WOOLLY HOTTIE WHITE HEART.,6,2010-12-01 08:26:00,3.39,17850.0,United Kingdom


In [4]:
df.shape

(541909, 8)

### 2. Basic Validation:

In [5]:
df = df[(df.Quantity>0)&(df.UnitPrice>0)]

In [6]:
df

Unnamed: 0,InvoiceNo,StockCode,Description,Quantity,InvoiceDate,UnitPrice,CustomerID,Country
0,536365,85123A,WHITE HANGING HEART T-LIGHT HOLDER,6,2010-12-01 08:26:00,2.55,17850.0,United Kingdom
1,536365,71053,WHITE METAL LANTERN,6,2010-12-01 08:26:00,3.39,17850.0,United Kingdom
2,536365,84406B,CREAM CUPID HEARTS COAT HANGER,8,2010-12-01 08:26:00,2.75,17850.0,United Kingdom
3,536365,84029G,KNITTED UNION FLAG HOT WATER BOTTLE,6,2010-12-01 08:26:00,3.39,17850.0,United Kingdom
4,536365,84029E,RED WOOLLY HOTTIE WHITE HEART.,6,2010-12-01 08:26:00,3.39,17850.0,United Kingdom
...,...,...,...,...,...,...,...,...
541904,581587,22613,PACK OF 20 SPACEBOY NAPKINS,12,2011-12-09 12:50:00,0.85,12680.0,France
541905,581587,22899,CHILDREN'S APRON DOLLY GIRL,6,2011-12-09 12:50:00,2.10,12680.0,France
541906,581587,23254,CHILDRENS CUTLERY DOLLY GIRL,4,2011-12-09 12:50:00,4.15,12680.0,France
541907,581587,23255,CHILDRENS CUTLERY CIRCUS PARADE,4,2011-12-09 12:50:00,4.15,12680.0,France


In [7]:
df.shape

(530104, 8)

dropped rows from 541909 to 540104

In [80]:
df.CustomerID = df.CustomerID.astype(str)

### 3. String and Data Operations:

In [26]:
df1 = df.copy()

In [27]:
df1.CustomerID = df.CustomerID.astype(str)

In [31]:
df1.Description = df.Description.str.strip()

### 4. Reusable Function and File Handling:

In [28]:
def clean_row(row):
    if row>0:
        return True
    return False

In [30]:
df1 = df1[df1.UnitPrice.apply(clean_row)]
df1 = df1[df1.Quantity.apply(clean_row)]

### 5. Modeling with Classes:

In [33]:
class Product:
    def __init__(self,  stock_code, description):
        self.stock_code = stock_code
        self.description = description
        self.total_units_sold = 0
        self.products_sold = []
        
    def update_units(self, quantity):
        self.products_sold.append((self.stock_code, quantity))
        self.total_units_sold += quantity
        

In [34]:
class Customer:
    def __init__(self,  customer_id, country):
        self.customer_id = customer_id
        self.country = country
        self.total_spent = 0
        self.purchases = []
        
    def add_purchase(self, amount):
        self.purchases.append((self.customer_id, amount))
        self.total_spent += amount

### 6. Inheritance and Encapsulation:

In [57]:
class VIPCustomer(Customer):
    def __init__(self, customer_id, country):
        super().__init__(customer_id, country)
        self.discount = 10
        
    def add_purchase(self, amount):
        self.total_spent+=amount
        self.discount=(self.discount/100)*self.total_spent
        self.total_spent = self.total_spent - self.discount
        print(f"Total amount: {self.total_spent}")

In [58]:
vip = VIPCustomer(1234, "India")

In [59]:
vip.add_purchase(1200)

Total amount: 1080.0


### 7. Error and Logging:

In [79]:
try:
    df.CustomerID = ""
    df.Quantity = 23
    df.UnitPrice = 293
except Exception as e:
    print(e)
    with open("erros.log","w") as f:
        f.write(e)
except ValueError as ve:
    print(ve)
    

### 8. Revenue Calculations with NumPy:

In [60]:
quantity = np.array(df.Quantity)
unit_price = np.array(df.UnitPrice)

In [67]:
revenue = quantity*unit_price

In [68]:
revenue

array([15.3 , 20.34, 22.  , ..., 16.6 , 16.6 , 14.85])

In [69]:
df["Revenue"] = revenue

In [61]:
np.mean(quantity)

10.542037034242338

In [62]:
np.max(quantity)

80995

In [63]:
np.min(quantity)

1

In [64]:
np.mean(unit_price)

3.90762524712132

In [65]:
np.max(unit_price)

13541.33

In [66]:
np.min(unit_price)

0.001

### 9. Data Aggregation with Pandas:

In [71]:
total_revenue = df.groupby(["Country","StockCode"])["Revenue"].sum()

In [75]:
df_res=pd.DataFrame(total_revenue)

### 10. Export and Reporting:

In [76]:
df_res.to_csv("Summarized_revenue.csv")

In [88]:
df_res

Unnamed: 0_level_0,Unnamed: 1_level_0,Revenue
Country,StockCode,Unnamed: 2_level_1
Australia,15036,432.00
Australia,20665,17.70
Australia,20675,228.96
Australia,20676,228.96
Australia,20677,228.96
...,...,...
Unspecified,85049A,1.25
Unspecified,85179A,4.65
Unspecified,85179C,4.65
Unspecified,85180A,9.30


<h2 align="center">🌻🌻🌻🌺🌻🌻🌻</h2>