# 📊 Real-Time E-commerce Analytics (User Journey Funnel)

In [None]:
!pip install pandas pyarrow seaborn


In [None]:
from google.colab import files
uploaded = files.upload()


In [None]:
import pandas as pd
import json

df = pd.read_json('events_log_with_userid.json', lines=True)
df['timestamp'] = pd.to_datetime(df['timestamp'], unit='s', errors='coerce')

if 'products' in df.columns:
    df = df.explode('products')
    product_df = pd.json_normalize(df['products'])
    df = pd.concat([df.drop(columns=['products']).reset_index(drop=True), product_df.reset_index(drop=True)], axis=1)

if 'price' in df.columns:
    df['price'] = df['price'].replace('[\$,]', '', regex=True).astype(float)

df.head()


In [None]:
df.to_parquet('user_events_final.parquet', engine='pyarrow', index=False)


In [None]:
from google.colab import files
files.download('user_events_final.parquet')


In [None]:
user_paths = df.groupby('user_id')['event_type'].apply(list).reset_index()

def classify_path(events):
    if 'login' in events and all(e == 'login' for e in events):
        return 'Login Only'
    elif 'login' in events and 'add_to_cart' in events and 'purchase' not in events:
        return 'Login + Add to Cart Only'
    elif 'login' in events and 'add_to_cart' in events and 'purchase' in events:
        return 'Login + Add + Purchase'
    else:
        return 'Other'

user_paths['user_journey'] = user_paths['event_type'].apply(classify_path)
user_paths.head()


In [None]:
user_paths['user_journey'].value_counts()


In [None]:
print("Total Logins:", df[df['event_type'] == 'login'].shape[0])

add_df = df[df['event_type'] == 'add_to_cart']
print("\nTop Product Categories in Cart:\n", add_df['product_category'].value_counts())

purchase_df = df[df['event_type'] == 'purchase']
print("\nTop Brands Purchased:\n", purchase_df['brand'].value_counts())

view_df = df[df['event_type'] == 'view_product']
print("\nMost Viewed Product Types:\n", view_df['product_type'].value_counts())

search_df = df[df['event_type'] == 'search']
print("\nTotal Search Events:", len(search_df))


In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

sns.countplot(y='user_journey', data=user_paths, order=user_paths['user_journey'].value_counts().index)
plt.title("User Journey Funnel")
plt.xlabel("Number of Users")
plt.ylabel("Journey Stage")
plt.show()


## 🔄 Micro-Batch Analytics Loop (Every 5 Seconds)

In [None]:
import pandas as pd
import time
import os

parquet_path = 'user_events_final.parquet'
processed_rows = 0

while True:
    if os.path.exists(parquet_path):
        try:
            df = pd.read_parquet(parquet_path)
            if len(df) > processed_rows:
                print(f"\n🔄 New data detected: {len(df) - processed_rows} new rows")
                new_data = df.iloc[processed_rows:]
                processed_rows = len(df)
                journey_df = new_data.groupby('user_id')['event_type'].apply(list).reset_index()
                def classify_path(events):
                    if 'login' in events and all(e == 'login' for e in events):
                        return 'Login Only'
                    elif 'login' in events and 'add_to_cart' in events and 'purchase' not in events:
                        return 'Login + Add to Cart Only'
                    elif 'login' in events and 'add_to_cart' in events and 'purchase' in events:
                        return 'Login + Add + Purchase'
                    else:
                        return 'Other'
                journey_df['user_journey'] = journey_df['event_type'].apply(classify_path)
                print(journey_df['user_journey'].value_counts())
            else:
                print("⏳ No new data...")
        except Exception as e:
            print("⚠️ Error reading file:", e)
    time.sleep(5)
