# E-commerce Product Recommendation System

Project track - AI for Market Trend Analysis

Problem statement - Implement a product recommendation system.The system Suggests relevant product recomendations to user by combining multiple recommendation strategies and then by employing a hybrid re ranker.

Relevance- E-commerce Product recomendations systems serve key use to both users and business. They improve CX for users and get good conversion and retention for business, choosing the besst performative model helps in suggesting most relevant products to user.



Run this notebook top-to-bottom. It includes:
- Synthetic data generation
- Temporal split + feature engineering
- Popularity, Item-Item CF, Content TF-IDF, and Hybrid reranker models
- Evaluation (Precision/Recall/NDCG/MAP/Coverage)
- Demo recommendations + similar-items demo


## 1) Imports and settings

In [None]:
# 1) Imports & settings
import numpy as np
import pandas as pd
from dataclasses import dataclass
from typing import Dict, List, Optional, Tuple, Set
from scipy import sparse
from sklearn.preprocessing import normalize
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.linear_model import LogisticRegression
import math
import time

pd.set_option("display.max_colwidth", 80)


## 2) Configuration

improvements-Denser interactions, stronger implicit weights, and more concentrated user preferences.

Data source is a synthetic ecommerce data for 2000  users ,1000 items and 300 average interactions per user. these interactions include viewing, adding to cart, purchasing(in increasing order of priority/weights).

In [None]:
# 2) Configuration - key improvements

@dataclass(frozen=True)
class CFG:
    seed: int = 42

    # Denser data: more interactions per user
    n_users: int = 2000
    n_items: int = 1000
    n_categories: int = 20
    n_interactions: int = 600000  # 300 per user avg

    # Stronger signal weights
    w_view: float = 1.0
    w_cart: float = 5.0
    w_purchase: float = 15.0

    # Temporal split config
    val_days: int = 7
    test_days: int = 7
    k_eval: int = 5

    # User behavior concentration
    user_preference_strength: float = 0.88
    purchase_concentration: float = 0.95

cfg = CFG()
rng = np.random.default_rng(cfg.seed)

print("Configuration:")
print(f"  Users: {cfg.n_users}, Items: {cfg.n_items}")
print(f"  Interactions: {cfg.n_interactions} ({cfg.n_interactions/cfg.n_users:.0f} per user)")
print(f"  Preference strength: {cfg.user_preference_strength:.0%}")
print(f"  Purchase concentration: {cfg.purchase_concentration:.0%}")


Configuration:
  Users: 2000, Items: 1000
  Interactions: 600000 (300 per user)
  Preference strength: 88%
  Purchase concentration: 95%


## 3) Synthetic data generation

Generates stronger user-item affinity, with purchases concentrated in primary favorites.

In [None]:
# 3) Data generation - improved

def generate_synthetic_ecommerce(
    n_users: int,
    n_items: int,
    n_interactions: int,
    n_categories: int,
    seed: int,
    user_pref_strength: float = 0.88,
    purchase_concentration: float = 0.95,
) -> Tuple[pd.DataFrame, pd.DataFrame]:
    """Generate data with stronger user-item affinity patterns."""
    rng_local = np.random.default_rng(seed)

    categories = [f"cat_{i:02d}" for i in range(n_categories)]
    item_cat = rng_local.integers(0, n_categories, size=n_items)

    adjective = ["premium", "classic", "smart", "eco", "luxury", "pro", "elite", "modern", "vintage", "designer"]
    noun = ["watch", "laptop", "phone", "camera", "headphones", "speaker", "tablet", "monitor", "keyboard", "mouse"]
    tagline = ["bestseller", "trending", "new arrival", "limited edition", "top rated"]

    titles, texts = [], []
    for i in range(n_items):
        a = adjective[int(rng_local.integers(0, len(adjective)))]
        n = noun[int(rng_local.integers(0, len(noun)))]
        t = tagline[int(rng_local.integers(0, len(tagline)))]
        c = categories[int(item_cat[i])]
        title = f"{a} {n} {i}"
        text = f"{title} {t}. category {c}. high quality {a} {n}."
        titles.append(title)
        texts.append(text)

    items = pd.DataFrame({
        "item_id": np.arange(n_items, dtype=int),
        "category": [categories[int(i)] for i in item_cat],
        "title": titles,
        "text": texts,
    })

    # Each user gets 2-3 favorite categories
    user_fav = np.zeros((n_users, 3), dtype=int)
    for u in range(n_users):
        primary = rng_local.integers(0, n_categories)
        user_fav[u, 0] = primary
        user_fav[u, 1] = (primary + rng_local.integers(1, 5)) % n_categories
        user_fav[u, 2] = (primary + rng_local.integers(5, 10)) % n_categories

    start_ts = 1700000000
    end_ts = start_ts + 60 * 24 * 3600

    user_ids = rng_local.integers(0, n_users, size=n_interactions)
    timestamps = rng_local.integers(start_ts, end_ts, size=n_interactions)

    # More purchases
    event_probs = np.array([0.70, 0.20, 0.10])
    event_types = rng_local.choice(["view", "cart", "purchase"], size=n_interactions, p=event_probs)

    # Item popularity (power law)
    item_pop = rng_local.pareto(a=1.5, size=n_items) + 1.0
    item_pop = item_pop / item_pop.sum()

    chosen_items = np.empty(n_interactions, dtype=int)
    for idx in range(n_interactions):
        u = int(user_ids[idx])
        event = event_types[idx]
        fav_cats = user_fav[u]

        pref_strength = purchase_concentration if event == "purchase" else user_pref_strength

        if rng_local.random() < pref_strength:
            if event == "purchase":
                cat = int(fav_cats[0])  # primary favorite for purchases
            else:
                cat = int(fav_cats[int(rng_local.integers(0, len(fav_cats)))])

            candidates = np.where(item_cat == cat)[0]
            chosen_items[idx] = int(rng_local.choice(candidates))
        else:
            chosen_items[idx] = int(rng_local.choice(np.arange(n_items), p=item_pop))

    interactions = pd.DataFrame({
        "user_id": user_ids.astype(int),
        "item_id": chosen_items.astype(int),
        "event_type": event_types.astype(str),
        "timestamp": timestamps.astype(int),
    }).sort_values("timestamp").reset_index(drop=True)

    return items, interactions


# Generate data
t0 = time.time()
items, interactions = generate_synthetic_ecommerce(
    n_users=cfg.n_users,
    n_items=cfg.n_items,
    n_interactions=cfg.n_interactions,
    n_categories=cfg.n_categories,
    seed=cfg.seed,
    user_pref_strength=cfg.user_preference_strength,
    purchase_concentration=cfg.purchase_concentration,
)
print(f"\n✓ Generated data in {time.time()-t0:.1f}s")
print(f"\n Items: {items.shape}, Interactions: {interactions.shape}")
print(f"\n Items: \n{items.head()}")
print(f"\n Interactions: \n{interactions.head()}")
print(f"\n  Event distribution:\n{interactions['event_type'].value_counts(normalize=True)}")

# Add weights + dedupe
weight_map = {"view": cfg.w_view, "cart": cfg.w_cart, "purchase": cfg.w_purchase}
interactions["weight"] = interactions["event_type"].map(weight_map).astype(float)
interactions = interactions.drop_duplicates(subset=["user_id", "item_id", "event_type", "timestamp"]).reset_index(drop=True)



✓ Generated data in 17.7s

 Items: (1000, 4), Interactions: (600000, 4)

 Items: 
   item_id category              title  \
0        0   cat_01  vintage monitor 0   
1        1   cat_15   vintage laptop 1   
2        2   cat_13  vintage speaker 2   
3        3   cat_08        pro phone 3   
4        4   cat_08     smart camera 4   

                                                                              text  
0    vintage monitor 0 new arrival. category cat_01. high quality vintage monitor.  
1        vintage laptop 1 top rated. category cat_15. high quality vintage laptop.  
2  vintage speaker 2 limited edition. category cat_13. high quality vintage spe...  
3            pro phone 3 limited edition. category cat_08. high quality pro phone.  
4          smart camera 4 new arrival. category cat_08. high quality smart camera.  

 Interactions: 
   user_id  item_id event_type   timestamp
0      872      518       cart  1700000000
1      991      689       view  1700000012
2     15

## 4) Temporal split

Train, validation, and test are split by time to avoid leakage.

In [None]:
# 4) Temporal split

def temporal_split(interactions: pd.DataFrame, val_days: int, test_days: int) -> Dict[str, pd.DataFrame]:
    df = interactions.sort_values("timestamp").reset_index(drop=True)
    max_ts = int(df["timestamp"].max())
    day = 24 * 3600
    test_start = max_ts - test_days * day
    val_start = test_start - val_days * day

    train = df[df["timestamp"] < val_start].copy()
    val = df[(df["timestamp"] >= val_start) & (df["timestamp"] < test_start)].copy()
    test = df[df["timestamp"] >= test_start].copy()

    return {"train": train.reset_index(drop=True), "val": val.reset_index(drop=True), "test": test.reset_index(drop=True)}


split = temporal_split(interactions, val_days=cfg.val_days, test_days=cfg.test_days)
print(f"\n✓ Split: train={split['train'].shape[0]}, val={split['val'].shape[0]}, test={split['test'].shape[0]}")



✓ Split: train=460289, val=70007, test=69704


## 5) Feature engineering

Creates user/item aggregates and recency features used by the hybrid reranker.

In [None]:
# 5) Feature engineering

def build_feature_tables(interactions_train: pd.DataFrame, items: pd.DataFrame, now_ts: int):
    df = interactions_train.copy()

    user_agg = df.groupby("user_id").agg(
        user_events=("weight", "size"),
        user_weight_sum=("weight", "sum"),
        user_last_ts=("timestamp", "max"),
    ).reset_index()
    user_agg["user_recency_days"] = (now_ts - user_agg["user_last_ts"]) / (24 * 3600)

    item_agg = df.groupby("item_id").agg(
        item_events=("weight", "size"),
        item_weight_sum=("weight", "sum"),
        item_last_ts=("timestamp", "max"),
    ).reset_index()
    item_agg["item_recency_days"] = (now_ts - item_agg["item_last_ts"]) / (24 * 3600)
    item_agg = item_agg.merge(items[["item_id", "category"]], on="item_id", how="left")

    return user_agg, item_agg


now_ts = int(split["train"]["timestamp"].max())
user_features, item_features = build_feature_tables(split["train"], items, now_ts=now_ts)
print(f"✓ Features: {user_features.shape[0]} users, {item_features.shape[0]} items")


✓ Features: 2000 users, 1000 items


## 6) Models

Popularity, Item-Item CF, Content TF-IDF, and Hybrid reranker.

In [None]:
# 6) Models

class PopularityRecommender:
    def __init__(self):
        self.global_top: List[Tuple[int, float]] = []
        self.by_category_top: Dict[str, List[Tuple[int, float]]] = {}

    def fit(self, interactions_train: pd.DataFrame, items: pd.DataFrame):
        score = interactions_train.groupby("item_id")["weight"].sum().sort_values(ascending=False)
        self.global_top = [(int(i), float(s)) for i, s in score.head(500).items()]

        merged = interactions_train.merge(items[["item_id", "category"]], on="item_id", how="left")
        by_cat = {}
        for cat, g in merged.groupby("category"):
            s = g.groupby("item_id")["weight"].sum().sort_values(ascending=False)
            by_cat[str(cat)] = [(int(i), float(v)) for i, v in s.head(500).items()]
        self.by_category_top = by_cat

    def recommend(self, user_id: int, k: int, category: Optional[str] = None):
        if category and category in self.by_category_top:
            return [(i, s, "popularity_category") for i, s in self.by_category_top[category][:k]]
        return [(i, s, "popularity_global") for i, s in self.global_top[:k]]


class ItemItemCFRecommender:
    def __init__(self, topk_sim: int = 150):
        self.topk_sim = topk_sim
        self.item_index: Dict[int, int] = {}
        self.index_item: Dict[int, int] = {}
        self.user_items: Dict[int, np.ndarray] = {}
        self.user_weights: Dict[int, np.ndarray] = {}
        self.sim_topk: Dict[int, Tuple[np.ndarray, np.ndarray]] = {}

    def fit(self, interactions_train: pd.DataFrame):
        df = interactions_train[["user_id", "item_id", "weight"]].copy()
        users = df["user_id"].unique()
        items_u = df["item_id"].unique()

        self.item_index = {int(item_id): idx for idx, item_id in enumerate(items_u)}
        self.index_item = {idx: item_id for item_id, idx in self.item_index.items()}

        user_index = {int(u): idx for idx, u in enumerate(users)}
        ui = df["user_id"].map(user_index).to_numpy()
        ii = df["item_id"].map(self.item_index).to_numpy()
        ww = df["weight"].to_numpy().astype(np.float32)

        X = sparse.csr_matrix((ww, (ui, ii)), shape=(len(users), len(items_u)))
        X = normalize(X, norm="l2", axis=1)
        S = (X.T @ X).tocsr()

        self.sim_topk = {}
        for j in range(S.shape[0]):
            row = S.getrow(j)
            if row.nnz == 0:
                self.sim_topk[j] = (np.array([], dtype=int), np.array([], dtype=float))
                continue
            idxs = row.indices
            vals = row.data
            mask = idxs != j
            idxs = idxs[mask]
            vals = vals[mask]
            if len(vals) == 0:
                self.sim_topk[j] = (np.array([], dtype=int), np.array([], dtype=float))
                continue
            order = np.argsort(-vals)[:self.topk_sim]
            self.sim_topk[j] = (idxs[order].astype(int), vals[order].astype(float))

        grouped = df.groupby("user_id").apply(lambda g: (g["item_id"].values, g["weight"].values))
        self.user_items = {}
        self.user_weights = {}
        for u, (its, wts) in grouped.items():
            arr = np.array([self.item_index[int(it)] for it in its if int(it) in self.item_index], dtype=int)
            wts_arr = np.array([w for it, w in zip(its, wts) if int(it) in self.item_index], dtype=float)
            if arr.size > 0:
                self.user_items[int(u)] = arr
                self.user_weights[int(u)] = wts_arr

    def recommend(self, user_id: int, k: int):
        if user_id not in self.user_items:
            return []
        hist = self.user_items[user_id]
        weights = self.user_weights[user_id]

        recency_boost = np.linspace(0.5, 1.5, len(hist))
        weights = weights * recency_boost

        scores = {}
        recent_window = min(100, len(hist))
        for it, w in zip(hist[-recent_window:], weights[-recent_window:]):
            neigh_idx, neigh_sim = self.sim_topk.get(int(it), (None, None))
            if neigh_idx is None:
                continue
            for n, s in zip(neigh_idx, neigh_sim):
                scores[int(n)] = scores.get(int(n), 0.0) + float(s) * float(w)

        hist_set = set(int(x) for x in hist.tolist())
        items_scored = [(i, s) for i, s in scores.items() if i not in hist_set]
        items_scored.sort(key=lambda x: x[1], reverse=True)

        return [(int(self.index_item[idx]), float(s), "cf_item_item") for idx, s in items_scored[:k]]


class ContentTFIDFRecommender:
    def __init__(self, max_features: int = 8000):
        self.max_features = max_features
        self.vectorizer = None
        self.item_ids = None
        self.item_matrix = None
        self.item_index = {}

    def fit(self, items: pd.DataFrame):
        self.item_ids = items["item_id"].to_numpy().astype(int)
        texts = items["text"].astype(str).tolist()
        self.vectorizer = TfidfVectorizer(max_features=self.max_features, ngram_range=(1, 2))
        X = self.vectorizer.fit_transform(texts)
        X = normalize(X, norm="l2", axis=1)
        self.item_matrix = X.tocsr()
        self.item_index = {int(item_id): idx for idx, item_id in enumerate(self.item_ids.tolist())}

    def similar_items(self, item_id: int, k: int):
        if self.item_matrix is None or item_id not in self.item_index:
            return []
        i = self.item_index[item_id]
        q = self.item_matrix.getrow(i)
        sims = (self.item_matrix @ q.T).toarray().ravel()
        sims[i] = -1.0
        top = np.argsort(-sims)[:k]
        return [(int(self.item_ids[j]), float(sims[j])) for j in top]


class HybridReranker:
    def __init__(
        self,
        pop,
        cf,
        content,
        user_features,
        item_features,
        items,
        seed,
        candidates_per_model=350,
        rerank_topn=120,
        neg_per_pos=3,
    ):
        self.pop = pop
        self.cf = cf
        self.content = content
        self.user_features = user_features
        self.item_features = item_features
        self.items = items[["item_id", "category"]].copy()
        self.seed = seed
        self.candidates_per_model = candidates_per_model
        self.rerank_topn = rerank_topn
        self.neg_per_pos = neg_per_pos
        self.model = LogisticRegression(max_iter=500, random_state=seed, C=0.5, class_weight="balanced")
        self.user_hist = {}
        self.user_categories = {}

    def set_user_history(self, interactions_train: pd.DataFrame):
        hist = interactions_train.sort_values("timestamp").groupby("user_id")["item_id"].apply(list)
        self.user_hist = {int(u): [int(x) for x in lst] for u, lst in hist.items()}
        merged = interactions_train.merge(self.items, on="item_id", how="left")
        user_cats = merged.groupby("user_id")["category"].apply(lambda x: x.mode()[0] if len(x) > 0 else None)
        self.user_categories = {int(u): str(c) for u, c in user_cats.items() if c is not None}

    def _user_row(self, user_id: int):
        row = self.user_features[self.user_features["user_id"] == user_id]
        if len(row) == 0:
            return {"user_events": 0.0, "user_weight_sum": 0.0, "user_recency_days": 999.0}
        r = row.iloc[0]
        return {
            "user_events": float(r["user_events"]),
            "user_weight_sum": float(r["user_weight_sum"]),
            "user_recency_days": float(r["user_recency_days"]),
        }

    def _item_row(self, item_id: int):
        row = self.item_features[self.item_features["item_id"] == item_id]
        if len(row) == 0:
            return {"item_events": 0.0, "item_weight_sum": 0.0, "item_recency_days": 999.0}
        r = row.iloc[0]
        return {
            "item_events": float(r["item_events"]),
            "item_weight_sum": float(r["item_weight_sum"]),
            "item_recency_days": float(r["item_recency_days"]),
        }

    def _featurize(self, user_id: int, item_id: int, cf_score: float, content_score: float):
        u = self._user_row(user_id)
        it = self._item_row(item_id)

        item_row = self.items[self.items["item_id"] == item_id]
        item_cat = item_row["category"].iloc[0] if len(item_row) > 0 else ""
        user_fav_cat = self.user_categories.get(user_id, "")
        category_match = 1.0 if item_cat == user_fav_cat else 0.0

        return np.array([
            u["user_events"],
            u["user_weight_sum"],
            np.log1p(u["user_weight_sum"]),
            u["user_recency_days"],
            it["item_events"],
            it["item_weight_sum"],
            np.log1p(it["item_weight_sum"]),
            it["item_recency_days"],
            cf_score,
            np.log1p(cf_score),
            content_score,
            category_match,
        ], dtype=np.float32)

    def _build_candidates(self, user_id: int):
        candidates = {}

        cf_recs = self.cf.recommend(user_id, self.candidates_per_model)
        for item_id, score, _ in cf_recs:
            candidates[int(item_id)] = (float(score), 0.0)

        content_candidates = []
        if user_id in self.user_hist and len(self.user_hist[user_id]) > 0:
            for last_item_id in self.user_hist[user_id][-3:]:
                content_candidates.extend(self.content.similar_items(last_item_id, self.candidates_per_model // 3))

        for item_id, score in content_candidates:
            if int(item_id) in candidates:
                candidates[int(item_id)] = (candidates[int(item_id)][0], float(score))
            else:
                candidates[int(item_id)] = (0.0, float(score))

        return [(it, cfs, cos) for it, (cfs, cos) in candidates.items()]

    def fit(self, interactions_train: pd.DataFrame, interactions_val: pd.DataFrame):
        rng_local = np.random.default_rng(self.seed)
        val = interactions_val.copy()
        pos = val[val["event_type"] == "purchase"][["user_id", "item_id"]].drop_duplicates()
        if len(pos) < 1000:
            cart_pos = val[val["event_type"] == "cart"][["user_id", "item_id"]].drop_duplicates()
            pos = pd.concat([pos, cart_pos]).drop_duplicates()

        X_rows, y_rows = [], []
        users = pos["user_id"].astype(int).unique().tolist()[:2000]
        for u in users:
            u_pos_items = pos[pos["user_id"] == u]["item_id"].astype(int).tolist()
            if not u_pos_items:
                continue

            cand = self._build_candidates(u)
            if not cand:
                continue

            cand_map = {it: (cfs, cos) for it, cfs, cos in cand}

            for it in u_pos_items[:10]:
                cfs, cos = cand_map.get(int(it), (0.0, 0.0))
                X_rows.append(self._featurize(u, int(it), cfs, cos))
                y_rows.append(1)

                cand_items = [c for c in cand_map.keys() if c not in set(u_pos_items)]
                n_negs = min(self.neg_per_pos, len(cand_items))
                if n_negs > 0:
                    neg_items = rng_local.choice(cand_items, size=n_negs, replace=False)
                    for neg_it in neg_items:
                        cfs2, cos2 = cand_map.get(int(neg_it), (0.0, 0.0))
                        X_rows.append(self._featurize(u, int(neg_it), cfs2, cos2))
                        y_rows.append(0)

        X = np.vstack(X_rows) if X_rows else np.zeros((0, 12), dtype=np.float32)
        y = np.array(y_rows, dtype=int)

        if len(np.unique(y)) < 2 or len(X) < 100:
            X = np.random.default_rng(self.seed).normal(size=(500, 12)).astype(np.float32)
            y = np.array([0] * 250 + [1] * 250, dtype=int)

        print(f"  Training reranker on {len(X)} examples ({sum(y)} positive)")
        self.model.fit(X, y)

    def recommend(self, user_id: int, k: int):
        cand = self._build_candidates(user_id)
        if not cand:
            return []

        X = np.vstack([self._featurize(user_id, it, cfs, cos) for it, cfs, cos in cand]).astype(np.float32)
        proba = self.model.predict_proba(X)[:, 1]
        order = np.argsort(-proba)[:self.rerank_topn]
        return [(int(cand[int(idx)][0]), float(proba[int(idx)]), "hybrid_rerank") for idx in order[:k]]


## 7) Training

Fits popularity, CF, content, and hybrid reranker.

In [None]:
# 7) Training

print("\n" + "="*60)
print("TRAINING MODELS")
print("="*60)

t0 = time.time()

pop = PopularityRecommender()
pop.fit(split["train"], items)
print("✓ Popularity trained")

cf = ItemItemCFRecommender(topk_sim=150)
cf.fit(split["train"])
print("✓ CF Item-Item trained")

content = ContentTFIDFRecommender(max_features=8000)
content.fit(items)
print("✓ Content TF-IDF trained")

hybrid = HybridReranker(
    pop, cf, content, user_features, item_features, items,
    seed=cfg.seed, candidates_per_model=350, rerank_topn=120, neg_per_pos=3
)
hybrid.set_user_history(split["train"])
hybrid.fit(split["train"], split["val"])
print("✓ Hybrid trained")

print(f"\nTotal training time: {time.time()-t0:.1f}s")



TRAINING MODELS
✓ Popularity trained


  grouped = df.groupby("user_id").apply(lambda g: (g["item_id"].values, g["weight"].values))


✓ CF Item-Item trained
✓ Content TF-IDF trained
  Training reranker on 27216 examples (6804 positive)
✓ Hybrid trained

Total training time: 77.7s


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


## 8) Evaluation metrics

Precision, recall, NDCG, MAP, and coverage utilities.

In [None]:
# 8) Evaluation metrics

def precision_at_k(ranked: List[int], relevant: Set[int], k: int):
    if k <= 0:
        return 0.0
    top = ranked[:k]
    return sum(1 for x in top if x in relevant) / float(k) if top else 0.0

def recall_at_k(ranked: List[int], relevant: Set[int], k: int):
    if not relevant:
        return 0.0
    top = ranked[:k]
    return sum(1 for x in top if x in relevant) / float(len(relevant))

def ndcg_at_k(ranked: List[int], relevant: Set[int], k: int):
    if not relevant:
        return 0.0
    dcg = sum(1.0 / math.log(i + 2, 2) for i, item in enumerate(ranked[:k]) if item in relevant)
    idcg = sum(1.0 / math.log(i + 2, 2) for i in range(min(len(relevant), k)))
    return dcg / idcg if idcg > 0 else 0.0

def average_precision_at_k(ranked: List[int], relevant: Set[int], k: int):
    if not relevant:
        return 0.0
    ap = 0.0
    hits = 0
    for i, item in enumerate(ranked[:k], start=1):
        if item in relevant:
            hits += 1
            ap += hits / float(i)
    return ap / float(min(len(relevant), k))

def ground_truth(interactions_test: pd.DataFrame):
    df = interactions_test[interactions_test["event_type"].isin(["purchase", "cart"])][["user_id", "item_id"]].drop_duplicates()
    gt = {}
    for u, g in df.groupby("user_id"):
        gt[int(u)] = set(int(x) for x in g["item_id"].tolist())
    return gt

def coverage(recs_by_user: Dict[int, List[int]], catalog_size: int):
    if catalog_size <= 0:
        return 0.0
    s = set()
    for lst in recs_by_user.values():
        s.update(lst)
    return len(s) / float(catalog_size)

def evaluate_model(model_name: str, model_obj, k: int, max_users: int = 1000):
    gt = ground_truth(split["test"])
    users = sorted(gt.keys())
    if len(users) > max_users:
        users = rng.choice(users, size=max_users, replace=False).tolist()

    recs_by_user = {}
    p_list, r_list, n_list, m_list = [], [], [], []

    for u in users:
        recs = model_obj.recommend(int(u), k=k)
        ranked = [int(it) for it, _, _ in recs]
        recs_by_user[int(u)] = ranked
        relevant = gt.get(int(u), set())
        p_list.append(precision_at_k(ranked, relevant, k))
        r_list.append(recall_at_k(ranked, relevant, k))
        n_list.append(ndcg_at_k(ranked, relevant, k))
        m_list.append(average_precision_at_k(ranked, relevant, k))

    cov = coverage(recs_by_user, catalog_size=int(items["item_id"].nunique()))

    return {
        "model": model_name,
        f"precision@{k}": float(np.mean(p_list)),
        f"recall@{k}": float(np.mean(r_list)),
        f"ndcg@{k}": float(np.mean(n_list)),
        f"map@{k}": float(np.mean(m_list)),
        "coverage": cov,
    }


## 9) Evaluation on test set

Runs all baselines and hybrid, producing a results table.

In [None]:
# 9) Evaluation on test set

print("\n" + "="*60)
print("EVALUATION ON TEST SET")
print("="*60)

k = cfg.k_eval

class ContentOnlyWrapper:
    def __init__(self, train_interactions, content_model, pop_model):
        self.content = content_model
        self.pop = pop_model
        hist = train_interactions.sort_values("timestamp").groupby("user_id")["item_id"].apply(list)
        self.user_hist = {int(u): [int(x) for x in lst] for u, lst in hist.items()}

    def recommend(self, user_id: int, k: int):
        if user_id not in self.user_hist or len(self.user_hist[user_id]) == 0:
            return self.pop.recommend(user_id, k)
        last_item_id = int(self.user_hist[user_id][-1])
        sims = self.content.similar_items(last_item_id, k)
        return [(it, s, "content_sim") for it, s in sims] if sims else self.pop.recommend(user_id, k)

content_only = ContentOnlyWrapper(split["train"], content, pop)

rows = [
    evaluate_model("popularity", pop, k),
    evaluate_model("cf_item_item", cf, k),
    evaluate_model("content_tfidf", content_only, k),
    evaluate_model("hybrid", hybrid, k),
]

df_results = pd.DataFrame(rows).sort_values(f"ndcg@{k}", ascending=False).reset_index(drop=True)

print("\nRESULTS:")
print(df_results.to_string(index=False))
print("\n" + "="*60)



EVALUATION ON TEST SET

RESULTS:
        model  precision@5  recall@5   ndcg@5    map@5  coverage
       hybrid       0.0992  0.048329 0.099080 0.050753     0.957
 cf_item_item       0.0898  0.044359 0.092676 0.048217     0.812
   popularity       0.0390  0.019798 0.048415 0.025834     0.005
content_tfidf       0.0130  0.006510 0.015900 0.008290     0.913



## 10) Demo recommendations

Shows example recommendations for an active user and CF similar-items.

In [None]:
# 10) Demo recommendations

item_lookup = {
    int(r.item_id): {"title": str(r.title), "category": str(r.category)}
    for r in items[["item_id", "title", "category"]].itertuples(index=False)
}

def pretty_recommendations(recs: List[Tuple[int, float, str]]):
    out = []
    for item_id, score, reason in recs:
        meta = item_lookup.get(int(item_id), {"title": f"item_{item_id}", "category": "unknown"})
        title = meta["title"]
        title_parts = title.rsplit(' ', 1)
        clean_title = title_parts[0].title() if len(title_parts) == 2 and title_parts[1].isdigit() else title.title()
        out.append({
            "item_id": int(item_id),
            "product_name": clean_title,
            "category": meta["category"],
            "score": float(score),
            "reason": reason
        })
    return pd.DataFrame(out)

def recommend(user_id: int, k: int = 10, model: str = "hybrid"):
    if model == "hybrid":
        recs = hybrid.recommend(user_id, k)
        if len(recs) < k:
            extra = pop.recommend(user_id, k - len(recs))
            existing = set(it for it, _, _ in recs)
            extra = [(it, s, r) for it, s, r in extra if it not in existing]
            recs = recs + extra[:(k - len(recs))]
    elif model == "popularity":
        recs = pop.recommend(user_id, k)
    elif model == "cf":
        recs = cf.recommend(user_id, k)
        if len(recs) < k:
            extra = pop.recommend(user_id, k - len(recs))
            existing = set(it for it, _, _ in recs)
            extra = [(it, s, r) for it, s, r in extra if it not in existing]
            recs = recs + extra[:(k - len(recs))]
    elif model == "content":
        recs = content_only.recommend(user_id, k)
    else:
        raise ValueError("model must be one of: hybrid, popularity, cf, content")
    return pretty_recommendations(recs)

print("\n" + "="*60)
print("RECOMMENDATIONS FOR USER")
print("="*60)

demo_user = int(split["train"]["user_id"].value_counts().index[5])
print(f"\nUser ID: {demo_user}")
print(f"Training interactions: {split['train'][split['train']['user_id']==demo_user].shape[0]}")

print("\n--- HYBRID RECOMMENDATIONS ---")
print(recommend(demo_user, k=5, model="hybrid").to_string(index=False))

# print("\n--- CF ITEM-ITEM RECOMMENDATIONS ---")
# print(recommend(demo_user, k=5, model="cf").to_string(index=False))

# print("\n" + "="*60)
# print("DEMO: SIMILAR ITEMS (CF)")
# print("="*60)

# def cf_similar_items(cf_model, item_id: int, k: int = 10):
#     item_id = int(item_id)
#     if item_id not in cf_model.item_index:
#         return []
#     j = int(cf_model.item_index[item_id])
#     neigh_idx, neigh_sim = cf_model.sim_topk.get(j, (np.array([], dtype=int), np.array([], dtype=float)))
#     return [
#         (int(cf_model.index_item[int(idx)]), float(s), "cf_similarity")
#         for idx, s in zip(neigh_idx[:k], neigh_sim[:k])
#     ]

# last_item = int(split["train"].query("user_id == @demo_user").sort_values("timestamp")["item_id"].iloc[-1])
# print(f"\nMost recent item in user history: {last_item}")
# print(f"Product: {item_lookup[last_item]['title']}")

# print("\nSimilar items (CF):")
# print(
#     pretty_recommendations(cf_similar_items(cf, last_item, k=5))[
#         ["item_id", "product_name", "category", "score"]
#     ].to_string(index=False)
# )

# print("\n" + "="*60)




RECOMMENDATIONS FOR USER

User ID: 51
Training interactions: 273

--- HYBRID RECOMMENDATIONS ---
 item_id    product_name category    score        reason
     608  Elite Keyboard   cat_19 0.922007 hybrid_rerank
     946  Designer Mouse   cat_19 0.921287 hybrid_rerank
     140  Designer Mouse   cat_19 0.919251 hybrid_rerank
     809 Vintage Monitor   cat_19 0.918714 hybrid_rerank
     796    Smart Tablet   cat_19 0.917428 hybrid_rerank
