In [None]:
# Quickstart: Item–Item kNN on this data
import pandas as pd, numpy as np
from scipy.sparse import csr_matrix
from sklearn.neighbors import NearestNeighbors

# ---------- Load ----------
train = pd.read_csv("res/interactions_train.csv", parse_dates=["ts"])
books = pd.read_csv("res/books.csv")

# collapse to canonical work_id (so editions don't fragment signal)
train = train[["user_id","work_id","weight"]].dropna()
train["weight"] = train["weight"].astype(np.float32)

# ---------- Encode ids ----------
u2i = {u:i for i,u in enumerate(train["user_id"].unique())}
w2i = {w:i for i,w in enumerate(train["work_id"].unique())}
i2w = {i:w for w,i in w2i.items()}

train["_uid"] = train["user_id"].map(u2i)
train["_iid"] = train["work_id"].map(w2i)

# ---------- Build user–item matrix with weights ----------
n_users = train["_uid"].max()+1
n_items = train["_iid"].max()+1
R = csr_matrix((train["weight"].values,
                (train["_uid"].values, train["_iid"].values)),
               shape=(n_users, n_items))

# ---------- Fit item–item kNN (cosine) ----------
item_user = R.T.tocsr()
knn = NearestNeighbors(n_neighbors=101, metric="cosine", algorithm="brute", n_jobs=-1)  # 100 neighbors + self
knn.fit(item_user)

# ---------- Recommend for a user ----------
def recommend_for_user(user_id, topn=10):
    u = u2i.get(user_id)
    if u is None: return []
    seen_items = set(R.getrow(u).indices.tolist())
    scores = {}
    for iid in seen_items:
        dists, nbrs = knn.kneighbors(item_user[iid], return_distance=True)
        dists, nbrs = dists[0], nbrs[0]
        for dist, nb in zip(dists[1:], nbrs[1:]):  # skip self
            sim = 1.0 - float(dist)
            if nb not in seen_items:
                scores[nb] = scores.get(nb, 0.0) + sim
    top = sorted(scores.items(), key=lambda x: x[1], reverse=True)[:topn]
    return [(i2w[i], s) for i, s in top]

# Example user → titles
books_by_work = books.drop_duplicates("work_id")[["work_id","title","author_name","genre"]]
test = pd.read_csv("res/interactions_test.csv", parse_dates=["ts"])
example_user = test.sample(1, random_state=0)["user_id"].iloc[0]



   work_id     score                            title    author_name  \
0  w_02204  7.428220         Kynthos: Legacy (Book 2)      Sam Smith   
1  w_01058  7.078107  The Prophecy of Solara (Book 1)    Riley Brown   
2  w_00422  6.510206                     Mythos: Code    Logan White   
3  w_01152  5.913240           Golden Garden (Book 1)    Riley Allen   
4  w_00753  5.722825         Eldoria: Throne (Book 3)   Skyler Jones   
5  w_02997  5.281706   The Secret of Kynthos (Book 1)  Sage Anderson   
6  w_02527  4.821906   The Legacy of Nemoris (Book 2)      Sam Allen   
7  w_01916  4.379576            Autumn Crown (Book 2)   Morgan Allen   
8  w_01814  4.326840            The Phoenix of Avalon   Alex Johnson   
9  w_00125  3.848648   The Garden of Eldoria (Book 3)    Reese Perez   

             genre  
0        Biography  
1          Fantasy  
2           Horror  
3        Biography  
4         Children  
5          Fantasy  
6        Self-Help  
7           Horror  
8  Science Fiction

In [None]:
recs = recommend_for_user(example_user, topn=10)
out = pd.DataFrame(recs, columns=["work_id","score"]).merge(books_by_work, on="work_id", how="left")
print(out)