In [None]:
import pandas as pd
import numpy as np
from tqdm import tqdm

# -------------------------------------------------
# –í–°–ü–û–ú–û–ì–ê–¢–ï–õ–¨–ù–ê–Ø –§–£–ù–ö–¶–ò–Ø: —Ä–∞—Å—á—ë—Ç mAP@20
# -------------------------------------------------
def apk(actual, predicted, k=20):
    if not actual:
        return 0.0
    predicted = predicted[:k]
    score = 0.0
    hits = 0.0
    for i, p in enumerate(predicted):
        if p in actual:
            hits += 1.0
            score += hits / (i + 1.0)
    return score / min(len(actual), k)

def map_at_k(val_interactions, user_history, top_popular, k=20, max_recent=500):
    aps = []
    for user_id, actual_set in tqdm(val_interactions.items(), desc="–û—Ü–µ–Ω–∫–∞ mAP@20"):
        pred = recommend(user_id, user_history, top_popular, top_k=k, max_recent=max_recent)
        ap = apk(actual_set, pred, k)
        aps.append(ap)
    return np.mean(aps)

# -------------------------------------------------
# –§–£–ù–ö–¶–ò–Ø –†–ï–ö–û–ú–ï–ù–î–ê–¶–ò–ò (–±–µ–∑ –∏–∑–º–µ–Ω–µ–Ω–∏–π)
# -------------------------------------------------
def recommend(user_id, user_history, top_popular, top_k=20, max_recent=500):
    if user_id not in user_history:
        return top_popular[:top_k]
    
    history = user_history[user_id]
    seen = set()
    recs = []
    for item in reversed(history):
        if item not in seen:
            recs.append(item)
            seen.add(item)
            if len(recs) >= max_recent:
                break
    recs = recs[:top_k]
    if len(recs) < top_k:
        for item in top_popular:
            if item not in seen:
                recs.append(item)
                if len(recs) >= top_k:
                    break
    return recs[:top_k]

# -------------------------------------------------
# 1. –ó–ê–ì–†–£–ó–ö–ê –î–ê–ù–ù–´–•
# -------------------------------------------------
print("üìÅ –ó–∞–≥—Ä—É–∑–∫–∞ –¥–∞–Ω–Ω—ã—Ö...")
df = pd.read_parquet("train_data.pq")
df["date"] = df["date"].astype(int)
max_day_all = df["date"].max()
print(f"–í—Å–µ–≥–æ –¥–Ω–µ–π –≤ –¥–∞–Ω–Ω—ã—Ö: {max_day_all}")

# -------------------------------------------------
# 2. –í–†–ï–ú–ï–ù–ù–û–ï –†–ê–ó–ë–ò–ï–ù–ò–ï: train / val
# -------------------------------------------------
val_days = 5  # –ø–æ—Å–ª–µ–¥–Ω–∏–µ 5 –¥–Ω–µ–π –∫–∞–∫ hold-out –≤–∞–ª–∏–¥–∞—Ü–∏—è
train_end = max_day_all - val_days  # –Ω–∞–ø—Ä–∏–º–µ—Ä, 47 - 5 = 42 ‚Üí train: <=41

print(f"\n‚è±Ô∏è –†–∞–∑–±–∏–µ–Ω–∏–µ –ø–æ –≤—Ä–µ–º–µ–Ω–∏:")
print(f"   Train: –¥–Ω–∏ 1‚Äì{train_end}")
print(f"   Val:   –¥–Ω–∏ {train_end + 1}‚Äì{max_day_all}")

train_df = df[df["date"] <= train_end].copy()
val_df = df[df["date"] > train_end].copy()

# -------------------------------------------------
# 3. –ü–û–î–ì–û–¢–û–í–ö–ê VAL: ground truth
# -------------------------------------------------
print("\nüîç –§–æ—Ä–º–∏—Ä–æ–≤–∞–Ω–∏–µ ground truth –¥–ª—è –≤–∞–ª–∏–¥–∞—Ü–∏–∏...")
val_interactions = val_df.groupby("user_id")["item_id"].apply(set).to_dict()
print(f"–ü–æ–ª—å–∑–æ–≤–∞—Ç–µ–ª–µ–π –≤ val: {len(val_interactions)}")

# -------------------------------------------------
# 4. –ü–û–î–ì–û–¢–û–í–ö–ê TRAIN: –∏—Å—Ç–æ—Ä–∏—è + –ø–æ–ø—É–ª—è—Ä–Ω–æ—Å—Ç—å
# -------------------------------------------------
print("\n‚è≥ –°–±–æ—Ä –∏—Å—Ç–æ—Ä–∏–∏ –ø–æ–ª—å–∑–æ–≤–∞—Ç–µ–ª–µ–π (train)...")
train_df = train_df.sort_values(["user_id", "date"])
user_history_train = train_df.groupby("user_id")["item_id"].apply(list).to_dict()

print("üî• –¢–æ–ø-200 –ø–æ–ø—É–ª—è—Ä–Ω—ã—Ö —Ç–æ–≤–∞—Ä–æ–≤ –∑–∞ –ø–æ—Å–ª–µ–¥–Ω–∏–µ 14 –¥–Ω–µ–π train...")
max_day_train = train_df["date"].max()
recent_train = train_df[train_df["date"] >= (max_day_train - 13)]["item_id"]
top_popular_train = recent_train.value_counts().index[:200].tolist()

# -------------------------------------------------
# 5. –í–ê–õ–ò–î–ê–¶–ò–Ø
# -------------------------------------------------
print("\nüß™ –ó–∞–ø—É—Å–∫ –≤–∞–ª–∏–¥–∞—Ü–∏–∏...")
map20 = map_at_k(
    val_interactions=val_interactions,
    user_history=user_history_train,
    top_popular=top_popular_train,
    k=20,
    max_recent=50
)
print(f"\n‚úÖ mAP@20 –Ω–∞ –≤–∞–ª–∏–¥–∞—Ü–∏–∏ (–¥–Ω–∏ {train_end+1}‚Äì{max_day_all}): {map20:.6f}")

# -------------------------------------------------
# 6. –ü–û–õ–ù–ê–Ø –ú–û–î–ï–õ–¨ –î–õ–Ø –°–ê–ë–ú–ò–¢–ê (–æ–±—É—á–µ–Ω–∏–µ –Ω–∞ –≤—Å–µ—Ö –¥–∞–Ω–Ω—ã—Ö)
# -------------------------------------------------
print("\nüöÄ –ü–æ–¥–≥–æ—Ç–æ–≤–∫–∞ —Ñ–∏–Ω–∞–ª—å–Ω–æ–π –º–æ–¥–µ–ª–∏ –Ω–∞ –≤—Å–µ—Ö –¥–∞–Ω–Ω—ã—Ö...")
df_full = df.sort_values(["user_id", "date"])
user_history_full = df_full.groupby("user_id")["item_id"].apply(list).to_dict()

print("üî• –¢–æ–ø-200 –ø–æ–ø—É–ª—è—Ä–Ω—ã—Ö –∑–∞ –ø–æ—Å–ª–µ–¥–Ω–∏–µ 14 –¥–Ω–µ–π (–≤—Å–µ –¥–∞–Ω–Ω—ã–µ)...")
recent_full = df_full[df_full["date"] >= (max_day_all - 13)]["item_id"]
top_popular_full = recent_full.value_counts().index[:200].tolist()

# -------------------------------------------------
# 7. –ì–ï–ù–ï–†–ê–¶–ò–Ø –°–ê–ë–ú–ò–¢–ê
# -------------------------------------------------
submission_template = pd.read_csv("sample_submission.csv")
unique_users = submission_template["user_id"].drop_duplicates().values
expected_total_rows = len(unique_users) * 20

print(f"\nüì§ –ì–µ–Ω–µ—Ä–∞—Ü–∏—è —Å–∞–±–º–∏—Ç–∞ –¥–ª—è {len(unique_users)} –ø–æ–ª—å–∑–æ–≤–∞—Ç–µ–ª–µ–π...")
output_file = "submission.csv"

with open(output_file, "w", encoding="utf-8") as f:
    f.write("user_id,item_id\n")
    for user_id in tqdm(unique_users, desc="–†–µ–∫–æ–º–µ–Ω–¥–∞—Ü–∏–∏", unit="–ø–æ–ª—å–∑–æ–≤–∞—Ç–µ–ª—å"):
        recs = recommend(user_id, user_history_full, top_popular_full, top_k=20, max_recent=50)
        for item_id in recs:
            f.write(f"{user_id},{item_id}\n")

print(f"\n‚úÖ –°–∞–±–º–∏—Ç —Å–æ—Ö—Ä–∞–Ω—ë–Ω: {output_file}")
print("üí° –ì–æ—Ç–æ–≤ –∫ –æ—Ç–ø—Ä–∞–≤–∫–µ –Ω–∞ –ø–ª–∞—Ç—Ñ–æ—Ä–º—É!")

üìÅ –ó–∞–≥—Ä—É–∑–∫–∞ –¥–∞–Ω–Ω—ã—Ö...
–í—Å–µ–≥–æ –¥–Ω–µ–π –≤ –¥–∞–Ω–Ω—ã—Ö: 46

‚è±Ô∏è –†–∞–∑–±–∏–µ–Ω–∏–µ –ø–æ –≤—Ä–µ–º–µ–Ω–∏:
   Train: –¥–Ω–∏ 1‚Äì41
   Val:   –¥–Ω–∏ 42‚Äì46

üîç –§–æ—Ä–º–∏—Ä–æ–≤–∞–Ω–∏–µ ground truth –¥–ª—è –≤–∞–ª–∏–¥–∞—Ü–∏–∏...
–ü–æ–ª—å–∑–æ–≤–∞—Ç–µ–ª–µ–π –≤ val: 441129

‚è≥ –°–±–æ—Ä –∏—Å—Ç–æ—Ä–∏–∏ –ø–æ–ª—å–∑–æ–≤–∞—Ç–µ–ª–µ–π (train)...
üî• –¢–æ–ø-200 –ø–æ–ø—É–ª—è—Ä–Ω—ã—Ö —Ç–æ–≤–∞—Ä–æ–≤ –∑–∞ –ø–æ—Å–ª–µ–¥–Ω–∏–µ 14 –¥–Ω–µ–π train...

üß™ –ó–∞–ø—É—Å–∫ –≤–∞–ª–∏–¥–∞—Ü–∏–∏...


–û—Ü–µ–Ω–∫–∞ mAP@20: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 441129/441129 [00:01<00:00, 306296.26it/s]



‚úÖ mAP@20 –Ω–∞ –≤–∞–ª–∏–¥–∞—Ü–∏–∏ (–¥–Ω–∏ 42‚Äì46): 0.058704

üöÄ –ü–æ–¥–≥–æ—Ç–æ–≤–∫–∞ —Ñ–∏–Ω–∞–ª—å–Ω–æ–π –º–æ–¥–µ–ª–∏ –Ω–∞ –≤—Å–µ—Ö –¥–∞–Ω–Ω—ã—Ö...
üî• –¢–æ–ø-200 –ø–æ–ø—É–ª—è—Ä–Ω—ã—Ö –∑–∞ –ø–æ—Å–ª–µ–¥–Ω–∏–µ 14 –¥–Ω–µ–π (–≤—Å–µ –¥–∞–Ω–Ω—ã–µ)...

üì§ –ì–µ–Ω–µ—Ä–∞—Ü–∏—è —Å–∞–±–º–∏—Ç–∞ –¥–ª—è 293230 –ø–æ–ª—å–∑–æ–≤–∞—Ç–µ–ª–µ–π...


–†–µ–∫–æ–º–µ–Ω–¥–∞—Ü–∏–∏: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 293230/293230 [00:04<00:00, 59620.38–ø–æ–ª—å–∑–æ–≤–∞—Ç–µ–ª—å/s]


‚úÖ –°–∞–±–º–∏—Ç —Å–æ—Ö—Ä–∞–Ω—ë–Ω: submission.csv
üí° –ì–æ—Ç–æ–≤ –∫ –æ—Ç–ø—Ä–∞–≤–∫–µ –Ω–∞ –ø–ª–∞—Ç—Ñ–æ—Ä–º—É!





In [None]:
# 0.04143233502383816
import pandas as pd
from tqdm import tqdm

# ----------------------------
# 1. –ó–∞–≥—Ä—É–∑–∫–∞ –¥–∞–Ω–Ω—ã—Ö
# ----------------------------
print("üìÅ –ó–∞–≥—Ä—É–∑–∫–∞ –¥–∞–Ω–Ω—ã—Ö...")
train_df = pd.read_parquet("train_data.pq")
submission_template = pd.read_csv("sample_submission.csv")

# –£–±–µ–¥–∏–º—Å—è, —á—Ç–æ –¥–∞—Ç–∞ ‚Äî —Ü–µ–ª–æ–µ —á–∏—Å–ª–æ
train_df["date"] = train_df["date"].astype(int)

# ----------------------------
# 2. –ü–æ–¥–≥–æ—Ç–æ–≤–∫–∞ –∏—Å—Ç–æ—Ä–∏–∏ –ø–æ–ª—å–∑–æ–≤–∞—Ç–µ–ª–µ–π (—Å–æ—Ä—Ç–∏—Ä–æ–≤–∫–∞ –ø–æ –¥–∞—Ç–µ)
# ----------------------------
print("‚è≥ –°–±–æ—Ä –∏—Å—Ç–æ—Ä–∏–∏ –ø–æ–ª—å–∑–æ–≤–∞—Ç–µ–ª–µ–π (—Å–æ—Ä—Ç–∏—Ä–æ–≤–∫–∞ –ø–æ –¥–∞—Ç–µ)...")
train_df = train_df.sort_values(["user_id", "date"])
user_history = train_df.groupby("user_id")["item_id"].apply(list).to_dict()

# ----------------------------
# 3. –¢–æ–ø-200 –ø–æ–ø—É–ª—è—Ä–Ω—ã—Ö —Ç–æ–≤–∞—Ä–æ–≤ –∑–∞ –ø–æ—Å–ª–µ–¥–Ω–∏–µ 14 –¥–Ω–µ–π
# ----------------------------
print("üî• –†–∞—Å—á—ë—Ç –ø–æ–ø—É–ª—è—Ä–Ω—ã—Ö —Ç–æ–≤–∞—Ä–æ–≤ –∑–∞ –ø–æ—Å–ª–µ–¥–Ω–∏–µ 14 –¥–Ω–µ–π...")
max_day = train_df["date"].max()
recent_items = train_df[train_df["date"] >= (max_day - 13)]["item_id"]
top_popular = recent_items.value_counts().index[:200].tolist()

# ----------------------------
# 4. –§—É–Ω–∫—Ü–∏—è —Ä–µ–∫–æ–º–µ–Ω–¥–∞—Ü–∏–∏
# ----------------------------
def recommend(user_id, user_history, top_popular, top_k=20, max_recent=50):
    if user_id not in user_history:
        return top_popular[:top_k]
    
    history = user_history[user_id]
    seen = set()
    recs = []
    # –ë–µ—Ä—ë–º –≤ –æ–±—Ä–∞—Ç–Ω–æ–º –ø–æ—Ä—è–¥–∫–µ ‚Äî —Å–∞–º—ã–µ —Å–≤–µ–∂–∏–µ –ø–µ—Ä–≤—ã–º–∏
    for item in reversed(history):
        if item not in seen:
            recs.append(item)
            seen.add(item)
            if len(recs) >= max_recent:
                break
    # –û–±—Ä–µ–∑–∞–µ–º –¥–æ top_k
    recs = recs[:top_k]
    # –î–æ–ø–æ–ª–Ω—è–µ–º –ø–æ–ø—É–ª—è—Ä–Ω—ã–º–∏, –µ—Å–ª–∏ –Ω–µ —Ö–≤–∞—Ç–∞–µ—Ç
    if len(recs) < top_k:
        for item in top_popular:
            if item not in seen:
                recs.append(item)
                if len(recs) >= top_k:
                    break
    return recs[:top_k]

# ----------------------------
# 5. –ü–æ–ª—É—á–∞–µ–º —É–Ω–∏–∫–∞–ª—å–Ω—ã—Ö –ø–æ–ª—å–∑–æ–≤–∞—Ç–µ–ª–µ–π –∏–∑ —à–∞–±–ª–æ–Ω–∞
# ----------------------------
print("üîç –û–ø—Ä–µ–¥–µ–ª–µ–Ω–∏–µ —É–Ω–∏–∫–∞–ª—å–Ω—ã—Ö –ø–æ–ª—å–∑–æ–≤–∞—Ç–µ–ª–µ–π –∏–∑ —à–∞–±–ª–æ–Ω–∞...")
unique_users = submission_template["user_id"].drop_duplicates().values
expected_total_rows = len(unique_users) * 20

print(f"–í—Å–µ–≥–æ —É–Ω–∏–∫–∞–ª—å–Ω—ã—Ö –ø–æ–ª—å–∑–æ–≤–∞—Ç–µ–ª–µ–π: {len(unique_users)}")
print(f"–û–∂–∏–¥–∞–µ–º–æ–µ —á–∏—Å–ª–æ —Å—Ç—Ä–æ–∫ –≤ —Å–∞–±–º–∏—Ç–µ: {expected_total_rows}")

# ----------------------------
# 6. –ì–µ–Ω–µ—Ä–∞—Ü–∏—è —Å–∞–±–º–∏—Ç–∞ ‚Äî –ø–∏—à–µ–º –Ω–∞–ø—Ä—è–º—É—é –≤ —Ñ–∞–π–ª (—ç–∫–æ–Ω–æ–º–∏—è –ø–∞–º—è—Ç–∏)
# ----------------------------
output_file = "submission.csv"
print(f"üì§ –ì–µ–Ω–µ—Ä–∞—Ü–∏—è —Å–∞–±–º–∏—Ç–∞ ‚Üí {output_file} ...")

with open(output_file, "w", encoding="utf-8") as f:
    f.write("user_id,item_id\n")  # –∑–∞–≥–æ–ª–æ–≤–æ–∫
    for user_id in tqdm(unique_users, desc="–†–µ–∫–æ–º–µ–Ω–¥–∞—Ü–∏–∏", unit="–ø–æ–ª—å–∑–æ–≤–∞—Ç–µ–ª—å"):
        recs = recommend(user_id, user_history, top_popular, top_k=20, max_recent=50)
        for item_id in recs:
            f.write(f"{user_id},{item_id}\n")

print(f"\n‚úÖ –°–∞–±–º–∏—Ç —É—Å–ø–µ—à–Ω–æ —Å–æ—Ö—Ä–∞–Ω—ë–Ω: {output_file}")
print(f"üí° –¢–µ–ø–µ—Ä—å –º–æ–∂–Ω–æ –æ—Ç–ø—Ä–∞–≤–ª—è—Ç—å –µ–≥–æ –Ω–∞ –ø–ª–∞—Ç—Ñ–æ—Ä–º—É!")

üìÅ –ó–∞–≥—Ä—É–∑–∫–∞ –¥–∞–Ω–Ω—ã—Ö...
‚è≥ –°–±–æ—Ä –∏—Å—Ç–æ—Ä–∏–∏ –ø–æ–ª—å–∑–æ–≤–∞—Ç–µ–ª–µ–π (—Å–æ—Ä—Ç–∏—Ä–æ–≤–∫–∞ –ø–æ –¥–∞—Ç–µ)...
üî• –†–∞—Å—á—ë—Ç –ø–æ–ø—É–ª—è—Ä–Ω—ã—Ö —Ç–æ–≤–∞—Ä–æ–≤ –∑–∞ –ø–æ—Å–ª–µ–¥–Ω–∏–µ 14 –¥–Ω–µ–π...
üîç –û–ø—Ä–µ–¥–µ–ª–µ–Ω–∏–µ —É–Ω–∏–∫–∞–ª—å–Ω—ã—Ö –ø–æ–ª—å–∑–æ–≤–∞—Ç–µ–ª–µ–π –∏–∑ —à–∞–±–ª–æ–Ω–∞...
–í—Å–µ–≥–æ —É–Ω–∏–∫–∞–ª—å–Ω—ã—Ö –ø–æ–ª—å–∑–æ–≤–∞—Ç–µ–ª–µ–π: 293230
–û–∂–∏–¥–∞–µ–º–æ–µ —á–∏—Å–ª–æ —Å—Ç—Ä–æ–∫ –≤ —Å–∞–±–º–∏—Ç–µ: 5864600
üì§ –ì–µ–Ω–µ—Ä–∞—Ü–∏—è —Å–∞–±–º–∏—Ç–∞ ‚Üí submission.csv ...


–†–µ–∫–æ–º–µ–Ω–¥–∞—Ü–∏–∏: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 293230/293230 [00:07<00:00, 41491.34–ø–æ–ª—å–∑–æ–≤–∞—Ç–µ–ª—å/s]


‚úÖ –°–∞–±–º–∏—Ç —É—Å–ø–µ—à–Ω–æ —Å–æ—Ö—Ä–∞–Ω—ë–Ω: submission.csv
üí° –¢–µ–ø–µ—Ä—å –º–æ–∂–Ω–æ –æ—Ç–ø—Ä–∞–≤–ª—è—Ç—å –µ–≥–æ –Ω–∞ –ø–ª–∞—Ç—Ñ–æ—Ä–º—É!



