In [9]:
!pip install implicit



In [10]:
import pandas as pd
import numpy as np
import scipy.sparse as sparse
import implicit
from implicit.als import AlternatingLeastSquares
import os

In [11]:
# --- 1. Data Loading & Preprocessing ---
def load_data(filename):
    print(f"Loading {filename}...")
    data = []
    users = []
    items = []

    # Read the file line by line
    with open(filename, 'r') as f:
        for line in f:
            parts = list(map(int, line.strip().split()))
            user_id = parts[0]
            interacted_items = parts[1:]

            for item_id in interacted_items:
                users.append(user_id)
                items.append(item_id)
                data.append(1) # Implicit feedback 'confidence' of 1

    # Create Sparse Matrix (Items x Users for Implicit library efficiency)
    # Note: Implicit library expects Item-User matrix for training
    user_item_matrix = sparse.csr_matrix((data, (users, items)))
    item_user_matrix = sparse.csr_matrix((data, (items, users)))

    print(f"Data Loaded: {user_item_matrix.shape[0]} Users, {user_item_matrix.shape[1]} Items")
    return user_item_matrix, item_user_matrix

In [12]:
# --- 2. Model Training & Tweaks (CORRECTED) ---
def train_model(training_matrix, factors=20, regularization=0.1, iterations=20, description="Base Model"):
    print(f"\n--- Training {description} ---")
    print(f"Params: Factors={factors}, Regularization={regularization}")

    # Initialize ALS Model
    model = AlternatingLeastSquares(factors=factors, regularization=regularization, iterations=iterations, random_state=42)

    # Train (UPDATED: Pass User x Item matrix for Implicit >= 0.6.0)
    model.fit(training_matrix)

    return model

In [13]:
# --- 3. Recommendation Generation ---
def generate_recommendations(model, user_item_matrix, user_ids, top_n=20):
    print("Generating recommendations...")
    recommendations = {}

    # implicit library's recommend function
    # filter_already_liked_items=True ensures we don't recommend what they already bought
    ids, scores = model.recommend(user_ids, user_item_matrix[user_ids], N=top_n, filter_already_liked_items=True)

    for i, user_id in enumerate(user_ids):
        recs = ids[i]
        recommendations[user_id] = recs

    return recommendations

In [14]:
# --- Execution Block ---
if __name__ == "__main__":
    # 1. Load Data
    user_item, item_user = load_data('train-1.txt')

    # Get list of all unique users
    unique_users = np.unique(user_item.nonzero()[0])

    # 2. Train Base Model
    # FIX: Pass 'user_item' instead of 'item_user'
    base_model = train_model(user_item, factors=20, regularization=0.1, description="Base MF (ALS)")

    # 3. Train Tweak 1: Higher Latent Factors
    tweak1_model = train_model(user_item, factors=50, regularization=0.1, description="Tweak 1 (High Factors)")

    # 4. Train Tweak 2: Higher Regularization
    tweak2_model = train_model(user_item, factors=20, regularization=0.5, description="Tweak 2 (High Reg)")

    # 5. Generate Output for Base Model (Example)
    final_recs = generate_recommendations(base_model, user_item, unique_users, top_n=20)

    # 6. Save to Output File
    output_filename = "output_recommendations.txt"
    with open(output_filename, 'w') as f:
        for user_id in sorted(final_recs.keys()):
            items_str = ' '.join(map(str, final_recs[user_id]))
            f.write(f"{user_id} {items_str}\n")

    print(f"\nSuccess! Recommendations saved to {output_filename}")

Loading train-1.txt...
Data Loaded: 31668 Users, 38056 Items

--- Training Base MF (ALS) ---
Params: Factors=20, Regularization=0.1


  0%|          | 0/20 [00:00<?, ?it/s]


--- Training Tweak 1 (High Factors) ---
Params: Factors=50, Regularization=0.1


  0%|          | 0/20 [00:00<?, ?it/s]


--- Training Tweak 2 (High Reg) ---
Params: Factors=20, Regularization=0.5


  0%|          | 0/20 [00:00<?, ?it/s]

Generating recommendations...

Success! Recommendations saved to output_recommendations.txt
