In [1]:
import numpy as np
import requests
from io import BytesIO
from PIL import Image
import matplotlib.pyplot as plt
from sklearn.metrics.pairwise import cosine_similarity
import pandas as pd
from pathlib import Path
import os
from scipy.sparse import load_npz

BASE_URL = "https://res.cloudinary.com/ds84b9f8s/image/upload/v1763570455/movies_small/"


def show_image(image_filename):
    full_url = BASE_URL + image_filename
    try:
        response = requests.get(full_url)
        response.raise_for_status()

        img = Image.open(BytesIO(response.content))

        plt.figure(figsize=(4, 4))
        plt.imshow(img)
        plt.axis("off")
        plt.title(image_filename)
        plt.show()

    except Exception as e:
        print("cant load:", full_url)
        print("Error:", e)


def display_recommendations(idx_in_test, recs, data):

    print("\n==============================")
    print("Film de requÃªte")
    print("==============================\n")

  
    for i in idx_in_test:
        print(f" ID dans data: {i}")
        print(data.loc[i])
        if "image_filename" in data.columns:
            print("\nðŸ“¸ Image film de requÃªte:")
            show_image(data.loc[i, "image_filename"])

    print("\n==============================")
    print("Recommandations")
    print("==============================\n")


    cols_reco = [
        "Index", "Movie_Title", "Year", "main_genre", "side_genre",
        "Rating", "plot", "image_filename","similarity","score"
    ]
    # print(recs[cols_reco].to_string(index=False))

   
    if "image_filename" in recs.columns:
        print("\nImages des recommandations:")
        for _, row in recs.iterrows():
            print("\n---------------------------")
            print(row[cols_reco])
            show_image(row["image_filename"])



def recommend_for_test_index(idx_in_test, data, tfidf_matrix, top_k=5, alpha=0.2):

   
    query_vec = tfidf_matrix[idx_in_test].mean(axis=0)
    query_vec = np.asarray(query_vec)


    sims = cosine_similarity(query_vec, tfidf_matrix).ravel()


    ratings = data["Rating"].astype(float).to_numpy()
    r_min, r_max = ratings.min(), ratings.max()
    rating_norm = (ratings - r_min) / (r_max - r_min)


    score = (1 - alpha) * sims + alpha * rating_norm


    sorted_idx = np.argsort(score)[::-1]

   
    filtered_idx = []
    for i in sorted_idx:
        if data.index[i] in idx_in_test:
            continue
        filtered_idx.append(i)
        if len(filtered_idx) == top_k:
            break

    top_idx = np.array(filtered_idx)

    cols = [
        "Index", "Movie_Title", "Year", "main_genre", "side_genre",
        "Rating", "plot", "image_filename"
    ]

    recs = data.iloc[top_idx][cols].copy()
    recs["similarity"] = sims[top_idx]
    recs["score"] = score[top_idx]

    display_recommendations(idx_in_test, recs, data)

    return recs


In [2]:

BASE_DIR = Path(os.getcwd())

tfidf_matrix = load_npz(BASE_DIR / "train_model" / "tfidf_matrix.npz")
data = pd.read_csv(BASE_DIR / "DATASETULTIME.csv")
print(data.columns)

recs = recommend_for_test_index(
    idx_in_test=[700,1,2,3,4], 
    data=data,
    tfidf_matrix=tfidf_matrix,
    top_k=5,
    alpha=0.2
)


FileNotFoundError: [Errno 2] No such file or directory: 'c:\\Users\\remis\\Documents\\M1\\cine5\\python\\DATASETULTIME.csv'