In [20]:
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from scipy.stats import pearsonr

In [50]:
def generate_random_data_with_none(n_users, m_items, seed=None):
    # 设置随机数生成器的种子
    np.random.seed(seed)
    
    # 生成随机数据
    random_data = np.random.randint(-1, 2, size=(n_users, m_items)).astype(float)
    
    # 为用户1随机一个待推荐的item
    random_item_index = np.random.choice(range(m_items))
    random_data[0, random_item_index] = np.nan
    
    # 创建DataFrame
    df = pd.DataFrame(random_data, columns=[f"Item_{i+1}" for i in range(m_items)], index=[f"User_{i+1}" for i in range(n_users)])
    
    return df, random_item_index


In [51]:
def calculate_similarity(data, method='cosine'):
    # 删除包含NaN的列
    data_without_nan = data[:, ~np.isnan(data).any(axis=0)]
    
    if method == 'cosine':
        similarity_matrix = cosine_similarity(data_without_nan)
    elif method == 'pearson':
        similarity_matrix = np.zeros((data_without_nan.shape[0], data_without_nan.shape[0]))
        for i in range(data_without_nan.shape[0]):
            for j in range(data_without_nan.shape[0]):
                if i != j:
                    similarity_matrix[i, j] = pearsonr(data_without_nan[i], data_without_nan[j])[0]
    return similarity_matrix

In [147]:
def recommend_items(user_id, data, similarity_matrix, top_n_users=3, top_n_items=2):
    # 找出与用户最相似的top_n个用户
    similar_users = np.argsort(similarity_matrix[user_id - 1])[::-1][1:top_n_users+1]
    
    # 找出用户尚未评价过的item
    unrated_items = np.where(np.isnan(data.iloc[user_id - 1]))[0]
    
    # 如果用户没有未评价的item，则返回空列表
    if len(unrated_items) == 0:
        return []
    
    # 计算每个未评价item的推荐度
    recommendation_scores = []
    for item in unrated_items:
        score = 0
        total_similarity = 0
        for sim_user in similar_users:
            if not np.isnan(data.iloc[sim_user, item]):  # 如果相似用户已评价过该item
                score += similarity_matrix[user_id - 1, sim_user] * data.iloc[sim_user, item]
                total_similarity += similarity_matrix[user_id - 1, sim_user]
        if total_similarity != 0:
            recommendation_scores.append((item, score / total_similarity))
    
    # 如果没有推荐的item，则返回空列表
    if len(recommendation_scores) == 0:
        return []
    
    # 排序推荐度并返回前top_n_items个item
    recommendation_scores.sort(key=lambda x: x[1], reverse=True)
    top_recommendations = [(item, score) for item, score in recommendation_scores[:top_n_items]]
    
    return top_recommendations


In [148]:
# 生成随机数据，并得到待推荐的item的索引
random_df, random_item_index = generate_random_data_with_none(5, 5, seed)

In [149]:
similarity_matrix = calculate_similarity(random_df.values)

In [150]:
top_recommendations = recommend_items(1, random_df, similarity_matrix)

In [151]:
top_recommendations

[(4, -2.6694289264767616e-17)]