## 第一步：协同过滤，基于用户相似度为用户推荐菜品

In [1]:
import pandas as pd  
import numpy as np  
from surprise import Dataset, Reader  
from surprise.model_selection import train_test_split, GridSearchCV, cross_validate  
from surprise import KNNBasic, KNNWithMeans, KNNWithZScore, SVD, SVDpp, NMF  
from surprise import accuracy  
import warnings  
warnings.filterwarnings('ignore')  

# Load and prepare data  
file_path = 'MealRating.csv'  
df = pd.read_csv(file_path)  
df['Rating'] = pd.to_numeric(df['Rating'], errors='coerce')  
data = df[['UserID', 'MealID', 'Rating']].dropna()  

# Create Surprise reader and dataset  
reader = Reader(rating_scale=(1, 5))  
dataset = Dataset.load_from_df(data, reader)  

# Step 1: Compare different algorithms  
def evaluate_algorithms(data):  
    algorithms = {  
        'KNNBasic': KNNBasic(),  
        'KNNWithMeans': KNNWithMeans(),  
        'KNNWithZScore': KNNWithZScore(),  
        'SVD': SVD(),  
        'SVDpp': SVDpp(),  
        'NMF': NMF()  
    }  
    
    results = {}  
    for name, algo in algorithms.items():  
        print(f"\nEvaluating {name}...")  
        cv_results = cross_validate(algo, data, measures=['RMSE', 'MAE'], cv=5, verbose=False)  
        results[name] = {  
            'RMSE': cv_results['test_rmse'].mean(),  
            'MAE': cv_results['test_mae'].mean()  
        }  
    return results  

# Step 2: Parameter optimization for the best algorithm  
def optimize_svd_params(data):  
    param_grid = {  
        'n_epochs': [20, 30, 40],  
        'lr_all': [0.005, 0.01],  
        'reg_all': [0.02, 0.1, 0.4]  
    }  
    
    gs = GridSearchCV(SVD, param_grid, measures=['rmse', 'mae'], cv=5)  
    gs.fit(data)  
    
    return gs.best_score['rmse'], gs.best_params['rmse']  

# Step 3: Train final model with best parameters  
def train_final_model(data, best_params):  
    trainset, testset = train_test_split(data, test_size=0.2, random_state=42)  
    
    algo = SVD(  
        n_epochs=best_params['n_epochs'],  
        lr_all=best_params['lr_all'],  
        reg_all=best_params['reg_all']  
    )  
    
    algo.fit(trainset)  
    predictions = algo.test(testset)  
    return algo, accuracy.rmse(predictions), accuracy.mae(predictions)  

def get_top_n_recommendations(algo, user_id, n=5):  
    """Get top N recommendations for a specific user."""  
    # Get items the user hasn't rated  
    user_ratings = df[df['UserID'] == user_id]['MealID'].unique()  
    all_items = df['MealID'].unique()  
    items_to_predict = np.setdiff1d(all_items, user_ratings)  
    
    # Get predictions  
    predictions = []  
    for item_id in items_to_predict:  
        pred = algo.predict(user_id, item_id)  
        predictions.append((item_id, pred.est))  
    
    # Sort predictions by estimated rating  
    predictions.sort(key=lambda x: x[1], reverse=True)  
    
    # Get top N  
    top_n = predictions[:n]  
    
    # Get meal details for recommendations  
    recommended_meals = []  
    for meal_id, pred_rating in top_n:  
        meal_info = df[df['MealID'] == meal_id].iloc[0]  
        recommended_meals.append({  
            'MealID': meal_id,  
            'MealName': meal_info['mealName'],  
            'MealType': meal_info['mealType'],  
            'MealPrice': meal_info['mealPrice'],  
            'PredictedRating': round(pred_rating, 2)  
        })  
    
    return recommended_meals  

# Run the optimization process  
print("Evaluating different algorithms...")  
results = evaluate_algorithms(dataset)  

# Print algorithm comparison results  
print("\nAlgorithm Comparison Results:")  
for algo, metrics in results.items():  
    print(f"{algo}:")  
    print(f"  RMSE: {metrics['RMSE']:.4f}")  
    print(f"  MAE: {metrics['MAE']:.4f}")  

# Optimize SVD parameters  
print("\nOptimizing SVD parameters...")  
best_rmse, best_params = optimize_svd_params(dataset)  
print(f"\nBest SVD parameters: {best_params}")  
print(f"Best RMSE: {best_rmse:.4f}")  

# Train final model  
print("\nTraining final model with optimized parameters...")  
final_algo, final_rmse, final_mae = train_final_model(dataset, best_params)  
print(f"Final Model RMSE: {final_rmse:.4f}")  
print(f"Final Model MAE: {final_mae:.4f}")  

# Generate recommendations for a sample user  
test_user = df['UserID'].iloc[0]  
print(f"\nGenerating recommendations for user: {test_user}")  
recommendations = get_top_n_recommendations(final_algo, test_user, n=5)  

print("\nTop 5 Recommended Meals:")  
for i, rec in enumerate(recommendations, 1):  
    print(f"{i}. {rec['MealName']} ({rec['MealType']}) - ¥{rec['MealPrice']}")  
    print(f"   Predicted Rating: {rec['PredictedRating']}")  

# Print dataset statistics  
print("\nDataset Statistics:")  
print(f"Total number of users: {len(df['UserID'].unique())}")  
print(f"Total number of meals: {len(df['MealID'].unique())}")  
print(f"Total number of ratings: {len(df)}")  
print(f"Rating sparsity: {(1 - len(df)/(len(df['UserID'].unique())*len(df['MealID'].unique())))*100:.2f}%")

Evaluating different algorithms...

Evaluating KNNBasic...
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.

Evaluating KNNWithMeans...
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.

Evaluating KNNWithZScore...
Computing the msd similarity matrix...
Done computing similarity matrix.
Computing the msd similarity matrix...
Done computing similarity matrix.
Computin

In [2]:
import pandas as pd
from sentence_transformers import SentenceTransformer, util

# 加载预训练的SBERT模型
model = SentenceTransformer('all-MiniLM-L6-v2')

# 读取店铺信息并转换数值列为合适的类型
file_path_shop_info = '店铺标签_更新2.xlsx'
df_shop_info = pd.read_excel(file_path_shop_info)

# 确保数值列被正确解析为浮点数或整数，处理可能存在的缺失值
numeric_columns = ['口味分', '服务分', '环境分', '总分']
df_shop_info[numeric_columns] = df_shop_info[numeric_columns].apply(pd.to_numeric, errors='coerce')

# 读取菜品描述数据
file_path_meal_descriptions = '阿里云菜品描述.xlsx'
df_meal_descriptions = pd.read_excel(file_path_meal_descriptions)

# 创建一个映射字典，用于快速查找菜品描述
meal_description_map = df_meal_descriptions.set_index('菜品名称')['菜品描述'].to_dict()

# 定义一个函数来获取菜品描述
def get_meal_description(meal_name):
    return meal_description_map.get(meal_name, "无法找到该菜品的描述")

# 获取推荐菜品的描述并构建用户输入信息
user_food_descriptions = [get_meal_description(rec['MealName']) for rec in recommendations]

# 用户输入信息中的其他部分保持不变
user_meal_scene = "朋友聚会"  # 例如：家庭聚餐、情侣约会等
user_focus_point = "服务"  # 关注点（口味/服务/环境）


# 第一步：算菜品相似度
def calc_food_similarity(shop_description, user_description):
    if pd.isna(shop_description):  # 检查是否为NaN
        return 0.0
    shop_embedding = model.encode(shop_description, convert_to_tensor=True)
    user_embedding = model.encode(user_description, convert_to_tensor=True)
    cosine_scores = util.pytorch_cos_sim(shop_embedding, user_embedding)
    return float(cosine_scores[0]) * 10  # 转换为10分制

# 第二步：算场景相似度
def calc_scene_similarity(scene_tags, user_scene):
    if pd.notna(scene_tags) and user_scene in scene_tags:
        return 1
    else:
        return 0

# 第三步：算关注点相似度
def calc_focus_similarity(focus_score, focus_point):
    if pd.notna(focus_score) and focus_score > 4:
        return 1
    else:
        return 0

# 第四步：算总体得分
def calc_overall_score(total_score):
    if pd.isna(total_score):
        return 0
    elif total_score > 4.5:
        return 1
    elif total_score > 4.3:
        return 0.5
    else:
        return 0

# 计算每个描述与所有店铺的【菜品相似度得分】，并保留前20个店铺
top_shops_per_description = {}
for description in user_food_descriptions:
    df_shop_info['菜品相似度得分'] = df_shop_info['店铺描述'].apply(lambda x: calc_food_similarity(x, description))
    top_20_shops = df_shop_info.nlargest(20, '菜品相似度得分').copy()
    top_shops_per_description[description] = top_20_shops

# 对每个描述中的20个店铺进行后续计算，并选出推荐分最高的店铺
recommended_shops = []
for description, top_20_shops in top_shops_per_description.items():
    # 对这20个店铺进行第二三四五步的其他相似度计算
    top_20_shops['场景相似度得分'] = top_20_shops.apply(lambda row: calc_scene_similarity(str(row['场景标签']), user_meal_scene), axis=1)
    top_20_shops['关注点相似度得分'] = top_20_shops.apply(lambda row: calc_focus_similarity(row[user_focus_point + '分'], user_focus_point), axis=1)
    top_20_shops['总体评分'] = top_20_shops.apply(lambda row: calc_overall_score(row['总分']), axis=1)
    
    # 计算综合推荐分
    top_20_shops['推荐分'] = (
        top_20_shops['菜品相似度得分'] * 0.4 +
        top_20_shops['场景相似度得分'] * 0.4 +
        top_20_shops['关注点相似度得分'] * 0.5 +
        top_20_shops['总体评分'] * 0.1
    )
    
    # 从这20个店铺中选出推荐分最高的一个店铺
    best_shop = top_20_shops.nlargest(1, '推荐分')
    recommended_shops.append(best_shop[['店铺名称', '菜品相似度得分', '场景相似度得分', '关注点相似度得分', '总体评分', '推荐分']])

# 合并推荐结果并去重，选取最终推荐的5个店铺
final_recommendations = pd.concat(recommended_shops).drop_duplicates(subset=['店铺名称']).nlargest(5, '推荐分')

print("最终推荐度最高的5个店铺及其各项得分：")
print(final_recommendations[['店铺名称', '菜品相似度得分', '场景相似度得分', '关注点相似度得分', '总体评分', '推荐分']])

最终推荐度最高的5个店铺及其各项得分：
                        店铺名称   菜品相似度得分  场景相似度得分  关注点相似度得分  总体评分       推荐分
3              洪鸭纪干锅鸭头(定福庄店)  9.299961        1         1   0.5  4.669985
203                 Ditto儿咖啡  8.495572        1         1   0.5  4.348229
257      锅里满口香锅贴水饺家常菜(传媒大学店)  8.258291        1         1   0.0  4.203316
18   草原黑森林碳烤羊腿·定福庄扛把子(朝阳北路店)  7.928850        1         1   1.0  4.171540
667              泉州面线糊(财满街店)  8.106827        1         1   0.0  4.142731
