下載 XGBoost

In [24]:
!pip install xgboost



In [25]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


匯入 + Preproccessing

In [35]:
# 匯入套件
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler, LabelEncoder
from xgboost import XGBRegressor
import torch

# 1. 載入資料
file_path = "/content/drive/MyDrive/DS/Final/Data/processed_recipes.csv"  # 修改成你上傳的路徑
data = pd.read_csv(file_path)

data = data.drop_duplicates(subset='name', keep='first')

# 2. 處理數據
# 轉換餐點名稱為數值標籤
label_encoder = LabelEncoder()
data['name_encoded'] = label_encoder.fit_transform(data['name'])

# 特徵選擇 (營養素作為輸入)
features = ['Calories_Kcal', 'Carbs_g', 'Fats_g', 'Fiber_g', 'Protein_g', 'Sugars_g']
X = data[features]

# 輸出為餐點的數值標籤
y = data['name_encoded']

# 標準化特徵
scaler = MinMaxScaler()
X_scaled = scaler.fit_transform(X)

# 分割數據集
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)



BuildModel

In [36]:
model = XGBRegressor(objective='reg:squarederror', n_estimators=100, learning_rate=0.1, max_depth=5)
model.fit(X_train, y_train)

Test

In [37]:
import numpy as np
import pandas as pd
from IPython.display import Image, display, HTML

# 假設 model、scaler 和 data 已經正確加載
# model: 已訓練好的 XGBoost 模型
# scaler: 已經 fit 過的 MinMaxScaler
# data: 包含餐點資訊的數據集，包含 'name', 'img_src', 'url'

# 模型評估
train_score = model.score(X_train, y_train)
test_score = model.score(X_test, y_test)
print(f"Train Score: {train_score:.2f}")
print(f"Test Score: {test_score:.2f}")

# 5. 使用者輸入並進行預測
# 模擬使用者輸入的營養需求
user_input = np.array([[200, 50, 20, 15, 30, 10]])  # [Calories, Carbs, Fats, Fiber, Protein, Sugar]

# 縮放使用者輸入數據
user_input_scaled = scaler.transform(user_input)

# 預測所有餐點的匹配分數
data_features_scaled = scaler.transform(X)  # 縮放所有餐點特徵數據
all_scores = model.predict(data_features_scaled)  # 預測所有餐點分數

# 計算與使用者輸入的相似度分數
similarity_scores = model.predict(user_input_scaled) * all_scores

# 將分數加入到數據集中
data['match_score'] = similarity_scores

# 找出 Top 5 餐點
top_5 = data.nlargest(5, 'match_score')

# 輸出推薦餐點的名稱、圖片和超連結
print("推薦的 Top 5 餐點：")
for index, row in top_5.iterrows():
    display(HTML(f"<h3>{row['name']}</h3>"))  # 餐點名稱
    display(Image(url=row['img_src'], width=300))  # 餐點圖片
    display(HTML(f"<a href='{row['url']}' target='_blank'>查看完整食譜</a>"))  # 超連結
    print("-" * 30)

Train Score: 0.62
Test Score: -0.11
推薦的 Top 5 餐點：




------------------------------


------------------------------


------------------------------


------------------------------


------------------------------


保存model

In [29]:
import joblib

# 保存模型
joblib.dump(model, "xgboost_recommendation_model.pkl")

['xgboost_recommendation_model.pkl']

In [30]:
# 3. 匯出標準化器 (scaler)
scaler_filename = "scaler.pkl"
joblib.dump(scaler, scaler_filename)

['scaler.pkl']