In [19]:
!pip install -q tensorflow-recommenders
!pip install -q --upgrade tensorflow-datasets
!pip install -q scann

In [20]:
import pandas as pd
import numpy as np
import json
import tensorflow as tf
import os
os.environ['TF_USE_LEGACY_KERAS'] = '1'
import pprint
import tempfile
import tensorflow_datasets as tfds
import tensorflow_recommenders as tfrs

from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import OneHotEncoder
from typing import Dict, Text


In [21]:
# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

# Rest of your code to read the CSV file
file = '/content/drive/My Drive/sugar.csv'
df = pd.read_csv(file)
df.head()


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


Unnamed: 0,User ID,Date,Dish Name,Sugar
0,1,2014-09-15,"my - McDonalds Espresso Pronto® Flat White, 2 ...",29
1,1,2014-09-15,"Uncle Tobys Australia - Vita Brits, 3 Biscuits...",0
2,1,2014-09-15,"Pauls - Smarter White Milk, 600 ml",34
3,1,2014-09-15,"Quick Added Calories, 500 calories",0
4,1,2014-09-15,"Quick Added Calories, 1,000 calories",0


In [22]:
df['Rating'] = np.random.randint(1, 6, size=len(df))
df.to_csv('ranking.csv', index=False)

In [26]:
df["User ID"] = df["User ID"].astype(str)
df["Rating"] = df["Rating"].replace({',': '.'}, regex=True).astype(float)
df["Dish Name"] = df["Dish Name"].astype(str)

final_df = tf.data.Dataset.from_tensor_slices({
    "User ID": df["User ID"].values,
    "Dish Name": df["Dish Name"].values,
    "Rating": df["Rating"].values,
})

In [27]:
# Shuffle and split the dataset
tf.random.set_seed(42)
shuffled = final_df.shuffle(100_000, seed=42, reshuffle_each_iteration=False)

# Split into train and test
train = shuffled.take(80_000)
test = shuffled.skip(80_000).take(20_000)


In [28]:
user_ids = final_df.batch(10_000).map(lambda x: x["User ID"])
dish_names = final_df.batch(1_000).map(lambda x: x["Dish Name"])

unique_user_ids = np.unique(np.concatenate(list(user_ids)))
unique_dish_names = np.unique(np.concatenate(list(dish_names)))

In [29]:
class LowSugarRecommendationModel(tf.keras.Model):

    def __init__(self):
        super().__init__()
        embedding_dimension = 32

        # Compute embeddings for users.
        self.user_embeddings = tf.keras.Sequential([
            tf.keras.layers.StringLookup(
                vocabulary=unique_user_ids, mask_token=None),
            tf.keras.layers.Embedding(len(unique_user_ids) + 1, embedding_dimension)
        ])

        # Compute embeddings for dishes.
        self.dish_embeddings = tf.keras.Sequential([
            tf.keras.layers.StringLookup(
                vocabulary=unique_dish_names, mask_token=None),
            tf.keras.layers.Embedding(len(unique_dish_names) + 1, embedding_dimension)
        ])

        # Final layer for predicting sugar content (rating).
        self.rating_content_layer = tf.keras.Sequential([
            tf.keras.layers.Dense(256, activation="relu"),
            tf.keras.layers.Dense(64, activation="relu"),
            tf.keras.layers.Dense(1)  # Output layer to predict rating from usr
        ])

    def call(self, inputs):
        user_id, dish_name = inputs

        # Get the user and dish embeddings
        user_embedding = self.user_embeddings(user_id)
        dish_embedding = self.dish_embeddings(dish_name)

        # Concatenate the embeddings and predict rating
        concatenated_embeddings = tf.concat([user_embedding, dish_embedding], axis=1)

        # Predict rating using the final layer
        return self.rating_content_layer(concatenated_embeddings)


In [30]:
task = tfrs.tasks.Ranking(
  loss = tf.keras.losses.MeanSquaredError(),
  metrics=[tf.keras.metrics.RootMeanSquaredError()]
)

In [31]:
class LowSugarFoodModel(tfrs.models.Model):

    def __init__(self):
        super().__init__()
        # Initialize the ranking model that predicts sugar content.
        self.ranking_model: tf.keras.Model = LowSugarRecommendationModel()

        # The task for ranking food items based on predicted sugar content.
        self.task: tf.keras.layers.Layer = task

    def call(self, features: Dict[str, tf.Tensor]) -> tf.Tensor:
        # Call the ranking model (low-sugar food recommendation).
        return self.ranking_model(
            (features["User ID"], features["Dish Name"])
        )

    def compute_loss(self, features: Dict[Text, tf.Tensor], training=False) -> tf.Tensor:
        """

        Args:
          features:
          training:

        Returns:

        """
        # Pop the target value, which is rating.
        labels = features.pop("Rating")

        # Get the predictions from the ranking model.
        sugar_predictions = self(features)

        # Compute and return the loss using the ranking task.
        return self.task(labels=labels, predictions=sugar_predictions)

In [32]:
model = LowSugarFoodModel()
model.compile(optimizer=tf.keras.optimizers.Adagrad(learning_rate=0.1))

In [33]:
cached_train = train.shuffle(100_000).batch(8192).cache()
cached_test = test.batch(4096).cache()

In [34]:
model.fit(cached_train, epochs=3)

Epoch 1/3
Epoch 2/3
Epoch 3/3


<tf_keras.src.callbacks.History at 0x7dc04ea3bf70>

In [35]:
model.evaluate(cached_test, return_dict=True)



{'root_mean_squared_error': 1.4108062982559204,
 'loss': 1.9377663135528564,
 'regularization_loss': 0,
 'total_loss': 1.9377663135528564}

In [39]:
# Example test dishes (correcting the format)
test_dishes = [
    "chicken and butterbean casserole, 1 serving",
    "#1 Gyro - Greek Lamb Salad, 2 cup",
    "Favorite Dinner, 1 cup serving, 2 servings"
]

# Dictionary to store predicted sugar content for each dish
test_ratings = {}

# Predict the sugar content for each dish for user 42
for dish_name in test_dishes:
    test_ratings[dish_name] = model({
        "User ID": np.array(["85"]),  # User ID (could be any user)
        "Dish Name": np.array([dish_name])  # The name of the dish
    })

# Sort dishes by predicted sugar content (from lowest to highest)
sorted_dishes = sorted(test_ratings.items(), key=lambda x: x[1].numpy()[0][0])

# Print the recommendations based on the lowest sugar content
print("Recommended Dishes (for user 85):")
for dish, rating in sorted_dishes:
    print(f"{dish}: {rating.numpy()[0][0]:.2f} rating")  # Access the scalar value for sugar content


Recommended Dishes (for user 85):
chicken and butterbean casserole, 1 serving: 2.82 rating
Favorite Dinner, 1 cup serving, 2 servings: 2.82 rating
#1 Gyro - Greek Lamb Salad, 2 cup: 2.86 rating


In [43]:

# The User ID you want to predict for
user_id = 42  # Replace with the desired user ID

# Filter out the dishes the user has already eaten
user_history = df[df['User ID'] == user_id]['Dish Name'].unique()

# List of all dishes to be considered (excluding those the user has already eaten)
remaining_dishes = df[~df['Dish Name'].isin(user_history)]

# Dictionary to store predicted ratings for each dish
predicted_ratings = {}

# Predict ratings for each remaining dish (assuming the 'model' is defined)
for dish_name in remaining_dishes['Dish Name'].unique():
    # Ensure both user_id and dish_name are passed as strings if the model expects them as strings
    predicted_ratings[dish_name] = model({
        "User ID": np.array([str(user_id)]),  # Convert user_id to string
        "Dish Name": np.array([dish_name])  # dish_name is already a string
    })

# Sort dishes by predicted ratings (from highest to lowest)
sorted_dishes = sorted(predicted_ratings.items(), key=lambda x: x[1].numpy()[0][0], reverse=True)

# Print the top 10 dishes with the highest predicted ratings
print(f"Top 10 recommended dishes for user {user_id} (excluding already eaten dishes):")
for i, (dish, rating) in enumerate(sorted_dishes[:10]):
    print(f"{i+1}. {dish}: {rating.numpy()[0][0]:.2f} rating")  # Access the scalar value for the predicted rating


Top 10 recommended dishes for user 42 (excluding already eaten dishes):
1. Monterey - Sliced Baby Bellas, 30 g: 3.03 rating
2. Oliver's - Banana, 1 banana: 3.03 rating
3. fresh - vegetables , 0.5 cup: 3.02 rating
4. Homemade - Sauteed Veggies (Carrots, Green Beans, Brocolli, Cauliflower, Squash, Zucchini, Onion, Green Pepper), 2 c Cooked: 3.02 rating
5. Eat Well/embrace Life - White Bean Hummus, 2 tbs: 3.02 rating
6. 1 Stick Large - Celery, 2 stick 10" long: 3.01 rating
7. Scrambled Eggs (Aps) - 2 Large Eggs Scrambled In Coconut Oil, 1 eggs: 3.01 rating
8. Phd Diet Whey - Whey, 1 scoop: 3.01 rating
9. Panera - Baked Potato Soup (Pick 2), 1 cup: 3.01 rating
10. Hershey's - Milk Chocolate Snack Size, 0.5 pieces: 3.01 rating


In [44]:
# Get the top 10 recommended dishes
top_10_dishes = sorted_dishes[:10]

# Now, sort the top 10 dishes by sugar content (from lowest to highest)
top_10_dishes_with_sugar = [(dish, rating, df[df['Dish Name'] == dish]['Sugar'].values[0]) for dish, rating in top_10_dishes]

# Sort the top 10 dishes by sugar content
sorted_by_sugar = sorted(top_10_dishes_with_sugar, key=lambda x: x[2])  # Sort by sugar content (x[2] is sugar)

# Print the top 3 dishes with the least sugar
print(f"Top 3 recommended dishes for user {user_id} (lowest sugar content):")
for i, (dish, rating, sugar) in enumerate(sorted_by_sugar[:3]):
    print(f"{i+1}. {dish}: {rating.numpy()[0][0]:.2f} rating, {sugar:.2f} sugar")

Top 3 recommended dishes for user 42 (lowest sugar content):
1. Monterey - Sliced Baby Bellas, 30 g: 3.03 rating, 0.00 sugar
2. fresh - vegetables , 0.5 cup: 3.02 rating, 0.00 sugar
3. Eat Well/embrace Life - White Bean Hummus, 2 tbs: 3.02 rating, 0.00 sugar
