### Libraries Required

In [1]:
import pandas as pd
from pyspark import SparkContext
from pyspark.sql.session import SparkSession
from pyspark.sql.types import IntegerType, StringType
from pyspark.ml.evaluation import RegressionEvaluator
from pyspark.ml.recommendation import ALS

### Initiate Spark Session

In [2]:
sc = SparkContext.getOrCreate()

if (sc is None):
    sc = SparkContext(master="local[*]", appName="Meal Recipe Collaborative Filtering")
spark = SparkSession(sparkContext=sc)

22/10/05 19:55:20 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
Using Spark's default log4j profile: org/apache/spark/log4j-defaults.properties
Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).


In [14]:
test_user_data = pd.read_csv('user_rating_test_data.csv', ',')
test_user_data['Recipe_Index'] = test_user_data.index
# test_user_data = test_user_data.dropna()
del test_user_data['Title']

  exec(code_obj, self.user_global_ns, self.user_ns)


### Input Format

input should be in the form of the table below User_ID, Recipe_Index, Individual's Recipe Rating

In [15]:
test_user_data = test_user_data.melt(id_vars = 'Recipe_Index', var_name='User_ID', value_name='Rating')
test_user_data = test_user_data[['User_ID', 'Recipe_Index', 'Rating']]
test_user_data = test_user_data.dropna()

test_user_data.head(5)

Unnamed: 0,User_ID,Recipe_Index,Rating
4,0,4,4.0
6,0,6,5.0
8,0,8,4.0
9,0,9,5.0
22,0,22,1.0


In [16]:
# turn pandas df into spark df for training
test_user_df = spark.createDataFrame(test_user_data)

# convert user_id to string
test_user_df = test_user_df \
    .withColumn('User_ID', test_user_df['User_ID'].cast(IntegerType()))

### Model Training

In [17]:
# train / test split
train, test = test_user_df.randomSplit([0.8, 0.2])

# define ALS model hyperparameters
als = ALS(maxIter=4, regParam=0.1, userCol="User_ID", itemCol="Recipe_Index", ratingCol="Rating",
          coldStartStrategy="drop")
model = als.fit(train)

#### Model Performance

In [7]:
# apply model to test
# predictions = model.transform(test)

# eval = RegressionEvaluator(metricName="rmse", labelCol="Rating", predictionCol="prediction")
# rmse = eval.evaluate(predictions)
# print("Root-mean-square error = " + str(rmse))



Root-mean-square error = 3.9565706811683357


                                                                                

MSE is quite poor, suggesting that more data is required to provide more reliable predictions<br>
Ideally we should have below 1.0 RMSE

### Extracting Features (Pandas)

In [8]:
userRecs = model.recommendForAllUsers(50)

In [9]:
pd.set_option('display.max_colwidth', None)
user_predictions = userRecs.toPandas()

user_predictions['User_ID'] = user_predictions['User_ID'].astype(str)

                                                                                

### Extract Recommendations

Run this function to get the N-number of recommendations that should be shown to the user

extractRecommendations(ratings_df, predictions_df, user_id, num_of_recommendations)<br>

Where:<br><br>
<b>ratings_df</b><br> refers to the user data table from the database, including user_id, recipe_id, recipe_ratings<br>

<b>predictions_df</b><br> is the recomemndations for each user based on the trained model<br>

<b>user_id</b><br> being the unique user identifying number<br>

<b>num_of_recommendations</b><br> being the number of recommendations you wish to output<br>

In [10]:
def extractRecommendations(ratings_df, predictions_df, user_id, num_of_recommendations):
    
    predicted_recipes = []
    user_ratings = ratings_df[ratings_df['User_ID'] == user_id]['Recipe_Index'].tolist()
    
    for item in predictions_df[predictions_df['User_ID'] == user_id]['recommendations'].tolist()[0]:
        predicted_recipes.append(item[0])
        
    return [x for x in predicted_recipes if x not in user_ratings][:num_of_recommendations]
    
    
print(extractRecommendations(test_user_data, user_predictions, '11', 5))

[168, 77, 162, 161, 174]
