In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# User_functions.py (loading all DFs)

In [2]:
%cd /content/drive/MyDrive/Grocery_Recommendation/flask_app

/content/drive/MyDrive/Grocery_Recommendation/flask_app


In [3]:
!pip install surprise
import nltk
nltk.download('punkt')

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting surprise
  Downloading surprise-0.1-py2.py3-none-any.whl (1.8 kB)
Collecting scikit-surprise
  Downloading scikit-surprise-1.1.1.tar.gz (11.8 MB)
[K     |████████████████████████████████| 11.8 MB 3.5 MB/s 
Building wheels for collected packages: scikit-surprise
  Building wheel for scikit-surprise (setup.py) ... [?25l[?25hdone
  Created wheel for scikit-surprise: filename=scikit_surprise-1.1.1-cp37-cp37m-linux_x86_64.whl size=1633984 sha256=9d2c8cf91a1cf17f2943fbafbddc0e3a789b1ea1d8573aeff9a0e3c96847ad04
  Stored in directory: /root/.cache/pip/wheels/76/44/74/b498c42be47b2406bd27994e16c5188e337c657025ab400c1c
Successfully built scikit-surprise
Installing collected packages: scikit-surprise, surprise
Successfully installed scikit-surprise-1.1.1 surprise-0.1


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.


True

In [4]:
import time
from flask import Flask, request, render_template
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import CountVectorizer
# import nltk
from nltk.stem.snowball import SnowballStemmer
import pandas as pd
import numpy as np
import pickle
import sys
from surprise import Dataset
from surprise import Reader
from surprise import SVD


rec_columns = pickle.load(open('Pickle/rec_columns.p','rb'))
rec_index = pickle.load(open('Pickle/rec_index.p', 'rb'))
rec_user = pickle.load(open('Pickle/rec_user.p', 'rb'))
rec_rating = pickle.load(open('Pickle/rec_rating.p', 'rb'))
rec_prod_id = pickle.load(open('Pickle/rec_prod_id.p', 'rb'))
new_rec_df = pd.DataFrame(np.column_stack([rec_user, rec_prod_id, rec_rating]), index=rec_index, columns=rec_columns)

short_head = pickle.load(open("Pickle/short_head.p", "rb"))
reader = pickle.load(open("Pickle/reader.p", "rb"))
new_stem_count_vec = pickle.load(open("Pickle/new_stem_count_vec.p", "rb"))
new_stem_count_vec_matrix = pickle.load(open("Pickle/new_stem_count_vec_matrix.p", "rb"))
stemmer = SnowballStemmer("english")   

products_desc_stemmed = pd.read_pickle("Pickle/products_desc_stemmed.p")
prod_columns = pickle.load(open("Pickle/prod_columns.p", "rb"))
prod_index = pickle.load(open("Pickle/prod_index.p", "rb"))
prod_name = pickle.load(open("Pickle/prod_name.p", "rb"))
prod_aisle = pickle.load(open("Pickle/prod_aisle.p", "rb"))
prod_id = pickle.load(open("Pickle/prod_id.p", "rb"))
products_desc = pd.DataFrame(np.column_stack([prod_name, prod_aisle, prod_id]), index=prod_index, columns=['Product Name', 'Aisle', 'Product ID'])


# stem_and_vectorize_products_based_on_metadata

In [5]:

def stem_and_vectorize_products_based_on_metadata(product_input):

    word_list = nltk.word_tokenize(product_input)
    input_stemmed = ' '.join([stemmer.stem(word) for word in word_list])
    vec = new_stem_count_vec.transform(np.array(input_stemmed).reshape(1,))
    
    simil = cosine_similarity(vec, new_stem_count_vec_matrix)
    simil_shape = simil.reshape(new_stem_count_vec_matrix.shape[0],)
    simil_scores = pd.DataFrame(data=simil_shape, index=prod_index, columns=['score'])

    # Don't return scores of zero, only as many positive scores as exist
    non_zero_scores = simil_scores[simil_scores['score'] > 0]
    
    if len(non_zero_scores) == 0:
        return 0, 'None'
    
    if len(non_zero_scores) < 10:
        item_count = len(non_zero_scores)
    else:
        item_count = 10
    
    similarity_scores = simil_scores.sort_values(['score'], ascending=False)[:item_count]

    return item_count, (products_desc.iloc[list(similarity_scores.index)]).to_html(index=False, justify='center', classes='table1', border=2)


# get_sample_product

In [6]:
def get_sample_product(aisle=None):
    try: 
        if aisle:
            product = products_desc[products_desc['Aisle'].str.contains(aisle)].sample(1)
        else:
            product = products_desc.sample(1)
    except:
        product = products_desc.sample(1)
    name = product['Product Name'].iloc[0]
    prod_aisle = product['Aisle'].iloc[0]
    prod_id = product['Product ID'].iloc[0]  
    return name, prod_aisle, prod_id

In [7]:
# products_desc.sample(1)

In [8]:
# get_sample_product(aisle='instant foods')

# recommend_diverse_products

In [9]:
# return the top n diverse recommendations 
def recommend_diverse_products(ranked_products, n, aisle=None, percent_diverse=.20):
    print('recommend diverse products called')
    num_diverse = round(n * percent_diverse)
    recs = []
    
    if n < 1:
        print('Number of recommended products must be 1 or more')
        return 0, "None"
    
    for idx, rec in enumerate(ranked_products):
        
        if n == 0:
            recommendation = pd.DataFrame(recs, columns=['Product ID', 'Rating', 'Product Name', 'Aisle'])
            return len(recs), recommendation.to_html(index=False, justify='center', classes='table1', border=2)
            
        prod_id, _, prod_name, aisle_name = [*rec]


        if aisle:                                    # Did we specify an aisle? 
            if aisle in aisle_name:                  # Is it in the aisle we want?
                if n > num_diverse:                  # Are we looking for a long tail product? No
                    name = prod_name
                    print('Recommendation # ', idx+1, ': ', name, '\n')
                    recs.append(rec)
                    n-= 1
                else:                                 # Are we looking for a long tail product? Yes
                    if prod_id not in short_head:     # Is it NOT in the short_head list?
                        name = prod_name
                        print('Recommendation # ', idx+1, ': ', name, '\n')
                        recs.append(rec)
                        n-= 1
                    else:
                        continue
            elif idx == len(ranked_products)-1:
                print('No recommended products found')
                continue
        else:
            if n > num_diverse:                  # Are we looking for a long tail product? No
                name = prod_name
                print('Recommendation # ', idx+1, ': ', name, '\n')
                recs.append(rec)
                n-= 1
            else:                                 # Are we looking for a long tail product? Yes
                if prod_id not in short_head:     # Is it NOT in the short_head list?
                    name = prod_name
                    print('Recommendation # ', idx+1, ': ', name, '\n')
                    recs.append(rec)
                    n-= 1
                else:
                    continue
    return 0, "None"


In [10]:
# num_results, svd_recs = recommend_diverse_products(ranked_products, n = 5, aisle=0, percent_diverse=0.2)

# generate_recs

In [11]:
def generate_recs(ratings_list, n_to_rec, percent_diverse, rec_aisle=None):
    # Convert ratings list to user_ratings
    print("I'm in")
    print(ratings_list)
    userID = 300000
    user_rating =[]
    for product, rating in ratings_list:
        rating_one_product = {'user_id':userID,'product_id':product,'rating':rating}
        user_rating.append(rating_one_product) 

    # add the new ratings to the original ratings DataFrame
    print('Creating ratings dataset...')
    new_ratings_df = new_rec_df.append(user_rating, ignore_index=True)
    new_data = Dataset.load_from_df(new_ratings_df, reader)
    
    # train a model using the new combined DataFrame
    print('Training recommendation model...')
    new_user_svd = SVD(n_factors = 20, n_epochs = 10, lr_all = 0.005, reg_all = 0.4)
    new_user_svd.fit(new_data.build_full_trainset())
    
    # make predictions for the user
    print('Making predictions...')
    list_of_products = []
    tp=0
    # start = time.time()
    products_desc.set_index('Product ID', inplace = True)
    for product in new_ratings_df['product_id'].unique():
        # if(tp>49000):
          # print(tp, product)
          # print
        # tp=tp+1
        try:
            product_name = products_desc.iloc[product]['Product Name'] #products_desc[products_desc['Product ID'] == product]['Product Name'].iloc[0]
            product_aisle = products_desc.iloc[product]['Aisle'] #products_desc[products_desc['Product ID'] == product]['Aisle'].iloc[0]
            list_of_products.append((product, round(new_user_svd.predict(300000, product)[3], 3), product_name, product_aisle))
        except:
            continue
        # list_of_products.append((11,round(new_user_svd.predict(300000, product)[3], 3),3))
        
    # end = time.time()
    # print(end - start)
    products_desc.reset_index(inplace = True)


    # list_of_products = []
    # for product in new_ratings_df['product_id'].unique():
    #     product_name = products_desc[products_desc['Product ID'] == product]['Product Name'].iloc[0]
    #     product_aisle = products_desc[products_desc['Product ID'] == product]['Aisle'].iloc[0]
    #     list_of_products.append((product, round(new_user_svd.predict(300000, product)[3], 3), product_name, product_aisle))
    
    # order the predictions from highest to lowest rated
    ranked_products = sorted(list_of_products, key=lambda x:x[1], reverse=True)
    print("sorted ranked products got")
    # return the top n recommendation
    print(len(ranked_products))
    # print(ranked_products)
    return ranked_products
    # num_results, svd_recs = recommend_diverse_products(ranked_products, n_to_rec, aisle=rec_aisle, percent_diverse=percent_diverse)
    # print('Complete')
    # return num_results, svd_recs

In [12]:
# ratings_list = [[43812,5.0],[10307,1.0]]

In [13]:
# ranked_products = generate_recs(ratings_list, n_to_rec=5, percent_diverse = 0.2, rec_aisle=None)

In [14]:
# svd_recs

In [15]:
# def grocery_rater(df, num, aisle=None):
#     userID = 300000
#     rating_list = []
#     while num > 0:
#         if aisle:
#             product = df[df['Aisle'].str.contains(aisle)].sample(1)
#         else:
#             product = df.sample(1)
#         print('\n', product['Product Name'].iloc[0])
#         rating = input('How do you rate this product on a scale of 1-5, choose 0 to rate a different product:\n')
#         if rating == '0':
#             continue
#         else:
#             rating_one_product = {'user_id':userID,'product_id':product['Product ID'].iloc[0],'rating':int(rating)}
#             rating_list.append(rating_one_product) 
#             num -= 1
#     return rating_list


# def generate_new_user_recommendations(n_to_rate, n_to_rec, percent_diverse, 
#                                       rate_aisle=None, rec_aisle=None):
#     # Get user ratings
#     user_rating = grocery_rater(products_desc, n_to_rate, aisle=rate_aisle)

#     # add the new ratings to the original ratings DataFrame
#     print('Creating ratings dataset...')
#     new_ratings_df = new_rec_df.append(user_rating, ignore_index=True)
#     new_data = Dataset.load_from_df(new_ratings_df, reader)
    
#     # train a model using the new combined DataFrame
#     print('Training recommendation model...')
#     new_user_svd = SVD(n_factors = 20, n_epochs = 10, lr_all = 0.005, reg_all = 0.4)
#     new_user_svd.fit(new_data.build_full_trainset())
#     # return new_user_svd, new_ratings_df

#     # make predictions for the user
#     print('Making predictions...')
#     list_of_products = []
#     tp=0
#     # start = time.time()
#     products_desc.set_index('Product ID', inplace = True)
#     for product in new_ratings_df['product_id'].unique():

#         try:
#             product_name = products_desc.iloc[product]['Product Name'] #products_desc[products_desc['Product ID'] == product]['Product Name'].iloc[0]
#             product_aisle = products_desc.iloc[product]['Aisle'] #products_desc[products_desc['Product ID'] == product]['Aisle'].iloc[0]
#             list_of_products.append((product, round(new_user_svd.predict(300000, product)[3], 3), product_name, product_aisle))
#         except:
#             continue
#         # list_of_products.append((11,round(new_user_svd.predict(300000, product)[3], 3),3))
#         if(tp%1000==0):
#           print(tp)
#           # print
#         tp=tp+1
#     # end = time.time()
#     # print(end - start)
#     products_desc.reset_index(inplace = True)

#     '''list_of_products = []
#     tp=0
#     start = time.time()
#     for product in new_ratings_df['product_id'].unique():
#         # product_name = products_desc[products_desc['Product ID'] == product]['Product Name'].iloc[0]
#         # product_aisle = products_desc[products_desc['Product ID'] == product]['Aisle'].iloc[0]
#         # list_of_products.append((product, round(new_user_svd.predict(300000, product)[3], 3), product_name, product_aisle))
#         list_of_products.append((11,23))
#         if(tp%1000==0):
#           print(tp)
#           # print
#         tp=tp+1
#     end = time.time()
#     print(end - start)'''
#     # order the predictions from highest to lowest rated
#     ranked_products = sorted(list_of_products, key=lambda x:x[1], reverse=True)
#     print("sorted ranked products got")
#     # return the top n recommendation
#     num_results, svd_recs = recommend_diverse_products( ranked_products, n_to_rec, aisle=rec_aisle , percent_diverse = percent_diverse )
#     return num_results, svd_recs

In [16]:
# nnum_results, svd_recs  = generate_new_user_recommendations( n_to_rate=1, n_to_rec=6, 
#                                   percent_diverse=1, rate_aisle='candy', rec_aisle=None)

In [17]:
# p = loop_thing(new_user_svd, new_ratings_df) 

In [18]:
# products_desc_copy = products_desc

In [19]:
# len(new_ratings_df.product_id.unique())

In [20]:
# products_desc.head()

In [21]:
# products_desc.reset_index(inplace = True)

In [22]:
# products_desc.set_index('Product ID', inplace = True)

In [23]:
# products_desc.iloc[2]['Aisle']

In [24]:
# products_desc.shape

In [25]:
# # %%timeit
# product=49688
# # products_desc[products_desc['Product ID'] == product]['Product Name'].iloc[0]
# # product_aisle = products_desc[products_desc['Product ID'] == product]['Aisle'].iloc[0]
# products_desc.iloc[product]['Product Name']
# products_desc.iloc[product]['Aisle']

In [26]:
# products_desc.shape

In [27]:
# for product in new_ratings_df['product_id'].unique():
#         product_name = products_desc[products_desc['Product ID'] == product]['Product Name'].iloc[0]
#         product_aisle = products_desc[products_desc['Product ID'] == product]['Aisle'].iloc[0]
#         list_of_products.append((product, round(new_user_svd.predict(300000, product)[3], 3), product_name, product_aisle))
#         break
#         if(tp%100==0):
#           print(tp)
#           # print
#         tp=tp+1

# App.py

In [28]:
!pip install pyngrok

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting pyngrok
  Downloading pyngrok-5.1.0.tar.gz (745 kB)
[K     |████████████████████████████████| 745 kB 2.6 MB/s 
Building wheels for collected packages: pyngrok
  Building wheel for pyngrok (setup.py) ... [?25l[?25hdone
  Created wheel for pyngrok: filename=pyngrok-5.1.0-py3-none-any.whl size=19007 sha256=ef9372804ac6ec605b12a97238a16c69b35e45964f34277098ea7294a7d5881e
  Stored in directory: /root/.cache/pip/wheels/bf/e6/af/ccf6598ecefecd44104069371795cb9b3afbcd16987f6ccfb3
Successfully built pyngrok
Installing collected packages: pyngrok
Successfully installed pyngrok-5.1.0


In [29]:
from pyngrok import ngrok
import nltk
nltk.download('punkt')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [None]:
from flask import Flask, request, render_template, session
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import CountVectorizer

from nltk.stem.snowball import SnowballStemmer
# from user_functions import stem_and_vectorize_products_based_on_metadata, generate_new_user_recommendations, generate_recs, get_sample_product
import pandas as pd
import numpy as np
import pickle

port_no = 4000
template_dir = 'templates'
static_dir = 'static'
app = Flask(__name__, template_folder=template_dir, static_folder=static_dir)
# ngrok.set_auth_token("23H0IY10fqeKMIW7kG05JhKZMae_3Zabr2iqkU9AUcZ7CrRTP") 
ngrok.set_auth_token("2BWuK5kiY9aU6289pnRKPsAYQdl_4u2A1XxVgtvoEcuGJWyVJ")
public_url =  ngrok.connect(port_no).public_url


# app = Flask(__name__)
app.secret_key = 'any random string'

@app.route('/', methods=['GET', 'POST'])
def rootpage():
    return render_template('index.html')

@app.route('/nlp', methods=['GET', 'POST'])
def nlppage():
    nlp = ''
    num_results = 0
    if request.method == 'POST' and 'searchwords' in request.form:
        num_results, nlp = stem_and_vectorize_products_based_on_metadata(request.form.get('searchwords')) 
    return render_template('nlp.html',
                           nlp=nlp, 
                           num_results=num_results)

@app.route('/svd', methods=['GET', 'POST'])
def svdpage():
    print("1")
    svd_recs = ''
    print("2")
    num_results = 0
    print("3")
    session['n_left_to_rate'] = None
    print(request.method == 'POST')
    print(request.form.get('num_to_rate'))
    if request.method == 'POST' and request.form.get('num_to_rate'):
        print("5")
        session['rate_aisle'] = request.form.get('rate_aisle')
        print("6")
        session['n_to_rate'] = float(request.form.get('num_to_rate'))
        print("7")
        session['rec_aisle'] = request.form.get('rec_aisle')
        print("10")
        session['n_to_rec'] = float(request.form.get('num_to_rec'))
        print("11")
        session['percent_diverse'] = float(request.form.get('diversity_index'))
        print("12")
        session['prod_name'], session['prod_aisle'], session['prod_id'] = get_sample_product(session['rate_aisle'])
        print("13")
        session['n_left_to_rate'] = session['n_to_rate']
        print("14")
        session['ratings_list'] = []
        print("15")
        return render_template('rating.html')
    else:
        print("16")
        print(request.method=='POST')
        print(request.form.get('num_to_rate'))
        # return render_template('rating.html')
        return render_template('svd.html',
                            svd_recs=svd_recs,
                            num_results=num_results)                                                                                                                   
                        
@app.route('/rating', methods=['GET', 'POST'])
def ratingpage():
    if session['n_to_rate'] == None:
        return render_template('svd.html',
                                svd_recs='',
                                num_results=0)     
    
    if session['n_left_to_rate'] == 0:
         ranked_products = generate_recs(session['ratings_list'], session['n_to_rec'], session['percent_diverse'], rec_aisle=session['rec_aisle'])
         print("check length of ranked_products: ",len(ranked_products))
         num_results, svd_recs = recommend_diverse_products(ranked_products, session['n_to_rec'], aisle=session['rec_aisle'], percent_diverse=session['percent_diverse'])
         return render_template('svd.html', svd_recs=svd_recs,num_results=num_results)
    elif 'rate_product' in request.form:
        rating = float(request.form.get('rate_product'))
        session['ratings_list'].append([session['prod_id'], rating])
        session['n_left_to_rate'] -= 1
        session['prod_name'], session['prod_aisle'], session['prod_id'] = get_sample_product(session['rate_aisle'])
        return render_template('rating.html')
    else:
        return render_template('rating.html')    


# if __name__ == "__main__":
#     app.run(debug=True)
public_url = public_url[:4] + "s" + public_url[4:]
print(f"To acces the Gloable link please click {public_url}")
app.run(port=port_no)

To acces the Gloable link please click https://33e8-35-237-161-82.ngrok.io
 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
[2m   Use a production WSGI server instead.[0m
 * Debug mode: off


 * Running on http://127.0.0.1:4000/ (Press CTRL+C to quit)
INFO:werkzeug: * Running on http://127.0.0.1:4000/ (Press CTRL+C to quit)
127.0.0.1 - - [05/Aug/2022 15:20:09] "[37mGET / HTTP/1.1[0m" 200 -
INFO:werkzeug:127.0.0.1 - - [05/Aug/2022 15:20:09] "[37mGET / HTTP/1.1[0m" 200 -
127.0.0.1 - - [05/Aug/2022 15:20:09] "[37mGET /static/style.css HTTP/1.1[0m" 200 -
INFO:werkzeug:127.0.0.1 - - [05/Aug/2022 15:20:09] "[37mGET /static/style.css HTTP/1.1[0m" 200 -
127.0.0.1 - - [05/Aug/2022 15:20:10] "[37mGET /static/grocery-shop-safely.jpg HTTP/1.1[0m" 200 -
INFO:werkzeug:127.0.0.1 - - [05/Aug/2022 15:20:10] "[37mGET /static/grocery-shop-safely.jpg HTTP/1.1[0m" 200 -
127.0.0.1 - - [05/Aug/2022 15:20:10] "[33mGET /favicon.ico HTTP/1.1[0m" 404 -
INFO:werkzeug:127.0.0.1 - - [05/Aug/2022 15:20:10] "[33mGET /favicon.ico HTTP/1.1[0m" 404 -
[2022-08-05 15:20:11,615] ERROR in app: Exception on /svd [POST]
Traceback (most recent call last):
  File "/usr/local/lib/python3.7/dist-packag

1
2
3
True
None
5
6
