In [3]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import *
import random

import time

spark = SparkSession \
        .builder \
        .appName("Phone_Similarity") \
        .master("local[*]") \
        .getOrCreate()

spark.sparkContext.setLogLevel("ERROR")

In [4]:
db = 'myFirstDatabase'
tab = 'products'
df = spark.read.format('com.mongodb.spark.sql.DefaultSource')\
               .option('spark.mongodb.input.uri','mongodb://admin:123@project-shard-00-00.u9pno.mongodb.net:27017,project-shard-00-01.u9pno.mongodb.net:27017,project-shard-00-02.u9pno.mongodb.net:27017/myFirstDatabase?ssl=true&replicaSet=atlas-pna2hx-shard-0&authSource=admin&retryWrites=true&w=majority') \
               .option('spark.mongodb.input.database',db) \
               .option('spark.mongodb.input.collection',tab).load()

In [5]:
from pyspark.sql.functions import col,isnan, when, count
from pyspark.sql.types import *
df1 = df.withColumn('_id', col('_id').cast(StringType()))
data = df1.withColumn('_id', split(col('_id'), ']').getItem(0))
data = data.withColumn('_id', expr("substring(_id, 2, length(_id))"))
phone_data = data.select('_id',
                                'title',
                                'category',
                                'color',
                                'memory',
                                'pin',
                                'ram',
                                'screenSize',
                                'status',
                                'price')

In [6]:
import pyspark.sql.functions as F 
categ = phone_data.select('category').distinct().rdd.flatMap(lambda x:x).collect()
exprs = [F.when(F.col('category') == cat,1).otherwise(0).alias(str(cat)) for cat in categ]
phone_data = phone_data.select(exprs + phone_data.columns)

In [7]:
categ = phone_data.select('color').distinct().rdd.flatMap(lambda x:x).collect()
exprs = [F.when(F.col('color') == cat,1).otherwise(0).alias(str(cat)) for cat in categ]
phone_data = phone_data.select(exprs + phone_data.columns)
phone_data.columns

['Shiny Black',
 'Turquoise',
 'Silver',
 'Green',
 'Purple',
 'Blue',
 'White',
 'Gold',
 'Mint Green',
 'Black',
 'Red',
 'Pink',
 '6194877b0327b0eef3a53fe9',
 '61947f86613ccbeacb59e5b8',
 '619487730327b0eef3a53fe4',
 '61947f8e613ccbeacb59e5bd',
 '_id',
 'title',
 'category',
 'color',
 'memory',
 'pin',
 'ram',
 'screenSize',
 'status',
 'price']

In [8]:
categ = phone_data.select('status').distinct().rdd.flatMap(lambda x:x).collect()
exprs = [F.when(F.col('status') == cat,1).otherwise(0).alias(str(cat)) for cat in categ]
phone_data = phone_data.select(exprs + phone_data.columns)
phone_data.columns

['99',
 'New',
 'Shiny Black',
 'Turquoise',
 'Silver',
 'Green',
 'Purple',
 'Blue',
 'White',
 'Gold',
 'Mint Green',
 'Black',
 'Red',
 'Pink',
 '6194877b0327b0eef3a53fe9',
 '61947f86613ccbeacb59e5b8',
 '619487730327b0eef3a53fe4',
 '61947f8e613ccbeacb59e5bd',
 '_id',
 'title',
 'category',
 'color',
 'memory',
 'pin',
 'ram',
 'screenSize',
 'status',
 'price']

In [9]:
from pyspark.sql.types import DoubleType
changedTypedf = phone_data.withColumn("screenSize", phone_data["screenSize"].cast(DoubleType()))
changedTypedf.head(5)

[Row(99=0, New=1, Shiny Black=0, Turquoise=0, Silver=0, Green=0, Purple=0, Blue=0, White=0, Gold=0, Mint Green=0, Black=0, Red=0, Pink=1, 6194877b0327b0eef3a53fe9=0, 61947f86613ccbeacb59e5b8=1, 619487730327b0eef3a53fe4=0, 61947f8e613ccbeacb59e5bd=0, _id='6194895c30e6b7130bb06add', title='iphone 13 pink', category='61947f86613ccbeacb59e5b8', color='Pink', memory=128, pin=3095, ram=6, screenSize=6.1, status='New', price=1100),
 Row(99=0, New=1, Shiny Black=0, Turquoise=0, Silver=0, Green=0, Purple=0, Blue=0, White=1, Gold=0, Mint Green=0, Black=0, Red=0, Pink=0, 6194877b0327b0eef3a53fe9=0, 61947f86613ccbeacb59e5b8=1, 619487730327b0eef3a53fe4=0, 61947f8e613ccbeacb59e5bd=0, _id='61948b652d9fa1d9e7da2d3a', title='iphone 13 pro max white', category='61947f86613ccbeacb59e5b8', color='White', memory=256, pin=3300, ram=6, screenSize=6.3, status='New', price=1300),
 Row(99=0, New=1, Shiny Black=0, Turquoise=0, Silver=0, Green=0, Purple=0, Blue=0, White=0, Gold=0, Mint Green=0, Black=1, Red=0, Pi

In [10]:
from pyspark.ml.feature import VectorAssembler
assemble=VectorAssembler(inputCols=['99',
 'New',
 'Shiny Black',
 'Turquoise',
 'Silver',
 'Green',
 'Purple',
 'Blue',
 'White',
 'Gold',
 'Mint Green',
 'Black',
 'Red',
 'Pink',
 '6194877b0327b0eef3a53fe9',
 '61947f86613ccbeacb59e5b8',
 '619487730327b0eef3a53fe4',
 '61947f8e613ccbeacb59e5bd',
 'memory',
 'pin',
 'ram',
 'screenSize',
 'price'], outputCol='features')
assembled_data=assemble.transform(changedTypedf)

In [11]:
from pyspark.ml.feature import StandardScaler

scale=StandardScaler(inputCol='features',outputCol='standardized')

data_scale=scale.fit(assembled_data)
data_scale_output=data_scale.transform(assembled_data)

data_scale_output.show(2)

+---+---+-----------+---------+------+-----+------+----+-----+----+----------+-----+---+----+------------------------+------------------------+------------------------+------------------------+--------------------+--------------------+--------------------+-----+------+----+---+----------+------+-----+--------------------+--------------------+
| 99|New|Shiny Black|Turquoise|Silver|Green|Purple|Blue|White|Gold|Mint Green|Black|Red|Pink|6194877b0327b0eef3a53fe9|61947f86613ccbeacb59e5b8|619487730327b0eef3a53fe4|61947f8e613ccbeacb59e5bd|                 _id|               title|            category|color|memory| pin|ram|screenSize|status|price|            features|        standardized|
+---+---+-----------+---------+------+-----+------+----+-----+----+----------+-----+---+----+------------------------+------------------------+------------------------+------------------------+--------------------+--------------------+--------------------+-----+------+----+---+----------+------+-----+--------

In [12]:
datad = data_scale_output.select('_id', 'title', 'category', 'color', 'memory', 'pin', 'ram', 'screenSize', 'status', 'price', 'standardized')
datf = datad.toPandas()

In [None]:
#RMSE
import numpy as np, pandas as pd
import matplotlib.pyplot as plt, seaborn as sns
from tqdm import tqdm
import warnings
warnings.filterwarnings("ignore")

class PhoneSimilarity():
    def __init__(self, all_Data):
        self.all_Data_ = all_Data
    
    def phone_similarity(self, phone_id, amount=1):
        distances = []
        phone = self.all_Data_[(self.all_Data_._id == phone_id)].head(1).values[0]
        phone_row = self.all_Data_[(self.all_Data_._id == phone_id)].head(1)
        phone_row['distance'] = 0
        current_standardized_vector = phone[10].toArray()
        res_data = self.all_Data_[self.all_Data_._id != phone_id]
        countElement = 23 #23 of vector and 1 of predict
        for r_phone in tqdm(res_data.values):
            dist = 0
            standardized_vector = r_phone[10].toArray()
            for col in np.arange(23):
                dist = dist + np.square(float(current_standardized_vector[col]) - float(standardized_vector[col]))
            dist = dist / countElement
            dist = np.sqrt(dist)
            distances.append(dist)
        res_data['distance'] = distances
        res_data = res_data.sort_values('distance')
        bigdata = pd.concat([phone_row, res_data], ignore_index=True, sort=False)
        columns = ['_id', 'title', 'category', 'color', 'memory', 'pin', 'ram', 'screenSize', 'status', 'price','distance']
        return bigdata[columns][:amount]

In [13]:
#euclidean
import numpy as np, pandas as pd
import matplotlib.pyplot as plt, seaborn as sns
from tqdm import tqdm
import warnings
warnings.filterwarnings("ignore")

class PhoneSimilarity():
    def __init__(self, all_Data):
        self.all_Data_ = all_Data
    
    def phone_similarity(self, phone_id, amount=1):
        amount = amount + 1
        distances = []
        phone = self.all_Data_[(self.all_Data_._id == phone_id)].head(1).values[0]
        phone_row = self.all_Data_[(self.all_Data_._id == phone_id)].head(1)
        current_standardized_vector = np.array(phone[10].toArray())
        res_data = self.all_Data_[self.all_Data_._id != phone_id]
        countElement = 23 #23 of vector and 1 of predict
        for r_phone in tqdm(res_data.values):
            dist = 0
            standardized_vector = np.array(r_phone[10].toArray())
            dist = np.linalg.norm(current_standardized_vector-standardized_vector)
            distances.append(dist)
        res_data['distance'] = distances
        phone_row['distance'] = 0
        res_data = res_data.sort_values('distance')
        bigdata = pd.concat([phone_row, res_data], ignore_index=True, sort=False)
        columns = ['_id', 'title', 'category', 'color', 'memory', 'pin', 'ram', 'screenSize', 'status', 'price','distance']
        return bigdata[columns][:amount]

In [14]:
#test PhoneSimilarity
similarity = PhoneSimilarity(datf)
x = '6194c722bb6b5b34d3a62769'
similarity_phones = similarity.phone_similarity(x, 10)

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 29/29 [00:00<00:00, 14509.70it/s]


In [15]:
import kafka
kafka_topic_name = "clickcount"
kafka_bootstrap_servers = 'localhost:9092'

# Construct a streaming DataFrame that reads from topic
flower_df = spark \
        .readStream \
        .format("kafka") \
        .option("kafka.bootstrap.servers", kafka_bootstrap_servers) \
        .option("subscribe", kafka_topic_name) \
        .option("startingOffsets", "latest") \
        .load()

In [16]:
def get_database():
    from pymongo import MongoClient
    import pymongo

    # Provide the mongodb atlas url to connect python to mongodb using pymongo
    CONNECTION_STRING = "mongodb://admin:123@project-shard-00-00.u9pno.mongodb.net:27017,project-shard-00-01.u9pno.mongodb.net:27017,project-shard-00-02.u9pno.mongodb.net:27017/myFirstDatabase?ssl=true&replicaSet=atlas-pna2hx-shard-0&authSource=admin&retryWrites=true&w=majority"

    # Create a connection using MongoClient. You can import MongoClient or use pymongo.MongoClient
    from pymongo import MongoClient
    client = MongoClient(CONNECTION_STRING)

    # Create the database for our example (we will use the same database throughout the tutorial
    return client['myFirstDatabase']

# Get the database
dbname = get_database()
similarities_collection = dbname["similarities"]
similarities_collection

Collection(Database(MongoClient(host=['project-shard-00-00.u9pno.mongodb.net:27017', 'project-shard-00-01.u9pno.mongodb.net:27017', 'project-shard-00-02.u9pno.mongodb.net:27017'], document_class=dict, tz_aware=False, connect=True, replicaset='atlas-pna2hx-shard-0', authsource='admin', retrywrites=True, w='majority', tls=True), 'myFirstDatabase'), 'similarities')

In [26]:
def foreach_batch_function(df, epoch_id,similarity_f,similarities_coll):
    if df.first() is not None:
        row = df.first()
        value = row['value'].decode("utf-8")
        first_element = value.split(',')[0]
        similarity_phones = similarity_f.phone_similarity(first_element, 10)
        listId = []
        for i in range(1,6):
            listId.append(similarity_phones['_id'][i])
        query = {"idProduct":first_element}
        dict1 = {"idProduct":first_element, "listId":listId}
#         similarities_collection.insert_one(dict1)
        update = {"$set": dict1}
        similarities_coll.update_one(query,update,upsert=True)
        print(dict1)
        print(similarity_phones)
    pass

# query3 = flower_df.writeStream.foreachBatch(foreach_batch_function).start()
query3 = flower_df.writeStream.foreachBatch(lambda df,epochId: foreach_batch_function(df,epochId,similarity,similarities_collection)).start()
#streamingDF.writeStream.foreachBatch(lambda df,epochId: foreach_batch_function(df,epochId,similarity_f,similarities_collection)).start()

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 29/29 [00:00<00:00, 30062.98it/s]


{'idProduct': '6194ca4ebb6b5b34d3a6278e', 'listId': ['6194c9f0bb6b5b34d3a62786', '6194c722bb6b5b34d3a62769', '6194c8cebb6b5b34d3a62776', '6194c79cbb6b5b34d3a6276f', '6194c46cbb6b5b34d3a6274d']}
                         _id                             title  \
0   6194ca4ebb6b5b34d3a6278e            oppo reno6 z 5g silver   
1   6194c9f0bb6b5b34d3a62786             oppo reno6 z 5g black   
2   6194c722bb6b5b34d3a62769               oppo reno6 5g black   
3   6194c8cebb6b5b34d3a62776                 oppo reno5 silver   
4   6194c79cbb6b5b34d3a6276f           oppo reno6 5g turquoise   
5   6194c46cbb6b5b34d3a6274d  samsung galaxy z fold3 5g sliver   
6   6194bce4bb6b5b34d3a626e4          samsung galaxy a72 white   
7   6194b8b0bb6b5b34d3a626b1         samsung galaxy z flip3 5g   
8   6194cc8cbb6b5b34d3a627a3       xiaomi mi 11 lite 5g yellow   
9   6194c059bb6b5b34d3a6270c            iphone 13 pro max gold   
10  6194c0e7bb6b5b34d3a62713            iphone 13 pro max gold   

             

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 29/29 [00:00<00:00, 29022.86it/s]


{'idProduct': '6194c696bb6b5b34d3a62763', 'listId': ['6194c3b0bb6b5b34d3a62747', '61948b652d9fa1d9e7da2d3a', '6194c14cbb6b5b34d3a6271a', '6194bce4bb6b5b34d3a626e4', '6194c0e7bb6b5b34d3a62713']}
                         _id                      title  \
0   6194c696bb6b5b34d3a62763    iphone 13 pro max white   
1   6194c3b0bb6b5b34d3a62747    iphone 13 pro max white   
2   61948b652d9fa1d9e7da2d3a    iphone 13 pro max white   
3   6194c14cbb6b5b34d3a6271a     iphone 13 pro max gold   
4   6194bce4bb6b5b34d3a626e4   samsung galaxy a72 white   
5   6194c0e7bb6b5b34d3a62713     iphone 13 pro max gold   
6   6194c059bb6b5b34d3a6270c     iphone 13 pro max gold   
7   6194bfd8bb6b5b34d3a62706     iphone 13 pro max blue   
8   6194c2cdbb6b5b34d3a62726     iphone 13 pro max blue   
9   6194b8b0bb6b5b34d3a626b1  samsung galaxy z flip3 5g   
10  6194c722bb6b5b34d3a62769        oppo reno6 5g black   

                    category  color  memory   pin  ram  screenSize status  \
0   61947f86613ccbea

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 29/29 [00:00<?, ?it/s]


{'idProduct': '61a363be0813e6445a3f7fa3', 'listId': ['6194cc8cbb6b5b34d3a627a3', '6194cbb3bb6b5b34d3a62795', '6194c233bb6b5b34d3a62720', '6194c9f0bb6b5b34d3a62786', '6194c722bb6b5b34d3a62769']}
                         _id                        title  \
0   61a363be0813e6445a3f7fa3         xiaomi mi 11 lite 5g   
1   6194cc8cbb6b5b34d3a627a3  xiaomi mi 11 lite 5g yellow   
2   6194cbb3bb6b5b34d3a62795   xiaomi mi 11 lite 5g black   
3   6194c233bb6b5b34d3a62720         xiaomi mi 11 lite 4g   
4   6194c9f0bb6b5b34d3a62786        oppo reno6 z 5g black   
5   6194c722bb6b5b34d3a62769          oppo reno6 5g black   
6   6194b8b0bb6b5b34d3a626b1    samsung galaxy z flip3 5g   
7   6194bce4bb6b5b34d3a626e4     samsung galaxy a72 white   
8   6194ca4ebb6b5b34d3a6278e       oppo reno6 z 5g silver   
9   6194c059bb6b5b34d3a6270c       iphone 13 pro max gold   
10  6194c0e7bb6b5b34d3a62713       iphone 13 pro max gold   

                    category        color  memory   pin  ram  screenSize 

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 29/29 [00:00<?, ?it/s]


{'idProduct': '61a363be0813e6445a3f7fa3', 'listId': ['6194cc8cbb6b5b34d3a627a3', '6194cbb3bb6b5b34d3a62795', '6194c233bb6b5b34d3a62720', '6194c9f0bb6b5b34d3a62786', '6194c722bb6b5b34d3a62769']}
                         _id                        title  \
0   61a363be0813e6445a3f7fa3         xiaomi mi 11 lite 5g   
1   6194cc8cbb6b5b34d3a627a3  xiaomi mi 11 lite 5g yellow   
2   6194cbb3bb6b5b34d3a62795   xiaomi mi 11 lite 5g black   
3   6194c233bb6b5b34d3a62720         xiaomi mi 11 lite 4g   
4   6194c9f0bb6b5b34d3a62786        oppo reno6 z 5g black   
5   6194c722bb6b5b34d3a62769          oppo reno6 5g black   
6   6194b8b0bb6b5b34d3a626b1    samsung galaxy z flip3 5g   
7   6194bce4bb6b5b34d3a626e4     samsung galaxy a72 white   
8   6194ca4ebb6b5b34d3a6278e       oppo reno6 z 5g silver   
9   6194c059bb6b5b34d3a6270c       iphone 13 pro max gold   
10  6194c0e7bb6b5b34d3a62713       iphone 13 pro max gold   

                    category        color  memory   pin  ram  screenSize 

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 29/29 [00:00<00:00, 28871.31it/s]


{'idProduct': '6194c696bb6b5b34d3a62763', 'listId': ['6194c3b0bb6b5b34d3a62747', '61948b652d9fa1d9e7da2d3a', '6194c14cbb6b5b34d3a6271a', '6194bce4bb6b5b34d3a626e4', '6194c0e7bb6b5b34d3a62713']}
                         _id                      title  \
0   6194c696bb6b5b34d3a62763    iphone 13 pro max white   
1   6194c3b0bb6b5b34d3a62747    iphone 13 pro max white   
2   61948b652d9fa1d9e7da2d3a    iphone 13 pro max white   
3   6194c14cbb6b5b34d3a6271a     iphone 13 pro max gold   
4   6194bce4bb6b5b34d3a626e4   samsung galaxy a72 white   
5   6194c0e7bb6b5b34d3a62713     iphone 13 pro max gold   
6   6194c059bb6b5b34d3a6270c     iphone 13 pro max gold   
7   6194bfd8bb6b5b34d3a62706     iphone 13 pro max blue   
8   6194c2cdbb6b5b34d3a62726     iphone 13 pro max blue   
9   6194b8b0bb6b5b34d3a626b1  samsung galaxy z flip3 5g   
10  6194c722bb6b5b34d3a62769        oppo reno6 5g black   

                    category  color  memory   pin  ram  screenSize status  \
0   61947f86613ccbea

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 29/29 [00:00<00:00, 29085.32it/s]


{'idProduct': '6194c696bb6b5b34d3a62763', 'listId': ['6194c3b0bb6b5b34d3a62747', '61948b652d9fa1d9e7da2d3a', '6194c14cbb6b5b34d3a6271a', '6194bce4bb6b5b34d3a626e4', '6194c0e7bb6b5b34d3a62713']}
                         _id                      title  \
0   6194c696bb6b5b34d3a62763    iphone 13 pro max white   
1   6194c3b0bb6b5b34d3a62747    iphone 13 pro max white   
2   61948b652d9fa1d9e7da2d3a    iphone 13 pro max white   
3   6194c14cbb6b5b34d3a6271a     iphone 13 pro max gold   
4   6194bce4bb6b5b34d3a626e4   samsung galaxy a72 white   
5   6194c0e7bb6b5b34d3a62713     iphone 13 pro max gold   
6   6194c059bb6b5b34d3a6270c     iphone 13 pro max gold   
7   6194bfd8bb6b5b34d3a62706     iphone 13 pro max blue   
8   6194c2cdbb6b5b34d3a62726     iphone 13 pro max blue   
9   6194b8b0bb6b5b34d3a626b1  samsung galaxy z flip3 5g   
10  6194c722bb6b5b34d3a62769        oppo reno6 5g black   

                    category  color  memory   pin  ram  screenSize status  \
0   61947f86613ccbea

100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 29/29 [00:00<?, ?it/s]


{'idProduct': '6194bae9bb6b5b34d3a626ce', 'listId': ['6194c233bb6b5b34d3a62720', '6194bce4bb6b5b34d3a626e4', '6194b9aebb6b5b34d3a626b7', '6194bc37bb6b5b34d3a626da', '6194ba78bb6b5b34d3a626c8']}
                         _id                           title  \
0   6194bae9bb6b5b34d3a626ce       samsung galaxy a72 purple   
1   6194c233bb6b5b34d3a62720            xiaomi mi 11 lite 4g   
2   6194bce4bb6b5b34d3a626e4        samsung galaxy a72 white   
3   6194b9aebb6b5b34d3a626b7  samsung galaxy a72 shiny black   
4   6194bc37bb6b5b34d3a626da     samsung galaxy s21 ultra 5g   
5   6194ba78bb6b5b34d3a626c8    samsung galaxy a72 turquoise   
6   6194b8b0bb6b5b34d3a626b1       samsung galaxy z flip3 5g   
7   6194c9f0bb6b5b34d3a62786           oppo reno6 z 5g black   
8   6194c722bb6b5b34d3a62769             oppo reno6 5g black   
9   6194c5aebb6b5b34d3a62759        galaxy z fold 3 5g black   
10  6194c059bb6b5b34d3a6270c          iphone 13 pro max gold   

                    category        c

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 29/29 [00:00<00:00, 30011.06it/s]


{'idProduct': '6194c0e7bb6b5b34d3a62713', 'listId': ['6194c059bb6b5b34d3a6270c', '6194c14cbb6b5b34d3a6271a', '6194cc8cbb6b5b34d3a627a3', '6194c696bb6b5b34d3a62763', '61948b652d9fa1d9e7da2d3a']}
                         _id                        title  \
0   6194c0e7bb6b5b34d3a62713       iphone 13 pro max gold   
1   6194c059bb6b5b34d3a6270c       iphone 13 pro max gold   
2   6194c14cbb6b5b34d3a6271a       iphone 13 pro max gold   
3   6194cc8cbb6b5b34d3a627a3  xiaomi mi 11 lite 5g yellow   
4   6194c696bb6b5b34d3a62763      iphone 13 pro max white   
5   61948b652d9fa1d9e7da2d3a      iphone 13 pro max white   
6   6194bfd8bb6b5b34d3a62706       iphone 13 pro max blue   
7   6194c3b0bb6b5b34d3a62747      iphone 13 pro max white   
8   6194b8b0bb6b5b34d3a626b1    samsung galaxy z flip3 5g   
9   6194c722bb6b5b34d3a62769          oppo reno6 5g black   
10  6194c9f0bb6b5b34d3a62786        oppo reno6 z 5g black   

                    category  color  memory   pin  ram  screenSize status

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 29/29 [00:00<00:00, 29009.02it/s]


{'idProduct': '6194c53bbb6b5b34d3a62753', 'listId': ['6194c5aebb6b5b34d3a62759', '6194c46cbb6b5b34d3a6274d', '6194bc37bb6b5b34d3a626da', '6194b8b0bb6b5b34d3a626b1', '6194bce4bb6b5b34d3a626e4']}
                         _id                             title  \
0   6194c53bbb6b5b34d3a62753          galaxy z fold 3 5g green   
1   6194c5aebb6b5b34d3a62759          galaxy z fold 3 5g black   
2   6194c46cbb6b5b34d3a6274d  samsung galaxy z fold3 5g sliver   
3   6194bc37bb6b5b34d3a626da       samsung galaxy s21 ultra 5g   
4   6194b8b0bb6b5b34d3a626b1         samsung galaxy z flip3 5g   
5   6194bce4bb6b5b34d3a626e4          samsung galaxy a72 white   
6   6194b9aebb6b5b34d3a626b7    samsung galaxy a72 shiny black   
7   6194bae9bb6b5b34d3a626ce         samsung galaxy a72 purple   
8   6194ba78bb6b5b34d3a626c8      samsung galaxy a72 turquoise   
9   6194c0e7bb6b5b34d3a62713            iphone 13 pro max gold   
10  6194c059bb6b5b34d3a6270c            iphone 13 pro max gold   

             

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 29/29 [00:00<00:00, 29050.59it/s]


{'idProduct': '6194c722bb6b5b34d3a62769', 'listId': ['6194c9f0bb6b5b34d3a62786', '6194b8b0bb6b5b34d3a626b1', '6194ca4ebb6b5b34d3a6278e', '6194bc37bb6b5b34d3a626da', '6194c79cbb6b5b34d3a6276f']}
                         _id                        title  \
0   6194c722bb6b5b34d3a62769          oppo reno6 5g black   
1   6194c9f0bb6b5b34d3a62786        oppo reno6 z 5g black   
2   6194b8b0bb6b5b34d3a626b1    samsung galaxy z flip3 5g   
3   6194ca4ebb6b5b34d3a6278e       oppo reno6 z 5g silver   
4   6194bc37bb6b5b34d3a626da  samsung galaxy s21 ultra 5g   
5   6194c79cbb6b5b34d3a6276f      oppo reno6 5g turquoise   
6   6194bce4bb6b5b34d3a626e4     samsung galaxy a72 white   
7   6194c059bb6b5b34d3a6270c       iphone 13 pro max gold   
8   6194cc8cbb6b5b34d3a627a3  xiaomi mi 11 lite 5g yellow   
9   6194c5aebb6b5b34d3a62759     galaxy z fold 3 5g black   
10  6194c0e7bb6b5b34d3a62713       iphone 13 pro max gold   

                    category      color  memory   pin  ram  screenSize st

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 29/29 [00:00<00:00, 29878.36it/s]


{'idProduct': '6194c53bbb6b5b34d3a62753', 'listId': ['6194c5aebb6b5b34d3a62759', '6194c46cbb6b5b34d3a6274d', '6194bc37bb6b5b34d3a626da', '6194b8b0bb6b5b34d3a626b1', '6194bce4bb6b5b34d3a626e4']}
                         _id                             title  \
0   6194c53bbb6b5b34d3a62753          galaxy z fold 3 5g green   
1   6194c5aebb6b5b34d3a62759          galaxy z fold 3 5g black   
2   6194c46cbb6b5b34d3a6274d  samsung galaxy z fold3 5g sliver   
3   6194bc37bb6b5b34d3a626da       samsung galaxy s21 ultra 5g   
4   6194b8b0bb6b5b34d3a626b1         samsung galaxy z flip3 5g   
5   6194bce4bb6b5b34d3a626e4          samsung galaxy a72 white   
6   6194b9aebb6b5b34d3a626b7    samsung galaxy a72 shiny black   
7   6194bae9bb6b5b34d3a626ce         samsung galaxy a72 purple   
8   6194ba78bb6b5b34d3a626c8      samsung galaxy a72 turquoise   
9   6194c0e7bb6b5b34d3a62713            iphone 13 pro max gold   
10  6194c059bb6b5b34d3a6270c            iphone 13 pro max gold   

             

In [25]:

query3.stop()


In [23]:
similarities_collection.delete_many({})

<pymongo.results.DeleteResult at 0x250eddc73c8>

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 29/29 [00:00<?, ?it/s]


{'idProduct': '61a363be0813e6445a3f7fa3', 'listId': ['6194cc8cbb6b5b34d3a627a3', '6194cbb3bb6b5b34d3a62795', '6194c233bb6b5b34d3a62720', '6194c9f0bb6b5b34d3a62786', '6194c722bb6b5b34d3a62769']}
                         _id                        title  \
0   61a363be0813e6445a3f7fa3         xiaomi mi 11 lite 5g   
1   6194cc8cbb6b5b34d3a627a3  xiaomi mi 11 lite 5g yellow   
2   6194cbb3bb6b5b34d3a62795   xiaomi mi 11 lite 5g black   
3   6194c233bb6b5b34d3a62720         xiaomi mi 11 lite 4g   
4   6194c9f0bb6b5b34d3a62786        oppo reno6 z 5g black   
5   6194c722bb6b5b34d3a62769          oppo reno6 5g black   
6   6194b8b0bb6b5b34d3a626b1    samsung galaxy z flip3 5g   
7   6194bce4bb6b5b34d3a626e4     samsung galaxy a72 white   
8   6194ca4ebb6b5b34d3a6278e       oppo reno6 z 5g silver   
9   6194c059bb6b5b34d3a6270c       iphone 13 pro max gold   
10  6194c0e7bb6b5b34d3a62713       iphone 13 pro max gold   

                    category        color  memory   pin  ram  screenSize 

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 29/29 [00:00<?, ?it/s]


{'idProduct': '6194bbacbb6b5b34d3a626d4', 'listId': ['61948b652d9fa1d9e7da2d3a', '6194c059bb6b5b34d3a6270c', '6194c0e7bb6b5b34d3a62713', '6194c696bb6b5b34d3a62763', '6194c14cbb6b5b34d3a6271a']}
                         _id                      title  \
0   6194bbacbb6b5b34d3a626d4              iphone 13 red   
1   61948b652d9fa1d9e7da2d3a    iphone 13 pro max white   
2   6194c059bb6b5b34d3a6270c     iphone 13 pro max gold   
3   6194c0e7bb6b5b34d3a62713     iphone 13 pro max gold   
4   6194c696bb6b5b34d3a62763    iphone 13 pro max white   
5   6194c14cbb6b5b34d3a6271a     iphone 13 pro max gold   
6   6194b8b0bb6b5b34d3a626b1  samsung galaxy z flip3 5g   
7   6194bfd8bb6b5b34d3a62706     iphone 13 pro max blue   
8   6194c722bb6b5b34d3a62769        oppo reno6 5g black   
9   6194c9f0bb6b5b34d3a62786      oppo reno6 z 5g black   
10  6194bdd5bb6b5b34d3a626f0    iphone 12 pro max black   

                    category  color  memory   pin  ram  screenSize status  \
0   61947f86613ccbea