In [18]:
!docker-compose down

Stopping mongodb_container ... 
[1Bping mongodb_container ... [32mdone[0mRemoving mongodb_container ... 
[1BRemoving network mongo_default2mdone[0m


In [1]:
!docker-compose up -d

Creating network "mongo_default" with the default driver
Creating mongodb_container ... 
[1Bting mongodb_container ... [32mdone[0m

In [2]:
!docker exec -it mongodb_container bash -c "cat /scripts/init_db.js | mongosh"

]0;mongosh mongodb://127.0.0.1:27017/?directConnection=true&serverSelectionTimeoutMS=2000Current Mongosh Log ID:	62cc4a81a762168fcce6fb63
Connecting to:		[1m[32mmongodb://127.0.0.1:27017/?directConnection=true&serverSelectionTimeoutMS=2000&appName=mongosh+1.5.0[0m
Using MongoDB:		5.0.9
[1m[33mUsing Mongosh[0m:		1.5.0

For mongosh info see: [1mhttps://docs.mongodb.com/mongodb-shell/[0m

[1G[0J [1G[1m[33m------[0m
   2022-07-11T16:06:24.677+00:00: Using the XFS filesystem is strongly recommended with the WiredTiger storage engine. See http://dochub.mongodb.org/core/prodnotes-filesystem
   2022-07-11T16:06:25.254+00:00: Access control is not enabled for the database. Read and write access to data and configuration is unrestricted
[1m[33m------[0m

[1m[33m------[0m
   Enable MongoDB's free cloud-based monitoring service, which will then receive and display
   metrics about your deployment (disk utilization, CPU, operation statistics, etc).
   
   The monitoring data w

In [3]:
from pymongo import MongoClient
uri = 'mongodb://localhost:27017/'
client = MongoClient(uri)
db = client.get_database("default")

In [4]:
import time
import statistics

class profile_code():
    def __init__(self):
        self.start = time.time()
        self.all_durations = []
        
    def setup_start_time(self):
        self.start = time.time()

    def checkpoint(self):
        end_time = time.time()
        self.all_durations.append(end_time - self.start)
        self.start = end_time

    def __enter__(self):
        return self

    def __exit__(self, type, value, traceback):
        print(f"Median - {statistics.median(self.all_durations)}")
        print(f"Average - {statistics.mean(self.all_durations)}")
        print(f"Summary - {sum(self.all_durations)}")

In [5]:
# Score from user

In [6]:
import uuid
from faker import Faker
import random

def generate_rating(num_rows_per_chunk: int, iteration: int):
    chunk = []
    fake = Faker()

    for i in range(iteration*num_rows_per_chunk):
        chunk.append(
            {
                '_id': str(uuid.uuid4()),
                'user_id': str(uuid.uuid4()),
                'film_id': str(uuid.uuid4()),
                'score': random.randint(0, 9),
                'event_time': fake.date_time_between()
            }
        )

        if len(chunk) == num_rows_per_chunk:
            yield chunk
            chunk = []

In [7]:
# Insert chunks of 500
rating = db.get_collection('rating')
rating.delete_many({})

with profile_code() as profiler:
    for chunk in generate_rating(500, 2000):
        profiler.setup_start_time()
        rating.insert_many(chunk)
        profiler.checkpoint()

Median - 0.01317298412322998
Average - 0.015365281462669373
Summary - 30.730562925338745


In [8]:
# Insert one record
rating = db.get_collection('rating')
with profile_code() as profiler:
    for chunk in generate_rating(1, 2000):
        profiler.setup_start_time()
        rating.insert_one(chunk[0])
        profiler.checkpoint()

Median - 0.0009522438049316406
Average - 0.001004459023475647
Summary - 2.008918046951294


In [9]:
# Bookmarks

In [10]:
import uuid
from faker import Faker
import random

def generate_bookmarks(num_rows_per_chunk: int, iteration: int):
    chunk = []
    fake = Faker()

    for i in range(iteration*num_rows_per_chunk):
        chunk.append(
            {
                '_id': str(uuid.uuid4()),
                'user_id': str(uuid.uuid4()),
                'film_id': str(uuid.uuid4()),
                'bookmark': random.randint(0, 10000000),
                'event_time': fake.date_time_between()
            }
        )

        if len(chunk) == num_rows_per_chunk:
            yield chunk
            chunk = []

In [11]:
# Insert chunks of 500
bookmark = db.get_collection('bookmark')
bookmark.delete_many({})

with profile_code() as profiler:
    for chunk in generate_bookmarks(500, 2000):
        profiler.setup_start_time()
        bookmark.insert_many(chunk)
        profiler.checkpoint()

Median - 0.01306307315826416
Average - 0.014295762419700623
Summary - 28.591524839401245


In [12]:
# Insert one record
bookmark = db.get_collection('bookmark')

with profile_code() as profiler:
    for chunk in generate_bookmarks(1, 2000):
        profiler.setup_start_time()
        bookmark.insert_one(chunk[0])
        profiler.checkpoint()

Median - 0.0009744167327880859
Average - 0.001048288345336914
Summary - 2.096576690673828


In [13]:
# Average film score

In [14]:
rating = db.get_collection('rating')
records = rating.find().limit(100)
with profile_code() as profiler:
    for r in records:
        profiler.setup_start_time()
        rating.aggregate([{"$group": {"_id":r["_id"], "avg_val":{"$avg":"$score"}}}])
        profiler.checkpoint()

Median - 0.9462798833847046
Average - 0.962671811580658
Summary - 96.2671811580658


In [15]:
# Real-time selecting and inserting

In [26]:
rating = db.get_collection('rating')
num_records = []
with profile_code() as profiler:
    for chunk in generate_rating(1, 2000):
        rating.insert_many(chunk)
        random_record = rating.find().limit(1)[0]
        profiler.setup_start_time()
        res = rating.find({"film_id": random_record["film_id"]})
        data = [r for r in res]
        profiler.checkpoint()

Median - 0.0009720325469970703
Average - 0.001012405753135681
Summary - 2.0248115062713623
