Pagination: Incorporated page and per_page arguments to control the output. Queries now compute the offset based on the page number and items per page.
Timestamp Filtering: Added optional start_time and end_time parameters to filter results within a specific time range.
Cache Efficiency: Included page number in the cache key to cache each page of results separately, enhancing cache relevance and utility.

In [92]:
%run './DatabaseConnection_Sarthak.ipynb'
import time
from dateutil import parser
from pprint import pprint

# Convert string dates to MongoDB datetime objects
def parse_dates(start, end):
    if start and end:
        try:
            return {'$gte': parser.parse(start), '$lte': parser.parse(end)}
        except Exception as e:
            print(f"Error parsing dates: {e}")
    return {}

def perform_search(query_type, query, page=1, per_page=10, start_time=None, end_time=None):
    cache_key = f"{query_type}:{query}:{page}"
    start_cache_time = time.time() 
    cache_result = cache.get(cache_key)
    end_cache_time = time.time()
    if cache_result:
        elapsed_time = end_cache_time - start_cache_time
        print(f"Cache hit for {query}, fetched in {elapsed_time:.2f} seconds")
        return cache_result

    print(f"Cache miss for {query}")
    search_start_time = time.time()

    date_filter = parse_dates(start_time, end_time)
    result = []
    if query_type == 'user':
        user_query = """
            SELECT * FROM Users WHERE screen_name LIKE %s 
            ORDER BY followers_count DESC LIMIT %s, %s
        """
        offset = (page - 1) * per_page
        user_data = mysql_query(user_query, (f"%{query}%", offset, per_page))
        
        # Extract screen names to use in the MongoDB query
        screen_names = [user['screen_name'] for user in user_data]

        # Create a MongoDB query using the `$in` operator
        tweet_query = {
            'user.screen_name': {'$in': screen_names},
            **({} if not start_time and not end_time else {'created_at': {'$gte': start_time, '$lte': end_time}})
        }
        tweets = mongo_query(tweet_query, 'favorite_count', 10 * len(screen_names))  # Assuming you want top 10 tweets per user

        # Map tweets back to users
        for user in user_data:
            user['tweets'] = [tweet for tweet in tweets if tweet['user']['screen_name'] == user['screen_name']]
            result.append(user)

    elif query_type == 'text':
        text_query = {'$text': {'$search': query}, **date_filter}
        result = mongo_query(text_query, 'favorite_count', per_page, page)

    elif query_type == 'hashtag':
        hashtag_query = {'hashtags': query, **date_filter}
        result = mongo_query(hashtag_query, 'favorite_count', per_page, page)

    search_end_time = time.time()
    search_duration = search_end_time - search_start_time
    print(f"Search completed in {search_duration:.2f} seconds")
    cache.put(cache_key, result)
    return result

# Function to get top 10 users based on follower count
def top_10_users():
    user_query = "SELECT * FROM Users ORDER BY followers_count DESC LIMIT 10"
    return mysql_query(user_query, ())

# Function to get top 10 tweets based on favorite count
def top_10_tweets():
    tweet_query = [{'$project': {'tweet_id': 1, 'text': 1, 'favorite_count': 1}},
                   {'$sort': {'favorite_count': -1}},
                   {'$limit': 10}]
    return mongo_aggregate(tweet_query)

# Function for drill-down on tweet metadata from MySQL
def tweet_metadata(tweet_id):
    tweet_query = "SELECT * FROM Tweets WHERE tweet_id = %s"
    return mysql_query(tweet_query, (tweet_id,))

# Function for drill-down on user activity based on screen name
def user_activity(screen_name, limit=10):
    user_query = "SELECT * FROM Users WHERE screen_name = %s"
    user_data = mysql_query(user_query, (screen_name,))
    
    tweet_query = {
        'user.screen_name': screen_name
    }
    user_data['tweets'] = mongo_query(tweet_query, 'favorite_count', limit)
    
    return user_data


In [None]:
#cache.restore()

In [81]:
# Example search by text for "CovidinIndia"
search_results_text = perform_search(
    query_type='text', 
    query='CoronaVirus' 
)
print("Text Search Results:")
pprint(search_results_text)

Cache miss for CoronaVirus
Search completed in 0.57 seconds
Text Search Results:
[{'_id': ObjectId('6628685b57e2ad708cc33802'),
  'created_at': datetime.datetime(2020, 4, 25, 14, 39, 32),
  'favorite_count': 331618,
  'hashtags': [],
  'retweet_count': 119601,
  'text': 'This is Dr. Usama Riaz. He spent past weeks screening and treating '
          'patients with Corona Virus in Pakistan. \n'
          '\n'
          'He knew there was no PPE. He persisted anyways.\n'
          '\n'
          'Today he lost his own battle with coronavirus but he gave life and '
          'hope to so many more. \n'
          '\n'
          'KNOW HIS NAME 😭❤ https://t.co/flSwhLCPmx',
  'tweet_id': 1254057458852679680,
  'user': {'name': 'ℑ𝔤𝔤𝔶',
           'screen_name': 'ialixooxo',
           'user_id': 936625453594488832}},
 {'_id': ObjectId('6628551f57e2ad708cc2213a'),
  'created_at': datetime.datetime(2020, 4, 25, 12, 58, 7),
  'favorite_count': 165110,
  'hashtags': [],
  'retweet_count': 18093,
  '

In [73]:
# Example search by hashtag for "COVID19InTurkeysPrisons"
search_results_hashtag = perform_search(
    query_type='hashtag', 
    query='COVID19InTurkeysPrisons'
)
print("\nHashtag Search Results:")
pprint(search_results_hashtag)

Cache miss for COVID19InTurkeysPrisons


INFO:root:MongoDB connection closed.


Search completed in 0.53 seconds

Hashtag Search Results:
[{'_id': ObjectId('66284e5b57e2ad708cc1bf7a'),
  'created_at': datetime.datetime(2020, 4, 12, 18, 46, 20),
  'favorite_count': 118,
  'hashtags': ['COVID19InTurkeysPrisons'],
  'retweet_count': 208,
  'text': 'Waiting for the evacuation, there are\n'
          '\n'
          '✅780 babies\n'
          '✅2,500 children\n'
          '✅Thousands of political prisoners\n'
          '✅1,333 Patients\n'
          '✅457 Severe patients\n'
          '\n'
          'Let these people be evacuated before they are infected by corona '
          'virus.\n'
          '\n'
          '#COVID19InTurkeysPrisons https://t.co/27PF0A1NZR',
  'tweet_id': 1249408528730767367,
  'user': {'name': 'Nihal Sema',
           'screen_name': 'NihalSema1',
           'user_id': 825077506441097216}},
 {'_id': ObjectId('66284e6957e2ad708cc1c047'),
  'created_at': datetime.datetime(2020, 4, 12, 18, 46, 34),
  'favorite_count': 118,
  'hashtags': ['COVID19InTurkeys

In [93]:
# Example search by user for "Deeksha"
search_results_user = perform_search(
    query_type='user', 
    query='Deeksha'
)
print("\nUser Search Results:")
pprint(search_results_user)

Cache miss for Deeksha


INFO:root:MongoDB connection closed.


Search completed in 0.89 seconds

User Search Results:
[{'created_at': datetime.datetime(2018, 8, 5, 9, 10, 10),
  'followers_count': 2104,
  'friends_count': 262,
  'location': 'India',
  'name': 'Deeksha Thakurr',
  'screen_name': 'thakur_deekshaa',
  'statuses_count': 82692,
  'tweets': [{'_id': ObjectId('66285b4c57e2ad708cc27b31'),
              'created_at': datetime.datetime(2020, 4, 25, 13, 31, 16),
              'favorite_count': 172,
              'hashtags': [],
              'retweet_count': 80,
              'text': 'If that’s how they are following lockdown/social  '
                      'distancing then Mumbai is sitting on Corona bomb.\n'
                      'This is Mumbra Market yesterday afternoon👇 '
                      'https://t.co/LGuPJjIua5',
              'tweet_id': 1254040278710829061,
              'user': {'name': 'Deeksha Thakurr',
                       'screen_name': 'thakur_deekshaa',
                       'user_id': 1026032604124868608}}],
  'user_

In [83]:
cache.print_cache()

Cache contents:
Key: hashtag:Covid19:1, Data: [{'_id': ObjectId('66284b1357e2ad708cc18fe8'), 'tweet_id': 1249405207156424704, 'text': 'राबी की फसल खेतों में तैयार खड़ी है लेकिन #Covid19 लॉक्डाउन में कटाई का काम मुश्किल है।सैकड़ों किसानों की जीविका ख़तरे में है।देश के अन्नदाता किसान आज इस संकट में दोहरी मुसीबत में हैं। कटाई के लिए सुरक्षित तरीक़े से लॉक्डाउन में ढील देना एकमात्र रास्ता है।\nhttps://t.co/fOc2cUA10D', 'hashtags': ['Covid19'], 'user': {'user_id': 3315155226, 'name': 'Dr.Sanjeev Rajpurohit', 'screen_name': 'DrSanjeevRajp4'}, 'retweet_count': 9225, 'favorite_count': 43655, 'created_at': datetime.datetime(2020, 4, 12, 18, 33, 8)}, {'_id': ObjectId('66284c2957e2ad708cc19f92'), 'tweet_id': 1249406245645307905, 'text': 'राबी की फसल खेतों में तैयार खड़ी है लेकिन #Covid19 लॉक्डाउन में कटाई का काम मुश्किल है।सैकड़ों किसानों की जीविका ख़तरे में है।देश के अन्नदाता किसान आज इस संकट में दोहरी मुसीबत में हैं। कटाई के लिए सुरक्षित तरीक़े से लॉक्डाउन में ढील देना एकमात्र रास्ता है।\nhttps

In [24]:
# Fetching top 10 users based on followers count
top_users = top_10_users()
print("\nTop 10 Users:")
for user in top_users:
    pprint(user)


Top 10 Users:
{'created_at': datetime.datetime(2009, 8, 27, 3, 3, 5),
 'followers_count': 15884929,
 'friends_count': 28,
 'location': 'Jakarta, Indonesia',
 'name': 'detikcom',
 'screen_name': 'detikcom',
 'statuses_count': 1631924,
 'user_id': 69183155}
{'created_at': datetime.datetime(2009, 8, 3, 13, 23, 45),
 'followers_count': 14608046,
 'friends_count': 721,
 'location': 'Scotland',
 'name': 'J.K. Rowling',
 'screen_name': 'jk_rowling',
 'statuses_count': 12592,
 'user_id': 62513246}
{'created_at': datetime.datetime(2009, 5, 26, 11, 31),
 'followers_count': 9704885,
 'friends_count': 416,
 'location': 'India',
 'name': 'AajTak',
 'screen_name': 'aajtak',
 'statuses_count': 398484,
 'user_id': 42606652}
{'created_at': datetime.datetime(2009, 5, 11, 12, 25, 51),
 'followers_count': 9562582,
 'friends_count': 248,
 'location': 'India',
 'name': 'ABP News',
 'screen_name': 'ABPNews',
 'statuses_count': 236879,
 'user_id': 39240673}
{'created_at': datetime.datetime(2011, 1, 20, 12, 1

In [31]:
# Fetching top 10 tweets based on favorite count
top_tweets = top_10_tweets()
print("\nTop 10 Tweets:")
for tweet in top_tweets:
    pprint(tweet)


Top 10 Tweets:
{'_id': ObjectId('66284eae57e2ad708cc1c432'),
 'favorite_count': 1128502,
 'text': 'ALERT‼️‼️‼️\n'
         'The corona virus can be spread through money. If you have any money '
         'at home, put on some gloves, put all the money in to a plastic bag '
         "and put it outside the front door tonight. I'm collecting all the "
         'plastic bags tonight for safety. Think of your health.',
 'tweet_id': 1249408877554270208}
{'_id': ObjectId('66285a0257e2ad708cc26875'),
 'favorite_count': 811062,
 'text': '*corona virus enters my body*\n'
         '\n'
         'The 4 Flintstone gummies I ate in 2005: https://t.co/3STfdIQtaT',
 'tweet_id': 1254038604927643653}
{'_id': ObjectId('6628593d57e2ad708cc25d55'),
 'favorite_count': 764405,
 'text': 'When this Corona shit passes we have to promise each other that '
         'we’re going to tell our kids that we survived a zombie apocalypse in '
         '2020',
 'tweet_id': 1254037586265411584}
{'_id': ObjectId('6628577b

In [84]:
# Example search by text for "India" - first call
first_search_results_india = perform_search(
    query_type='text', 
    query='India', 
    page=1, 
    per_page=10
)
print("First Search Results for 'India':")
print(first_search_results_india)

# Example search by text for "India" - second call, expecting a cache hit
second_search_results_india = perform_search(
    query_type='text', 
    query='India', 
    page=1, 
    per_page=10
)
print("\nSecond Search Results for 'India' (should be from cache):")
print(second_search_results_india)

Cache miss for India
Search completed in 0.64 seconds
First Search Results for 'India':
[{'_id': ObjectId('66284b6957e2ad708cc194c5'), 'tweet_id': 1249405547129925632, 'text': 'When Corona Virus  is over, let’s spend our holidays in India, eat in local restaurants, buy local meats and veggies , buy clothes and shoes from indian brands and support local businesses. These businesses are going to find it very difficult to survive without our help.', 'hashtags': [], 'user': {'user_id': 164932759, 'name': 'Raj jha', 'screen_name': 'Rajjha07'}, 'retweet_count': 21125, 'favorite_count': 92954, 'created_at': datetime.datetime(2020, 4, 12, 18, 34, 29)}, {'_id': ObjectId('66285f0757e2ad708cc2b13b'), 'tweet_id': 1254045330556940288, 'text': 'Namaste india 🙏 hum sab corona virus ko harane mein ek saath hai , hum sab apne apne sarkar ki baat ka nirdes kare aur ghar me kuch Dino ke liye rahe , yeh samay hai hosiyaar rahene ka .App sabhi ko der sara pyaar 💕\n\nMy Hindi teacher - @shreevats1 🙏🏻', 'has

In [85]:
cache.persist()