# Search Application

In [1]:
# importing required libraries
import psycopg2
from pymongo import MongoClient
from fastapi import HTTPException
from exceptions import *

## Connecting to the Databases Storing the Information

In [3]:
# connecting to the PostgreSQL database
try:
    p_conn = psycopg2.connect(
        dbname = "twitter",
        user = "varshiniyanamandra",
        password = "",
        host = "localhost",
        port = "5432"
    )
except psycopg2.OperationalError as e:
    # raise an error if the connection is unsuccessful
    print(f"Unable to connect to PostgreSQL: {e}")

# opening a cursor to perform database operations
p_cur = p_conn.cursor()

# print the PostgreSQL server information
print(p_conn.get_dsn_parameters(), "\n")

{'user': 'varshiniyanamandra', 'passfile': '/Users/varshiniyanamandra/.pgpass', 'channel_binding': 'prefer', 'dbname': 'twitter', 'host': 'localhost', 'port': '5432', 'options': '', 'sslmode': 'prefer', 'sslcompression': '0', 'sslsni': '1', 'ssl_min_protocol_version': 'TLSv1.2', 'gssencmode': 'prefer', 'krbsrvname': 'postgres', 'target_session_attrs': 'any'} 



In [None]:
# connect to the MongoDB database
mongo_conn = MongoClient('mongodb://localhost:27017/')
mongo_db = mongo_conn['twitter_data']
tweets_collection = mongo_db['tweets']

## Building the Search Application

### Defining Utility Functions

In [None]:
# function to get user information
def get_user_info(user_id: str):
    """
        This function returns the user information as a JSON object.
        Input:
            user_id (str): Twitter user ID which we want to look up
        Output:
            user_out (JSON object): user information corresponding to user_id
    """
    p_cur.execute("SELECT * FROM TwitterUser WHERE id = {0};".format(user_id))
    user_info = p_cur.fetchone()
    if user_info is None:
        # raise an exception if the user doesn't exist in the database
        raise HTTPException(status_code = UserNotFoundError.code, detail = UserNotFoundError.description)
    user_out = {
        'id': user_info[0],
        'name': user_info[1],
        'screen_name': user_info[2],
        'location': user_info[3],
        'created_at': user_info[4],
        'followers_count': user_info[5],
        'friends_count': user_info[6],
        'statuses_count': user_info[7],
        'favorites_count': user_info[8]
    }

    return user_out

In [None]:
# function to search tweets containing a specified keyword
def search_tweets_keyword(keyword: str):
    """
        Function to get the information of tweets based on a user-specified keyword.
        Input:
            keyword (str): user-specified keyword
        Output:
            out (list): list of tweets containing the keyword
    """
    out = []
    query = {'$text': {'$search': keyword}}
    tweets_match = tweets_collection.find(query) # we can add .limit(PAGE_LIMIT) here, if needed
    for result in tweets_match:
        tweet = {
            'id': result['_id'],
            'text': result['text'],
            'user_id': result['user_id'],
            'quote_count': result['quote_count'],
            'reply_count': result['reply_count'],
            'retweet_count': result['retweet_count'],
            'favorite_count': result['favorite_count'],
            'created_at': result['timestamp'],
            'coordinates': result['coordinates']
        }
        # add information on whether the tweet is a retweet
        if 'retweet' in result:
            tweet['retweet'] = "Yes"
        else:
            tweet['retweet'] = "No"

        
        out.append(tweet)

    # sort the results from newest to oldest before returning
    return out.sort(key = lambda x: int(x['timestamp_ms']), reverse = True)

In [None]:
# function to search tweets based on tweet id
def retrieve_tweet(tweet_id: str):
    """
        Function to get the information of a tweet based on a user-specified tweet ID.
        Input:
            tweet_id (str): user-specified tweet ID
        Output:
            tweet: tweet corresponding to tweet_id
    """
    query = {'id': tweet_id}
    result = tweets_collection.find_one(query)
    if result is None:
        # raise an exception if the tweet doesn't exist in the database
        raise HTTPException(status_code = TweetNotFoundError.code, detail = TweetNotFoundError.description)
    tweet = {
        'id': result['_id'],
        'text': result['text'],
        'user_id': result['user_id'],
        'quote_count': result['quote_count'],
        'reply_count': result['reply_count'],
        'retweet_count': result['retweet_count'],
        'favorite_count': result['favorite_count'],
        'created_at': result['timestamp'],
        'coordinates': result['coordinates']
    }
    # add information on whether the tweet is a retweet
    if 'retweet' in result:
        tweet['retweet'] = "Yes"
    else:
        tweet['retweet'] = "No"

    return tweet