# Search Query In Twitter Collection
-------

# Import Libraries

In [1]:
from pymongo import MongoClient
import pymongo
import json
from bson import json_util
import csv
import os
import logging
import time
import pandas as pd
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)

# Read Config File

In [2]:
import configparser 
config = configparser.ConfigParser()
config.read('config.ini')
ip = config['DEFAULT']['IP']
port = config['DEFAULT']['MongoDB-Port']
db_name = config['DEFAULT']['DB-Name']
user_name = config['DEFAULT']['User-Name']
psword = config['DEFAULT']['Psword']
debug = config['DEFAULT']['Debug']
query_input = config['DEFAULT']['Query-Input']

# Setup Log File

In [3]:
logging.basicConfig(filename='debug.log',level=logging.DEBUG, format='%(asctime)s | %(levelname)s | %(message)s')

# Read Query List

In [4]:
query_pd = pd.read_csv(query_input, header = None, encoding= "UTF-8")
query_list = query_pd[0].tolist()

# Connect MongoDB

In [5]:
client = MongoClient(ip, int(port), username=user_name, password=psword)

# Get Collection Names

In [6]:
db_twitter = client[db_name]
collections_twitter = db_twitter.collection_names()

# Supporting Functions

In [7]:
# create folder if not exist
def create_folder(folder):
    if not os.path.exists(folder):
        os.makedirs(folder)
    return folder

In [8]:
# export to csv
def write_csv(output_file,row_list,func):
    with open(output_file, func) as f:
            writer = csv.writer(f, quoting=csv.QUOTE_MINIMAL)
            writer.writerow(row_list)

# Create Result Folders

In [10]:
create_folder("csv_result/")
create_folder("json_result/")

'json_result/'

# Write Into CSV &JSON

In [11]:
start= time.time()
for c in sorted(collections_twitter):
    create_folder("json_result/{}/".format(c))
    
    for q in query_list:
        if debug:
            logging.info("---------------------------------------")
            logging.info("Finding query '{}' in collection {}...".format(q,c))

        count = 0
        create_folder("json_result/{}/{}".format(c,q))
        write_csv("csv_result/{}_{}.csv".format(c,q),["tweet_id","created_at","text","user_screen_name","user_location"],"w")
        
        cursor = db_twitter[c].find({"$text":{"$search": q}})
        
        text = created_at = screen_name = location = ""
        for data in cursor:
            # export selected fields to csv
            tweet_id = data["id_str"]
            text = data["text"]
            if "created_at" in data:
                created_at = data["created_at"]
            if "user" in data:
                if "screen_name" in data["user"]:
                    screen_name = data["user"]["screen_name"]
                if "location" in data["user"]:
                    location = data["user"]["location"]
            write_csv("csv_result/{}_{}.csv".format(c,q),[tweet_id,created_at,text,screen_name,location],"a+")
            
            # export each record as json
            with open('json_result/{}/{}/{}.json'.format(c,q,tweet_id.replace(" ","_")),'w') as f:
                del data["_id"]
                json.dump(json.loads(json_util.dumps(data)), f)

            count += 1
        if debug:
            logging.info("The program has been running for " + str(time.time()-start) + " seconds")
            logging.info("Found {} records for query '{}'".format(count,q))