## Import Libraries

In [2]:
import sys
import json
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)
collection_name = sys.argv[1]

## Read Config Files

In [3]:
import configparser 
config = configparser.ConfigParser()
config.read('config.ini')
ip = config['DEFAULT']['IP']
port = config['DEFAULT']['MongoDB-Port']
db_name = config['DEFAULT']['DB-Name']

## Connect MongoDB

In [4]:
from pymongo import MongoClient
client = MongoClient(ip, int(port))

In [5]:
db = client[db_name]

## Pipeline

In [6]:
# For language field
pipeline_lang = [
    {"$match": {"lang" : "en"}},
    {"$count": "number_english"}
]

In [7]:
# For geoname field
pipeline_geoname = [
    {"$match": {"geoname": {"$exists":True}}},
    {"$count": "number_geoname"}
]

## Aggregation

In [13]:
lang = list(db[collection_name].aggregate(pipeline_lang,allowDiskUse=True))
if len(lang) > 0:
    number_lang = lang[0]["number_english"]
else:
    number_lang = 0

In [14]:
geoname = list(db[collection_name].aggregate(pipeline_geoname,allowDiskUse=True))
if len(geoname) > 0:
    number_geoname = geoname[0]["number_geoname"]
else:
    number_geoname = 0

## Get Report Value

In [15]:
count = db[collection_name].count()

In [16]:
index = db[collection_name].index_information()

In [17]:
report = {"record_count":count,"Indexes":[index],"en_tweets_count":number_lang,"geoname_count":number_geoname}

## Write into JSON

In [18]:
json_string = json.dumps(report)
datastore = json.loads(json_string)

In [19]:
with open(collection_name+".json", 'w') as f:
    json.dump(datastore, f)