In [1]:
import numpy as np
import pandas as pd
from pymongo import MongoClient
np.set_printoptions(suppress=True)
#consider inf and -inf to be “NA” in computations
pd.options.mode.use_inf_as_na = True

client = MongoClient('localhost', 27017)
db = client['IF29']
collection = db['tweets']

In [2]:
pipeline = [
    {
        "$group": {
            "_id": "$user.id",  
            "tweets": {
                "$push": {  
                    "timestamp_ms": "$timestamp_ms",
                    "user": {
                        "followers_count": "$user.followers_count",
                        "friends_count":"$user.friends_count",
                        "verified":"$user.verified",
                        "created_at":"$user.created_at",
                        "favourites_count":"$user.favourites_count",
                        "statuses_count":"$user.statuses_count",
                        "listed_count":"$user.listed_count"
                    },
                    "retweet_count":"$retweet_count",
                    "entities": {
                        "urls": "$entities.urls",
                        "hashtags": "$entities.hashtags",
                        "user_mentions":"$entities.user_mentions"
                    },
                    "text":"$text",
                    "reply_count":"$reply_count",
                    "created_date":"$created_date",
                    "created_at":"$created_at"
                }
            }
        }
    }
]

# 执行聚合查询
tweets = collection.aggregate(pipeline)



In [4]:
tweets = list(tweets)

In [5]:
tweets

[{'_id': 2620092884,
  'tweets': [{'timestamp_ms': '1528944492131',
    'user': {'followers_count': 748,
     'friends_count': 93,
     'verified': False,
     'created_at': 'Sat Jul 12 01:54:43 +0000 2014',
     'favourites_count': 444,
     'statuses_count': 34250,
     'listed_count': 4},
    'retweet_count': 0,
    'entities': {'urls': [],
     'hashtags': [{'indices': [84, 88], 'text': 'KSA'},
      {'indices': [89, 98], 'text': 'WorldCup'}],
     'user_mentions': [{'indices': [3, 17],
       'screen_name': 'Almoj_alazra8',
       'id_str': '379309259',
       'name': 'قروب الموج الأزرق',
       'id': 379309259}]},
    'text': 'RT @Almoj_alazra8: إيكر كاسياس :\nأتوقع فوز السعودية على روسيا في إفتتاح المونديال. \n#KSA\n#WorldCup https://t.co/1XVXP1KJ9m',
    'reply_count': 0,
    'created_date': '2018-06-14 04:48:12',
    'created_at': 'Thu Jun 14 02:48:12 +0000 2018'}]},
 {'_id': 3700160833,
  'tweets': [{'timestamp_ms': '1528943948565',
    'user': {'followers_count': 180,
     '

In [6]:
import json

with open('output.json', 'w') as f:
    json.dump(tweets, f)
