In [1]:
import pymongo
import datetime
import mysql.connector
import time
import pandas as pd

In [2]:
#connect mysql
mydb = mysql.connector.connect(
    host="localhost",       # host
    user="root",    # username
    passwd="password",   # password 
    database= "stackexchange_cs", # database
)


In [3]:
#connect mongodb
mongo_client = pymongo.MongoClient("mongodb://localhost:27017/")
mongodb = mongo_client["cs_stackexchange"]


In [4]:
# get the sql query result
def getSQLResult(query):
    cursor = mydb.cursor()
    start = time.perf_counter()
    cursor.execute(query)
    cost = time.perf_counter() - start
    result = cursor.fetchall()
    df = pd.DataFrame(result, columns = cursor.column_names)
    cursor.close()

    return df, cost

In [5]:
def getMongoResult(query):
    start = time.perf_counter()
    result = query()
    cost = time.perf_counter() - start

    list_result = list(result)
    df = pd.DataFrame(list_result)

    return df, cost

### Get monthly new users

In [6]:
sql = '''
    SELECT DATE_FORMAT(CreationDate, '%Y-%m') AS date, COUNT(*) AS count
    FROM Users
    GROUP BY date
    ORDER BY date;
'''

In [7]:
sql_df, sql_cost = getSQLResult(sql)

In [8]:
sql_df.head()

Unnamed: 0,date,count
0,2012-03,820
1,2012-04,443
2,2012-05,359
3,2012-06,303
4,2012-07,282


In [9]:
sql_cost

0.31984749999999984

In [10]:
query = lambda: mongodb.Users.aggregate( [
    {
    '$group': {
      '_id': {
        '$dateToString': {
          'format': "%Y-%m",
          'date': "$CreationDate",
        },
      },
      'count': {
        '$sum': 1,
      },
    },
  },
  {
    '$sort': {
      '_id': 1,
    },
  }])

In [11]:
mongo_df, mongo_cost = getMongoResult(query)

In [12]:
mongo_df.head()

Unnamed: 0,_id,count
0,2012-03,820
1,2012-04,443
2,2012-05,359
3,2012-06,303
4,2012-07,282


In [13]:
mongo_cost

0.8562171000000003

### Get daily new users

In [14]:
sql = '''
    SELECT DATE_FORMAT(CreationDate, '%Y-%m-%d') AS date, COUNT(*) AS count
    FROM Users
    GROUP BY date
    ORDER BY date;
'''

In [15]:
sql_df, sql_cost = getSQLResult(sql)

In [16]:
sql_df.head()

Unnamed: 0,date,count
0,2012-03-06,100
1,2012-03-07,42
2,2012-03-08,15
3,2012-03-09,6
4,2012-03-10,16


In [17]:
sql_cost

0.1967863999999997

In [18]:
query = lambda: mongodb['Users'].aggregate([
    {
    '$group': {
      '_id': {
        '$dateToString': {
          'format': "%Y-%m-%d",
          'date': "$CreationDate",
        },
      },
      'count': {
        '$sum': 1,
      },
    },
  },
  {
    '$sort': {
      '_id': 1,
    },
  }]) 

In [19]:
mongo_df, mongo_cost = getMongoResult(query)

In [20]:
mongo_df.head()

Unnamed: 0,_id,count
0,2012-03-06,100
1,2012-03-07,42
2,2012-03-08,15
3,2012-03-09,6
4,2012-03-10,16


In [21]:
mongo_cost

0.37692050000000066

### Select posts by last activity time

In [22]:
#Take '2012-03-06' as an example
sql = '''
    SELECT *
    FROM Posts
    WHERE DATE_FORMAT(LastActivityDate, '%Y-%m-%d') = '2012-03-06';
'''

In [23]:
sql_df, sql_cost = getSQLResult(sql)

In [24]:
sql_df.head()

Unnamed: 0,Id,PostTypeId,AcceptedAnswerId,ParentId,CreationDate,DeletionDate,Score,ViewCount,Body,OwnerUserId,...,LastEditDate,LastActivityDate,Title,Tags,AnswerCount,CommentCount,FavoriteCount,ClosedDate,CommunityOwnedDate,ContentLicense
0,6,2,,3.0,2012-03-06 19:19:20,,24,,<p>In comparison to other comparison-based sor...,41.0,...,2012-03-06 22:18:33,2012-03-06 22:18:33,,,,13,,,,CC BY-SA 3.0
1,7,1,,,2012-03-06 19:34:23,,15,376.0,<p>When placing geometric objects in a quadtre...,11.0,...,2012-03-06 19:47:07,2012-03-06 20:22:06,Which method is preferred for storing large ge...,<graphics><data-structures><computational-geom...,2.0,5,,,,CC BY-SA 3.0
2,9,2,,7.0,2012-03-06 19:44:18,,8,,"<p>Assuming you are storing a reference, not t...",44.0,...,NaT,2012-03-06 19:44:18,,,,1,,,,CC BY-SA 3.0
3,10,2,,3.0,2012-03-06 19:48:00,,41,,<p>I think one of the main reasons why QuickSo...,29.0,...,NaT,2012-03-06 19:48:00,,,,6,,,,CC BY-SA 3.0
4,12,2,,5.0,2012-03-06 19:55:00,,15,,"<p>In a truly ""cooperative"" setting, and if th...",43.0,...,NaT,2012-03-06 19:55:00,,,,0,,,,CC BY-SA 3.0


In [25]:
sql_cost

0.015488899999999362

In [26]:
query = lambda: mongodb.Posts.find(
            {
          "$expr": {
            "$regexMatch": {
              "input":{ '$dateToString': {
                      'format': "%Y-%m-%d",
                      'date': "$LastActivityDate",
                    }},
              "regex": '2012-03-06'
            }
          }
        })

In [27]:
mongo_df, mongo_cost = getMongoResult(query)

In [28]:
mongo_df.head()

Unnamed: 0,_id,PostId,PostTypeId,AcceptedAnswerId,ParentId,CreationDate,Score,ViewCount,Body,OwnerUserId,...,LastEditDate,LastActivityDate,Title,Tags,AnswerCount,CommentCount,FavoriteCount,ClosedDate,CommunityOwnedDate,Comments
0,6551e8acdb0b505f45a61a88,6,2,,3.0,2012-03-06 19:19:20.237,24,,<p>In comparison to other comparison-based sor...,41.0,...,2012-03-06 22:18:33.027,2012-03-06 22:18:33.027,,[],,13,,,,"[{'CommentId': 14, 'Score': 0, 'Text': 'And ye..."
1,6551e8acdb0b505f45a61a89,7,1,,,2012-03-06 19:34:22.793,15,376.0,<p>When placing geometric objects in a quadtre...,11.0,...,2012-03-06 19:47:07.427,2012-03-06 20:22:05.510,Which method is preferred for storing large ge...,"[graphics, data-structures, computational-geom...",2.0,5,,,,"[{'CommentId': 26, 'Score': 1, 'Text': 'Certai..."
2,6551e8acdb0b505f45a61a8a,9,2,,7.0,2012-03-06 19:44:17.600,8,,"<p>Assuming you are storing a reference, not t...",44.0,...,NaT,2012-03-06 19:44:17.600,,[],,1,,,,"[{'CommentId': 29, 'Score': 0, 'Text': 'Thanks..."
3,6551e8acdb0b505f45a61a8b,10,2,,3.0,2012-03-06 19:48:00.343,41,,<p>I think one of the main reasons why QuickSo...,29.0,...,NaT,2012-03-06 19:48:00.343,,[],,6,,,,"[{'CommentId': 93, 'Score': 6, 'Text': 'That's..."
4,6551e8acdb0b505f45a61a8d,12,2,,5.0,2012-03-06 19:54:59.633,15,,"<p>In a truly ""cooperative"" setting, and if th...",43.0,...,NaT,2012-03-06 19:54:59.633,,[],,0,,,,


In [29]:
mongo_cost

0.00045659999999969614

### Select posts by creation time

In [30]:
#Take '2012-03-06' as an example
sql = '''
    SELECT *
    FROM Posts
    WHERE DATE_FORMAT(CreationDate, '%Y-%m-%d') = '2012-03-06';
'''

In [31]:
sql_df, sql_cost = getSQLResult(sql)

In [32]:
sql_df.head()

Unnamed: 0,Id,PostTypeId,AcceptedAnswerId,ParentId,CreationDate,DeletionDate,Score,ViewCount,Body,OwnerUserId,...,LastEditDate,LastActivityDate,Title,Tags,AnswerCount,CommentCount,FavoriteCount,ClosedDate,CommunityOwnedDate,ContentLicense
0,2,1,28.0,,2012-03-06 19:06:06,,19,1343.0,"<p>The set difference operator (e.g., <code>EX...",5.0,...,2012-04-02 15:35:06,2013-05-29 00:50:35,Does the 'difference' operation add expressive...,<database-theory><relational-algebra><finite-m...,2.0,2,,,,CC BY-SA 3.0
1,3,1,90.0,,2012-03-06 19:11:07,,380,344624.0,<p>In a standard algorithms course we are taug...,24.0,...,2020-07-30 12:01:28,2020-07-31 22:29:47,Why is quicksort better than other sorting alg...,<algorithms><sorting>,12.0,18,,,,CC BY-SA 3.0
2,5,1,12.0,,2012-03-06 19:17:48,,19,1736.0,<p>Many operating systems references say that ...,40.0,...,2012-04-07 13:42:43,2012-04-09 22:15:05,Does cooperative scheduling suspend processes ...,<operating-systems><process-scheduling>,4.0,2,0.0,,,CC BY-SA 3.0
3,6,2,,3.0,2012-03-06 19:19:20,,24,,<p>In comparison to other comparison-based sor...,41.0,...,2012-03-06 22:18:33,2012-03-06 22:18:33,,,,13,,,,CC BY-SA 3.0
4,7,1,,,2012-03-06 19:34:23,,15,376.0,<p>When placing geometric objects in a quadtre...,11.0,...,2012-03-06 19:47:07,2012-03-06 20:22:06,Which method is preferred for storing large ge...,<graphics><data-structures><computational-geom...,2.0,5,,,,CC BY-SA 3.0


In [33]:
sql_cost

0.0014447999999998018

In [34]:
query = lambda: mongodb.Posts.find({
          "$expr": {
            "$regexMatch": {
              "input":{ '$dateToString': {
                      'format': "%Y-%m-%d",
                      'date': "$CreationDate",
                    }},
              "regex": '2012-03-06'
            }
          }
        })

In [35]:
mongo_df, mongo_cost = getMongoResult(query)

In [36]:
mongo_df.head()

Unnamed: 0,_id,PostId,PostTypeId,AcceptedAnswerId,ParentId,CreationDate,Score,ViewCount,Body,OwnerUserId,...,LastEditDate,LastActivityDate,Title,Tags,AnswerCount,CommentCount,FavoriteCount,ClosedDate,CommunityOwnedDate,Comments
0,6551e8acdb0b505f45a61a85,2,1,28.0,,2012-03-06 19:06:05.667,19,1343.0,"<p>The set difference operator (e.g., <code>EX...",5.0,...,2012-04-02 15:35:05.827,2013-05-29 00:50:34.590,Does the 'difference' operation add expressive...,"[database-theory, relational-algebra, finite-m...",2.0,2,,,,"[{'CommentId': 8, 'Score': 2, 'Text': 'To show..."
1,6551e8acdb0b505f45a61a86,3,1,90.0,,2012-03-06 19:11:07.127,380,344624.0,<p>In a standard algorithms course we are taug...,24.0,...,2020-07-30 12:01:27.630,2020-07-31 22:29:46.523,Why is quicksort better than other sorting alg...,"[algorithms, sorting]",12.0,18,,,,"[{'CommentId': 11, 'Score': 4, 'Text': 'Merge ..."
2,6551e8acdb0b505f45a61a87,5,1,12.0,,2012-03-06 19:17:48.460,19,1736.0,<p>Many operating systems references say that ...,40.0,...,2012-04-07 13:42:43.093,2012-04-09 22:15:05.277,Does cooperative scheduling suspend processes ...,"[operating-systems, process-scheduling]",4.0,2,0.0,,,"[{'CommentId': 104, 'Score': 0, 'Text': 'This ..."
3,6551e8acdb0b505f45a61a88,6,2,,3.0,2012-03-06 19:19:20.237,24,,<p>In comparison to other comparison-based sor...,41.0,...,2012-03-06 22:18:33.027,2012-03-06 22:18:33.027,,[],,13,,,,"[{'CommentId': 14, 'Score': 0, 'Text': 'And ye..."
4,6551e8acdb0b505f45a61a89,7,1,,,2012-03-06 19:34:22.793,15,376.0,<p>When placing geometric objects in a quadtre...,11.0,...,2012-03-06 19:47:07.427,2012-03-06 20:22:05.510,Which method is preferred for storing large ge...,"[graphics, data-structures, computational-geom...",2.0,5,,,,"[{'CommentId': 26, 'Score': 1, 'Text': 'Certai..."


In [37]:
mongo_cost

0.0001265999999997547

### Select posts by viewcount

In [38]:
#Take '68' as an example
sql = '''
    SELECT *
    FROM Posts
    WHERE ViewCount = 68;
'''

In [39]:
sql_df, sql_cost = getSQLResult(sql)

In [40]:
sql_df.head()

Unnamed: 0,Id,PostTypeId,AcceptedAnswerId,ParentId,CreationDate,DeletionDate,Score,ViewCount,Body,OwnerUserId,...,LastEditDate,LastActivityDate,Title,Tags,AnswerCount,CommentCount,FavoriteCount,ClosedDate,CommunityOwnedDate,ContentLicense
0,2064,1,2072.0,,2012-05-25 04:33:49,,3,68,<p>I need to recover a data block from a repea...,1642.0,...,2012-05-25 11:53:59,2012-05-25 11:53:59,Block detection in repeated stream,<algorithms><online-algorithms><communication-...,1,0,,NaT,,CC BY-SA 3.0
1,9306,1,9316.0,,2013-01-30 13:25:23,,0,68,<p>So say I have a task like this:</p>&#xA;&#x...,5281.0,...,NaT,2013-01-30 16:26:46,Clarification for a class of Turing machine pr...,<turing-machines>,1,5,,NaT,,CC BY-SA 3.0
2,10482,1,,,2013-03-12 13:46:05,,0,68,"<p>Suppose that there is some graph, with $n$ ...",7243.0,...,NaT,2013-03-12 13:46:05,"The name of ""finding the path of a graph that ...",<complexity-theory><graphs>,0,3,,2016-07-05 08:17:58,,CC BY-SA 3.0
3,11872,1,,,2013-05-08 00:29:13,,1,68,"<p>Is there a common code metric for ""code red...",1829.0,...,2013-05-09 01:46:25,2013-05-09 01:46:25,Code metric for code redundancy or code cloning,<reference-request><empirical-research>,0,5,,NaT,,CC BY-SA 3.0
4,13418,1,13430.0,,2013-07-24 14:07:52,,2,68,<p>I'm trying to create simple tokenizer to tr...,9325.0,...,2013-07-24 15:48:59,2013-07-25 08:41:27,Tokenizer and complex operators,<parsers>,1,3,,NaT,,CC BY-SA 3.0


In [41]:
sql_cost

0.07485959999999992

In [42]:
query = lambda: mongodb.Posts.find({"ViewCount" : 68})

In [43]:
mongo_df, mongo_cost = getMongoResult(query)

In [44]:
mongo_df.head()

Unnamed: 0,_id,PostId,PostTypeId,AcceptedAnswerId,ParentId,CreationDate,Score,ViewCount,Body,OwnerUserId,...,LastActivityDate,Title,Tags,AnswerCount,CommentCount,FavoriteCount,ClosedDate,CommunityOwnedDate,Comments,PostLinks
0,6551e8acdb0b505f45a621b1,2064,1,2072.0,,2012-05-25 04:33:48.567,3,68,<p>I need to recover a data block from a repea...,1642.0,...,2012-05-25 11:53:58.550,Block detection in repeated stream,"[algorithms, online-algorithms, communication-...",1,0,,NaT,,,
1,6551e8acdb0b505f45a62ff7,9306,1,9316.0,,2013-01-30 13:25:23.467,0,68,<p>So say I have a task like this:</p>\n\n<blo...,5281.0,...,2013-01-30 16:26:46.157,Clarification for a class of Turing machine pr...,[turing-machines],1,5,,NaT,,"[{'CommentId': 17893, 'Score': 1, 'Text': 'The...",
2,6551e8acdb0b505f45a633db,10482,1,,,2013-03-12 13:46:05.437,0,68,"<p>Suppose that there is some graph, with $n$ ...",7243.0,...,2013-03-12 13:46:05.437,"The name of ""finding the path of a graph that ...","[complexity-theory, graphs]",0,3,,2016-07-05 08:17:58.053,,"[{'CommentId': 20559, 'Score': 0, 'Text': 'oh ...","[{'PostLinkId': 174965, 'CreationDate': 2013-0..."
3,6551e8acdb0b505f45a63858,11872,1,,,2013-05-08 00:29:13.067,1,68,"<p>Is there a common code metric for ""code red...",1829.0,...,2013-05-09 01:46:25.000,Code metric for code redundancy or code cloning,"[reference-request, empirical-research]",0,5,,NaT,,"[{'CommentId': 24799, 'Score': 1, 'Text': 'loo...",
4,6551e8acdb0b505f45a63d21,13418,1,13430.0,,2013-07-24 14:07:52.047,2,68,<p>I'm trying to create simple tokenizer to tr...,9325.0,...,2013-07-25 08:41:26.530,Tokenizer and complex operators,[parsers],1,3,,NaT,,"[{'CommentId': 28411, 'Score': 2, 'Text': 'Up ...",


In [45]:
mongo_cost

0.00010890000000074451

### Select posts by score

In [46]:
#Take '2' as an example
sql = '''
    SELECT *
    FROM Posts
    WHERE Score = 2;
'''

In [47]:
sql_df, sql_cost = getSQLResult(sql)

In [48]:
sql_df.head()

Unnamed: 0,Id,PostTypeId,AcceptedAnswerId,ParentId,CreationDate,DeletionDate,Score,ViewCount,Body,OwnerUserId,...,LastEditDate,LastActivityDate,Title,Tags,AnswerCount,CommentCount,FavoriteCount,ClosedDate,CommunityOwnedDate,ContentLicense
0,30,2,,27.0,2012-03-06 22:09:53,,2,,"<p>You may want to have a look at <a href=""htt...",21.0,...,NaT,2012-03-06 22:09:53,,,,2,,NaT,NaT,CC BY-SA 3.0
1,193,2,,163.0,2012-03-10 12:08:48,,2,,<p>Since permuting two rows within a single bl...,24.0,...,2012-03-10 12:20:38,2012-03-10 12:20:38,,,,0,,NaT,NaT,CC BY-SA 3.0
2,211,2,,196.0,2012-03-10 20:56:45,,2,,"<p>This doesn't fit case 3 exactly, but I don'...",71.0,...,NaT,2012-03-10 20:56:45,,,,2,,NaT,NaT,CC BY-SA 3.0
3,228,2,,227.0,2012-03-12 00:06:03,,2,,<p>You get fewer special cases. In many situa...,15.0,...,2012-03-12 18:06:47,2012-03-12 18:06:47,,,,7,,NaT,NaT,CC BY-SA 3.0
4,229,2,,227.0,2012-03-12 01:31:42,,2,,<p>Having links to the parent directory makes ...,29.0,...,NaT,2012-03-12 01:31:42,,,,3,,NaT,NaT,CC BY-SA 3.0


In [49]:
sql_cost

0.008162499999999184

In [50]:
query = lambda: mongodb.Posts.find({"Score" : 2})

In [51]:
mongo_df, mongo_cost = getMongoResult(query)

In [52]:
mongo_df.head()

Unnamed: 0,_id,PostId,PostTypeId,AcceptedAnswerId,ParentId,CreationDate,Score,ViewCount,Body,OwnerUserId,...,LastActivityDate,Title,Tags,AnswerCount,CommentCount,FavoriteCount,ClosedDate,CommunityOwnedDate,Comments,PostLinks
0,6551e8acdb0b505f45a61a9e,30,2,,27.0,2012-03-06 22:09:52.677,2,,"<p>You may want to have a look at <a href=""htt...",21.0,...,2012-03-06 22:09:52.677,,[],,2,,NaT,NaT,"[{'CommentId': 76, 'Score': 2, 'Text': 'And ho...",
1,6551e8acdb0b505f45a61b38,193,2,,163.0,2012-03-10 12:08:48.173,2,,<p>Since permuting two rows within a single bl...,24.0,...,2012-03-10 12:20:37.923,,[],,0,,NaT,NaT,,
2,6551e8acdb0b505f45a61b48,211,2,,196.0,2012-03-10 20:56:44.693,2,,"<p>This doesn't fit case 3 exactly, but I don'...",71.0,...,2012-03-10 20:56:44.693,,[],,2,,NaT,NaT,"[{'CommentId': 542, 'Score': 1, 'Text': 'Writt...",
3,6551e8acdb0b505f45a61b58,228,2,,227.0,2012-03-12 00:06:02.723,2,,<p>You get fewer special cases. In many situa...,15.0,...,2012-03-12 18:06:47.050,,[],,7,,NaT,NaT,"[{'CommentId': 566, 'Score': 3, 'Text': 'If th...",
4,6551e8acdb0b505f45a61b59,229,2,,227.0,2012-03-12 01:31:42.280,2,,<p>Having links to the parent directory makes ...,29.0,...,2012-03-12 01:31:42.280,,[],,3,,NaT,NaT,"[{'CommentId': 571, 'Score': 3, 'Text': 'Same ...",


In [53]:
mongo_cost

6.319999999959691e-05

### Select posts which do not have an answer.

In [54]:
#Take '2' as an example
sql = '''
    SELECT *
    FROM Posts
    WHERE AnswerCount = 0;
'''

In [55]:
sql_df, sql_cost = getSQLResult(sql)

In [56]:
sql_df.head()

Unnamed: 0,Id,PostTypeId,AcceptedAnswerId,ParentId,CreationDate,DeletionDate,Score,ViewCount,Body,OwnerUserId,...,LastEditDate,LastActivityDate,Title,Tags,AnswerCount,CommentCount,FavoriteCount,ClosedDate,CommunityOwnedDate,ContentLicense
0,81,1,,,2012-03-07 09:27:48,,10,133,"<p>I am reading <a href=""http://www.google.ch/...",125.0,...,NaT,2012-03-07 09:27:48,Applying the graph mining algorithm Leap Searc...,<data-mining>,0,3,,NaT,,CC BY-SA 3.0
1,933,1,,,2012-04-01 04:01:58,,4,111,<p>I have asked a series of questions concerni...,69.0,...,2017-04-13 12:48:34,2012-04-01 13:42:33,Computational power of nondeterministic type-2...,<formal-languages><automata>,0,3,,NaT,,CC BY-SA 3.0
2,1050,1,,,2012-04-05 02:59:27,,0,121,<blockquote>&#xA; <p><strong>Possible Duplica...,,...,2017-04-13 12:48:33,2012-04-05 03:20:46,Multiples of n is a regular language,<formal-languages><regular-languages>,0,2,,2012-04-05 14:34:37,,CC BY-SA 3.0
3,1502,1,,,2012-04-25 13:44:46,,8,582,<p>Assume I want to insert elements $1$ to $n$...,220.0,...,2013-05-05 15:55:44,2013-05-05 15:55:44,Predecessor query where the insertion order is...,<data-structures><runtime-analysis>,0,6,,NaT,,CC BY-SA 3.0
4,1602,1,,,2012-04-30 23:20:10,,8,84,<p>My organization wants to maintain multiple ...,1038.0,...,2012-05-01 01:46:08,2012-05-01 01:46:08,Distributed Storage for Access and Preservation,<digital-preservation><distributed-systems><st...,0,1,,NaT,,CC BY-SA 3.0


In [57]:
sql_cost

0.01651599999999931

In [58]:
query = lambda: mongodb.Posts.find({"AnswerCount" : 0})

In [59]:
mongo_df, mongo_cost = getMongoResult(query)

In [60]:
mongo_df.head()

Unnamed: 0,_id,PostId,PostTypeId,AcceptedAnswerId,ParentId,CreationDate,Score,ViewCount,Body,OwnerUserId,...,LastActivityDate,Title,Tags,AnswerCount,CommentCount,FavoriteCount,ClosedDate,CommunityOwnedDate,Comments,PostLinks
0,6551e8acdb0b505f45a61acf,81,1,,,2012-03-07 09:27:48.330,10,133,"<p>I am reading <a href=""http://www.google.ch/...",125.0,...,2012-03-07 09:27:48.330,Applying the graph mining algorithm Leap Searc...,[data-mining],0,3,,NaT,,"[{'CommentId': 173, 'Score': 0, 'Text': 'If I ...",
1,6551e8acdb0b505f45a61de4,933,1,,,2012-04-01 04:01:58.047,4,111,<p>I have asked a series of questions concerni...,69.0,...,2012-04-01 13:42:32.693,Computational power of nondeterministic type-2...,"[formal-languages, automata]",0,3,,NaT,,"[{'CommentId': 2204, 'Score': 0, 'Text': 'We a...","[{'PostLinkId': 7904, 'CreationDate': 2012-04-..."
2,6551e8acdb0b505f45a61e4e,1050,1,,,2012-04-05 02:59:27.360,0,121,<blockquote>\n <p><strong>Possible Duplicate:...,,...,2012-04-05 03:20:45.850,Multiples of n is a regular language,"[formal-languages, regular-languages]",0,2,,2012-04-05 14:34:36.797,,"[{'CommentId': 2500, 'Score': 6, 'Text': 'poss...","[{'PostLinkId': 9369, 'CreationDate': 2012-04-..."
3,6551e8acdb0b505f45a61fcc,1502,1,,,2012-04-25 13:44:46.220,8,582,<p>Assume I want to insert elements $1$ to $n$...,220.0,...,2013-05-05 15:55:44.163,Predecessor query where the insertion order is...,"[data-structures, runtime-analysis]",0,6,,NaT,,"[{'CommentId': 3860, 'Score': 1, 'Text': 'Plea...",
4,6551e8acdb0b505f45a6201f,1602,1,,,2012-04-30 23:20:10.173,8,84,<p>My organization wants to maintain multiple ...,1038.0,...,2012-05-01 01:46:07.983,Distributed Storage for Access and Preservation,"[digital-preservation, distributed-systems, st...",0,1,,NaT,,"[{'CommentId': 300906, 'Score': 0, 'Text': 'Th...",


In [61]:
mongo_cost

9.070000000122036e-05

### Select posts which do not have an accepted answer.

In [62]:
#Take '2' as an example
sql = '''
        SELECT *
        FROM Posts
        WHERE AcceptedAnswerId IS NULL;
'''

In [63]:
sql_df, sql_cost = getSQLResult(sql)

In [64]:
sql_df.head()

Unnamed: 0,Id,PostTypeId,AcceptedAnswerId,ParentId,CreationDate,DeletionDate,Score,ViewCount,Body,OwnerUserId,...,LastEditDate,LastActivityDate,Title,Tags,AnswerCount,CommentCount,FavoriteCount,ClosedDate,CommunityOwnedDate,ContentLicense
0,6,2,,3.0,2012-03-06 19:19:20,,24,,<p>In comparison to other comparison-based sor...,41.0,...,2012-03-06 22:18:33,2012-03-06 22:18:33,,,,13,,NaT,NaT,CC BY-SA 3.0
1,7,1,,,2012-03-06 19:34:23,,15,376.0,<p>When placing geometric objects in a quadtre...,11.0,...,2012-03-06 19:47:07,2012-03-06 20:22:06,Which method is preferred for storing large ge...,<graphics><data-structures><computational-geom...,2.0,5,,NaT,NaT,CC BY-SA 3.0
2,9,2,,7.0,2012-03-06 19:44:18,,8,,"<p>Assuming you are storing a reference, not t...",44.0,...,NaT,2012-03-06 19:44:18,,,,1,,NaT,NaT,CC BY-SA 3.0
3,10,2,,3.0,2012-03-06 19:48:00,,41,,<p>I think one of the main reasons why QuickSo...,29.0,...,NaT,2012-03-06 19:48:00,,,,6,,NaT,NaT,CC BY-SA 3.0
4,12,2,,5.0,2012-03-06 19:55:00,,15,,"<p>In a truly ""cooperative"" setting, and if th...",43.0,...,NaT,2012-03-06 19:55:00,,,,0,,NaT,NaT,CC BY-SA 3.0


In [65]:
sql_cost

0.0023933999999989908

In [66]:
query = lambda: mongodb.Posts.find({"AcceptedAnswerId" : None})

In [67]:
mongo_df, mongo_cost = getMongoResult(query)

In [68]:
mongo_df.head()

Unnamed: 0,_id,PostId,PostTypeId,AcceptedAnswerId,ParentId,CreationDate,Score,ViewCount,Body,OwnerUserId,...,LastActivityDate,Title,Tags,AnswerCount,CommentCount,FavoriteCount,ClosedDate,CommunityOwnedDate,Comments,PostLinks
0,6551e8acdb0b505f45a61a88,6,2,,3.0,2012-03-06 19:19:20.237,24,,<p>In comparison to other comparison-based sor...,41.0,...,2012-03-06 22:18:33.027,,[],,13,,NaT,NaT,"[{'CommentId': 14, 'Score': 0, 'Text': 'And ye...",
1,6551e8acdb0b505f45a61a89,7,1,,,2012-03-06 19:34:22.793,15,376.0,<p>When placing geometric objects in a quadtre...,11.0,...,2012-03-06 20:22:05.510,Which method is preferred for storing large ge...,"[graphics, data-structures, computational-geom...",2.0,5,,NaT,NaT,"[{'CommentId': 26, 'Score': 1, 'Text': 'Certai...",
2,6551e8acdb0b505f45a61a8a,9,2,,7.0,2012-03-06 19:44:17.600,8,,"<p>Assuming you are storing a reference, not t...",44.0,...,2012-03-06 19:44:17.600,,[],,1,,NaT,NaT,"[{'CommentId': 29, 'Score': 0, 'Text': 'Thanks...",
3,6551e8acdb0b505f45a61a8b,10,2,,3.0,2012-03-06 19:48:00.343,41,,<p>I think one of the main reasons why QuickSo...,29.0,...,2012-03-06 19:48:00.343,,[],,6,,NaT,NaT,"[{'CommentId': 93, 'Score': 6, 'Text': 'That's...",
4,6551e8acdb0b505f45a61a8d,12,2,,5.0,2012-03-06 19:54:59.633,15,,"<p>In a truly ""cooperative"" setting, and if th...",43.0,...,2012-03-06 19:54:59.633,,[],,0,,NaT,NaT,,


In [69]:
mongo_cost

8.379999999874599e-05

### Find recently closed posts

In [70]:
#Show 10 recently closed posts.
sql = '''
    SELECT *
    FROM Posts
    ORDER BY ClosedDate DESC
    LIMIT 10;
'''

In [71]:
sql_df, sql_cost = getSQLResult(sql)

In [72]:
sql_df.head()

Unnamed: 0,Id,PostTypeId,AcceptedAnswerId,ParentId,CreationDate,DeletionDate,Score,ViewCount,Body,OwnerUserId,...,LastEditDate,LastActivityDate,Title,Tags,AnswerCount,CommentCount,FavoriteCount,ClosedDate,CommunityOwnedDate,ContentLicense
0,161781,1,,,2023-08-27 04:51:28,,-2,39,<p>The algorithm assumes the graph is 3-colora...,162670,...,2023-08-27 04:53:59,2023-08-27 04:53:59,Is this 3-colorability algorithm polynomial?,<complexity-theory><p-vs-np><3-sat><colorings>,0,2,,2023-08-28 01:29:50,,CC BY-SA 4.0
1,160197,1,,,2023-05-15 22:34:09,,1,18,<p>I'm working on implementing a puzzle board ...,160598,...,NaT,2023-05-15 22:34:09,generating solvable puzzles for a Double-Choco...,<algorithms><graphs><data-structures><optimiza...,0,2,,2023-08-28 01:28:30,,CC BY-SA 4.0
2,161778,1,,,2023-08-26 16:23:33,,0,9,<p>The input of my neural network consists of ...,162660,...,NaT,2023-08-26 16:23:33,What Model to Choose for a NN with a Very Wide...,<machine-learning><neural-networks>,0,2,,2023-08-27 00:34:40,,CC BY-SA 4.0
3,161733,1,,,2023-08-23 16:28:34,,-3,36,<p>TL;DR: How can I make my final year project...,162599,...,2023-08-24 18:30:13,2023-08-24 18:30:13,Computer Science final year project ideas / su...,<complexity-theory>,0,5,,2023-08-25 07:31:07,,CC BY-SA 4.0
4,161703,1,,,2023-08-20 13:16:33,,0,18,<p>I came across this question presented in a ...,162465,...,NaT,2023-08-20 13:16:33,"prove that the language L = { ww | w ∈ {a,b}* ...",<formal-languages><context-free><proof-techniq...,0,0,,2023-08-20 23:15:09,,CC BY-SA 4.0


In [73]:
sql_cost

0.9783551999999993

In [74]:
query = lambda: mongodb.Posts.find({"ClosedDate":{"$exists": True}}).sort('ClosedDate',pymongo.DESCENDING).limit(10)

In [75]:
mongo_df, mongo_cost = getMongoResult(query)

In [76]:
mongo_df.head()

Unnamed: 0,_id,PostId,PostTypeId,AcceptedAnswerId,ParentId,CreationDate,Score,ViewCount,Body,OwnerUserId,...,LastActivityDate,Title,Tags,AnswerCount,CommentCount,FavoriteCount,ClosedDate,CommunityOwnedDate,Comments,PostLinks
0,6551e8aedb0b505f45a7ab6b,161781,1,,,2023-08-27 04:51:27.650,-2,39,<p>The algorithm assumes the graph is 3-colora...,162670,...,2023-08-27 04:53:58.727,Is this 3-colorability algorithm polynomial?,"[complexity-theory, p-vs-np, 3-sat, colorings]",0,2,,2023-08-28 01:29:50.107,,"[{'CommentId': 338296, 'Score': 0, 'Text': 'Yo...",
1,6551e8addb0b505f45a7a65d,160197,1,,,2023-05-15 22:34:08.733,1,18,<p>I'm working on implementing a puzzle board ...,160598,...,2023-05-15 22:34:08.733,generating solvable puzzles for a Double-Choco...,"[algorithms, graphs, data-structures, optimiza...",0,2,,2023-08-28 01:28:30.280,,"[{'CommentId': 338294, 'Score': 0, 'Text': 'Cr...",
2,6551e8aedb0b505f45a7ab68,161778,1,,,2023-08-26 16:23:33.213,0,9,<p>The input of my neural network consists of ...,162660,...,2023-08-26 16:23:33.213,What Model to Choose for a NN with a Very Wide...,"[machine-learning, neural-networks]",0,2,,2023-08-27 00:34:39.767,,"[{'CommentId': 338283, 'Score': 0, 'Text': 'Cr...",
3,6551e8aedb0b505f45a7ab3f,161733,1,,,2023-08-23 16:28:34.433,-3,36,<p>TL;DR: How can I make my final year project...,162599,...,2023-08-24 18:30:13.040,Computer Science final year project ideas / su...,[complexity-theory],0,5,,2023-08-25 07:31:07.220,,"[{'CommentId': 338243, 'Score': 2, 'Text': 'I ...",
4,6551e8aedb0b505f45a7ab28,161703,1,,,2023-08-20 13:16:33.107,0,18,<p>I came across this question presented in a ...,162465,...,2023-08-20 13:16:33.107,"prove that the language L = { ww | w ∈ {a,b}* ...","[formal-languages, context-free, proof-techniq...",0,0,,2023-08-20 23:15:08.920,,,"[{'PostLinkId': 2998333, 'CreationDate': 2023-..."


In [77]:
mongo_cost

0.0007044999999976653

### Select posts by tag

In [78]:
#Take 'cpu-pipelines' as an example
sql = '''
    SELECT p.*
    FROM Posts p
    JOIN post_tags pt
    ON p.Id = pt.PostId
    JOIN tags t
    ON pt.TagId = t.Id
    WHERE t.TagName = 'cpu-pipelines';
'''

In [79]:
sql_df, sql_cost = getSQLResult(sql)

In [80]:
sql_df.head()

Unnamed: 0,Id,PostTypeId,AcceptedAnswerId,ParentId,CreationDate,DeletionDate,Score,ViewCount,Body,OwnerUserId,...,LastEditDate,LastActivityDate,Title,Tags,AnswerCount,CommentCount,FavoriteCount,ClosedDate,CommunityOwnedDate,ContentLicense
0,73,1,76.0,,2012-03-07 05:11:44,,11,654,<p>I have observed that there are two differen...,,...,2012-03-07 05:32:21,2012-11-14 00:40:54,Which kind of branch prediction is more import...,<cpu-pipelines><computer-architecture>,2,1,,NaT,,CC BY-SA 3.0
1,1936,1,6646.0,,2012-05-19 20:49:42,,8,1866,<p>I'm looking for some relatively simple exam...,1554.0,...,2012-05-19 21:12:40,2012-11-13 14:32:06,When do structural hazards occur in pipelined ...,<computer-architecture><cpu-pipelines>,1,0,,NaT,,CC BY-SA 3.0
2,19668,1,19681.0,,2014-01-12 14:04:37,,5,12063,<p>I'm a little confused about the difference ...,12774.0,...,2014-01-13 07:08:01,2014-08-07 14:26:06,Difference between memory access and write-bac...,<computer-architecture><cpu-pipelines>,3,2,,NaT,,CC BY-SA 3.0
3,20093,1,,,2014-01-30 03:29:25,,3,7085,<p><strong>NOTE</strong>: Let me point out tha...,6569.0,...,2014-01-30 14:48:47,2018-01-06 10:14:10,How are the control signals derived in the MIP...,<computer-architecture><cpu-pipelines>,1,4,,NaT,,CC BY-SA 3.0
4,21924,1,,,2014-02-22 16:04:48,,0,11557,<p>I was trying to solve a question dealing wi...,14939.0,...,2014-03-27 18:36:41,2014-03-27 18:36:41,Execution time of an uneven pipeline,<computer-architecture><cpu-pipelines>,3,1,,NaT,,CC BY-SA 3.0


In [81]:
sql_cost

0.014892700000000758

In [82]:
query = lambda: mongodb.Posts.find({"Tags" : 'cpu-pipelines'})

In [83]:
mongo_df, mongo_cost = getMongoResult(query)

In [84]:
mongo_df.head()

Unnamed: 0,_id,PostId,PostTypeId,AcceptedAnswerId,ParentId,CreationDate,Score,ViewCount,Body,OwnerUserId,...,LastActivityDate,Title,Tags,AnswerCount,CommentCount,FavoriteCount,ClosedDate,CommunityOwnedDate,Comments,PostLinks
0,6551e8acdb0b505f45a61ac9,73,1,76.0,,2012-03-07 05:11:43.907,11,654,<p>I have observed that there are two differen...,,...,2012-11-14 00:40:54.050,Which kind of branch prediction is more import...,"[cpu-pipelines, computer-architecture]",2,1,,NaT,,"[{'CommentId': 143, 'Score': 1, 'Text': 'Maybe...",
1,6551e8acdb0b505f45a62144,1936,1,6646.0,,2012-05-19 20:49:42.443,8,1866,<p>I'm looking for some relatively simple exam...,1554.0,...,2012-11-13 14:32:05.693,When do structural hazards occur in pipelined ...,"[computer-architecture, cpu-pipelines]",1,0,,NaT,,,
2,6551e8acdb0b505f45a64975,19668,1,19681.0,,2014-01-12 14:04:36.507,5,12063,<p>I'm a little confused about the difference ...,12774.0,...,2014-08-07 14:26:06.260,Difference between memory access and write-bac...,"[computer-architecture, cpu-pipelines]",3,2,,NaT,,"[{'CommentId': 39958, 'Score': 0, 'Text': 'Whe...",
3,6551e8acdb0b505f45a64a9b,20093,1,,,2014-01-30 03:29:24.877,3,7085,<p><strong>NOTE</strong>: Let me point out tha...,6569.0,...,2018-01-06 10:14:09.823,How are the control signals derived in the MIP...,"[computer-architecture, cpu-pipelines]",1,4,,NaT,,"[{'CommentId': 41065, 'Score': 0, 'Text': 'You...",
4,6551e8addb0b505f45a64ceb,21924,1,,,2014-02-22 16:04:48.137,0,11557,<p>I was trying to solve a question dealing wi...,14939.0,...,2014-03-27 18:36:40.790,Execution time of an uneven pipeline,"[computer-architecture, cpu-pipelines]",3,1,,NaT,,"[{'CommentId': 43358, 'Score': 1, 'Text': 'Wha...",


In [85]:
mongo_cost

8.289999999888664e-05