In [1]:
import numpy as np
import pandas as pd
import re
from unidecode import unidecode

In [2]:
def clean_string(s):
    s = unidecode(s)
    #Replace newlines with spaces
    s = s.replace("\n"," ")
    #Delete handles
    s = re.sub(r"@(.+?)\b:*","",s)
    #Delete "RT"
    s = re.sub(r"RT","",s)
    #Delete hashtags
    s = re.sub(r"#(.+?)\b","",s)
    #Delete hyperlinks
    s = re.sub(r"http(.+?)(?:$|\s)","",s)
    s = s.strip()
    return s

In [3]:
def cleanDataframe(df):
    print(f"Before:\t{df.shape}")
    df = df.copy()

    #Remove unnecessary columns
    df=df[["created_at","text","truncated","lang"]]

    #Drop rows with missing text
    df.dropna(subset=["text"], inplace=True)

    #Drop rows with truncated text
    df = df.loc[df["truncated"]==0]
    df.shape

    #Drop all non-English entries
    df = df.loc[df["lang"]=="en"]
    df.shape

    #Drop the now-unnecessary columns
    df = df[["created_at","text"]]

    #Clean the strings
    df["text"] = df["text"].apply(clean_string)
    df.sample(10, random_state=11)

    #Reset the index
    df.reset_index(drop=True,inplace=True)
    
    print(f"After:\t{df.shape}")
    return df

In [7]:
import os

#Select 4 hours of tweets: hours 0, 6, 12, and 18
basePath = "2020_06_11"
directories = ["00", "06", "12", "18"]
paths = [os.path.join(basePath,d) for d in directories]

df_big = pd.DataFrame()
for path in paths:
    for file_name in os.listdir(path):
        print(os.path.join(path,file_name))
        df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
        df_big = pd.concat([df_big, cleanDataframe(df)])
        print(f"Full shape: {df_big.shape}")

2020_06_11/00/30.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(3239, 37)
After:	(790, 2)
Full shape: (790, 2)
2020_06_11/00/51.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(3468, 38)
After:	(752, 2)
Full shape: (1542, 2)
2020_06_11/00/54.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(3018, 37)
After:	(732, 2)
Full shape: (2274, 2)
2020_06_11/00/55.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(2947, 37)
After:	(728, 2)
Full shape: (3002, 2)
2020_06_11/00/58.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(2974, 37)
After:	(726, 2)
Full shape: (3728, 2)
2020_06_11/00/33.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(3295, 38)
After:	(773, 2)
Full shape: (4501, 2)
2020_06_11/00/48.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(2906, 37)
After:	(757, 2)
Full shape: (5258, 2)
2020_06_11/00/56.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(2868, 38)
After:	(725, 2)
Full shape: (5983, 2)
2020_06_11/00/43.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(3241, 37)
After:	(694, 2)
Full shape: (6677, 2)
2020_06_11/00/47.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(2960, 37)
After:	(774, 2)
Full shape: (7451, 2)
2020_06_11/00/42.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(3380, 37)
After:	(747, 2)
Full shape: (8198, 2)
2020_06_11/00/31.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(3005, 37)
After:	(755, 2)
Full shape: (8953, 2)
2020_06_11/00/52.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(3016, 37)
After:	(682, 2)
Full shape: (9635, 2)
2020_06_11/00/35.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(3052, 37)
After:	(724, 2)
Full shape: (10359, 2)
2020_06_11/00/40.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(2919, 37)
After:	(743, 2)
Full shape: (11102, 2)
2020_06_11/00/46.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(3004, 37)
After:	(717, 2)
Full shape: (11819, 2)
2020_06_11/00/49.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(3020, 37)
After:	(709, 2)
Full shape: (12528, 2)
2020_06_11/00/59.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(2882, 37)
After:	(742, 2)
Full shape: (13270, 2)
2020_06_11/00/45.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(3049, 37)
After:	(720, 2)
Full shape: (13990, 2)
2020_06_11/00/53.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(3185, 38)
After:	(758, 2)
Full shape: (14748, 2)
2020_06_11/00/50.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(3396, 37)
After:	(747, 2)
Full shape: (15495, 2)
2020_06_11/00/29.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(3326, 38)
After:	(810, 2)
Full shape: (16305, 2)
2020_06_11/00/34.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(3471, 38)
After:	(778, 2)
Full shape: (17083, 2)
2020_06_11/00/41.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(3374, 37)
After:	(705, 2)
Full shape: (17788, 2)
2020_06_11/00/44.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(3291, 37)
After:	(710, 2)
Full shape: (18498, 2)
2020_06_11/00/57.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(2865, 37)
After:	(701, 2)
Full shape: (19199, 2)
2020_06_11/00/37.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(3042, 37)
After:	(751, 2)
Full shape: (19950, 2)
2020_06_11/00/36.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(3039, 37)
After:	(766, 2)
Full shape: (20716, 2)
2020_06_11/00/32.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(3162, 37)
After:	(791, 2)
Full shape: (21507, 2)
2020_06_11/00/39.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(3018, 37)
After:	(741, 2)
Full shape: (22248, 2)
2020_06_11/00/38.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(2988, 37)
After:	(725, 2)
Full shape: (22973, 2)
2020_06_11/06/08.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(4495, 38)
After:	(921, 2)
Full shape: (23894, 2)
2020_06_11/06/28.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(4695, 38)
After:	(989, 2)
Full shape: (24883, 2)
2020_06_11/06/26.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(4621, 37)
After:	(926, 2)
Full shape: (25809, 2)
2020_06_11/06/30.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(4735, 38)
After:	(1053, 2)
Full shape: (26862, 2)
2020_06_11/06/04.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(4471, 37)
After:	(905, 2)
Full shape: (27767, 2)
2020_06_11/06/06.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(4413, 38)
After:	(921, 2)
Full shape: (28688, 2)
2020_06_11/06/51.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(4527, 37)
After:	(1038, 2)
Full shape: (29726, 2)
2020_06_11/06/54.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(4659, 37)
After:	(1051, 2)
Full shape: (30777, 2)
2020_06_11/06/55.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(4555, 38)
After:	(1055, 2)
Full shape: (31832, 2)
2020_06_11/06/58.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(4913, 37)
After:	(1013, 2)
Full shape: (32845, 2)
2020_06_11/06/33.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(4731, 37)
After:	(967, 2)
Full shape: (33812, 2)
2020_06_11/06/09.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(4451, 37)
After:	(986, 2)
Full shape: (34798, 2)
2020_06_11/06/11.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(4326, 37)
After:	(960, 2)
Full shape: (35758, 2)
2020_06_11/06/21.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(4490, 37)
After:	(931, 2)
Full shape: (36689, 2)
2020_06_11/06/03.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(4510, 37)
After:	(940, 2)
Full shape: (37629, 2)
2020_06_11/06/48.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(4475, 37)
After:	(962, 2)
Full shape: (38591, 2)
2020_06_11/06/56.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(4476, 37)
After:	(989, 2)
Full shape: (39580, 2)
2020_06_11/06/05.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(4605, 37)
After:	(913, 2)
Full shape: (40493, 2)
2020_06_11/06/24.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(4630, 37)
After:	(949, 2)
Full shape: (41442, 2)
2020_06_11/06/43.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(4669, 37)
After:	(946, 2)
Full shape: (42388, 2)
2020_06_11/06/47.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(4485, 37)
After:	(991, 2)
Full shape: (43379, 2)
2020_06_11/06/27.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(4298, 38)
After:	(964, 2)
Full shape: (44343, 2)
2020_06_11/06/42.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(4650, 37)
After:	(1033, 2)
Full shape: (45376, 2)
2020_06_11/06/19.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(4455, 37)
After:	(946, 2)
Full shape: (46322, 2)
2020_06_11/06/25.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(4573, 37)
After:	(973, 2)
Full shape: (47295, 2)
2020_06_11/06/01.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(4583, 37)
After:	(941, 2)
Full shape: (48236, 2)
2020_06_11/06/02.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(4588, 37)
After:	(886, 2)
Full shape: (49122, 2)
2020_06_11/06/00.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(4721, 38)
After:	(1025, 2)
Full shape: (50147, 2)
2020_06_11/06/31.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(4622, 37)
After:	(1072, 2)
Full shape: (51219, 2)
2020_06_11/06/23.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(4579, 37)
After:	(920, 2)
Full shape: (52139, 2)
2020_06_11/06/52.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(4554, 37)
After:	(1079, 2)
Full shape: (53218, 2)
2020_06_11/06/13.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(4522, 37)
After:	(981, 2)
Full shape: (54199, 2)
2020_06_11/06/14.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(4831, 37)
After:	(953, 2)
Full shape: (55152, 2)
2020_06_11/06/35.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(4791, 37)
After:	(1001, 2)
Full shape: (56153, 2)
2020_06_11/06/20.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(4649, 37)
After:	(962, 2)
Full shape: (57115, 2)
2020_06_11/06/40.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(4727, 37)
After:	(987, 2)
Full shape: (58102, 2)
2020_06_11/06/46.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(4531, 37)
After:	(1006, 2)
Full shape: (59108, 2)
2020_06_11/06/49.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(4770, 37)
After:	(996, 2)
Full shape: (60104, 2)
2020_06_11/06/18.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(4566, 37)
After:	(989, 2)
Full shape: (61093, 2)
2020_06_11/06/59.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(4627, 37)
After:	(960, 2)
Full shape: (62053, 2)
2020_06_11/06/45.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(4860, 37)
After:	(992, 2)
Full shape: (63045, 2)
2020_06_11/06/22.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(4511, 37)
After:	(963, 2)
Full shape: (64008, 2)
2020_06_11/06/16.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(4480, 38)
After:	(969, 2)
Full shape: (64977, 2)
2020_06_11/06/53.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(4632, 37)
After:	(1025, 2)
Full shape: (66002, 2)
2020_06_11/06/15.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(4546, 37)
After:	(920, 2)
Full shape: (66922, 2)
2020_06_11/06/50.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(4569, 37)
After:	(1036, 2)
Full shape: (67958, 2)
2020_06_11/06/17.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(4433, 37)
After:	(929, 2)
Full shape: (68887, 2)
2020_06_11/06/29.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(4627, 37)
After:	(936, 2)
Full shape: (69823, 2)
2020_06_11/06/34.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(4801, 37)
After:	(1007, 2)
Full shape: (70830, 2)
2020_06_11/06/12.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(4477, 38)
After:	(1000, 2)
Full shape: (71830, 2)
2020_06_11/06/41.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(4657, 37)
After:	(1008, 2)
Full shape: (72838, 2)
2020_06_11/06/44.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(4624, 37)
After:	(987, 2)
Full shape: (73825, 2)
2020_06_11/06/57.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(4649, 37)
After:	(1000, 2)
Full shape: (74825, 2)
2020_06_11/06/37.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(4628, 37)
After:	(959, 2)
Full shape: (75784, 2)
2020_06_11/06/36.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(4627, 37)
After:	(971, 2)
Full shape: (76755, 2)
2020_06_11/06/32.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(4730, 38)
After:	(953, 2)
Full shape: (77708, 2)
2020_06_11/06/10.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(4468, 37)
After:	(900, 2)
Full shape: (78608, 2)
2020_06_11/06/39.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(4715, 37)
After:	(1019, 2)
Full shape: (79627, 2)
2020_06_11/06/38.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(4688, 37)
After:	(987, 2)
Full shape: (80614, 2)
2020_06_11/06/07.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(4587, 37)
After:	(988, 2)
Full shape: (81602, 2)
2020_06_11/12/08.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(8508, 37)
After:	(1204, 2)
Full shape: (82806, 2)
2020_06_11/12/28.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(4000, 38)
After:	(1230, 2)
Full shape: (84036, 2)
2020_06_11/12/26.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(4270, 37)
After:	(1277, 2)
Full shape: (85313, 2)
2020_06_11/12/30.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(4104, 37)
After:	(1284, 2)
Full shape: (86597, 2)
2020_06_11/12/04.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(8291, 37)
After:	(1203, 2)
Full shape: (87800, 2)
2020_06_11/12/06.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(8635, 37)
After:	(1278, 2)
Full shape: (89078, 2)
2020_06_11/12/51.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(3885, 37)
After:	(1291, 2)
Full shape: (90369, 2)
2020_06_11/12/54.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(3726, 38)
After:	(1214, 2)
Full shape: (91583, 2)
2020_06_11/12/55.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(3684, 38)
After:	(1137, 2)
Full shape: (92720, 2)
2020_06_11/12/58.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(3519, 37)
After:	(1177, 2)
Full shape: (93897, 2)
2020_06_11/12/33.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(4283, 37)
After:	(1249, 2)
Full shape: (95146, 2)
2020_06_11/12/09.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(8528, 37)
After:	(1278, 2)
Full shape: (96424, 2)
2020_06_11/12/11.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(8441, 37)
After:	(1224, 2)
Full shape: (97648, 2)
2020_06_11/12/21.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(7788, 37)
After:	(1262, 2)
Full shape: (98910, 2)
2020_06_11/12/03.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(8683, 37)
After:	(1313, 2)
Full shape: (100223, 2)
2020_06_11/12/48.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(3885, 38)
After:	(1186, 2)
Full shape: (101409, 2)
2020_06_11/12/56.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(3808, 37)
After:	(1157, 2)
Full shape: (102566, 2)
2020_06_11/12/05.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(8391, 37)
After:	(1262, 2)
Full shape: (103828, 2)
2020_06_11/12/24.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(7630, 37)
After:	(1224, 2)
Full shape: (105052, 2)
2020_06_11/12/43.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(3816, 37)
After:	(1277, 2)
Full shape: (106329, 2)
2020_06_11/12/47.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(3661, 37)
After:	(1207, 2)
Full shape: (107536, 2)
2020_06_11/12/27.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(4020, 37)
After:	(1240, 2)
Full shape: (108776, 2)
2020_06_11/12/42.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(3883, 37)
After:	(1253, 2)
Full shape: (110029, 2)
2020_06_11/12/19.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(8012, 37)
After:	(1273, 2)
Full shape: (111302, 2)
2020_06_11/12/25.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(5960, 37)
After:	(1241, 2)
Full shape: (112543, 2)
2020_06_11/12/01.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(6573, 38)
After:	(1245, 2)
Full shape: (113788, 2)
2020_06_11/12/02.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(8571, 37)
After:	(1272, 2)
Full shape: (115060, 2)
2020_06_11/12/00.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(4361, 37)
After:	(1449, 2)
Full shape: (116509, 2)
2020_06_11/12/31.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(3913, 37)
After:	(1286, 2)
Full shape: (117795, 2)
2020_06_11/12/23.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(7623, 37)
After:	(1294, 2)
Full shape: (119089, 2)
2020_06_11/12/52.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(3819, 38)
After:	(1330, 2)
Full shape: (120419, 2)
2020_06_11/12/13.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(8441, 37)
After:	(1242, 2)
Full shape: (121661, 2)
2020_06_11/12/14.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(8490, 37)
After:	(1254, 2)
Full shape: (122915, 2)
2020_06_11/12/35.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(4091, 38)
After:	(1247, 2)
Full shape: (124162, 2)
2020_06_11/12/20.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(8086, 37)
After:	(1234, 2)
Full shape: (125396, 2)
2020_06_11/12/40.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(4001, 37)
After:	(1253, 2)
Full shape: (126649, 2)
2020_06_11/12/46.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(3690, 37)
After:	(1243, 2)
Full shape: (127892, 2)
2020_06_11/12/49.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(3777, 37)
After:	(1175, 2)
Full shape: (129067, 2)
2020_06_11/12/18.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(8324, 37)
After:	(1244, 2)
Full shape: (130311, 2)
2020_06_11/12/59.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(3648, 37)
After:	(1208, 2)
Full shape: (131519, 2)
2020_06_11/12/45.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(3846, 37)
After:	(1211, 2)
Full shape: (132730, 2)
2020_06_11/12/22.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(7944, 37)
After:	(1246, 2)
Full shape: (133976, 2)
2020_06_11/12/16.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(8242, 37)
After:	(1298, 2)
Full shape: (135274, 2)
2020_06_11/12/53.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(3718, 37)
After:	(1238, 2)
Full shape: (136512, 2)
2020_06_11/12/15.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(8481, 37)
After:	(1258, 2)
Full shape: (137770, 2)
2020_06_11/12/50.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(3761, 37)
After:	(1165, 2)
Full shape: (138935, 2)
2020_06_11/12/17.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(8272, 37)
After:	(1275, 2)
Full shape: (140210, 2)
2020_06_11/12/29.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(4012, 37)
After:	(1261, 2)
Full shape: (141471, 2)
2020_06_11/12/34.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(4175, 38)
After:	(1209, 2)
Full shape: (142680, 2)
2020_06_11/12/12.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(8426, 37)
After:	(1253, 2)
Full shape: (143933, 2)
2020_06_11/12/41.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(4129, 37)
After:	(1221, 2)
Full shape: (145154, 2)
2020_06_11/12/44.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(3813, 37)
After:	(1213, 2)
Full shape: (146367, 2)
2020_06_11/12/57.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(3696, 37)
After:	(1191, 2)
Full shape: (147558, 2)
2020_06_11/12/37.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(3901, 38)
After:	(1181, 2)
Full shape: (148739, 2)
2020_06_11/12/36.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(3856, 38)
After:	(1224, 2)
Full shape: (149963, 2)
2020_06_11/12/32.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(3938, 37)
After:	(1241, 2)
Full shape: (151204, 2)
2020_06_11/12/10.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(8682, 37)
After:	(1292, 2)
Full shape: (152496, 2)
2020_06_11/12/39.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(3973, 38)
After:	(1236, 2)
Full shape: (153732, 2)
2020_06_11/12/38.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(4061, 37)
After:	(1291, 2)
Full shape: (155023, 2)
2020_06_11/12/07.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(8757, 37)
After:	(1303, 2)
Full shape: (156326, 2)
2020_06_11/18/08.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(3495, 37)
After:	(1079, 2)
Full shape: (157405, 2)
2020_06_11/18/28.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(3367, 37)
After:	(1011, 2)
Full shape: (158416, 2)
2020_06_11/18/26.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(3501, 37)
After:	(1023, 2)
Full shape: (159439, 2)
2020_06_11/18/30.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(3452, 37)
After:	(1044, 2)
Full shape: (160483, 2)
2020_06_11/18/04.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(3493, 37)
After:	(1026, 2)
Full shape: (161509, 2)
2020_06_11/18/06.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(3884, 37)
After:	(1046, 2)
Full shape: (162555, 2)
2020_06_11/18/51.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(3327, 37)
After:	(1022, 2)
Full shape: (163577, 2)
2020_06_11/18/54.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(3459, 37)
After:	(1026, 2)
Full shape: (164603, 2)
2020_06_11/18/55.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(3240, 37)
After:	(977, 2)
Full shape: (165580, 2)
2020_06_11/18/58.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(3369, 38)
After:	(1067, 2)
Full shape: (166647, 2)
2020_06_11/18/33.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(3500, 37)
After:	(1055, 2)
Full shape: (167702, 2)
2020_06_11/18/09.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(3371, 37)
After:	(939, 2)
Full shape: (168641, 2)
2020_06_11/18/11.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(3408, 38)
After:	(1022, 2)
Full shape: (169663, 2)
2020_06_11/18/21.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(3355, 38)
After:	(1077, 2)
Full shape: (170740, 2)
2020_06_11/18/03.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(3471, 37)
After:	(1066, 2)
Full shape: (171806, 2)
2020_06_11/18/48.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(3355, 37)
After:	(983, 2)
Full shape: (172789, 2)
2020_06_11/18/56.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(3406, 37)
After:	(1021, 2)
Full shape: (173810, 2)
2020_06_11/18/05.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(3509, 37)
After:	(1022, 2)
Full shape: (174832, 2)
2020_06_11/18/24.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(3343, 37)
After:	(1040, 2)
Full shape: (175872, 2)
2020_06_11/18/43.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(3469, 38)
After:	(1002, 2)
Full shape: (176874, 2)
2020_06_11/18/47.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(3284, 37)
After:	(999, 2)
Full shape: (177873, 2)
2020_06_11/18/27.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(3338, 37)
After:	(1032, 2)
Full shape: (178905, 2)
2020_06_11/18/42.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(3491, 37)
After:	(1052, 2)
Full shape: (179957, 2)
2020_06_11/18/19.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(3455, 37)
After:	(1037, 2)
Full shape: (180994, 2)
2020_06_11/18/25.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(3352, 37)
After:	(1016, 2)
Full shape: (182010, 2)
2020_06_11/18/01.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(3445, 37)
After:	(1000, 2)
Full shape: (183010, 2)
2020_06_11/18/02.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(3629, 37)
After:	(1065, 2)
Full shape: (184075, 2)
2020_06_11/18/00.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(3725, 38)
After:	(1077, 2)
Full shape: (185152, 2)
2020_06_11/18/31.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(3660, 37)
After:	(1075, 2)
Full shape: (186227, 2)
2020_06_11/18/23.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(3269, 38)
After:	(1003, 2)
Full shape: (187230, 2)
2020_06_11/18/52.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(3367, 37)
After:	(1044, 2)
Full shape: (188274, 2)
2020_06_11/18/13.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(3287, 38)
After:	(964, 2)
Full shape: (189238, 2)
2020_06_11/18/14.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(3419, 37)
After:	(1019, 2)
Full shape: (190257, 2)
2020_06_11/18/35.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(3510, 37)
After:	(1026, 2)
Full shape: (191283, 2)
2020_06_11/18/20.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(3502, 37)
After:	(1104, 2)
Full shape: (192387, 2)
2020_06_11/18/40.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(3653, 37)
After:	(1066, 2)
Full shape: (193453, 2)
2020_06_11/18/46.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(3459, 37)
After:	(1051, 2)
Full shape: (194504, 2)
2020_06_11/18/49.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(3388, 37)
After:	(1048, 2)
Full shape: (195552, 2)
2020_06_11/18/18.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(3703, 37)
After:	(1043, 2)
Full shape: (196595, 2)
2020_06_11/18/59.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(3376, 37)
After:	(995, 2)
Full shape: (197590, 2)
2020_06_11/18/45.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(3367, 37)
After:	(1043, 2)
Full shape: (198633, 2)
2020_06_11/18/22.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(3358, 37)
After:	(1070, 2)
Full shape: (199703, 2)
2020_06_11/18/16.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(3399, 37)
After:	(1029, 2)
Full shape: (200732, 2)
2020_06_11/18/53.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(3376, 37)
After:	(992, 2)
Full shape: (201724, 2)
2020_06_11/18/15.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(3502, 37)
After:	(1020, 2)
Full shape: (202744, 2)
2020_06_11/18/50.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(3399, 37)
After:	(1066, 2)
Full shape: (203810, 2)
2020_06_11/18/17.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(3515, 37)
After:	(982, 2)
Full shape: (204792, 2)
2020_06_11/18/29.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(3423, 37)
After:	(1031, 2)
Full shape: (205823, 2)
2020_06_11/18/34.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(3472, 37)
After:	(1063, 2)
Full shape: (206886, 2)
2020_06_11/18/12.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(3545, 37)
After:	(1043, 2)
Full shape: (207929, 2)
2020_06_11/18/41.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(3452, 37)
After:	(1001, 2)
Full shape: (208930, 2)
2020_06_11/18/44.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(3381, 37)
After:	(1027, 2)
Full shape: (209957, 2)
2020_06_11/18/57.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(3429, 37)
After:	(1042, 2)
Full shape: (210999, 2)
2020_06_11/18/37.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(3728, 37)
After:	(1009, 2)
Full shape: (212008, 2)
2020_06_11/18/36.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(3655, 37)
After:	(1046, 2)
Full shape: (213054, 2)
2020_06_11/18/32.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(3532, 37)
After:	(1030, 2)
Full shape: (214084, 2)
2020_06_11/18/10.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(3423, 37)
After:	(984, 2)
Full shape: (215068, 2)
2020_06_11/18/39.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(3383, 37)
After:	(1041, 2)
Full shape: (216109, 2)
2020_06_11/18/38.json.bz2


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


Before:	(3374, 37)
After:	(1045, 2)
Full shape: (217154, 2)
2020_06_11/18/07.json.bz2
Before:	(3521, 37)
After:	(1012, 2)
Full shape: (218166, 2)


  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])
  df = pd.read_json(os.path.join(path,file_name), lines=True, convert_dates=["created_at","timestamp_ms"])


In [8]:
df_big.to_csv("twitter_stream.csv")

In [10]:
df_big.shape

(218166, 2)