# Data Wrangling: We Rate Dogs Twitter account

In [1]:
# import 
import numpy as np
import pandas as pd
import requests
import tweepy
import json
import time

## Table of contents
* Data gathering
* Data assessment
* Data cleaning
* Analysis and visualisation
* Conclusions

## Data gathering

In the first part of this project, the required data will be gathered from different sources. 

In [2]:
# create a data frame from the provided .csv-file
df_archive = pd.read_csv('twitter-archive-enhanced.csv')
df_archive.head()

Unnamed: 0,tweet_id,in_reply_to_status_id,in_reply_to_user_id,timestamp,source,text,retweeted_status_id,retweeted_status_user_id,retweeted_status_timestamp,expanded_urls,rating_numerator,rating_denominator,name,doggo,floofer,pupper,puppo
0,892420643555336193,,,2017-08-01 16:23:56 +0000,"<a href=""http://twitter.com/download/iphone"" r...",This is Phineas. He's a mystical boy. Only eve...,,,,https://twitter.com/dog_rates/status/892420643...,13,10,Phineas,,,,
1,892177421306343426,,,2017-08-01 00:17:27 +0000,"<a href=""http://twitter.com/download/iphone"" r...",This is Tilly. She's just checking pup on you....,,,,https://twitter.com/dog_rates/status/892177421...,13,10,Tilly,,,,
2,891815181378084864,,,2017-07-31 00:18:03 +0000,"<a href=""http://twitter.com/download/iphone"" r...",This is Archie. He is a rare Norwegian Pouncin...,,,,https://twitter.com/dog_rates/status/891815181...,12,10,Archie,,,,
3,891689557279858688,,,2017-07-30 15:58:51 +0000,"<a href=""http://twitter.com/download/iphone"" r...",This is Darla. She commenced a snooze mid meal...,,,,https://twitter.com/dog_rates/status/891689557...,13,10,Darla,,,,
4,891327558926688256,,,2017-07-29 16:00:24 +0000,"<a href=""http://twitter.com/download/iphone"" r...",This is Franklin. He would like you to stop ca...,,,,https://twitter.com/dog_rates/status/891327558...,12,10,Franklin,,,,


In [3]:
# download the provided .tsv-file programmatically
r = requests.get('https://d17h27t6h515a5.cloudfront.net/topher/2017/August/599fd2ad_image-predictions/image-predictions.tsv')

In [4]:
r.status_code

200

In [5]:
# write the downloaded object into a file
with open('image-predictions.tsv', mode = 'wb') as file:
    file.write(r.content)

In [3]:
# create a data frame
df_predictions = pd.read_csv('image-predictions.tsv', sep='\t')
df_predictions.head()

Unnamed: 0,tweet_id,jpg_url,img_num,p1,p1_conf,p1_dog,p2,p2_conf,p2_dog,p3,p3_conf,p3_dog
0,666020888022790149,https://pbs.twimg.com/media/CT4udn0WwAA0aMy.jpg,1,Welsh_springer_spaniel,0.465074,True,collie,0.156665,True,Shetland_sheepdog,0.061428,True
1,666029285002620928,https://pbs.twimg.com/media/CT42GRgUYAA5iDo.jpg,1,redbone,0.506826,True,miniature_pinscher,0.074192,True,Rhodesian_ridgeback,0.07201,True
2,666033412701032449,https://pbs.twimg.com/media/CT4521TWwAEvMyu.jpg,1,German_shepherd,0.596461,True,malinois,0.138584,True,bloodhound,0.116197,True
3,666044226329800704,https://pbs.twimg.com/media/CT5Dr8HUEAA-lEu.jpg,1,Rhodesian_ridgeback,0.408143,True,redbone,0.360687,True,miniature_pinscher,0.222752,True
4,666049248165822465,https://pbs.twimg.com/media/CT5IQmsXIAAKY4A.jpg,1,miniature_pinscher,0.560311,True,Rottweiler,0.243682,True,Doberman,0.154629,True


In [19]:
# prepare for using tweepy
consumer_key = 'CONSUMER KEY'
consumer_secret = 'CONSUMER SECRET'
access_token = 'ACCESS TOKEN'
access_secret = 'ACCESS SECRET'

auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_secret)

api = tweepy.API(auth, wait_on_rate_limit = True, wait_on_rate_limit_notify = True)

In [77]:
missing_ids = [] # initialise list
for i in df_archive.tweet_id:
    start = time.time()
    try:
        # write the json file for each ID into a file
        tweet = api.get_status(i, tweet_mode = 'extended')
        tweet_json = tweet._json
        with open('tweet_json.txt', 'a') as outfile:
            json.dump(tweet_json, outfile)
            outfile.write('\n')
    except:
        # write missing IDs into list
        missing_ids.append(i)
    print('Twitter ID: ', i)
    end = time.time()
    print('Elapsed time: ', end - start, '\n')

Twitter ID:  892420643555336193
Elapsed time:  0.9466848373413086 

Twitter ID:  892177421306343426
Elapsed time:  0.46565914154052734 

Twitter ID:  891815181378084864
Elapsed time:  0.4680612087249756 

Twitter ID:  891689557279858688
Elapsed time:  0.6174278259277344 

Twitter ID:  891327558926688256
Elapsed time:  0.46570706367492676 

Twitter ID:  891087950875897856
Elapsed time:  0.5863049030303955 

Twitter ID:  890971913173991426
Elapsed time:  0.5816981792449951 

Twitter ID:  890729181411237888
Elapsed time:  0.5422000885009766 

Twitter ID:  890609185150312448
Elapsed time:  0.6892170906066895 

Twitter ID:  890240255349198849
Elapsed time:  0.44129300117492676 

Twitter ID:  890006608113172480
Elapsed time:  0.7178442478179932 

Twitter ID:  889880896479866881
Elapsed time:  0.4552040100097656 

Twitter ID:  889665388333682689
Elapsed time:  0.6945648193359375 

Twitter ID:  889638837579907072
Elapsed time:  0.5880029201507568 

Twitter ID:  889531135344209921
Elapsed time:

Twitter ID:  869596645499047938
Elapsed time:  1.4813039302825928 

Twitter ID:  869227993411051520
Elapsed time:  0.5711719989776611 

Twitter ID:  868880397819494401
Elapsed time:  0.4910457134246826 

Twitter ID:  868639477480148993
Elapsed time:  0.49472618103027344 

Twitter ID:  868622495443632128
Elapsed time:  0.4803638458251953 

Twitter ID:  868552278524837888
Elapsed time:  0.4608950614929199 

Twitter ID:  867900495410671616
Elapsed time:  0.5419089794158936 

Twitter ID:  867774946302451713
Elapsed time:  0.5583531856536865 

Twitter ID:  867421006826221569
Elapsed time:  0.5131111145019531 

Twitter ID:  867072653475098625
Elapsed time:  0.7572782039642334 

Twitter ID:  867051520902168576
Elapsed time:  0.44892215728759766 

Twitter ID:  866816280283807744
Elapsed time:  0.841317892074585 

Twitter ID:  866720684873056260
Elapsed time:  0.7595641613006592 

Twitter ID:  866686824827068416
Elapsed time:  0.8586091995239258 

Twitter ID:  866450705531457537
Elapsed time:  

Twitter ID:  846153765933735936
Elapsed time:  0.4721839427947998 

Twitter ID:  846139713627017216
Elapsed time:  0.701866865158081 

Twitter ID:  846042936437604353
Elapsed time:  0.6152520179748535 

Twitter ID:  845812042753855489
Elapsed time:  0.6151571273803711 

Twitter ID:  845677943972139009
Elapsed time:  0.4341318607330322 

Twitter ID:  845459076796616705
Elapsed time:  0.4348769187927246 

Twitter ID:  845397057150107648
Elapsed time:  0.5061249732971191 

Twitter ID:  845306882940190720
Elapsed time:  0.4843459129333496 

Twitter ID:  845098359547420673
Elapsed time:  0.5960569381713867 

Twitter ID:  844979544864018432
Elapsed time:  0.5532739162445068 

Twitter ID:  844973813909606400
Elapsed time:  0.7446529865264893 

Twitter ID:  844704788403113984
Elapsed time:  0.4652290344238281 

Twitter ID:  844580511645339650
Elapsed time:  0.49811291694641113 

Twitter ID:  844223788422217728
Elapsed time:  0.4663691520690918 

Twitter ID:  843981021012017153
Elapsed time:  0

Twitter ID:  829374341691346946
Elapsed time:  0.431445837020874 

Twitter ID:  829141528400556032
Elapsed time:  0.4954550266265869 

Twitter ID:  829011960981237760
Elapsed time:  0.4648411273956299 

Twitter ID:  828801551087042563
Elapsed time:  0.4323139190673828 

Twitter ID:  828770345708580865
Elapsed time:  1.2515079975128174 

Twitter ID:  828708714936930305
Elapsed time:  0.453279972076416 

Twitter ID:  828650029636317184
Elapsed time:  0.5791528224945068 

Twitter ID:  828409743546925057
Elapsed time:  0.4795811176300049 

Twitter ID:  828408677031882754
Elapsed time:  0.5499088764190674 

Twitter ID:  828381636999917570
Elapsed time:  0.46604108810424805 

Twitter ID:  828376505180889089
Elapsed time:  0.4260869026184082 

Twitter ID:  828372645993398273
Elapsed time:  0.4742240905761719 

Twitter ID:  828361771580813312
Elapsed time:  0.5202038288116455 

Twitter ID:  828046555563323392
Elapsed time:  0.4516868591308594 

Twitter ID:  828011680017821696
Elapsed time:  0.

Twitter ID:  814638523311648768
Elapsed time:  2.680560827255249 

Twitter ID:  814578408554463233
Elapsed time:  0.549098014831543 

Twitter ID:  814530161257443328
Elapsed time:  0.4819490909576416 

Twitter ID:  814153002265309185
Elapsed time:  0.4401078224182129 

Twitter ID:  813944609378369540
Elapsed time:  0.44078898429870605 

Twitter ID:  813910438903693312
Elapsed time:  0.4577207565307617 

Twitter ID:  813812741911748608
Elapsed time:  0.6992666721343994 

Twitter ID:  813800681631023104
Elapsed time:  0.42391109466552734 

Twitter ID:  813217897535406080
Elapsed time:  0.6014788150787354 

Twitter ID:  813202720496779264
Elapsed time:  0.7426228523254395 

Twitter ID:  813187593374461952
Elapsed time:  0.45744800567626953 

Twitter ID:  813172488309972993
Elapsed time:  0.510385274887085 

Twitter ID:  813157409116065792
Elapsed time:  0.5176949501037598 

Twitter ID:  813142292504645637
Elapsed time:  0.4638798236846924 

Twitter ID:  813130366689148928
Elapsed time:  0

Twitter ID:  798576900688019456
Elapsed time:  0.4375648498535156 

Twitter ID:  798340744599797760
Elapsed time:  0.47386693954467773 

Twitter ID:  798209839306514432
Elapsed time:  0.4278740882873535 

Twitter ID:  797971864723324932
Elapsed time:  0.5102388858795166 

Twitter ID:  797545162159308800
Elapsed time:  0.5370333194732666 

Twitter ID:  797236660651966464
Elapsed time:  1.7109839916229248 

Twitter ID:  797165961484890113
Elapsed time:  0.4533679485321045 

Twitter ID:  796904159865868288
Elapsed time:  0.4452688694000244 

Twitter ID:  796865951799083009
Elapsed time:  0.45709896087646484 

Twitter ID:  796759840936919040
Elapsed time:  0.4390408992767334 

Twitter ID:  796563435802726400
Elapsed time:  0.6653599739074707 

Twitter ID:  796484825502875648
Elapsed time:  0.4367377758026123 

Twitter ID:  796387464403357696
Elapsed time:  0.5835456848144531 

Twitter ID:  796177847564038144
Elapsed time:  0.8556680679321289 

Twitter ID:  796149749086875649
Elapsed time: 

Twitter ID:  782598640137187329
Elapsed time:  1.487785816192627 

Twitter ID:  782305867769217024
Elapsed time:  0.46359705924987793 

Twitter ID:  782021823840026624
Elapsed time:  0.4407072067260742 

Twitter ID:  781955203444699136
Elapsed time:  0.5025410652160645 

Twitter ID:  781661882474196992
Elapsed time:  0.4726111888885498 

Twitter ID:  781655249211752448
Elapsed time:  0.4418628215789795 

Twitter ID:  781524693396357120
Elapsed time:  0.43051600456237793 

Twitter ID:  781308096455073793
Elapsed time:  0.9347579479217529 

Twitter ID:  781251288990355457
Elapsed time:  0.4670279026031494 

Twitter ID:  781163403222056960
Elapsed time:  0.7058560848236084 

Twitter ID:  780931614150983680
Elapsed time:  0.8372678756713867 

Twitter ID:  780858289093574656
Elapsed time:  0.40699291229248047 

Twitter ID:  780800785462489090
Elapsed time:  0.8441989421844482 

Twitter ID:  780601303617732608
Elapsed time:  0.4376800060272217 

Twitter ID:  780543529827336192
Elapsed time: 

Twitter ID:  766078092750233600
Elapsed time:  0.6137750148773193 

Twitter ID:  766069199026450432
Elapsed time:  0.4350910186767578 

Twitter ID:  766008592277377025
Elapsed time:  0.4319119453430176 

Twitter ID:  765719909049503744
Elapsed time:  0.7707910537719727 

Twitter ID:  765669560888528897
Elapsed time:  0.4314260482788086 

Twitter ID:  765395769549590528
Elapsed time:  0.4424099922180176 

Twitter ID:  765371061932261376
Elapsed time:  0.6418349742889404 

Twitter ID:  765222098633691136
Elapsed time:  0.47185373306274414 

Twitter ID:  764857477905154048
Elapsed time:  0.43788814544677734 

Twitter ID:  764259802650378240
Elapsed time:  0.43911004066467285 

Twitter ID:  763956972077010945
Elapsed time:  0.45131921768188477 

Twitter ID:  763837565564780549
Elapsed time:  0.43936800956726074 

Twitter ID:  763183847194451968
Elapsed time:  0.5430638790130615 

Twitter ID:  763167063695355904
Elapsed time:  0.41883420944213867 

Twitter ID:  763103485927849985
Elapsed ti

Rate limit reached. Sleeping for: 40


Twitter ID:  759446261539934208
Elapsed time:  45.814329862594604 

Twitter ID:  759197388317847553
Elapsed time:  2.078723192214966 

Twitter ID:  759159934323924993
Elapsed time:  0.8227071762084961 

Twitter ID:  759099523532779520
Elapsed time:  0.6104528903961182 

Twitter ID:  759047813560868866
Elapsed time:  0.6406972408294678 

Twitter ID:  758854675097526272
Elapsed time:  0.45558595657348633 

Twitter ID:  758828659922702336
Elapsed time:  0.5389621257781982 

Twitter ID:  758740312047005698
Elapsed time:  0.6138389110565186 

Twitter ID:  758474966123810816
Elapsed time:  0.6888430118560791 

Twitter ID:  758467244762497024
Elapsed time:  0.7468039989471436 

Twitter ID:  758405701903519748
Elapsed time:  0.6742079257965088 

Twitter ID:  758355060040593408
Elapsed time:  0.455930233001709 

Twitter ID:  758099635764359168
Elapsed time:  0.4382779598236084 

Twitter ID:  758041019896193024
Elapsed time:  0.683035135269165 

Twitter ID:  757741869644341248
Elapsed time:  0.6

Twitter ID:  747204161125646336
Elapsed time:  0.48436594009399414 

Twitter ID:  747103485104099331
Elapsed time:  0.6021976470947266 

Twitter ID:  746906459439529985
Elapsed time:  1.6284480094909668 

Twitter ID:  746872823977771008
Elapsed time:  0.4523329734802246 

Twitter ID:  746818907684614144
Elapsed time:  1.4175827503204346 

Twitter ID:  746790600704425984
Elapsed time:  0.4899320602416992 

Twitter ID:  746757706116112384
Elapsed time:  0.7250399589538574 

Twitter ID:  746726898085036033
Elapsed time:  0.5193610191345215 

Twitter ID:  746542875601690625
Elapsed time:  0.4533970355987549 

Twitter ID:  746521445350707200
Elapsed time:  0.6757607460021973 

Twitter ID:  746507379341139972
Elapsed time:  0.8175609111785889 

Twitter ID:  746369468511756288
Elapsed time:  0.6141500473022461 

Twitter ID:  746131877086527488
Elapsed time:  1.2304019927978516 

Twitter ID:  746056683365994496
Elapsed time:  0.6130208969116211 

Twitter ID:  745789745784041472
Elapsed time:  

Twitter ID:  728409960103686147
Elapsed time:  0.5526981353759766 

Twitter ID:  728387165835677696
Elapsed time:  0.6383650302886963 

Twitter ID:  728046963732717569
Elapsed time:  0.4422578811645508 

Twitter ID:  728035342121635841
Elapsed time:  0.598052978515625 

Twitter ID:  728015554473250816
Elapsed time:  0.7807581424713135 

Twitter ID:  727685679342333952
Elapsed time:  0.6126251220703125 

Twitter ID:  727644517743104000
Elapsed time:  0.6144402027130127 

Twitter ID:  727524757080539137
Elapsed time:  0.42114901542663574 

Twitter ID:  727314416056803329
Elapsed time:  0.6026129722595215 

Twitter ID:  727286334147182592
Elapsed time:  0.6195650100708008 

Twitter ID:  727175381690781696
Elapsed time:  0.6099817752838135 

Twitter ID:  727155742655025152
Elapsed time:  0.4557509422302246 

Twitter ID:  726935089318363137
Elapsed time:  0.7723410129547119 

Twitter ID:  726887082820554753
Elapsed time:  0.6133661270141602 

Twitter ID:  726828223124897792
Elapsed time:  0

Twitter ID:  710588934686908417
Elapsed time:  0.4308297634124756 

Twitter ID:  710296729921429505
Elapsed time:  0.6512472629547119 

Twitter ID:  710283270106132480
Elapsed time:  0.49001026153564453 

Twitter ID:  710272297844797440
Elapsed time:  0.44459009170532227 

Twitter ID:  710269109699739648
Elapsed time:  0.49510884284973145 

Twitter ID:  710153181850935296
Elapsed time:  0.490811824798584 

Twitter ID:  710140971284037632
Elapsed time:  0.5470070838928223 

Twitter ID:  710117014656950272
Elapsed time:  0.6241819858551025 

Twitter ID:  709918798883774466
Elapsed time:  0.5796818733215332 

Twitter ID:  709901256215666688
Elapsed time:  0.5848462581634521 

Twitter ID:  709852847387627521
Elapsed time:  0.46594810485839844 

Twitter ID:  709566166965075968
Elapsed time:  0.7051088809967041 

Twitter ID:  709556954897764353
Elapsed time:  0.5035669803619385 

Twitter ID:  709519240576036864
Elapsed time:  0.4556100368499756 

Twitter ID:  709449600415961088
Elapsed time:

Twitter ID:  701601587219795968
Elapsed time:  0.6135330200195312 

Twitter ID:  701570477911896070
Elapsed time:  0.5417799949645996 

Twitter ID:  701545186879471618
Elapsed time:  0.6880338191986084 

Twitter ID:  701214700881756160
Elapsed time:  0.6132142543792725 

Twitter ID:  700890391244103680
Elapsed time:  0.49828600883483887 

Twitter ID:  700864154249383937
Elapsed time:  0.4902768135070801 

Twitter ID:  700847567345688576
Elapsed time:  0.5984811782836914 

Twitter ID:  700796979434098688
Elapsed time:  0.6653549671173096 

Twitter ID:  700747788515020802
Elapsed time:  0.44429898262023926 

Twitter ID:  700518061187723268
Elapsed time:  0.45421695709228516 

Twitter ID:  700505138482569216
Elapsed time:  0.6129550933837891 

Twitter ID:  700462010979500032
Elapsed time:  0.4850497245788574 

Twitter ID:  700167517596164096
Elapsed time:  0.6637377738952637 

Twitter ID:  700151421916807169
Elapsed time:  0.6128740310668945 

Twitter ID:  700143752053182464
Elapsed time:

Twitter ID:  692417313023332352
Elapsed time:  0.6982388496398926 

Twitter ID:  692187005137076224
Elapsed time:  1.229151964187622 

Twitter ID:  692158366030913536
Elapsed time:  1.6395971775054932 

Twitter ID:  692142790915014657
Elapsed time:  1.8464889526367188 

Twitter ID:  692041934689402880
Elapsed time:  0.9191679954528809 

Twitter ID:  692017291282812928
Elapsed time:  0.6771678924560547 

Twitter ID:  691820333922455552
Elapsed time:  0.6057860851287842 

Twitter ID:  691793053716221953
Elapsed time:  1.748872995376587 

Twitter ID:  691756958957883396
Elapsed time:  1.7765121459960938 

Twitter ID:  691675652215414786
Elapsed time:  0.45767712593078613 

Twitter ID:  691483041324204033
Elapsed time:  0.45604562759399414 

Twitter ID:  691459709405118465
Elapsed time:  0.6008038520812988 

Twitter ID:  691444869282295808
Elapsed time:  0.5330061912536621 

Twitter ID:  691416866452082688
Elapsed time:  0.6166698932647705 

Twitter ID:  691321916024623104
Elapsed time:  1

Twitter ID:  684959798585110529
Elapsed time:  0.8220961093902588 

Twitter ID:  684940049151070208
Elapsed time:  0.5379283428192139 

Twitter ID:  684926975086034944
Elapsed time:  0.4807248115539551 

Twitter ID:  684914660081053696
Elapsed time:  0.6148579120635986 

Twitter ID:  684902183876321280
Elapsed time:  0.5102620124816895 

Twitter ID:  684880619965411328
Elapsed time:  0.7183012962341309 

Twitter ID:  684830982659280897
Elapsed time:  0.6138648986816406 

Twitter ID:  684800227459624960
Elapsed time:  0.7058582305908203 

Twitter ID:  684594889858887680
Elapsed time:  0.7249429225921631 

Twitter ID:  684588130326986752
Elapsed time:  0.5011579990386963 

Twitter ID:  684567543613382656
Elapsed time:  0.7313759326934814 

Twitter ID:  684538444857667585
Elapsed time:  0.613400936126709 

Twitter ID:  684481074559381504
Elapsed time:  0.8178942203521729 

Twitter ID:  684460069371654144
Elapsed time:  0.6147139072418213 

Twitter ID:  684241637099323392
Elapsed time:  0.

Twitter ID:  679503373272485890
Elapsed time:  0.5658121109008789 

Twitter ID:  679475951516934144
Elapsed time:  0.867913007736206 

Twitter ID:  679462823135686656
Elapsed time:  0.6131188869476318 

Twitter ID:  679405845277462528
Elapsed time:  0.501091718673706 

Twitter ID:  679158373988876288
Elapsed time:  0.7282202243804932 

Twitter ID:  679148763231985668
Elapsed time:  1.2250890731811523 

Twitter ID:  679132435750195208
Elapsed time:  0.7000761032104492 

Twitter ID:  679111216690831360
Elapsed time:  0.8854267597198486 

Twitter ID:  679062614270468097
Elapsed time:  0.47825002670288086 

Twitter ID:  679047485189439488
Elapsed time:  0.5160319805145264 

Twitter ID:  679001094530465792
Elapsed time:  0.5769069194793701 

Twitter ID:  678991772295516161
Elapsed time:  0.761012077331543 

Twitter ID:  678969228704284672
Elapsed time:  0.7952640056610107 

Twitter ID:  678800283649069056
Elapsed time:  0.8171441555023193 

Twitter ID:  678798276842360832
Elapsed time:  0.6

Rate limit reached. Sleeping for: 262


Twitter ID:  677331501395156992
Elapsed time:  268.11939668655396 

Twitter ID:  677328882937298944
Elapsed time:  0.9778969287872314 

Twitter ID:  677314812125323265
Elapsed time:  0.7667498588562012 

Twitter ID:  677301033169788928
Elapsed time:  0.48630571365356445 

Twitter ID:  677269281705472000
Elapsed time:  0.7455189228057861 

Twitter ID:  677228873407442944
Elapsed time:  0.611518144607544 

Twitter ID:  677187300187611136
Elapsed time:  0.4882068634033203 

Twitter ID:  676975532580409345
Elapsed time:  0.48238277435302734 

Twitter ID:  676957860086095872
Elapsed time:  0.6109180450439453 

Twitter ID:  676949632774234114
Elapsed time:  0.4571809768676758 

Twitter ID:  676948236477857792
Elapsed time:  3.7016520500183105 

Twitter ID:  676946864479084545
Elapsed time:  0.43296098709106445 

Twitter ID:  676942428000112642
Elapsed time:  0.49195122718811035 

Twitter ID:  676936541936185344
Elapsed time:  0.44837188720703125 

Twitter ID:  676916996760600576
Elapsed time

Twitter ID:  674330906434379776
Elapsed time:  1.0847370624542236 

Twitter ID:  674318007229923329
Elapsed time:  0.8187170028686523 

Twitter ID:  674307341513269249
Elapsed time:  0.5806188583374023 

Twitter ID:  674291837063053312
Elapsed time:  0.6578800678253174 

Twitter ID:  674271431610523648
Elapsed time:  0.606112003326416 

Twitter ID:  674269164442398721
Elapsed time:  0.6131703853607178 

Twitter ID:  674265582246694913
Elapsed time:  0.4512948989868164 

Twitter ID:  674262580978937856
Elapsed time:  0.7854247093200684 

Twitter ID:  674255168825880576
Elapsed time:  2.0983171463012695 

Twitter ID:  674082852460433408
Elapsed time:  1.4406249523162842 

Twitter ID:  674075285688614912
Elapsed time:  0.4528031349182129 

Twitter ID:  674063288070742018
Elapsed time:  1.732447862625122 

Twitter ID:  674053186244734976
Elapsed time:  1.0288817882537842 

Twitter ID:  674051556661161984
Elapsed time:  0.6553359031677246 

Twitter ID:  674045139690631169
Elapsed time:  0.7

Twitter ID:  671735591348891648
Elapsed time:  0.8206307888031006 

Twitter ID:  671729906628341761
Elapsed time:  0.6109879016876221 

Twitter ID:  671561002136281088
Elapsed time:  0.4993431568145752 

Twitter ID:  671550332464455680
Elapsed time:  0.46262598037719727 

Twitter ID:  671547767500775424
Elapsed time:  0.5625219345092773 

Twitter ID:  671544874165002241
Elapsed time:  0.596240758895874 

Twitter ID:  671542985629241344
Elapsed time:  0.43759584426879883 

Twitter ID:  671538301157904385
Elapsed time:  1.3309357166290283 

Twitter ID:  671536543010570240
Elapsed time:  0.510775089263916 

Twitter ID:  671533943490011136
Elapsed time:  0.5632913112640381 

Twitter ID:  671528761649688577
Elapsed time:  0.5688812732696533 

Twitter ID:  671520732782923777
Elapsed time:  0.6091530323028564 

Twitter ID:  671518598289059840
Elapsed time:  1.2307300567626953 

Twitter ID:  671511350426865664
Elapsed time:  0.6154420375823975 

Twitter ID:  671504605491109889
Elapsed time:  0

Twitter ID:  669597912108789760
Elapsed time:  0.4831109046936035 

Twitter ID:  669583744538451968
Elapsed time:  0.5464351177215576 

Twitter ID:  669573570759163904
Elapsed time:  0.4737548828125 

Twitter ID:  669571471778410496
Elapsed time:  0.5698351860046387 

Twitter ID:  669567591774625800
Elapsed time:  0.6349518299102783 

Twitter ID:  669564461267722241
Elapsed time:  0.6041936874389648 

Twitter ID:  669393256313184256
Elapsed time:  0.6274089813232422 

Twitter ID:  669375718304980992
Elapsed time:  0.6357192993164062 

Twitter ID:  669371483794317312
Elapsed time:  0.5890626907348633 

Twitter ID:  669367896104181761
Elapsed time:  0.6152639389038086 

Twitter ID:  669363888236994561
Elapsed time:  0.7386670112609863 

Twitter ID:  669359674819481600
Elapsed time:  0.5819332599639893 

Twitter ID:  669354382627049472
Elapsed time:  0.9318552017211914 

Twitter ID:  669353438988365824
Elapsed time:  0.6148488521575928 

Twitter ID:  669351434509529089
Elapsed time:  0.52

Twitter ID:  667437278097252352
Elapsed time:  0.831226110458374 

Twitter ID:  667435689202614272
Elapsed time:  0.6038641929626465 

Twitter ID:  667405339315146752
Elapsed time:  0.6246049404144287 

Twitter ID:  667393430834667520
Elapsed time:  0.5643231868743896 

Twitter ID:  667369227918143488
Elapsed time:  0.6637911796569824 

Twitter ID:  667211855547486208
Elapsed time:  0.821368932723999 

Twitter ID:  667200525029539841
Elapsed time:  0.6135227680206299 

Twitter ID:  667192066997374976
Elapsed time:  0.4670882225036621 

Twitter ID:  667188689915760640
Elapsed time:  0.46605896949768066 

Twitter ID:  667182792070062081
Elapsed time:  0.4780259132385254 

Twitter ID:  667177989038297088
Elapsed time:  0.842134952545166 

Twitter ID:  667176164155375616
Elapsed time:  0.6129879951477051 

Twitter ID:  667174963120574464
Elapsed time:  0.6118748188018799 

Twitter ID:  667171260800061440
Elapsed time:  0.825484037399292 

Twitter ID:  667165590075940865
Elapsed time:  0.60

In [81]:
with open('missing_ids.txt', mode = 'w') as file:
    file.write('missing_ids')

In [78]:
df = []
with open('tweet_json.txt') as file:
    for line in file:
        df.append(json.loads(line))

JSONDecodeError: Extra data: line 1 column 4037 (char 4036)