In [1]:
import pandas as pd
import sqlite3

In [2]:
import numpy as np
import gc

In [3]:
conn = sqlite3.connect('[...PATH...]/httparchive_processed/2020-01/assets.db')
try:
    assets_df = pd.read_sql('''
                            select  base_page, asset_url,
                                    protocol, securityDetails AS tls_version,
                                    mime_type,
                                    is_ad, is_tracker,
                                    hybrid_ident
                            from    assets_processed
                            ''',
                            con=conn)

except Exception as e:
    print(e)
finally:
    conn.close()

In [4]:
# normalize names for Amazon and Microsoft

cf_ms_names = {'Amazon CloudFront' : 'Amazon',
               'Microsoft Azure' : 'Microsoft'}

assets_df['hybrid_ident'] = assets_df['hybrid_ident'].replace(cf_ms_names)

In [5]:
assets_df['hybrid_ident'].unique()

array(['Akamai', 'Fastly', None, 'Google', 'Amazon', 'Edgecast',
       'Microsoft', 'CDN77', 'Cloudflare', 'Twitter', 'Facebook',
       'Highwinds', 'StackPath', 'NetDNA', 'jsDelivr', 'Yahoo', 'Cedexis',
       'CDNetworks', 'Netlify', 'LeaseWeb CDN', 'Level3', 'Internap',
       'section.io', 'Rackspace', 'CDNvideo', 'Instart Logic',
       'WordPress', 'KeyCDN', 'Taobao', 'AT&T', 'Limelight',
       'Sucuri Firewall', 'BelugaCDN', 'OVH CDN', 'Reflected Networks',
       'ChinaNetCenter', 'Incapsula', 'BunnyCDN', 'Reapleaf', 'Yottaa',
       'Cachefly', 'Level 3', 'CDN', 'Zenedge', 'CacheFly', 'Medianova',
       'ChinaCache', 'Rocket CDN', 'Yunjiasu', 'Azion', 'OnApp',
       'KINX CDN', 'Universal CDN', 'TRBCDN', 'GoCache', 'Pressable CDN',
       'SFR', 'Sirv CDN', 'CDNsun', 'NGENIX', 'QUIC.cloud',
       'Myra Security CDN', 'Telenor', 'SwiftCDN', 'BitGravity', 'Aryaka',
       'VegaCDN', 'Airee', 'Advanced Hosters CDN', 'Optimal CDN', 'Naver',
       'XLabs Security', 'PageCDN'

In [6]:
assets_df.columns

Index(['base_page', 'asset_url', 'protocol', 'tls_version', 'mime_type',
       'is_ad', 'is_tracker', 'hybrid_ident'],
      dtype='object')

In [7]:
gc.collect()

0

In [8]:
assets_df

Unnamed: 0,base_page,asset_url,protocol,tls_version,mime_type,is_ad,is_tracker,hybrid_ident
0,https://www.thejamesroom.com/,https://use.typekit.net/ik/j6yHdzj79DugkCmKnZA...,HTTP/2,TLS 1.3,javascript,0,0,Akamai
1,https://www.thejamesroom.com/,https://assets.squarespace.com/universal/scrip...,HTTP/2,TLS 1.2,javascript,0,0,Fastly
2,https://www.thejamesroom.com/,https://assets.squarespace.com/universal/scrip...,HTTP/2,,javascript,0,0,Fastly
3,https://www.thejamesroom.com/,https://static1.squarespace.com/static/sitecss...,HTTP/2,TLS 1.3,text,0,0,Fastly
4,https://www.thejamesroom.com/,https://static1.squarespace.com/static/598a54e...,HTTP/2,,image,0,0,
...,...,...,...,...,...,...,...,...
392322368,https://vsmart.net/,https://lg1.logging.admicro.vn/cpa2?tid=-1&pat...,http/1.1,,image,1,0,
392322369,https://vsmart.net/,https://lg1.logging.admicro.vn/_tracking1.gif?...,http/1.1,TLS 1.2,html,1,1,
392322370,https://vsmart.net/,https://lg1.logging.admicro.vn/rtg_bn?url=http...,http/1.1,TLS 1.2,image,1,0,
392322371,https://vsmart.net/,https://media1.admicro.vn/core/persist.js,,,javascript,1,0,


### Embedded Videos

In [9]:
youtube_df = assets_df[assets_df['asset_url'].str.contains('youtube')]  # might contain assets such as "youtube-download" etc.

In [10]:
youtube_df

Unnamed: 0,base_page,asset_url,protocol,tls_version,mime_type,is_ad,is_tracker,hybrid_ident
646,http://tejidosparedes.net/,https://www.youtube.com/embed/P_M0nIy1wlg?rel=0,HTTP/2,TLS 1.3,html,0,0,Google
647,http://tejidosparedes.net/,https://www.youtube.com/yts/cssbin/www-player-...,HTTP/2,,text,0,0,Google
648,http://tejidosparedes.net/,https://www.youtube.com/yts/jsbin/www-embed-pl...,HTTP/2,,javascript,0,0,Google
649,http://tejidosparedes.net/,https://www.youtube.com/yts/jsbin/player_ias-v...,HTTP/2,,javascript,0,0,Google
665,http://tejidosparedes.net/,https://www.youtube.com/yts/jsbin/player_ias-v...,HTTP/2,,javascript,0,0,Google
...,...,...,...,...,...,...,...,...
392320710,http://kgfamily.co.kr/,https://www.youtube.com/yts/jsbin/www-embed-pl...,HTTP/2,,javascript,0,0,Google
392320711,http://kgfamily.co.kr/,https://www.youtube.com/yts/jsbin/player_ias-v...,HTTP/2,,javascript,0,0,Google
392320719,http://kgfamily.co.kr/,https://www.youtube.com/yts/jsbin/player_ias-v...,HTTP/2,,javascript,0,0,Google
392320720,http://kgfamily.co.kr/,https://www.youtube.com/yts/jsbin/player_ias-v...,HTTP/2,,javascript,0,0,Google


In [106]:
# reduce to embedded videos, which also comes with JavaScript assets etc. for the player
embedded_yt = youtube_df[(youtube_df['asset_url'].str.contains('youtube.com/embed/', regex=False)) |
                         youtube_df['asset_url'].str.contains('youtube-nocookie.com/embed/', regex=False)]  

In [107]:
embedded_yt

Unnamed: 0,base_page,asset_url,protocol,tls_version,mime_type,is_ad,is_tracker,hybrid_ident
646,http://tejidosparedes.net/,https://www.youtube.com/embed/P_M0nIy1wlg?rel=0,HTTP/2,TLS 1.3,html,0,0,Google
2000,http://pcarmall.com/,https://www.youtube.com/embed/o2TMYvTWQDo?rel=0&autoplay=1&mute=1,HTTP/2,TLS 1.3,html,0,0,Google
2001,http://pcarmall.com/,https://www.youtube.com/embed/QbYsqZaVYy0,HTTP/2,,html,0,0,Google
2002,http://pcarmall.com/,https://www.youtube.com/embed/DJI3BYZ26e0,HTTP/2,,html,0,0,Google
3528,https://www.lastresortrecovery.com/,https://www.youtube.com/embed/2QTSP3zBfnM,HTTP/2,,html,0,0,Google
...,...,...,...,...,...,...,...,...
392319842,https://support.crowdrise.com/,https://www.youtube.com/embed/N9vjTXY7i3k?start=2,HTTP/2,,html,0,0,Google
392319843,https://support.crowdrise.com/,https://www.youtube.com/embed/zBavxQ2-SAQ?start=4,HTTP/2,,html,0,0,Google
392319844,https://support.crowdrise.com/,https://www.youtube.com/embed/e6SgQwGOOxU?start=1,HTTP/2,,html,0,0,Google
392320445,https://denver.woodhousespas.com/,https://www.youtube.com/embed/V_p6ekd7Uf8?rel=0&controls=0&showinfo=0,HTTP/2,TLS 1.3,html,0,0,Google


In [19]:
embedded_yt['mime_type'].value_counts()

html            654570
unidentified    70351 
no type         7237  
javascript      19    
text            14    
image           13    
application     2     
Name: mime_type, dtype: int64

In [108]:
embedded_yt[embedded_yt['asset_url'].str.contains('youtube-nocookie', regex=False)]

Unnamed: 0,base_page,asset_url,protocol,tls_version,mime_type,is_ad,is_tracker,hybrid_ident
55010,https://bestfunnyclannames.com/,https://www.youtube-nocookie.com/embed/tRTmI8t6vuo?rel=0&controls=0&showinfo=0,HTTP/2,TLS 1.3,html,0,0,Google
101646,https://www.knuth.com.ro/,https://www.youtube-nocookie.com/embed/MMH75jCvlzs?modestbranding=1&showinfo=0&rel=0&controls=1&autoplay=1&loop=1&mute=1,HTTP/2,,html,0,0,Google
139045,https://lcu.edu/,https://www.youtube-nocookie.com/embed/VUTlLBTCvpU?rel=0&cc_load_policy=1,HTTP/2,TLS 1.3,html,0,0,Google
235562,https://www.mlpd.de/,https://www.youtube-nocookie.com/embed/OZIZFQmLg4w?rel=0&amp;start=0,HTTP/2,TLS 1.3,html,0,0,Google
241902,http://www.tarjetas-de-cumpleanios.com/,https://www.youtube-nocookie.com/embed/WaxonrzlQoM?rel=0&controls=0&showinfo=0,HTTP/2,TLS 1.3,html,0,0,Google
...,...,...,...,...,...,...,...,...
392314076,https://avamax.com/,https://www.youtube-nocookie.com/embed/wBmWvvTV0P4,HTTP/2,TLS 1.3,html,0,0,Google
392314077,https://avamax.com/,https://www.youtube-nocookie.com/embed/SxGLPVvNjvY,HTTP/2,,html,0,0,Google
392314078,https://avamax.com/,https://www.youtube-nocookie.com/embed/PjelIssAltY,HTTP/2,,html,0,0,Google
392314079,https://avamax.com/,https://www.youtube-nocookie.com/embed/3AKPaq0IaDk,HTTP/2,,html,0,0,Google


--> no-cookie variant has slightly different behavior

In [21]:
embedded_yt['base_page'].nunique()

369063

In [28]:
embedded_yt['base_page'].nunique()/4278626

0.08625736392944838

In [26]:
youtube_df[youtube_df['base_page'] == 'http://tejidosparedes.net/']

Unnamed: 0,base_page,asset_url,protocol,tls_version,mime_type,is_ad,is_tracker,hybrid_ident
646,http://tejidosparedes.net/,https://www.youtube.com/embed/P_M0nIy1wlg?rel=0,HTTP/2,TLS 1.3,html,0,0,Google
647,http://tejidosparedes.net/,https://www.youtube.com/yts/cssbin/www-player-webp-vflrXpvQF.css,HTTP/2,,text,0,0,Google
648,http://tejidosparedes.net/,https://www.youtube.com/yts/jsbin/www-embed-player-vflq69b7D/www-embed-player.js,HTTP/2,,javascript,0,0,Google
649,http://tejidosparedes.net/,https://www.youtube.com/yts/jsbin/player_ias-vflMn34bn/en_US/base.js,HTTP/2,,javascript,0,0,Google
665,http://tejidosparedes.net/,https://www.youtube.com/yts/jsbin/player_ias-vflMn34bn/en_US/remote.js,HTTP/2,,javascript,0,0,Google
669,http://tejidosparedes.net/,https://www.youtube.com/yts/jsbin/player_ias-vflMn34bn/en_US/embed.js,HTTP/2,,javascript,0,0,Google
676,http://tejidosparedes.net/,https://www.youtube.com/generate_204?uwe7FA,HTTP/2,,no type,0,0,Google


In [27]:
youtube_df[youtube_df['base_page'] == 'http://kgfamily.co.kr/']

Unnamed: 0,base_page,asset_url,protocol,tls_version,mime_type,is_ad,is_tracker,hybrid_ident
392320708,http://kgfamily.co.kr/,https://www.youtube.com/embed/THjVEuBLmCw,HTTP/2,TLS 1.3,html,0,0,Google
392320709,http://kgfamily.co.kr/,https://www.youtube.com/yts/cssbin/www-player-webp-vfly-boRU.css,HTTP/2,,text,0,0,Google
392320710,http://kgfamily.co.kr/,https://www.youtube.com/yts/jsbin/www-embed-player-vfl19rSh2/www-embed-player.js,HTTP/2,,javascript,0,0,Google
392320711,http://kgfamily.co.kr/,https://www.youtube.com/yts/jsbin/player_ias-vfl22ubNH/en_US/base.js,HTTP/2,,javascript,0,0,Google
392320719,http://kgfamily.co.kr/,https://www.youtube.com/yts/jsbin/player_ias-vfl22ubNH/en_US/remote.js,HTTP/2,,javascript,0,0,Google
392320720,http://kgfamily.co.kr/,https://www.youtube.com/yts/jsbin/player_ias-vfl22ubNH/en_US/embed.js,HTTP/2,,javascript,0,0,Google
392320734,http://kgfamily.co.kr/,https://www.youtube.com/generate_204?bbwztw,HTTP/2,,no type,0,0,Google


In [42]:
vimeo_df = assets_df[assets_df['asset_url'].str.contains('vimeo')]
vimeo_df

Unnamed: 0,base_page,asset_url,protocol,tls_version,mime_type,is_ad,is_tracker,hybrid_ident
4983,https://freewheely.com/,https://freewheely.com/wp-content/themes/arrasjb/images/social/vimeo.png,HTTP/2,,image,0,0,
8815,https://www.fammed.wisc.edu/,https://www.fammed.wisc.edu/wp-content/themes/Avada/includes/lib/assets/min/js/library/vimeoPlayer.js,http/1.1,,javascript,0,0,
10574,https://journal.ccas.fr/,https://i.vimeocdn.com/video/715131583.jpg?mw=80&q=85,HTTP/2,,image,0,0,Fastly
10575,https://journal.ccas.fr/,https://f.vimeocdn.com/p/3.15.7/css/player.css,HTTP/2,,text,0,0,Fastly
10576,https://journal.ccas.fr/,https://f.vimeocdn.com/js_opt/modules/utils/vuid.min.js,HTTP/2,,javascript,0,0,Fastly
...,...,...,...,...,...,...,...,...
392305504,https://valq.com/,https://i.vimeocdn.com/video/779940162_960.jpg,HTTP/2,,image,0,0,Fastly
392305505,https://valq.com/,https://i.vimeocdn.com/video/682936310_960.jpg,HTTP/2,,image,0,0,Fastly
392305507,https://valq.com/,https://i.vimeocdn.com/video/838859586_960.jpg,HTTP/2,,image,0,0,Fastly
392305509,https://valq.com/,https://player.vimeo.com/video/361787457?dnt=1&app_id=122963,http/1.1,TLS 1.2,html,0,0,Fastly


In [39]:
vimeo_df['hybrid_ident'].value_counts()

Fastly             268763
Google             26226 
Akamai             14898 
Cloudflare         5112  
Amazon             2736  
NetDNA             1590  
WordPress          884   
Sucuri Firewall    812   
Microsoft          426   
Facebook           297   
Rackspace          172   
LeaseWeb CDN       166   
Incapsula          157   
KeyCDN             132   
StackPath          108   
OVH CDN            84    
CDN                61    
Edgecast           61    
Highwinds          60    
jsDelivr           41    
Pressable CDN      41    
BunnyCDN           33    
AT&T               21    
Yahoo              20    
Twitter            14    
Level3             13    
Internap           12    
GoCache            8     
CDNetworks         8     
Taobao             7     
CDN77              7     
Netlify            6     
Telenor            5     
CDNvideo           5     
Level 3            4     
Azion              3     
BelugaCDN          3     
Limelight          2     
section.io  

In [38]:
vimeo_df[vimeo_df['base_page'] == 'https://journal.ccas.fr/']

Unnamed: 0,base_page,asset_url,protocol,tls_version,mime_type,is_ad,is_tracker,hybrid_ident
10574,https://journal.ccas.fr/,https://i.vimeocdn.com/video/715131583.jpg?mw=80&q=85,HTTP/2,,image,0,0,Fastly
10575,https://journal.ccas.fr/,https://f.vimeocdn.com/p/3.15.7/css/player.css,HTTP/2,,text,0,0,Fastly
10576,https://journal.ccas.fr/,https://f.vimeocdn.com/js_opt/modules/utils/vuid.min.js,HTTP/2,,javascript,0,0,Fastly
10577,https://journal.ccas.fr/,https://f.vimeocdn.com/p/3.15.7/js/player.js,HTTP/2,,javascript,0,0,Fastly
10584,https://journal.ccas.fr/,https://i.vimeocdn.com/video/715131583.webp?mw=700&mh=394,HTTP/2,,no type,0,0,Fastly
10585,https://journal.ccas.fr/,https://vimeo.com/ablincoln/vuid?pid=943ecb652bfa7b9817ae34a077f12d68caff1cff1578301692,,,html,0,0,Fastly
10586,https://journal.ccas.fr/,https://player.vimeo.com/video/280962127,http/1.1,,unidentified,0,0,Fastly
10587,https://journal.ccas.fr/,https://fresnel.vimeocdn.com/add/player-stats?beacon=1&session-id=943ecb652bfa7b9817ae34a077f12d68caff1cff1578301692,HTTP/2,,no type,0,1,Google
10588,https://journal.ccas.fr/,https://i.vimeocdn.com/video/715131583.webp?mw=700&mh=393,HTTP/2,,image,0,0,Fastly


In [109]:
refined_vimeo = vimeo_df[(vimeo_df['asset_url'].str.contains('.vimeocdn.com/', regex=False)) |
                         (vimeo_df['asset_url'].str.contains('player.vimeo.com/', regex=False))
                        ]
refined_vimeo

Unnamed: 0,base_page,asset_url,protocol,tls_version,mime_type,is_ad,is_tracker,hybrid_ident
10574,https://journal.ccas.fr/,https://i.vimeocdn.com/video/715131583.jpg?mw=80&q=85,HTTP/2,,image,0,0,Fastly
10575,https://journal.ccas.fr/,https://f.vimeocdn.com/p/3.15.7/css/player.css,HTTP/2,,text,0,0,Fastly
10576,https://journal.ccas.fr/,https://f.vimeocdn.com/js_opt/modules/utils/vuid.min.js,HTTP/2,,javascript,0,0,Fastly
10577,https://journal.ccas.fr/,https://f.vimeocdn.com/p/3.15.7/js/player.js,HTTP/2,,javascript,0,0,Fastly
10584,https://journal.ccas.fr/,https://i.vimeocdn.com/video/715131583.webp?mw=700&mh=394,HTTP/2,,no type,0,0,Fastly
...,...,...,...,...,...,...,...,...
392305503,https://valq.com/,https://i.vimeocdn.com/video/717145851_960.jpg,HTTP/2,,image,0,0,Fastly
392305504,https://valq.com/,https://i.vimeocdn.com/video/779940162_960.jpg,HTTP/2,,image,0,0,Fastly
392305505,https://valq.com/,https://i.vimeocdn.com/video/682936310_960.jpg,HTTP/2,,image,0,0,Fastly
392305507,https://valq.com/,https://i.vimeocdn.com/video/838859586_960.jpg,HTTP/2,,image,0,0,Fastly


In [46]:
refined_vimeo['hybrid_ident'].value_counts()

Fastly        213840
Google        23406 
Akamai        803   
Amazon        54    
Cloudflare    28    
WordPress     21    
OVH CDN       4     
Microsoft     1     
Name: hybrid_ident, dtype: int64

In [49]:
refined_vimeo['mime_type'].value_counts()

html            71904
image           64606
javascript      47556
unidentified    22965
no type         20034
text            15068
application     173  
video           58   
font            4    
Name: mime_type, dtype: int64

In [110]:
embedded_vimeo = refined_vimeo[(refined_vimeo['asset_url'].str.contains('player.vimeo.com/', regex=False))]
embedded_vimeo

Unnamed: 0,base_page,asset_url,protocol,tls_version,mime_type,is_ad,is_tracker,hybrid_ident
10586,https://journal.ccas.fr/,https://player.vimeo.com/video/280962127,http/1.1,,unidentified,0,0,Fastly
27094,https://atheneainstitute.com/,https://player.vimeo.com/video/220764168?dnt=1&app_id=122963,http/1.1,,unidentified,0,0,Fastly
27095,https://atheneainstitute.com/,https://player.vimeo.com/video/347021590?dnt=1&app_id=122963,http/1.1,,unidentified,0,0,Fastly
44087,https://partners.artsy.net/,https://player.vimeo.com/video/288599625?title=0&byline=0&portrait=0&rel=0,http/1.1,TLS 1.2,html,0,0,Fastly
44089,https://partners.artsy.net/,https://player.vimeo.com/video/304211321?title=0&byline=0&portrait=0&rel=0,http/1.1,TLS 1.2,html,0,0,Fastly
...,...,...,...,...,...,...,...,...
392305495,https://valq.com/,https://player.vimeo.com/video/361969472?dnt=1&app_id=122963,http/1.1,,unidentified,0,0,Fastly
392305496,https://valq.com/,https://player.vimeo.com/video/255509480?dnt=1&app_id=122963,http/1.1,TLS 1.2,unidentified,0,0,Fastly
392305497,https://valq.com/,https://player.vimeo.com/video/378942982?dnt=1&app_id=122963,http/1.1,TLS 1.2,unidentified,0,0,Fastly
392305499,https://valq.com/,https://player.vimeo.com/video/342202161?dnt=1&app_id=122963,http/1.1,TLS 1.2,html,0,0,Fastly


In [47]:
embedded_vimeo['base_page'].nunique()

58369

In [50]:
embedded_vimeo['base_page'].nunique()/4278626

0.0136419962857235

In [111]:
dailymotion_df = assets_df[assets_df['asset_url'].str.contains('dailymotion', regex=False)]

In [112]:
refined_dailymotion = dailymotion_df[dailymotion_df['asset_url'].str.contains('.dailymotion.com/', regex=False)]

In [113]:
embedded_dailymotion =  refined_dailymotion[refined_dailymotion['asset_url'].str.contains('.dailymotion.com/embed/', regex=False)]

In [114]:
len(embedded_dailymotion)

5521

In [115]:
embedded_dailymotion['base_page'].nunique()

2280

In [116]:
embedded_dailymotion['hybrid_ident'].value_counts()

Series([], Name: hybrid_ident, dtype: int64)

In [117]:
facebook_df = assets_df[(assets_df['asset_url'].str.contains('facebook.com/', regex=False)) 
                        | (assets_df['asset_url'].str.contains('fbcdn', regex=False))]

In [118]:
facebook_df

Unnamed: 0,base_page,asset_url,protocol,tls_version,mime_type,is_ad,is_tracker,hybrid_ident
750,http://bosorkaplzen.cz/,https://staticxx.facebook.com/connect/xd_arbiter.php?version=45,HTTP/2,,html,0,0,Facebook
751,http://bosorkaplzen.cz/,https://www.facebook.com/x/oauth/status?client_id=334341610034299&input_token&origin=1&redirect_uri=http%3A%2F%2Fbosorkaplzen.cz%2F&sdk=joey&wants_cookie_data=false,HTTP/2,TLS 1.3,text,0,0,Facebook
775,http://bosorkaplzen.cz/,https://www.facebook.com/plugins/like_box.php?app_id=334341610034299&channel=https%3A%2F%2Fstaticxx.facebook.com%2Fconnect%2Fxd_arbiter.php%3Fversion%3D45%23cb%3Df2f041a3ff0391%26domain%3Dbosorkaplzen.cz%26origin%3Dhttp%253A%252F%252Fbosorkaplzen.cz%252Ff39da72962551c%26relation%3Dparent.parent&color_scheme=light&container_width=253&header=false&href=https%3A%2F%2Fwww.facebook.com%2Fbosorkaplzen%2F&locale=en_US&sdk=joey&show_border=false&show_faces=true&stream=false,HTTP/2,,html,0,0,Facebook
776,http://bosorkaplzen.cz/,"https://www.facebook.com/rsrc.php/v3/yE/l/0,cross/w00K6gP5Oh2.css?_nc_x=Ij3Wp8lg5Kz",HTTP/2,,text,0,0,Facebook
777,http://bosorkaplzen.cz/,https://www.facebook.com/rsrc.php/v3/yo/r/wr8T68pTljZ.js?_nc_x=Ij3Wp8lg5Kz,HTTP/2,,javascript,0,0,Facebook
...,...,...,...,...,...,...,...,...
392321990,https://agencepro.orange.fr/,https://www.facebook.com/tr/?id=1094611494063509&ev=Microdata&dl=https%3A%2F%2Fagencepro.orange.fr%2F&rl=&if=false&ts=1578710475295&cd[DataLayer]=%5B%5D&cd[Meta]=%7B%22title%22%3A%22Trouver%20une%20boutique%20Orange%20pro%20%3A%20agence%20pro%2C%20horaires%2C%20adresses%20-%20Magasin%20Orange%22%2C%22meta%3Adescription%22%3A%22Localisez%20votre%20boutique%20Orange%20disposant%20d%27un%20espace%20d%C3%A9di%C3%A9%20aux%20professionnels%20la%20plus%20proche%20de%20chez%20vous%20%3A%20adresses%2C%20horaires%2C%20prise%20de%20RDV%20avec%20un%20conseiller%E2%80%A6%22%7D&cd[OpenGraph]=%7B%22og%3Atype%22%3A%22website%22%2C%22og%3Atitle%22%3A%22Boutique%20Orange%20Pro%20%20%3A%20Venez%20rencontrer%20nos%20conseillers%22%2C%22og%3Adescription%22%3A%22Nos%20conseillers%20d%C3%A9di%C3%A9s%20pro%20vous%20accueillent%20sur%20rendez-vous%20et%20vous%20conseillent%20sur%20les%20solutions%20adapt%C3%A9es%20%C3%A0%20votre%20m%C3%A9tier%22%2C%22og%3Aurl%22%3A%22https%3A%2F%2Fagencepro.orange.fr%2F%22%2C%22og%3Aimage%22%3A%22https%3A%2F%2Fagencepro.orange.frhttps%3A%2F%2Fd1nuj3f2uuhf3u.cloudfront.net%2Ftemplates%2Forange%2Forange-pro%2Fversions%2F5193%2Fimages%2Fpos-image.png%22%7D&cd[Schema.org]=%5B%7B%22dimensions%22%3A%7B%22h%22%3A0%2C%22w%22%3A0%7D%2C%22properties%22%3A%7B%7D%2C%22subscopes%22%3A%5B%5D%2C%22type%22%3A%22https%3A%2F%2Fschema.org%2FWebsite%22%7D%5D&cd[JSON-LD]=%5B%5D&sw=1920&sh=1200&v=2.9.15&r=stable&a=tmtealium&ec=1&o=30&fbp=fb.1.1578710473921.184726943&it=1578710473068&coo=false&es=automatic&tm=3&rqm=GET,HTTP/2,,text,0,0,Facebook
392322090,https://www.townofbreckenridge.com/,https://www.facebook.com/tr/?id=2009811592678427&ev=PageView&dl=https%3A%2F%2Fwww.townofbreckenridge.com%2F&rl=&if=false&ts=1579186160766&sw=1920&sh=1200&v=2.9.5&r=c2&ec=0&o=30&fbp=fb.1.1579186160763.359785203&it=1579186160282&coo=false&rqm=GET,HTTP/2,,image,0,0,Facebook
392322091,https://www.townofbreckenridge.com/,https://www.facebook.com/tr/?id=2009811592678427&ev=Microdata&dl=https%3A%2F%2Fwww.townofbreckenridge.com%2F&rl=&if=false&ts=1579186162273&cd[DataLayer]=%5B%5D&cd[Meta]=%7B%22title%22%3A%22%5Cn%5Ct%5Cn%20%20%20%20Breckenridge%2C%20CO%20%7C%20Home%5Cn%5Cn%22%2C%22meta%3Adescription%22%3A%22%22%2C%22meta%3Akeywords%22%3A%22%22%7D&cd[OpenGraph]=%7B%7D&cd[Schema.org]=%5B%5D&cd[JSON-LD]=%5B%5D&sw=1920&sh=1200&v=2.9.5&r=c2&ec=1&o=30&fbp=fb.1.1579186160763.359785203&it=1579186160282&coo=false&es=automatic&rqm=GET,HTTP/2,,image,0,0,Facebook
392322334,https://vsmart.net/,https://www.facebook.com/tr/?id=2297236480534310&ev=PageView&dl=https%3A%2F%2Fvsmart.net%2Fvn-vi&rl=&if=false&ts=1577943159313&sw=1920&sh=1200&v=2.9.15&r=stable&ec=0&o=30&fbp=fb.1.1577943159310.249709653&it=1577943157818&coo=false&rqm=GET,HTTP/2,TLS 1.3,image,0,0,Facebook


In [119]:
refined_facebook = facebook_df[(facebook_df['asset_url'].str.contains('/videos/', regex=False))
                               | (facebook_df['asset_url'].str.contains('video.php?', regex=False))
                               | (facebook_df['asset_url'].str.contains('mp4', regex=False))]  

In [120]:
refined_facebook

Unnamed: 0,base_page,asset_url,protocol,tls_version,mime_type,is_ad,is_tracker,hybrid_ident
32860,http://www.937themountain.com/,https://scontent-sea1-1.xx.fbcdn.net/v/t42.1790-29/10000000_126289718860956_4227745465434243685_n.mp4?_nc_cat=104&efg=eyJ2ZW5jb2RlX3RhZyI6ImRhc2hfbGl2ZV9tZF9mcmFnXzJfdmlkZW8ifQ%3D%3D&_nc_ohc=LwoNXcSTtRQAQkLtKJGcIwoqzs4pZqRgnsAsfu_WhDLGVAjPRt_muY2kA&_nc_ht=scontent-sea1-1.xx&oh=75d2ef47f60d3297e013d304a3175ce4&oe=5E1BF68A&bytestart=0&byteend=905,HTTP/2,,video,0,0,
32861,http://www.937themountain.com/,https://scontent-sea1-1.xx.fbcdn.net/v/t42.1790-29/10000000_126289718860956_4227745465434243685_n.mp4?_nc_cat=104&efg=eyJ2ZW5jb2RlX3RhZyI6ImRhc2hfbGl2ZV9tZF9mcmFnXzJfdmlkZW8ifQ%3D%3D&_nc_ohc=LwoNXcSTtRQAQkLtKJGcIwoqzs4pZqRgnsAsfu_WhDLGVAjPRt_muY2kA&_nc_ht=scontent-sea1-1.xx&oh=75d2ef47f60d3297e013d304a3175ce4&oe=5E1BF68A&bytestart=906&byteend=2329,HTTP/2,,video,0,0,Facebook
32862,http://www.937themountain.com/,https://scontent-sea1-1.xx.fbcdn.net/v/t42.1790-29/10000000_126289718860956_4227745465434243685_n.mp4?_nc_cat=104&efg=eyJ2ZW5jb2RlX3RhZyI6ImRhc2hfbGl2ZV9tZF9mcmFnXzJfdmlkZW8ifQ%3D%3D&_nc_ohc=LwoNXcSTtRQAQkLtKJGcIwoqzs4pZqRgnsAsfu_WhDLGVAjPRt_muY2kA&_nc_ht=scontent-sea1-1.xx&oh=75d2ef47f60d3297e013d304a3175ce4&oe=5E1BF68A&bytestart=2330&byteend=177484,HTTP/2,,video,0,0,Facebook
32863,http://www.937themountain.com/,https://scontent-sea1-1.xx.fbcdn.net/v/t42.1790-29/82810904_149662029791007_905336144741050619_n.mp4?_nc_cat=103&efg=eyJ2ZW5jb2RlX3RhZyI6ImRhc2hfbGl2ZV9tZF9mcmFnXzJfYXVkaW8ifQ%3D%3D&_nc_ohc=OgXFGGY_DuIAQmiyD8dZ2L_sEv5w_VFy7-AQnOiIR2AkpnSAiugfbyZSg&_nc_ht=scontent-sea1-1.xx&oh=f58678ae460543075a6e3288e004a7ce&oe=5E1C0703&bytestart=0&byteend=853,HTTP/2,,video,0,0,Facebook
32864,http://www.937themountain.com/,https://scontent-sea1-1.xx.fbcdn.net/v/t42.1790-29/82810904_149662029791007_905336144741050619_n.mp4?_nc_cat=103&efg=eyJ2ZW5jb2RlX3RhZyI6ImRhc2hfbGl2ZV9tZF9mcmFnXzJfYXVkaW8ifQ%3D%3D&_nc_ohc=OgXFGGY_DuIAQmiyD8dZ2L_sEv5w_VFy7-AQnOiIR2AkpnSAiugfbyZSg&_nc_ht=scontent-sea1-1.xx&oh=f58678ae460543075a6e3288e004a7ce&oe=5E1C0703&bytestart=854&byteend=2349,HTTP/2,,video,0,0,Facebook
...,...,...,...,...,...,...,...,...
392295032,https://vbautoparts.com/,https://video-sea1-1.xx.fbcdn.net/v/t39.25447-2/81967304_104622264316539_4533733582025209670_n.mp4?_nc_cat=104&efg=eyJ2ZW5jb2RlX3RhZyI6ImRhc2hfcGFzc3Rocm91Z2hhbGlnbmVkX2hxMV9mcmFnXzJfdmlkZW8ifQ%3D%3D&_nc_ohc=aNmgdjrKTD4AQls37qWR_YutA2OenDVAXGHyUgGLCgEB69WKJ8_XG2iMg&_nc_ht=video-sea1-1.xx&oh=0fce7688034fb2687b8fc0d7fd9ebf62&oe=5EAFA5E4&bytestart=0&byteend=950,HTTP/2,,video,0,0,Facebook
392295033,https://vbautoparts.com/,https://video-sea1-1.xx.fbcdn.net/v/t39.25447-2/82016061_2821519461240863_3861404299498495517_n.mp4?_nc_cat=110&efg=eyJ2ZW5jb2RlX3RhZyI6ImRhc2hfcGFzc3Rocm91Z2hhbGlnbmVkX2hxNF9mcmFnXzJfdmlkZW8ifQ%3D%3D&_nc_ohc=0a9VobPtfGEAQmVH_4rtKiC1lc-ZkgNrb9xuF36aTdgFFL_4GbaJTTFeg&_nc_ht=video-sea1-1.xx&oh=86a4bd64489fe3e48cc6b9658ab9c40f&oe=5E67BD68&bytestart=951&byteend=1078,HTTP/2,,video,0,0,Facebook
392295034,https://vbautoparts.com/,https://video-sea1-1.xx.fbcdn.net/v/t39.25447-2/82016061_2821519461240863_3861404299498495517_n.mp4?_nc_cat=110&efg=eyJ2ZW5jb2RlX3RhZyI6ImRhc2hfcGFzc3Rocm91Z2hhbGlnbmVkX2hxNF9mcmFnXzJfdmlkZW8ifQ%3D%3D&_nc_ohc=0a9VobPtfGEAQmVH_4rtKiC1lc-ZkgNrb9xuF36aTdgFFL_4GbaJTTFeg&_nc_ht=video-sea1-1.xx&oh=86a4bd64489fe3e48cc6b9658ab9c40f&oe=5E67BD68&bytestart=0&byteend=950,HTTP/2,,video,0,0,Facebook
392295035,https://vbautoparts.com/,https://video-sea1-1.xx.fbcdn.net/v/t42.1790-2/82283615_2550692015147535_3997187304878768128_n.mp4?_nc_cat=105&efg=eyJ2ZW5jb2RlX3RhZyI6ImRhc2hfdjRhdWRpb2ZvcnNvdXJjZXBpcGVsaW5lXzEyOF9jcmZfMjNfbWFpbl8zLjBfZnJhZ18yX2F1ZGlvIn0%3D&_nc_ohc=bFCdudTA4g0AQmguodYBE45KcEPwAYnENK950e3--7XEpGDY9i66xG3kg&_nc_ht=video-sea1-1.xx&oh=5ed78f2820ad6a12741bc626e1f734b7&oe=5E13707E&bytestart=25767&byteend=38060,HTTP/2,,video,0,0,Facebook


In [122]:
refined_facebook['mime_type'].value_counts()

video           649880
html            8924  
unidentified    2431  
image           666   
text            125   
javascript      83    
no type         41    
application     3     
Name: mime_type, dtype: int64

In [123]:
refined_facebook['hybrid_ident'].value_counts()

Facebook      655168
Cloudflare    11    
Google        10    
Amazon        6     
StackPath     3     
Highwinds     3     
Microsoft     2     
NetDNA        2     
Fastly        1     
Name: hybrid_ident, dtype: int64

In [124]:
refined_facebook[refined_facebook['hybrid_ident'] == 'Cloudflare']

Unnamed: 0,base_page,asset_url,protocol,tls_version,mime_type,is_ad,is_tracker,hybrid_ident
4017717,http://www.longmontpublichouse.com/,http://cdn.embedly.com/widgets/media.html?type=text%2Fhtml&key=96f1f04c5f4143bcb0f2e68c87d65feb&schema=facebook&url=https%3A//www.facebook.com/LongmontPublicHouse/videos/2172595419709990/&image=https%3A//scontent-iad3-1.xx.fbcdn.net/v/t15.5256-10/p200x200/74526855_2172596889709843_1147163224773230592_n.jpg%3F_nc_cat%3D110%26_nc_ohc%3D6cX6RTtl4J8AQmhn8IwH0VruETS54lWBa_sGPCpQztS5Aoe70W-QKDNcA%26_nc_ht%3Dscontent-iad3-1.xx%26oh%3D0ffb5786afcee888a9815d0dd6443d0f%26oe%3D5E7296E9,http/1.1,,html,0,0,Cloudflare
5720636,https://www.mainstreethanford.com/,https://cdn.embedly.com/widgets/media.html?src=https%3A%2F%2Fscontent-iad3-1.xx.fbcdn.net%2Fv%2Ft42.9040-4%2F34320094_1886722144960957_6392879891758448640_n.mp4%3F_nc_cat%3D0%26efg%3DeyJ2ZW5jb2RlX3RhZyI6InN2ZV9zZCJ9%26oh%3D7db6331588db646338acc130a7bbeb1d%26oe%3D5B15A33D&src_secure=1&url=https%3A%2F%2Fwww.facebook.com%2F1125403737491507%2Fvideos%2F1886987111333162%2F&image=https%3A%2F%2Fscontent-iad3-1.xx.fbcdn.net%2Fv%2Ft15.0-10%2Fp720x720%2F30820449_1539996899460734_524330973295280128_n.jpg%3F_nc_cat%3D0%26oh%3D97bef1dbf59aa216e02af4731be0a25c%26oe%3D5BB83E86&key=61d05c9d54e8455ea7a9677c366be814&type=video%2Fmp4&schema=facebook&wmode=opaque,http/1.1,TLS 1.2,html,0,0,Cloudflare
68400919,https://www.pitchersbardc.com/,https://cdn.embedly.com/widgets/media.html?src=https%3A%2F%2Fvideo-iad3-1.xx.fbcdn.net%2Fv%2Ft42.9040-2%2F34581263_2071283449566415_5827837881242091520_n.mp4%3F_nc_cat%3D0%26efg%3DeyJybHIiOjM2OSwicmxhIjo2MzUsInZlbmNvZGVfdGFnIjoic3ZlX3NkIn0%253D%26rl%3D369%26vabr%3D205%26oh%3D7f8f7c3a606366a8adf000e92bbe927e%26oe%3D5B2AA839&src_secure=1&url=https%3A%2F%2Fwww.facebook.com%2Fkiddoshea%2Fvideos%2F1704079163018580%2F&image=https%3A%2F%2Fscontent-iad3-1.xx.fbcdn.net%2Fv%2Ft15.0-10%2F29784902_1704085803017916_6901716130583281664_n.jpg%3F_nc_cat%3D0%26oh%3Dc8bc0f0c4c71830669f5a4f2df97ad16%26oe%3D5BAC3B71&key=61d05c9d54e8455ea7a9677c366be814&type=video%2Fmp4&schema=facebook&wmode=opaque,http/1.1,TLS 1.2,html,0,0,Cloudflare
152044124,https://www.forkandspoonkitchen.org/,https://cdn.embedly.com/widgets/media.html?src=https%3A%2F%2Fvideo-iad3-1.xx.fbcdn.net%2Fv%2Ft42.9040-2%2F34339322_193741314787643_5898392541669097472_n.mp4%3F_nc_cat%3D0%26efg%3DeyJybHIiOjM2OSwicmxhIjo1MTIsInZlbmNvZGVfdGFnIjoic3ZlX3NkIn0%253D%26rl%3D369%26vabr%3D205%26oh%3D5ae662c62a3c39f57c7ee4f8068b6e94%26oe%3D5B47D0DE&src_secure=1&url=https%3A%2F%2Fwww.facebook.com%2Fforkandspoonkitchen%2Fvideos%2F1619321148166340%2F&image=https%3A%2F%2Fscontent-iad3-1.xx.fbcdn.net%2Fv%2Ft15.0-10%2F30851940_378355309342961_261289347678994432_n.jpg%3F_nc_cat%3D0%26oh%3D3ee9bea529975f86bd2cbb3424dd4de6%26oe%3D5BE16B45&key=61d05c9d54e8455ea7a9677c366be814&type=video%2Fmp4&schema=facebook&wmode=opaque,http/1.1,TLS 1.2,html,0,0,Cloudflare
234235474,http://www.peacecontests.org/,http://cdn.embedly.com/widgets/media.html?type=text%2Fhtml&key=96f1f04c5f4143bcb0f2e68c87d65feb&schema=facebook&url=https%3A//www.facebook.com/2019video/videos/2376461812678023/&image=https%3A//scontent-iad3-1.xx.fbcdn.net/v/t15.13418-10/55719878_2356893124355838_333462497812021248_n.jpg%3F_nc_cat%3D100%26_nc_ht%3Dscontent-iad3-1.xx%26oh%3Dfabfce3ee396b896b12390557cd63c17%26oe%3D5D2F2AF8,http/1.1,,html,0,0,Cloudflare
234235475,http://www.peacecontests.org/,http://cdn.embedly.com/widgets/media.html?type=text%2Fhtml&key=96f1f04c5f4143bcb0f2e68c87d65feb&schema=facebook&url=https%3A//www.facebook.com/2019video/videos/1514879685310512/&image=https%3A//scontent-iad3-1.xx.fbcdn.net/v/t15.13418-10/54246535_342643836395038_1042313727229034496_n.jpg%3F_nc_cat%3D102%26_nc_ht%3Dscontent-iad3-1.xx%26oh%3Dcae314a26d32e26235ce40190765a40c%26oe%3D5D459D55,http/1.1,,html,0,0,Cloudflare
234235476,http://www.peacecontests.org/,http://cdn.embedly.com/widgets/media.html?type=text%2Fhtml&key=96f1f04c5f4143bcb0f2e68c87d65feb&schema=facebook&url=https%3A//www.facebook.com/2019video/videos/1997622673865947/&image=https%3A//scontent-iad3-1.xx.fbcdn.net/v/t15.13418-10/53918466_259811278296545_5607286637668073472_n.jpg%3F_nc_cat%3D110%26_nc_ht%3Dscontent-iad3-1.xx%26oh%3D42a3d797a937527f0ac766fbee12567f%26oe%3D5D37CF8C,http/1.1,,html,0,0,Cloudflare
234235477,http://www.peacecontests.org/,http://cdn.embedly.com/widgets/media.html?type=text%2Fhtml&key=96f1f04c5f4143bcb0f2e68c87d65feb&schema=facebook&url=https%3A//www.facebook.com/2019video/videos/444956579412043/&image=https%3A//scontent-iad3-1.xx.fbcdn.net/v/t15.5256-10/53637755_444959349411766_5842456850406047744_n.jpg%3F_nc_cat%3D103%26_nc_ht%3Dscontent-iad3-1.xx%26oh%3Ded224f08913ec993b18b2837fcd18611%26oe%3D5D453306,http/1.1,,html,0,0,Cloudflare
234235478,http://www.peacecontests.org/,http://cdn.embedly.com/widgets/media.html?type=text%2Fhtml&key=96f1f04c5f4143bcb0f2e68c87d65feb&schema=facebook&url=https%3A//www.facebook.com/2019video/videos/2400323363588210/&image=https%3A//scontent-iad3-1.xx.fbcdn.net/v/t15.5256-10/54246735_2400348093585737_2393704188820324352_n.jpg%3F_nc_cat%3D110%26_nc_ht%3Dscontent-iad3-1.xx%26oh%3D035c90fa9e080f61bb4f41fd605a0259%26oe%3D5D3D768E,http/1.1,,html,0,0,Cloudflare
338393414,https://www.nisahomes.com/,https://cdn.embedly.com/widgets/media.html?src=https%3A%2F%2Fvideo-iad3-1.xx.fbcdn.net%2Fv%2Ft42.9040-2%2F33680441_234638267293100_6649367504655220736_n.mp4%3F_nc_cat%3D0%26efg%3DeyJybHIiOjM0NywicmxhIjo1MjIsInZlbmNvZGVfdGFnIjoic3ZlX3NkIn0%253D%26rl%3D347%26vabr%3D193%26oh%3D4379fafc78f92e5e1a2b121a24402761%26oe%3D5B0F15FC&src_secure=1&url=https%3A%2F%2Fwww.facebook.com%2Fnisahomes%2Fvideos%2F1834946803473244%2F&image=https%3A%2F%2Fscontent-iad3-1.xx.fbcdn.net%2Fv%2Ft15.0-10%2Fp720x720%2F31932112_1834948380139753_1509059836308881408_n.jpg%3F_nc_cat%3D0%26oh%3D190b2a889c9d775d9f69d8eca06a0a88%26oe%3D5B8BF9D0&key=61d05c9d54e8455ea7a9677c366be814&type=video%2Fmp4&schema=facebook&wmode=opaque,http/1.1,TLS 1.2,html,0,0,Cloudflare


In [125]:
refined_facebook[refined_facebook['asset_url'].str.contains('embed')]

Unnamed: 0,base_page,asset_url,protocol,tls_version,mime_type,is_ad,is_tracker,hybrid_ident
4017717,http://www.longmontpublichouse.com/,http://cdn.embedly.com/widgets/media.html?type=text%2Fhtml&key=96f1f04c5f4143bcb0f2e68c87d65feb&schema=facebook&url=https%3A//www.facebook.com/LongmontPublicHouse/videos/2172595419709990/&image=https%3A//scontent-iad3-1.xx.fbcdn.net/v/t15.5256-10/p200x200/74526855_2172596889709843_1147163224773230592_n.jpg%3F_nc_cat%3D110%26_nc_ohc%3D6cX6RTtl4J8AQmhn8IwH0VruETS54lWBa_sGPCpQztS5Aoe70W-QKDNcA%26_nc_ht%3Dscontent-iad3-1.xx%26oh%3D0ffb5786afcee888a9815d0dd6443d0f%26oe%3D5E7296E9,http/1.1,,html,0,0,Cloudflare
4033264,https://www.childrenshospital.org/,https://www.facebook.com/tr/?id=486220931531947&ev=Microdata&dl=https%3A%2F%2Fwww.childrenshospital.org%2F&rl=&if=false&ts=1579145438083&cd[DataLayer]=%5B%5D&cd[Meta]=%7B%22title%22%3A%22Boston%20Children%27s%20Hospital%22%2C%22meta%3Akeywords%22%3A%22%22%2C%22meta%3Adescription%22%3A%22Learn%20about%20Boston%20Children%E2%80%99s%20Hospital%2C%20ranked%20the%20%231%20pediatric%20hospital%20in%20the%20country%20by%20U.S.%20News%20and%20World%20Report.%22%7D&cd[OpenGraph]=%7B%7D&cd[Schema.org]=%5B%5D&cd[JSON-LD]=%5B%5B%7B%22%40context%22%3A%22http%3A%2F%2Fschema.org%22%2C%22%40type%22%3A%22VideoObject%22%2C%22name%22%3A%22Bienvenido%20al%20Hospital%20de%20Ni%C3%B1os%20de%20Boston%20%7C%20Boston%20Children%27s%20Hospital%22%2C%22description%22%3A%22Una%20gu%C3%ADa%20de%20apoyo%20para%20la%20experiencia%20de%20los%20pacientes.%22%2C%22thumbnailUrl%22%3A%22https%3A%2F%2Ff1.media.brightcove.com%2F8%2F5498268461001%2F5498268461001_5751230488001_5750036198001-th.jpg%3FpubId%3D5498268461001%26videoId%3D5750036198001%22%2C%22dateCreated%22%3A%222018-03-12T21%3A02%3A32.835Z%22%2C%22dateModified%22%3A%222018-03-14T13%3A08%3A18.929Z%22%2C%22uploadDate%22%3A%222018-03-12T21%3A02%3A32.835Z%22%2C%22duration%22%3A%22P221.268S%22%2C%22contentUrl%22%3A%22http%3A%2F%2Ff1.media.brightcove.com%2F4%2F5498268461001%2F5498268461001_5750038836001_5750036198001.mp4%3FpubId%3D5498268461001%26videoId%3D5750036198001%22%2C%22embedUrl%22%3A%22https%3A%2F%2Fwww.childrenshospital.org%2F%3FvideoId%3D5750036198001%22%2C%22keywords%22%3A%22%22%7D%5D%5D&sw=1920&sh=1200&v=2.9.15&r=stable&ec=1&o=30&fbp=fb.1.1579145437546.1018193286&it=1579145435741&coo=false&es=automatic&tm=3&rqm=GET,HTTP/2,,image,0,0,Facebook
5720636,https://www.mainstreethanford.com/,https://cdn.embedly.com/widgets/media.html?src=https%3A%2F%2Fscontent-iad3-1.xx.fbcdn.net%2Fv%2Ft42.9040-4%2F34320094_1886722144960957_6392879891758448640_n.mp4%3F_nc_cat%3D0%26efg%3DeyJ2ZW5jb2RlX3RhZyI6InN2ZV9zZCJ9%26oh%3D7db6331588db646338acc130a7bbeb1d%26oe%3D5B15A33D&src_secure=1&url=https%3A%2F%2Fwww.facebook.com%2F1125403737491507%2Fvideos%2F1886987111333162%2F&image=https%3A%2F%2Fscontent-iad3-1.xx.fbcdn.net%2Fv%2Ft15.0-10%2Fp720x720%2F30820449_1539996899460734_524330973295280128_n.jpg%3F_nc_cat%3D0%26oh%3D97bef1dbf59aa216e02af4731be0a25c%26oe%3D5BB83E86&key=61d05c9d54e8455ea7a9677c366be814&type=video%2Fmp4&schema=facebook&wmode=opaque,http/1.1,TLS 1.2,html,0,0,Cloudflare
5720642,https://www.mainstreethanford.com/,https://www.facebook.com/v2.3/plugins/video.php?app_id=52049637695&channel=https%3A%2F%2Fstaticxx.facebook.com%2Fconnect%2Fxd_arbiter.php%3Fversion%3D45%23cb%3Df455decedb0ab4%26domain%3Dcdn.embedly.com%26origin%3Dhttps%253A%252F%252Fcdn.embedly.com%252Ff6fd583af100bc%26relation%3Dparent.parent&container_width=661&href=https%3A%2F%2Fwww.facebook.com%2F1125403737491507%2Fvideos%2F1886987111333162%2F&locale=en_US&sdk=joey,HTTP/2,,html,0,0,Facebook
8600020,https://www.ingresos.tv/,https://www.facebook.com/tr/?id=711964975637818&ev=Microdata&dl=https%3A%2F%2Fwww.ingresos.tv%2F&rl=&if=false&ts=1578335479854&cd[DataLayer]=%5B%5D&cd[Meta]=%7B%22title%22%3A%22%5Cn%20%20%20%20%20%20%5Cn%20%20%20%20%20%20%20%20Ingresos.TV%20-%20Negocio%20Simple%20Vida%20Simple%5Cn%20%20%20%20%20%20%5Cn%20%20%20%20%22%7D&cd[OpenGraph]=%7B%22og%3Atype%22%3A%22website%22%2C%22og%3Aurl%22%3A%22http%3A%2F%2Fwww.ingresos.tv%2F%22%2C%22og%3Atitle%22%3A%22Ingresos.TV%20-%20Negocio%20Simple%20Vida%20Simple%22%2C%22og%3Aimage%22%3A%22https%3A%2F%2Fkajabi-storefronts-production.global.ssl.fastly.net%2Fkajabi-storefronts-production%2Fsites%2F19719%2Fimages%2FNCBZiw7QVyfv3hMJeS8b_Contrast_blue_r006-BIG_I_NGRESOS_TV_LOGO.png%22%7D&cd[Schema.org]=%5B%5D&cd[JSON-LD]=%5B%7B%22%40context%22%3A%22http%3A%2F%2Fschema.org%2F%22%2C%22%40id%22%3A%22https%3A%2F%2Ffast.wistia.net%2Fembed%2Fiframe%2Fhphh6zqysd%22%2C%22%40type%22%3A%22VideoObject%22%2C%22duration%22%3A%22PT18S%22%2C%22name%22%3A%22sites%2F19719%2Fvideo%2FBtyhJqTiTcZEwTXRuA5U_Discussion_-_8252_1_.mp4%22%2C%22thumbnailUrl%22%3A%22blank.jpg%22%2C%22embedUrl%22%3A%22https%3A%2F%2Ffast.wistia.net%2Fembed%2Fiframe%2Fhphh6zqysd%22%2C%22uploadDate%22%3A%222018-03-20%22%2C%22description%22%3A%22a%20Kajabi%20Site%2019719%20video%22%7D%5D&sw=1920&sh=1200&ud[ct]=e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855&ud[fn]=e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855&ud[ln]=e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855&ud[pn]=e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855&ud[zp]=e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855&v=2.9.15&r=stable&a=plkajabi&ec=1&o=30&fbp=fb.1.1578335479339.139119013&it=1578335478368&coo=false&es=automatic&tm=3&rqm=GET,HTTP/2,,image,0,0,Facebook
...,...,...,...,...,...,...,...,...
390581648,http://www.roaddday.com/,https://www.facebook.com/v3.3/plugins/video.php?allowfullscreen=true&app_id=601787263304111&channel=https%3A%2F%2Fstaticxx.facebook.com%2Fconnect%2Fxd_arbiter.php%3Fversion%3D45%23cb%3Df7135084850c34%26domain%3Dwww.roaddday.com%26origin%3Dhttps%253A%252F%252Fwww.roaddday.com%252Ff245b01d8a0fa3c%26relation%3Dparent.parent&container_width=0&href=https%3A%2F%2Fwww.roaddday.com%2F%253Ciframe%2520width%3D%2522560%2522%2520height%3D%2522315%2522%2520src%3D%2522https%3A%2F%2Fwww.youtube.com%2Fembed%2FCL_MltwLYEA%2522%2520frameborder%3D%25220%2522%2520allow%3D%2522autoplay%3B%2520encrypted-media%2522%2520allowfullscreen%253E%253C%2Fiframe%253E&locale=en_US&sdk=joey&width=0,HTTP/2,,unidentified,0,0,Facebook
390581654,http://www.roaddday.com/,https://www.facebook.com/v3.3/plugins/video.php?allowfullscreen=true&app_id=601787263304111&channel=https%3A%2F%2Fstaticxx.facebook.com%2Fconnect%2Fxd_arbiter.php%3Fversion%3D45%23cb%3Df1828944f37b94%26domain%3Dwww.roaddday.com%26origin%3Dhttps%253A%252F%252Fwww.roaddday.com%252Ff245b01d8a0fa3c%26relation%3Dparent.parent&container_width=0&href=https%3A%2F%2Fwww.roaddday.com%2F%253Ciframe%2520width%3D%2522560%2522%2520height%3D%2522315%2522%2520src%3D%2522https%3A%2F%2Fwww.youtube.com%2Fembed%2F6XtI4m0QYPo%2522%2520frameborder%3D%25220%2522%2520allow%3D%2522autoplay%3B%2520encrypted-media%2522%2520allowfullscreen%253E%253C%2Fiframe%253E&locale=en_US&sdk=joey&width=0,HTTP/2,,unidentified,0,0,Facebook
390683655,https://expertsecretsacademy.com/,https://www.facebook.com/tr/?id=1423340474575422&ev=Microdata&dl=https%3A%2F%2Fexpertsecretsacademy.com%2F&rl=&if=false&ts=1579093827488&cd[DataLayer]=%5B%5D&cd[Meta]=%7B%22title%22%3A%22Homepage%20%7C%20Expert%20Secrets%20Academy%22%7D&cd[OpenGraph]=%7B%22og%3Atitle%22%3A%22Homepage%22%2C%22og%3Atype%22%3A%22website%22%2C%22og%3Aimage%22%3A%22https%3A%2F%2Fwww.filepicker.io%2Fapi%2Ffile%2FGUHjwT9KQiWTOrRgFqR9%22%2C%22og%3Aurl%22%3A%22https%3A%2F%2Fexpertsecretsacademy.com%2Fp%2Fhome%22%7D&cd[Schema.org]=%5B%5D&cd[JSON-LD]=%5B%7B%22%40context%22%3A%22http%3A%2F%2Fschema.org%2F%22%2C%22%40id%22%3A%22https%3A%2F%2Ffast.wistia.net%2Fembed%2Fiframe%2F7nid03260i%22%2C%22%40type%22%3A%22VideoObject%22%2C%22duration%22%3A%22PT1M52S%22%2C%22name%22%3A%22video%20promocional.mp4%22%2C%22thumbnailUrl%22%3A%22https%3A%2F%2Fwww.filepicker.io%2Fapi%2Ffile%2FyJnL8U13Q9ySDk8Lnh6P%22%2C%22embedUrl%22%3A%22https%3A%2F%2Ffast.wistia.net%2Fembed%2Fiframe%2F7nid03260i%22%2C%22uploadDate%22%3A%222018-03-13%22%2C%22description%22%3A%22a%20school_170659%20video%22%7D%5D&sw=1920&sh=1200&v=2.9.15&r=stable&ec=1&o=30&fbp=fb.1.1579093826979.1134121031&it=1579093825988&coo=false&es=automatic&tm=3&rqm=GET,HTTP/2,,image,0,0,Facebook
391060667,https://www.jenhatmakerbookclub.com/,https://www.facebook.com/tr/?id=2315638071830517&ev=Microdata&dl=https%3A%2F%2Fwww.jenhatmakerbookclub.com%2F&rl=&if=false&ts=1579207266160&cd[DataLayer]=%5B%5D&cd[Meta]=%7B%22title%22%3A%22%5Cn%20%20%20%20%20%20%5Cn%20%20%20%20%20%20%20%20The%20Jen%20Hatmaker%20Book%20Club%20%E2%80%93%C2%A0Online%2C%20Virtual%20Book%20Club%20for%20Women%5Cn%20%20%20%20%20%20%5Cn%20%20%20%20%22%7D&cd[OpenGraph]=%7B%22og%3Atype%22%3A%22website%22%2C%22og%3Aurl%22%3A%22https%3A%2F%2Fwww.jenhatmakerbookclub.com%2F%22%2C%22og%3Atitle%22%3A%22The%20Jen%20Hatmaker%20Book%20Club%20%E2%80%93%C2%A0Online%2C%20Virtual%20Book%20Club%20for%20Women%22%2C%22og%3Aimage%22%3A%22https%3A%2F%2Fkajabi-storefronts-production.global.ssl.fastly.net%2Fkajabi-storefronts-production%2Fsites%2F49933%2Fimages%2FcDBs7Qu3Sg6jQ2LNeZlA_hatmaker-book-club-logo-facebook.jpg%22%7D&cd[Schema.org]=%5B%5D&cd[JSON-LD]=%5B%7B%22%40context%22%3A%22http%3A%2F%2Fschema.org%2F%22%2C%22%40id%22%3A%22https%3A%2F%2Ffast.wistia.net%2Fembed%2Fiframe%2Fixoj9ukogf%22%2C%22%40type%22%3A%22VideoObject%22%2C%22duration%22%3A%22PT1M3S%22%2C%22name%22%3A%22sites%2F49933%2Fvideo%2FKHA2Ek4jQTiFz5LrRR90_JHBC_Primary_LP_Promo_Video.mp4%22%2C%22thumbnailUrl%22%3A%22https%3A%2F%2Fkajabi-storefronts-production.global.ssl.fastly.net%2Fkajabi-storefronts-production%2Fthemes%2F876625%2Fsettings_images%2F21IP7yJ0S22wqaVx6gB9_Jen.png%22%2C%22embedUrl%22%3A%22https%3A%2F%2Ffast.wistia.net%2Fembed%2Fiframe%2Fixoj9ukogf%22%2C%22uploadDate%22%3A%222019-05-03%22%2C%22description%22%3A%22a%20Kajabi%20Site%2049933%20video%22%7D%5D&sw=1920&sh=1200&v=2.9.15&r=stable&a=plkajabi&ec=1&o=30&fbp=fb.1.1579207264742.578391520&it=1579207263450&coo=false&es=automatic&tm=3&rqm=GET,HTTP/2,,image,0,0,Facebook


In [126]:
refined_facebook['base_page'].nunique()

37035

In [121]:
tmp_df = assets_df[assets_df['asset_url'].str.contains('instagram.com', regex=False)]

In [128]:
tmp_df

Unnamed: 0,base_page,asset_url,protocol,tls_version,mime_type,is_ad,is_tracker,hybrid_ident
2135,http://pcarmall.com/,https://api.instagram.com/v1/users/6069247473/media/recent?access_token=6069247473.e3cc986.cef278a9ebe643a2a8bd8bda5fabf514&count=8&callback=instafeedCache6f2ee509ea78cb75.parse,HTTP/2,TLS 1.3,javascript,0,0,Facebook
2171,http://pcarmall.com/,http://scontent.cdninstagram.com/v/t51.2885-15/e35/s150x150/79169355_128194035317990_6514362458091921600_n.jpg?_nc_ht=scontent.cdninstagram.com&_nc_ohc=0OAoLXhJuYAAX8ODDWy&oh=9668d24cbdfd5e27e0eec820d6172cb8&oe=5EB06FC0,http/1.1,,image,0,0,Facebook
2172,http://pcarmall.com/,http://scontent.cdninstagram.com/v/t51.2885-15/e35/c121.0.837.837a/s150x150/79387162_452136028811857_702905703010022053_n.jpg?_nc_ht=scontent.cdninstagram.com&_nc_ohc=h7hz4MccaBkAX8HVPWk&oh=31714e7ea34e825c2c4374fc525038cf&oe=5EA37705,http/1.1,,image,0,0,Facebook
2173,http://pcarmall.com/,http://scontent.cdninstagram.com/v/t51.2885-15/e35/c236.0.608.608a/s150x150/80497016_167126341320843_8628863971598076777_n.jpg?_nc_ht=scontent.cdninstagram.com&_nc_ohc=ob-F_4dODeQAX9abrKj&oh=b4c78f15fe0bb43955d10d248bed6ac2&oe=5E940045,http/1.1,,image,0,0,Facebook
2174,http://pcarmall.com/,http://scontent.cdninstagram.com/v/t51.2885-15/e35/c180.0.719.719a/s150x150/79390022_474164543301976_8805773158475615966_n.jpg?_nc_ht=scontent.cdninstagram.com&_nc_ohc=jSEsIw4UxjIAX9t8G8C&oh=308cb0712f124cdd17d71b6a28384254&oe=5E8D33DA,http/1.1,,image,0,0,Facebook
...,...,...,...,...,...,...,...,...
392321712,https://us.mamonde.com/,https://api.bazaarvoice.com/curations/c3/imagingAPI?passkey=kuuqd395w5u7gv43987gxshh&url=https%3A%2F%2Fscontent-lga3-1.cdninstagram.com%2Fvp%2F326c6fa1ece34f75b5567d37bf3ec995%2F5CF229AD%2Ft51.2885-19%2Fs150x150%2F44910224_2196180740702952_779745857980334080_n.jpg&media_type=avatar&permalink_url=https%3A%2F%2Fwww.instagram.com%2Fp%2FBtjfh__HJhl%2F&client=mamonde&contentId=5c5b69799344be605985aff4,http/1.1,,image,0,0,Amazon
392321713,https://us.mamonde.com/,https://api.bazaarvoice.com/curations/c3/imagingAPI?passkey=kuuqd395w5u7gv43987gxshh&url=https%3A%2F%2Fscontent-iad3-1.cdninstagram.com%2Fvp%2F7c4c6bf9348fc1bd36e3f4400d51167e%2F5D975B85%2Ft51.2885-15%2Fsh0.08%2Fe35%2Fp640x640%2F51536040_239395090345840_1198967531954345213_n.jpg&media_type=photo&permalink_url=https%3A%2F%2Fwww.instagram.com%2Fp%2FBthYCdoHYp6%2F&client=mamonde&contentId=5c5b69799344be66a085b08b&width=540&height=540&exact=true,http/1.1,,image,0,0,Amazon
392321714,https://us.mamonde.com/,https://api.bazaarvoice.com/curations/c3/imagingAPI?passkey=kuuqd395w5u7gv43987gxshh&url=https%3A%2F%2Fscontent-lga3-1.cdninstagram.com%2Fvp%2Fa8088159ec892ad754d9224b8b6df283%2F5CDE93CA%2Ft51.2885-19%2Fs150x150%2F49843689_366153454203185_195220256606650368_n.jpg&media_type=avatar&permalink_url=https%3A%2F%2Fwww.instagram.com%2Fp%2FBthYCdoHYp6%2F&client=mamonde&contentId=5c5b69799344be66a085b08b,http/1.1,,image,0,0,Amazon
392321715,https://us.mamonde.com/,https://api.bazaarvoice.com/curations/c3/imagingAPI?passkey=kuuqd395w5u7gv43987gxshh&url=https%3A%2F%2Fscontent-iad3-1.cdninstagram.com%2Fvp%2Fd8d4b2b98018369abc66b4ed9bf26570%2F5E1AFA80%2Ft51.2885-15%2Fsh0.08%2Fe35%2Fs640x640%2F50088381_2350184471882534_5895664281724675067_n.jpg&media_type=photo&permalink_url=https%3A%2F%2Fwww.instagram.com%2Fp%2FBtSTpqghPX3%2F&client=mamonde&contentId=5c5283586ba4f8127f790497&width=540&height=540&exact=true,http/1.1,,image,0,0,Amazon


In [127]:
tmp_df[tmp_df['mime_type'] == 'video']

Unnamed: 0,base_page,asset_url,protocol,tls_version,mime_type,is_ad,is_tracker,hybrid_ident
231488,https://www.dairyworks.co.nz/,https://scontent.cdninstagram.com/v/t50.2886-16/82398283_109289167138682_1634779875560698456_n.mp4?_nc_ht=scontent.cdninstagram.com&_nc_ohc=98VF5STj2rwAX8zD8Bi&oe=5E299AF5&oh=52242e3a3fb51facd4bbcfb88f5ef1d0,HTTP/2,,video,0,0,Facebook
231489,https://www.dairyworks.co.nz/,https://scontent.cdninstagram.com/v/t50.2886-16/82544336_1439327669571613_5781496878392653038_n.mp4?_nc_ht=scontent.cdninstagram.com&_nc_ohc=dvuOal14B94AX-FMgpU&oe=5E2A3264&oh=600138f89fded3f09c9ce68ac14de032,HTTP/2,,video,0,0,Facebook
231492,https://www.dairyworks.co.nz/,https://scontent.cdninstagram.com/v/t50.2886-16/82963390_2414281742154823_646421123011042307_n.mp4?_nc_ht=scontent.cdninstagram.com&_nc_ohc=vn4pW4n-Z5sAX9Mt3HK&oe=5E2A1DEC&oh=75955b42283e3a5b1c5a5ea777524fa5,HTTP/2,,video,0,0,Facebook
231493,https://www.dairyworks.co.nz/,https://scontent.cdninstagram.com/v/t50.2886-16/80599493_1303817636495104_7973899353607688104_n.mp4?_nc_ht=scontent.cdninstagram.com&_nc_ohc=biAVHQ--liQAX86ahEk&oe=5E29C7A4&oh=f862ca3de90aa9b5646ea4f2e72c4954,HTTP/2,,video,0,0,Facebook
492736,https://www.madchefkitchen.com/,https://scontent.cdninstagram.com/v/t50.2886-16/82437735_2416842331962954_4707524422991902313_n.mp4?_nc_ht=scontent.cdninstagram.com&_nc_ohc=EvMtnArFdwAAX9ZQa-7&oe=5E1FCAB8&oh=ace9984b0cde59f4cb141d55c847001f,HTTP/2,,video,0,0,Facebook
...,...,...,...,...,...,...,...,...
391237284,https://forevercheese.com/,https://scontent-atl3-1.cdninstagram.com/v/t50.2886-16/80991907_138326834265110_1432239285603403112_n.mp4?_nc_ht=scontent-atl3-1.cdninstagram.com&_nc_cat=102&_nc_ohc=Azn-2byLD5IAX_xhw62&oe=5E178A95&oh=8f73c366bde91c4573fadb1bf72a0d5b,HTTP/2,,video,0,0,Facebook
391718019,https://swamphousetulsa.com/,https://scontent.cdninstagram.com/v/t50.2886-16/79773361_2418474391814261_5169445450565276869_n.mp4?_nc_ht=scontent.cdninstagram.com&_nc_ohc=6Zhwg8Sg_aEAX8oppzw&oe=5E1C412E&oh=4b5807bc345336ba45aa9973e53260df,HTTP/2,,video,0,0,Facebook
392012588,https://gnomecones.co/,https://scontent.cdninstagram.com/v/t50.2886-16/81944452_630728824333732_5111384647517325011_n.mp4?_nc_ht=scontent.cdninstagram.com&_nc_ohc=7_jKGW3nEA8AX_nva0_&oe=5E1290C1&oh=fe4e63e7a55d0596d27240a46adf2931,HTTP/2,,video,0,0,Facebook
392145159,https://www.ekitchenary.com/,https://scontent.cdninstagram.com/v/t50.2886-16/80999385_630577314146078_8858560992695144776_n.mp4?_nc_ht=scontent.cdninstagram.com&_nc_ohc=evDcWmJoFuYAX-YagSM&oe=5E1BF69F&oh=719bb19b6f51cbe490c160e9b07b9df1,HTTP/2,,video,0,0,Facebook


In [22]:
amazon_ads_trackers = assets_df[(assets_df['hybrid_ident'] == 'Amazon') & 
                                ((assets_df['is_ad'] == 1) | (assets_df['is_tracker'] == 1))
                               ]

In [23]:
amazon_ads_trackers

Unnamed: 0,base_page,asset_url,protocol,tls_version,mime_type,is_ad,is_tracker,hybrid_ident
96,https://www.honorstransfercouncil.org/,"https://frog.wix.com/bt?src=29&evid=3&v=1.4761.0&msid=839a079d-5123-4823-8bea-14e4b75e2bb4&isp=1&st=2&dc=96&iss=1&url=honorstransfercouncil.org%2F&et=1&event_name=Init&ts=0&tts=449&vsi=4d71abbd-15a3-4583-b4ed-162b9bde7b79&rid=1579246045.080140130217494026145&viewer_name=bolt&is_rollout=0&is_platform_loaded=1&sessionId=74ad46bf-49e9-4aab-a103-89d82a606597&vid=14d8cf74-4f07-45b0-9232-9c7d1988ea83&is_cached=false&caching=miss,miss&pn=1&sr=1920x1200&sar=1920x1200&wr=1024x652&wor=1024x768&ita=1&siterev=898-1579105706990",HTTP/2,TLS 1.2,no type,0,1,Amazon
97,https://www.honorstransfercouncil.org/,https://frog.wix.com/bolt-performance?appName=bolt-viewer&src=72&evid=21&dc=96&is_rollout=0&is_cached=false&session_id=4d71abbd-15a3-4583-b4ed-162b9bde7b79&_=0.4650327701072541,HTTP/2,,no type,0,1,Amazon
102,https://www.honorstransfercouncil.org/,https://frog.wix.com/ugc-viewer?c=1579246045789&top=1&bot=0&sbot=1&evid=361&src=42&majorVer=4&did=7eae86cb-6a83-4502-8cdc-c3728296b84a&msid=839a079d-5123-4823-8bea-14e4b75e2bb4&uid=0c62f041-ad35-45d2-9051-bbe335193918&tsp=140341381&vsi=4d71abbd-15a3-4583-b4ed-162b9bde7b79,HTTP/2,,no type,0,1,Amazon
103,https://www.honorstransfercouncil.org/,"https://frog.wix.com/bt?src=29&evid=3&v=1.4761.0&msid=839a079d-5123-4823-8bea-14e4b75e2bb4&isp=1&st=2&dc=96&iss=1&url=honorstransfercouncil.org%2F&et=4&event_name=main-r%20executed&ts=556&tts=1003&vsi=4d71abbd-15a3-4583-b4ed-162b9bde7b79&rid=1579246045.080140130217494026145&viewer_name=bolt&is_rollout=0&is_platform_loaded=1&sessionId=74ad46bf-49e9-4aab-a103-89d82a606597&vid=14d8cf74-4f07-45b0-9232-9c7d1988ea83&is_cached=false&caching=miss,miss&isjp=1&ita=1&pn=1&sr=1920x1200&sar=1920x1200&wr=1024x652&wor=1024x768&siterev=898-1579105706990",HTTP/2,,no type,0,1,Amazon
123,https://www.honorstransfercouncil.org/,"https://frog.wix.com/bt?src=29&evid=3&v=1.4761.0&msid=839a079d-5123-4823-8bea-14e4b75e2bb4&isp=1&st=2&dc=96&iss=1&url=honorstransfercouncil.org%2F&et=12&event_name=Partially%20visible&ts=1071&tts=1517&vsi=4d71abbd-15a3-4583-b4ed-162b9bde7b79&rid=1579246045.080140130217494026145&viewer_name=bolt&is_rollout=0&is_platform_loaded=1&sessionId=74ad46bf-49e9-4aab-a103-89d82a606597&vid=14d8cf74-4f07-45b0-9232-9c7d1988ea83&is_cached=false&caching=miss,miss&isjp=1&ita=1&pid=mainPage&pn=1&sr=1920x1200&sar=1920x1200&wr=1024x652&wor=1024x768&siterev=898-1579105706990&ism=1",HTTP/2,,no type,0,1,Amazon
...,...,...,...,...,...,...,...,...
392321523,https://us.mamonde.com/,https://network.bazaarvoice.com/sid.gif?_=u4j0jl,http/1.1,TLS 1.2,image,0,1,Amazon
392321624,https://us.mamonde.com/,https://network.bazaarvoice.com/sid.gif?_=eftkvl,http/1.1,,image,0,1,Amazon
392321656,https://us.mamonde.com/,https://cdn.attn.tv/mamonde/analytics.js?v=1.0.29,HTTP/2,,javascript,0,1,Amazon
392321922,https://agencepro.orange.fr/,https://akatracking.esearchvision.com/esi/esearchvisiontracking.js,HTTP/2,TLS 1.2,javascript,0,1,Amazon


In [89]:
amazon_adsys = assets_df[assets_df['asset_url'].str.contains('amazon-adsystem', regex=False)]

In [90]:
amazon_adsys

Unnamed: 0,base_page,asset_url,protocol,tls_version,mime_type,is_ad,is_tracker,hybrid_ident
972,http://cardgames.game.coocan.jp/,http://ir-jp.amazon-adsystem.com/e/ir?t=duelmasters3-22&l=as2&o=9&a=4087797074,http/1.1,,image,0,0,Amazon
973,http://cardgames.game.coocan.jp/,http://ws-fe.amazon-adsystem.com/widgets/q?_encoding=UTF8&ASIN=4087797074&Format=_SL110_&ID=AsinImage&MarketPlace=JP&ServiceVersion=20070822&WS=1&tag=duelmasters3-22,http/1.1,,no type,0,0,Amazon
1105,http://explore.gastateparks.org/,https://s.amazon-adsystem.com/iu3?d=generic&ex-fargs=%3Fid%3D9aae6990-4802-1a40-c754-1dc51224d6c0%26type%3D55%26m%3D1&ex-fch=416613&ex-src=https://gastateparks.org/&ex-hargs=v%3D1.0%3Bc%3D7620163140101%3Bp%3D9AAE6990-4802-1A40-C754-1DC51224D6C0&cb=970463064922764500,,,no type,0,0,Amazon
1106,http://explore.gastateparks.org/,https://s.amazon-adsystem.com/iu3?d=generic&ex-fargs=%3Fid%3D6a0935f9-4de8-7b2f-4cd0-7b1507099c8e%26type%3DUNKNOWN%26m%3D1&ex-fch=416613&ex-src=https://gastateparks.org/&ex-hargs=v%3D1.0%3Bc%3D7620163140101%3Bp%3D6A0935F9-4DE8-7B2F-4CD0-7B1507099C8E&cb=105642575661107250,,,no type,0,0,Amazon
1109,http://explore.gastateparks.org/,https://s.amazon-adsystem.com/iu3?d=generic&ex-fargs=%3Fid%3D9aae6990-4802-1a40-c754-1dc51224d6c0%26type%3D55%26m%3D1&ex-fch=416613&ex-src=https://gastateparks.org/&ex-hargs=v%3D1.0%3Bc%3D7620163140101%3Bp%3D9AAE6990-4802-1A40-C754-1DC51224D6C0&cb=970463064922764500&dcc=t,,,html,0,0,Amazon
...,...,...,...,...,...,...,...,...
392321178,https://equal-love.fandom.com/,https://s.amazon-adsystem.com/ecm3?id=UP5c3b40a5-35d3-11ea-9e39-02d3fc5a0d1b&ex=oath.com,,,image,0,0,Amazon
392321190,https://equal-love.fandom.com/,https://s.amazon-adsystem.com/ecm3?ex=brealtime.com&id=5833365845649844400brt57151578899250541410a9,,,image,0,0,Amazon
392321221,https://equal-love.fandom.com/,https://s.amazon-adsystem.com/ecm3?ex=dmx.com&id=eyJhbGciOiJFUzI1NiIsInR5cCI6IkpXVCJ9.eyJzaWQiOjEwMDAyLCJ1c3IiOiJxZ1llc2dZYk1WZExXRkpPU0U4NFQzSmxiakZDZUVkUlMzWjNUekJwV1dkaiJ9.nKoW6JDByEIHVLjZNE-puyCuIGRF8FJziFRpwFqJ_WZ87o2Z8fn9e7HCnT_M23pU1WKN3Uf5scCzJSlsMPSYqw,,,image,0,0,Amazon
392321286,https://equal-love.fandom.com/,https://s.amazon-adsystem.com/ecm3?id=K5C3UGXJ-5-7Y2X&ex=d-rubiconproject.com&status=ok&gdpr=0,,,image,0,0,Amazon


In [91]:
amazon_ads_trackers[amazon_ads_trackers['asset_url'].str.contains('amazon-adsystem', regex=False)]

Unnamed: 0,base_page,asset_url,protocol,tls_version,mime_type,is_ad,is_tracker,hybrid_ident
1146,http://explore.gastateparks.org/,https://usersync.samplicio.us/amazon/pixel.gif?https://s.amazon-adsystem.com/ecm3?ex=luc.id&id=,,,unidentified,0,1,Amazon
7338,https://android-sdk.en.softonic.com/,https://c.amazon-adsystem.com/aax2/apstag.js,HTTP/2,TLS 1.2,javascript,1,0,Amazon
25369,https://worldoftanks.com/,https://usersync.samplicio.us/amazon/pixel.gif?https://s.amazon-adsystem.com/ecm3?ex=luc.id&id=,,,image,0,1,Amazon
28788,https://www.wirefly.com/,https://aax-us-east.amazon-adsystem.com/x/getad?src=330&c=100&sz=1x1&apiVersion=2.0&pj=%7B%22overwrite%22%3A%22false%22%2C%22tracking_id%22%3A%22myrateplancom%22%2C%22ad_type%22%3A%22one_tag%22%2C%22marketplace%22%3A%22amazon%22%2C%22enable_geo_redirection%22%3A%22true%22%2C%22enable_auto_tagging%22%3A%22false%22%2C%22region%22%3A%22US%22%2C%22placement%22%3A%22adunit%22%2C%22viewerCountry%22%3A%22US%22%2C%22textlinks%22%3A%22%22%2C%22debug%22%3A%22false%22%2C%22acap_publisherId%22%3A%22myrateplancom%22%2C%22slotNum%22%3A0%7D&u=https%3A%2F%2Fwww.wirefly.com%2F&jscb=amzn_assoc_jsonp_callback_adunit_0,http/1.1,TLS 1.2,javascript,1,0,Amazon
30428,http://jpmpmpw.hatenablog.com/,https://c.amazon-adsystem.com/aax2/apstag.js,HTTP/2,TLS 1.2,javascript,1,0,Amazon
...,...,...,...,...,...,...,...,...
392314413,http://www.buffettnews.com/,http://ws-na.amazon-adsystem.com/widgets/q?ServiceVersion=20070822&OneJS=1&Operation=GetAdHtml&MarketPlace=US&source=ss&ref=as_ss_li_til&ad_type=product_link&tracking_id=thefeedingfrenzy&language=en_US&marketplace=amazon&region=US&placement=B01NAPOZ7N&asins=B01NAPOZ7N&linkId=afee6078df5ba7a61776174b6b9f0e6a&show_border=true&link_opens_in_new_window=true,http/1.1,,html,1,0,Amazon
392314414,http://www.buffettnews.com/,http://ws-na.amazon-adsystem.com/widgets/q?ServiceVersion=20070822&OneJS=1&Operation=GetAdHtml&MarketPlace=US&source=ac&ref=tf_til&ad_type=product_link&tracking_id=thefeedingfrenzy&marketplace=amazon&region=US&placement=B07NBDQYL7&asins=B07NBDQYL7&linkId=53f96a06ba902725662248276ba1e6dd&show_border=true&link_opens_in_new_window=true&price_color=333333&title_color=0066c0&bg_color=ffffff,http/1.1,,html,1,0,Amazon
392314415,http://www.buffettnews.com/,http://ws-na.amazon-adsystem.com/widgets/q?ServiceVersion=20070822&OneJS=1&Operation=GetAdHtml&MarketPlace=US&source=ac&ref=qf_sp_asin_til&ad_type=product_link&tracking_id=thefeedingfrenzy&marketplace=amazon&region=US&placement=1501132555&asins=1501132555&linkId=375d62ca6fa40680fab21e606d54914c&show_border=true&link_opens_in_new_window=true&price_color=333333&title_color=0066c0&bg_color=ffffff,http/1.1,,html,1,0,Amazon
392314416,http://www.buffettnews.com/,http://ws-na.amazon-adsystem.com/widgets/q?ServiceVersion=20070822&OneJS=1&Operation=GetAdHtml&MarketPlace=US&source=ss&ref=as_ss_li_til&ad_type=product_link&tracking_id=thefeedingfrenzy&language=en_US&marketplace=amazon&region=US&placement=B01BHK4MK0&asins=B01BHK4MK0&linkId=f3b6c9c20c2cd3691abb91f5f4c0118f&show_border=true&link_opens_in_new_window=true,http/1.1,,html,1,0,Amazon


In [129]:
amazon_ads_trackers[(amazon_ads_trackers['asset_url'].str.contains('amazon-adsystem', regex=False))
                    | (amazon_ads_trackers['asset_url'].str.contains('amazon.com', regex=False))
#                     | (amazon_ads_trackers['asset_url'].str.contains('amazonaws.com', regex=False)) 
                   ]['asset_url']

1146         https://usersync.samplicio.us/amazon/pixel.gif?https://s.amazon-adsystem.com/ecm3?ex=luc.id&id=                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                               
7338         https://c.amazon-adsystem.com/aax2/apstag.js                                                                                                                                                                                                                                                                                                                                   

In [92]:
amazon_ads = amazon_ads_trackers[amazon_ads_trackers['is_ad'] == 1]
amazon_trackers = amazon_ads_trackers[amazon_ads_trackers['is_tracker'] == 1]

In [97]:
len(amazon_ads)

401946

In [142]:
len(amazon_trackers)

2466341

In [140]:
amazon_ads[(amazon_ads['asset_url'].str.contains('amazon-adsystem.com', regex=False))
           | (amazon_ads['asset_url'].str.contains('amazon.com', regex=False))]

Unnamed: 0,base_page,asset_url,protocol,tls_version,mime_type,is_ad,is_tracker,hybrid_ident
7338,https://android-sdk.en.softonic.com/,https://c.amazon-adsystem.com/aax2/apstag.js,HTTP/2,TLS 1.2,javascript,1,0,Amazon
28788,https://www.wirefly.com/,https://aax-us-east.amazon-adsystem.com/x/getad?src=330&c=100&sz=1x1&apiVersion=2.0&pj=%7B%22overwrite%22%3A%22false%22%2C%22tracking_id%22%3A%22myrateplancom%22%2C%22ad_type%22%3A%22one_tag%22%2C%22marketplace%22%3A%22amazon%22%2C%22enable_geo_redirection%22%3A%22true%22%2C%22enable_auto_tagging%22%3A%22false%22%2C%22region%22%3A%22US%22%2C%22placement%22%3A%22adunit%22%2C%22viewerCountry%22%3A%22US%22%2C%22textlinks%22%3A%22%22%2C%22debug%22%3A%22false%22%2C%22acap_publisherId%22%3A%22myrateplancom%22%2C%22slotNum%22%3A0%7D&u=https%3A%2F%2Fwww.wirefly.com%2F&jscb=amzn_assoc_jsonp_callback_adunit_0,http/1.1,TLS 1.2,javascript,1,0,Amazon
30428,http://jpmpmpw.hatenablog.com/,https://c.amazon-adsystem.com/aax2/apstag.js,HTTP/2,TLS 1.2,javascript,1,0,Amazon
31836,https://www.ourfamilyworld.com/,https://c.amazon-adsystem.com/aax2/apstag.js,HTTP/2,TLS 1.2,javascript,1,0,Amazon
33201,https://www.twospoons.ca/,https://aax-us-east.amazon-adsystem.com/x/getad?src=330&c=100&sz=1x1&apiVersion=2.0&pj=%7B%22overwrite%22%3A%22false%22%2C%22div_name%22%3A%22amzn-assoc-ad-b68b8d17-a7d4-493e-b7b0-196be9b4af83%22%2C%22tracking_id%22%3A%22twospoons0d-20%22%2C%22ad_type%22%3A%22one_tag%22%2C%22marketplace%22%3A%22amazon%22%2C%22enable_geo_redirection%22%3A%22true%22%2C%22enable_auto_tagging%22%3A%22false%22%2C%22region%22%3A%22US%22%2C%22placement%22%3A%22adunit%22%2C%22viewerCountry%22%3A%22US%22%2C%22textlinks%22%3A%22%22%2C%22debug%22%3A%22false%22%2C%22acap_publisherId%22%3A%22twospoons0d-20%22%2C%22slotNum%22%3A0%7D&u=https%3A%2F%2Ftwospoons.ca%2F&jscb=amzn_assoc_jsonp_callback_adunit_0,http/1.1,TLS 1.2,javascript,1,0,Amazon
...,...,...,...,...,...,...,...,...
392314413,http://www.buffettnews.com/,http://ws-na.amazon-adsystem.com/widgets/q?ServiceVersion=20070822&OneJS=1&Operation=GetAdHtml&MarketPlace=US&source=ss&ref=as_ss_li_til&ad_type=product_link&tracking_id=thefeedingfrenzy&language=en_US&marketplace=amazon&region=US&placement=B01NAPOZ7N&asins=B01NAPOZ7N&linkId=afee6078df5ba7a61776174b6b9f0e6a&show_border=true&link_opens_in_new_window=true,http/1.1,,html,1,0,Amazon
392314414,http://www.buffettnews.com/,http://ws-na.amazon-adsystem.com/widgets/q?ServiceVersion=20070822&OneJS=1&Operation=GetAdHtml&MarketPlace=US&source=ac&ref=tf_til&ad_type=product_link&tracking_id=thefeedingfrenzy&marketplace=amazon&region=US&placement=B07NBDQYL7&asins=B07NBDQYL7&linkId=53f96a06ba902725662248276ba1e6dd&show_border=true&link_opens_in_new_window=true&price_color=333333&title_color=0066c0&bg_color=ffffff,http/1.1,,html,1,0,Amazon
392314415,http://www.buffettnews.com/,http://ws-na.amazon-adsystem.com/widgets/q?ServiceVersion=20070822&OneJS=1&Operation=GetAdHtml&MarketPlace=US&source=ac&ref=qf_sp_asin_til&ad_type=product_link&tracking_id=thefeedingfrenzy&marketplace=amazon&region=US&placement=1501132555&asins=1501132555&linkId=375d62ca6fa40680fab21e606d54914c&show_border=true&link_opens_in_new_window=true&price_color=333333&title_color=0066c0&bg_color=ffffff,http/1.1,,html,1,0,Amazon
392314416,http://www.buffettnews.com/,http://ws-na.amazon-adsystem.com/widgets/q?ServiceVersion=20070822&OneJS=1&Operation=GetAdHtml&MarketPlace=US&source=ss&ref=as_ss_li_til&ad_type=product_link&tracking_id=thefeedingfrenzy&language=en_US&marketplace=amazon&region=US&placement=B01BHK4MK0&asins=B01BHK4MK0&linkId=f3b6c9c20c2cd3691abb91f5f4c0118f&show_border=true&link_opens_in_new_window=true,http/1.1,,html,1,0,Amazon


In [131]:
len(amazon_ads[(amazon_ads['asset_url'].str.contains('amazon-adsystem.com', regex=False))
           | (amazon_ads['asset_url'].str.contains('amazon.com', regex=False)) ]) / len(amazon_ads)

0.22213431654998433

In [132]:
amazon_trackers[(amazon_trackers['asset_url'].str.contains('amazon-adsystem.com', regex=False))
               | (amazon_trackers['asset_url'].str.contains('amazon.com', regex=False)) ]

Unnamed: 0,base_page,asset_url,protocol,tls_version,mime_type,is_ad,is_tracker,hybrid_ident
1146,http://explore.gastateparks.org/,https://usersync.samplicio.us/amazon/pixel.gif?https://s.amazon-adsystem.com/ecm3?ex=luc.id&id=,,,unidentified,0,1,Amazon
25369,https://worldoftanks.com/,https://usersync.samplicio.us/amazon/pixel.gif?https://s.amazon-adsystem.com/ecm3?ex=luc.id&id=,,,image,0,1,Amazon
180709,https://m.poolandspa.com/,https://payments.amazon.com/cs/uedata,http/1.1,,application,0,1,Amazon
680272,https://www.snakeheadvintage.com/,https://payments.amazon.com/cs/uedata,http/1.1,,application,0,1,Amazon
780315,https://www.highhorseperformance.com/,https://payments.amazon.com/cs/uedata,http/1.1,,application,0,1,Amazon
...,...,...,...,...,...,...,...,...
391951401,https://goodnessofgodministries.international/,https://fls-na.amazon.com/1/batch/1/OP/ATVPDKIKX0DER:146-1872783-8764226:R537RB6Q00730K1334XN$uedata=s:https%3A%2F%2Fread.amazon.com%2Fkp%2Fuedata%2Fuedata%3Fat%26v%3DINSTRUMENTATION_VERSION%26id%3DR537RB6Q00730K1334XN%26m%3D1%26sc%3Dadblk_no%26pc%3D1369%26at%3D1369%26t%3D1579295106816%26csmtags%3Dadblk_no%26pty%3DKindlePlayer%26spty%3DBookCard%26pti%3DB01DK0QZYG%26tid%3DR537RB6Q00730K1334XN%26aftb%3D1:1369,HTTP/2,,image,0,1,Amazon
391955557,http://stlouis.cardinals.mlb.com/,https://usersync.samplicio.us/amazon/pixel.gif?https://s.amazon-adsystem.com/ecm3?ex=luc.id&id=,,,image,0,1,Amazon
392285286,https://www.toyotaoflancasterav.com/,https://usersync.samplicio.us/amazon/pixel.gif?https://s.amazon-adsystem.com/ecm3?ex=luc.id&id=,,,image,0,1,Amazon
392289399,https://www.niveausa.com/,https://usersync.samplicio.us/amazon/pixel.gif?https://s.amazon-adsystem.com/ecm3?ex=luc.id&id=,,,image,0,1,Amazon


In [141]:
len(amazon_trackers[(amazon_trackers['asset_url'].str.contains('amazon-adsystem.com', regex=False))
           | (amazon_trackers['asset_url'].str.contains('amazon.com', regex=False)) ]) / len(amazon_trackers)

0.004317732219510603

In [95]:
amazon_ads[(amazon_ads['asset_url'].str.contains('amazonaws', regex=False))]

Unnamed: 0,base_page,asset_url,protocol,tls_version,mime_type,is_ad,is_tracker,hybrid_ident
154115,http://tulsafood.com/,http://media.tulsafood.com.s3.amazonaws.com/wp-content/uploads/2017/05/JINYA-Banner-Ad-Draft-3.jpeg,http/1.1,,image,1,0,Amazon
298728,http://www2.radio24.ilsole24ore.com/,https://s3-eu-west-1.amazonaws.com/aplus-creativities/MARKETPLACE-PUBLISHER-CREATIVES/image/gif/300x250_8_.gif,http/1.1,TLS 1.2,image,1,0,Amazon
298729,http://www2.radio24.ilsole24ore.com/,https://s3-eu-west-1.amazonaws.com/aplus-creativities/MARKETPLACE-PUBLISHER-CREATIVES/image/gif/Guida_Pensioni_728x90.gif,http/1.1,TLS 1.2,image,1,0,Amazon
556723,https://www.tribunazamora.com/,https://s3-us-west-2.amazonaws.com/ads.optimizads.com/jcm-mm/6a314500d4e16e257119adcf10c5f421.png,http/1.1,,image,1,0,Amazon
651879,https://www.huntingnet.com/,https://stackadapt_public.s3.amazonaws.com/icons/adchoices/adchoices.png,http/1.1,TLS 1.2,image,1,0,Amazon
...,...,...,...,...,...,...,...,...
392052317,https://www.annapoliscountyspectator.ca/,https://paywall-ad-bucket.s3.amazonaws.com/ad_300_250.jpg,http/1.1,TLS 1.2,image,1,0,Amazon
392058620,https://www.kriti24.gr/,https://s3-eu-west-2.amazonaws.com/net22/kriti24_demo/wp-content/uploads/2016/09/18100718/2016_Kritika-Akinita_banner_300X250.png,http/1.1,,image,1,0,Amazon
392058627,https://www.kriti24.gr/,https://s3-eu-west-2.amazonaws.com/net22/kriti24_demo/wp-content/uploads/2019/11/05151341/synka_300x250_28-11.gif,http/1.1,,image,1,0,Amazon
392312072,https://spurs.vitalfootball.co.uk/,https://s3-eu-west-1.amazonaws.com/xzyvmgtxseboq/unicef_728x90.jpg,http/1.1,,image,1,0,Amazon


In [96]:
amazon_trackers[(amazon_trackers['asset_url'].str.contains('amazonaws', regex=False))]

Unnamed: 0,base_page,asset_url,protocol,tls_version,mime_type,is_ad,is_tracker,hybrid_ident
6450,http://radiokodak.com/,http://cloudfront-labs.amazonaws.com/x.png,http/1.1,,image,0,1,Amazon
18853,https://www.is-tech.co.jp/,https://juicer-vri.s3-ap-northeast-1.amazonaws.com/px.gif?u1=srn:smooothieapi:usercard:juicer:720c941b-2c73-42c7-b0f6-c8598532b948&u2=657D9D005E16CC118BD5097117E58202&r=555471,http/1.1,TLS 1.2,image,0,1,Amazon
30634,http://almezmaah.com/,http://cloudfront-labs.amazonaws.com/x.png,http/1.1,,image,0,1,Amazon
33792,http://panambinews.com/,http://cloudfront-labs.amazonaws.com/x.png,http/1.1,,image,0,1,Amazon
34642,https://www.itplus.co.jp/,https://juicer-vri.s3-ap-northeast-1.amazonaws.com/px.gif?u1=srn:smooothieapi:usercard:juicer:3e113974-ccd8-4720-9cac-4f8cb9abe9b9&u2=657D9D005E15236C00730970F3131D02&r=117563,http/1.1,TLS 1.2,image,0,1,Amazon
...,...,...,...,...,...,...,...,...
392268806,http://www.fesan-jp.com/,https://juicer-vri.s3-ap-northeast-1.amazonaws.com/px.gif?u1=srn:smooothieapi:usercard:juicer:74779350-abc2-4f0c-92e5-d405764596cb&u2=257D9D005E128203064F09A6B9D9FD02&r=727497,http/1.1,TLS 1.2,image,0,1,Amazon
392287442,http://songyuepan.pixnet.net/,http://cloudfront-labs.amazonaws.com/x.png,http/1.1,,image,0,1,Amazon
392296780,https://rollplaydnd.fandom.com/,https://s3.amazonaws.com/ki.js/52510/gQT.js,http/1.1,TLS 1.2,application,0,1,Amazon
392320023,https://www.homesicktexan.com/,https://sqs.us-east-1.amazonaws.com/397719490216/Test_oPS_Script_Loads?Action=SendMessage&MessageBody=cid%3D21%26bt%3Dnull,http/1.1,TLS 1.2,text,0,1,Amazon


In [135]:
amazon_ads_trackers[(amazon_ads_trackers['asset_url'].str.contains('amazon.co.uk', regex=False))]  # only a few hundred for ccTLDs

Unnamed: 0,base_page,asset_url,protocol,tls_version,mime_type,is_ad,is_tracker,hybrid_ident
8766317,https://uk.diono.com/,https://fls-eu.amazon.co.uk/1/batch/1/OP/A1F83G8C2ARO7P:257-7060151-2075639:1T889BAMFGVFQJEFD58E$uedata=s:%2Fgp%2Fuedata%3Fstaticb%26id%3D1T889BAMFGVFQJEFD58E:0,HTTP/2,,image,0,1,Amazon
15067939,http://www.thespinningimage.co.uk/,http://www.assoc-amazon.co.uk/s/ads-common.js,http/1.1,,javascript,1,0,Amazon
22487974,https://sellercentral-europe.amazon.com/,https://fls-eu.amazon.co.uk/1/batch/1/OP/A1F83G8C2ARO7P:260-9561629-9848738:WVE6TSKRA063W3E75JPH$uedata=s:%2Fmons%2Fuedata%3Fstaticb%26id%3DWVE6TSKRA063W3E75JPH:0,HTTP/2,TLS 1.2,image,0,1,Amazon
22488000,https://sellercentral-europe.amazon.com/,https://fls-eu.amazon.co.uk/1/batch/1/OP/A1F83G8C2ARO7P:260-9561629-9848738:WVE6TSKRA063W3E75JPH:sellercentral-europe.amazon.com$uedata=s:%2Fmons%2Fuedata%3Fld%26v%3D0.206330.0%26id%3DWVE6TSKRA063W3E75JPH%26sw%3D1920%26sh%3D1200%26vw%3D1009%26vh%3D652%26m%3D1%26sc%3DWVE6TSKRA063W3E75JPH%26ue%3D4%26bb%3D435%26cf%3D562%26be%3D640%26fp%3D654%26fcp%3D654%26pc%3D2898%26tc%3D-598%26na_%3D-598%26ul_%3D-1579267511690%26_ul%3D-1579267511690%26rd_%3D-1579267511690%26_rd%3D-1579267511690%26fe_%3D-583%26lk_%3D-529%26_lk%3D-529%26co_%3D-529%26_co%3D-399%26sc_%3D-492%26rq_%3D-399%26rs_%3D-43%26_rs%3D138%26dl_%3D-31%26di_%3D661%26de_%3D662%26_de%3D663%26_dc%3D2889%26ld_%3D2898%26_ld%3D-1579267511690%26ntd%3D-1%26ty%3D0%26rc%3D0%26hob%3D3%26hoe%3D5%26ld%3D2899%26t%3D1579267514589%26ctb%3D1%26rt%3Dcf%3A5-2-2-1-3-0-1__ld%3A29-7-3-9-8-0-0%26ec%3D1%26ecf%3D1%26csmtags%3Daui%7Caui%3Aaui_build_date%3A3.19.8-2020-01-13%7Cfls-eu-amazon-co-uk%26viz%3Dvisible%3A4%26pty%3Dwelcome-page%26spty%3Dundefined%26pti%3Dundefined%26tid%3DWVE6TSKRA063W3E75JPH%26aftb%3D1:2901,HTTP/2,,image,0,1,Amazon
22488001,https://sellercentral-europe.amazon.com/,https://fls-eu.amazon.co.uk/1/batch/1/OP/A1F83G8C2ARO7P:260-9561629-9848738:WVE6TSKRA063W3E75JPH:sellercentral-europe.amazon.com$uedata=s:%2Fmons%2Fuedata%3Fld%26v%3D0.206330.0%26id%3DWVE6TSKRA063W3E75JPH%26sc0%3DcsmCELLSframework%26bb0%3D649%26pc0%3D649%26ld0%3D649%26t0%3D1579267512339%26sc1%3DcsmCELLSpdm%26bb1%3D649%26pc1%3D650%26ld1%3D650%26t1%3D1579267512340%26sc2%3DcsmCELLSvpm%26bb2%3D651%26pc2%3D651%26ld2%3D651%26t2%3D1579267512341%26sc3%3DcsmCELLSfem%26bb3%3D654%26pc3%3D654%26ld3%3D655%26t3%3D1579267512345%26sc4%3Due_sushi_v1%26bb4%3D656%26pc4%3D658%26ld4%3D658%26t4%3D1579267512348%26ctb%3D1:2901,HTTP/2,,image,0,1,Amazon
...,...,...,...,...,...,...,...,...
360189713,https://music.amazon.co.uk/,https://music.amazon.co.uk/uedata/uedata?ld&v=0.205901.0&id=8744A56G4ZZY7ERSEA8E&m=1&sc=8744A56G4ZZY7ERSEA8E&ue=5&bb=1267&pc=3310&tc=-856&na_=-856&ul_=-1578625684839&_ul=-1578625684839&rd_=-1578625684839&_rd=-1578625684839&fe_=-845&lk_=-789&_lk=-789&co_=-789&_co=-413&sc_=-613&rq_=-413&rs_=-56&_rs=791&dl_=-42&di_=3087&de_=3087&_de=3203&_dc=3309&ld_=3309&_ld=-1578625684839&ntd=-1&ty=0&rc=0&hob=3&hoe=5&ld=3311&t=1578625688150&ctb=1&csmtags=aui|aui:aui_build_date:3.19.8-2019-12-13&viz=visible:5&aftb=1,http/1.1,,image,0,1,Amazon
360189720,https://music.amazon.co.uk/,https://fls-eu.amazon.co.uk/1/batch/1/OP/A1F83G8C2ARO7P:260-1837730-8425544:8744A56G4ZZY7ERSEA8E$uedata=s:%2Fuedata%2Fuedata%3Fld%26v%3D0.205901.0%26id%3D8744A56G4ZZY7ERSEA8E%26m%3D1%26sc%3D8744A56G4ZZY7ERSEA8E%26ue%3D5%26bb%3D1267%26pc%3D3310%26tc%3D-856%26na_%3D-856%26ul_%3D-1578625684839%26_ul%3D-1578625684839%26rd_%3D-1578625684839%26_rd%3D-1578625684839%26fe_%3D-845%26lk_%3D-789%26_lk%3D-789%26co_%3D-789%26_co%3D-413%26sc_%3D-613%26rq_%3D-413%26rs_%3D-56%26_rs%3D791%26dl_%3D-42%26di_%3D3087%26de_%3D3087%26_de%3D3203%26_dc%3D3309%26ld_%3D3309%26_ld%3D-1578625684839%26ntd%3D-1%26ty%3D0%26rc%3D0%26hob%3D3%26hoe%3D5%26ld%3D3311%26t%3D1578625688150%26ctb%3D1%26csmtags%3Daui%7Caui%3Aaui_build_date%3A3.19.8-2019-12-13%26viz%3Dvisible%3A5%26aftb%3D1:3312,HTTP/2,TLS 1.2,image,0,1,Amazon
381825603,https://www.aboutamazon.co.uk/,https://www.aboutamazon.co.uk/_resource/brightspot/analytics/search/SiteSearchAnalytics.js,HTTP/2,,javascript,0,1,Amazon
381825649,https://www.aboutamazon.co.uk/,https://certify.alexametrics.com/atrk.gif?frame_height=652&frame_width=1024&iframe=0&title=About%20Amazon&time=1578915924900&time_zone_offset=480&screen_params=1920x1200x24&java_enabled=0&cookie_enabled=1&ref_url=&host_url=https%3A%2F%2Fwww.aboutamazon.co.uk%2F&random_number=2225575951&sess_cookie=51a553fc16f9eb90ba33c4fbe87&sess_cookie_flag=1&user_cookie=51a553fc16f9eb90ba33c4fbe87&user_cookie_flag=1&dynamic=true&domain=aboutamazon.co.uk&account=JGgbs1Fx9f207i&jsv=20130128&user_lang=en-US,http/1.1,TLS 1.2,image,0,1,Amazon


In [None]:
# Google Fonts: 130k not from fonts.gstatic.com, 10.8M in total

In [136]:
10813964-130364

10683600

In [137]:
10683600/10813964

0.9879448461267302

In [138]:
130364/10813964

0.012055153873269784