In [1]:
import ujson as json
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import plotly.plotly as py
import urllib2
import datetime
import sys

from moztelemetry.spark import get_pings

%pylab inline

Unable to parse whitelist (/home/hadoop/anaconda2/lib/python2.7/site-packages/moztelemetry/histogram-whitelists.json). Assuming all histograms are acceptable.
Populating the interactive namespace from numpy and matplotlib




In [2]:
sc.defaultParallelism

32

In [3]:
def fmt_date(d):
    return d.strftime("%Y%m%d")
t1 = fmt_date(datetime.datetime.now() - datetime.timedelta(9)) # go back 16 days
t2 = fmt_date(datetime.datetime.now() - datetime.timedelta(2)) # go back 2 days
t1, t2

('20160525', '20160601')

In [4]:
pings = get_pings(sc, app="Firefox", channel="nightly", build_id=(t1, t2), fraction=1.0)

In [20]:
pings.first()["payload"]["histograms"] == None

False

In [12]:
pings.first()["clientId"]

u'0d83cc31-71ec-44de-a159-b6703d2780dc'

In [40]:
def parseAddons(addons):
    addons = addons.split(',')
    addons = map(urllib2.unquote, addons)
    addons = map(lambda a: tuple(a.split(':')), addons)
    return addons

def extract(ping):
    os = ping["environment"]["system"]["os"]["name"]
    clientId = ping.get("clientId", None)
    hists = ping["payload"].get("histograms", {})
    keyed = ping["payload"].get("keyedHistograms", {})
    shims = keyed.get("ADDON_SHIM_USAGE", {})
    jank = keyed.get("MISBEHAVING_ADDONS_JANK_LEVEL", {})
    cpow = keyed.get("PERF_MONITORING_SLOW_ADDON_CPOW_US", {})
    e10s = ping["environment"]["settings"]["e10sEnabled"]
    enabledAddons = parseAddons(ping["payload"]["info"].get("addons", ""))
    
    return (clientId, {'os': os, 'shims': shims, 'jank': jank,
                       'cpow': cpow, 'e10s': e10s, 'addons': enabledAddons})

bySession = pings.map(lambda ping: extract(ping))

In [22]:
bySession.count()

450333

In [23]:
clients = bySession.reduceByKey(lambda x1, x2: x2)

In [25]:
cached = clients.map(lambda (id, d): d)

In [26]:
def getShimData(d):
    result = {}
    for addonv in d['addons']:
        addon = addonv[0]
        result[addonv] = (addon in d['shims'], 1)
    return result

def collectShimData(a, b):
    for addonv in b:
        if addonv not in a:
            a[addonv] = b[addonv]
        else:
            (shim1, count1) = a[addonv]
            (shim2, count2) = b[addonv]
            a[addonv] = (shim1 or shim2, count1 + count2)
    return a

shimUsage = cached.map(getShimData).reduce(collectShimData)

In [27]:
shimUsageSorted = [ (c, a, b) for (a, (b, c)) in shimUsage.items() ]
shimUsageSorted.sort(reverse=True)

In [28]:
shimUsageFiltered = [ (c, x, y) for (c, x, y) in shimUsageSorted if c >= 50 ]

In [29]:
shimUsageShimmed = [ (c, x, y) for (c, x, y) in shimUsageFiltered if y ]

In [30]:
len(shimUsageFiltered)

320

In [31]:
len(shimUsageShimmed)

190

In [36]:
try:
    output = open('output/shim-data.json', 'w')
    json.dump(shimUsageFiltered, output)
    output.close()
except:
    pass

try:
    output = open('shim-data.json', 'w')
    json.dump(shimUsageFiltered, output)
    output.close()
except:
    pass

In [37]:
shimUsageFiltered

[(39924, (u'webcompat@mozilla.org', u'1.0'), False),
 (39850, (u'e10srollout@mozilla.org', u'1.0'), False),
 (39779, (u'{972ce4c6-7e08-4474-a285-3208198ce6fd}', u'49.0a1'), False),
 (39680, (u'firefox@getpocket.com', u'1.0.3b1'), True),
 (39492, (u'loop@mozilla.org', u'1.4.0'), True),
 (7540, (u'{d10d0bf8-f5b5-c8b4-a8b2-2b9879e08c5d}', u'2.7.3'), True),
 (2813, (u'uBlock0@raymondhill.net', u'1.7.0'), True),
 (1676, (u'{e4a8a97b-f2ed-450b-b12d-ee082ba24781}', u'3.8'), True),
 (1481, (u'{b9db16a4-6edc-47ec-a1f4-b86292ed211d}', u'5.6.1'), True),
 (1103, (u'{DDC359D1-844A-42a7-9AA1-88A850A938A8}', u'2.0.19'), True),
 (914, (u'{73a6fe31-595d-460b-a920-fcc0f8843232}', u'2.9.0.11'), True),
 (878, (u'{46551EC9-40F0-4e47-8E18-8E5CF550CFB8}', u'2.0.6'), True),
 (859, (u'{b9bfaf1c-a63f-47cd-8b9a-29526ced9060}', u'1.8.7'), False),
 (827, (u'adbhelper@mozilla.org', u'0.8.7'), False),
 (820, (u'firebug@software.joehewitt.com', u'2.0.16'), False),
 (796, (u'firefox@ghostery.com', u'6.2.0'), True),
 (

In [41]:
def processForHist(d):
    def getVersion(addon):
        for addonv in d['addons']:
            if addonv[0] == addon:
                return addonv[1] if len(addonv) == 2 else None
    return [ ((addon, getVersion(addon)), h) for (addon, h) in d['cpow'].items() ]
        
cpowHists = bySession.flatMap(lambda (id, d): processForHist(d))

In [42]:
cpowHists.take(1)

[((u'{d10d0bf8-f5b5-c8b4-a8b2-2b9879e08c5d}', u'2.7.3'),
  {u'bucket_count': 20,
   u'histogram_type': 0,
   u'range': [1, 10000000],
   u'sum': 0,
   u'values': {u'0': 792, u'1': 0}})]

In [43]:
def fixHist((id, h)):
    n = sum(h['values'].values())
    s = h['sum']
    vs = { int(k): v for (k, v) in h['values'].items() }
    return (id, {'n': n, 'sum': s, 'values': vs, 'hits': 1})

def combineHists(h1, h2):
    vs = h1['values']
    for k, v in h2['values'].items():
        vs[k] = vs.get(k, 0) + v
    return {'n': h1['n'] + h2['n'],
            'sum': h1['sum'] + h2['sum'],
            'values': vs,
            'hits': h1['hits']+h2['hits']}

cpows = cpowHists.map(fixHist).reduceByKey(combineHists)

In [44]:
cpowsFiltered = cpows.filter(lambda (addon, d): d['hits'] > 50)

In [45]:
cpowsSorted = cpowsFiltered.map(lambda (addon, d): (d['sum']/d['n']*d['hits'], (addon, d))).sortByKey(False)

In [46]:
cpowsSimple = cpowsSorted.map(lambda (avg, (id, d)): (id, avg))

In [47]:
cpowTimes = cpowsSimple.collect()

In [51]:
try:
    output = open('output/cpow-data.json', 'w')
    json.dump(cpowTimes, output)
    output.close()
except:
    pass

try:
    output = open('cpow-data.json', 'w')
    json.dump(cpowTimes, output)
    output.close()
except:
    pass

In [50]:
cpowTimes

[((u'{19503e42-ca3c-4c27-b1e2-9cdb2170ee34}', u'1.5.6.13'), 1248534338L),
 ((u'support@lastpass.com', u'3.3.1'), 459027000L),
 ((u'{73a6fe31-595d-460b-a920-fcc0f8843232}', u'2.9.0.11'), 427779169L),
 ((u'{b9db16a4-6edc-47ec-a1f4-b86292ed211d}', u'5.6.1'), 367023802L),
 ((u's3google@translator', u'5.23'), 327030480L),
 ((u'YoutubeDownloader@PeterOlayev.com', u'2.4.0.4'), 326850160L),
 ((u'artur.dubovoy@gmail.com', u'13.2.1'), 325688418),
 ((u'{bee6eb20-01e0-ebd1-da83-080329fb9a3a}', u'1.83'), 291419164),
 ((u'sovetnik@metabar.ru', u'3.1.4.26'), 282367775L),
 ((u'hidecaptionplus-dp@dummy.addons.mozilla.org', u'3.0.5'), 238618555),
 ((u'{4ED1F68A-5463-4931-9384-8FFF5ED91D92}', u'5.0.169.0'), 202061682L),
 ((u'{73a6fe31-595d-460b-a920-fcc0f8843232}', u'2.9.0.12rc1'), 169859464L),
 ((u'abs@avira.com', u'1.9.2.691'), 146832207L),
 ((u'wrc@avast.com', u'10.3.3.44'), 138830076),
 ((u'{e8deb9e5-5688-4655-838a-b7a121a9f16e}', u'46.0'), 128144042),
 ((u'anttoolbar@ant.com', u'2.4.7.42'), 12716439