In [1]:
import ujson as json
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import plotly.plotly as py
import datetime
from __future__ import division

from moztelemetry import get_pings, get_pings_properties, get_one_ping_per_client

%pylab inline

Populating the interactive namespace from numpy and matplotlib


In [2]:
# We grab pings starting from 2 days ago until [TimeWindow] days ago.
TimeWindow = 14

# Additional filters.
Filters = {
    'app': 'Firefox',
    
    # We sample 1% of pings. For testing, it is beter to use a small number
    # here (like 0.001) to speed up processing time.
    'fraction': 0.001,
    
    # Optionally restrict pings to a single channel.
    # 'channel': 'beta',
}

# In case you need them!
VendorIDs = {
    'Intel': '0x8086',
    'NVIDIA': '0x10de',
    'AMD': '0x1002',
    'ATI': '0x1002'
}

In [3]:
###############################
# This section gathers pings. #
###############################

def run_get_pings():
    def fmt_date(d):
        return d.strftime("%Y%m%d")
    t1 = fmt_date(datetime.datetime.now() - datetime.timedelta(TimeWindow + 2)) # go back 16 days
    t2 = fmt_date(datetime.datetime.now() - datetime.timedelta(2)) # go back 2 days
    return get_pings(sc, build_id=(t1, t2), **Filters)

# Get pings for the parameters in the previous step.
raw_pings = run_get_pings()

In [4]:
######################################################################
# This section takes the raw ping list, then formats and filters it. #
######################################################################

# Map the pings into a more readable dictionary-like form. To see
# what these look like, execute "pings.take(1)".
pings = get_pings_properties(raw_pings, [
  "clientID",
  "environment/build/version",
  "environment/system/os/name",
  "environment/system/os/version",
  "environment/system/os/servicePackMajor",
  "environment/system/os/servicePackMinor",
  "environment/system/gfx/adapters",
])
pings = get_one_ping_per_client(pings)

# We add two extra steps. The first rewrites the ping to have some
# information more easily accessible (like the primary adapter),
# and the second step removes any pings that don't have adapter
# information.
def rewrite_ping(p):
    if 'environment/system/gfx/adapters' in p:
        if len('environment/system/gfx/adapters') > 0:
            adapter = p['environment/system/gfx/adapters'][0]
            
    if adapter is not None:
        p['adapter'] = adapter
            
        # Convert the version to a tuple of integers.
        if adapter['driverVersion'] is not None:
            p['driverVersion'] = [int(n) for n in adapter['driverVersion'].split('.') if n.isdigit()]
    return p

def filter_ping(p):
    return 'adapter' in p

pings = pings.map(rewrite_ping).filter(filter_ping)
pings = pings.cache()

In [5]:
# Observe the format of a random ping. This may take some time since it has to
# execute the pipeline.
pings.take(1)

[{'adapter': {u'GPUActive': True,
   u'RAM': None,
   u'description': u'Intel(R) HD Graphics',
   u'deviceID': u'0x0046',
   u'driver': u'igdumdx32 igd10umd32',
   u'driverDate': u'10-21-2011',
   u'driverVersion': u'8.15.10.2559',
   u'subsysID': u'143d103c',
   u'vendorID': u'0x8086'},
  'clientID': u'08700825-106c-4588-9674-c64daf9f65f7',
  'driverVersion': [8, 15, 10, 2559],
  'environment/build/version': u'39.0',
  'environment/system/gfx/adapters': [{u'GPUActive': True,
    u'RAM': None,
    u'description': u'Intel(R) HD Graphics',
    u'deviceID': u'0x0046',
    u'driver': u'igdumdx32 igd10umd32',
    u'driverDate': u'10-21-2011',
    u'driverVersion': u'8.15.10.2559',
    u'subsysID': u'143d103c',
    u'vendorID': u'0x8086'}],
  'environment/system/os/name': u'Windows_NT',
  'environment/system/os/servicePackMajor': 1,
  'environment/system/os/servicePackMinor': 0,
  'environment/system/os/version': u'6.1'}]

In [44]:
# Count the total number of sessions in the dataset.
TotalSessions = pings.count()
print('Number of sessions: {0}'.format(TotalSessions))

Number of sessions: 30837


In [40]:
##############################################
# Helper function to compare version tuples. #
##############################################
def compare_version_tuples(v1, v2):
    n = max(len(v1), len(v2))
    for i in xrange(0, n):
        x1 = v1[i] if i < len(v1) else 0
        x2 = v2[i] if i < len(v2) else 0
        if x1 != x2:
            return x1 - x2
    return 0

# Tests
assert(compare_version_tuples((1, 0), (1, 1)) < 0)
assert(compare_version_tuples((1, 1), (1, 0)) > 0)
assert(compare_version_tuples((1, 1), (1, 1)) == 0)
assert(compare_version_tuples((1,), (1, 0)) == 0)
assert(compare_version_tuples((1,), (1, 0)) == 0)
assert(compare_version_tuples((1,0), (2,5)) < 0)

In [49]:
# Sample filter #1 - how many people are using Intel devices
# with a driver less than 8.15.10.2622? (bug 1175366).
BadVersion = (8, 15, 10, 2622)
def sample_filter_1(p):
    if p['adapter']['vendorID'] != VendorIDs['Intel']:
        return False
    if 'driverVersion' not in p:
        return False
    return compare_version_tuples(p['driverVersion'], BadVersion) < 0

sample_result_1 = pings.filter(sample_filter_1)
print('{0} out of {1} sessions matched. ({2:.2f}%)'.format(
    sample_result_1.count(),
    pings.count(),
    ((sample_result_1.count() / pings.count()) * 100)))

10204 out of 30837 sessions matched. (33.09%)


In [51]:
# Sample filter #2 - how many users have either devices:
#   0x8086, 0x2e32 - Intel G41 express graphics
#   0x8086, 0x2a02 - Intel GM965, Intel X3100
# See bug 1116812.
#
# Note that vendor and deviceID hex digits are lowercase.
def sample_filter_2(p):
    if p['adapter']['vendorID'] != VendorIDs['Intel']:
        return False
    if p['adapter']['deviceID'] == '0x2e32':
        return True
    if p['adapter']['deviceID'] == '0x2a02':
        return True
    return False

sample_result_2 = pings.filter(sample_filter_2)
print('{0} out of {1} sessions matched. ({2:.2f}%)'.format(
    sample_result_2.count(),
    pings.count(),
    ((sample_result_2.count() / pings.count()) * 100)))

2287 out of 30837 sessions matched. (7.42%)
