In [1]:
import json
import requests
import popelines
import os
from datetime import datetime
from main import fix_values, populating_vals, flatten, flatten_dupe_vals
    

In [2]:
# read endpoint, returns a json file of the HTTP request
def read_endpoint(endpoint, headers_set, params_set=None):
    try:
        response = requests.get(endpoint, headers=headers_set, params=params_set)
        response_text = json.loads(response.text)
        response.raise_for_status()

    except requests.exceptions.HTTPError as err:
        print(err)

    return response_text


In [3]:
# generate all projects within account
def generate_projects(project_endpoint, project_headers):

    j_proj = read_endpoint(endpoint=project_endpoint, headers_set=project_headers)

    return j_proj


In [4]:
############################################### generate and upload all experiments ###########################
def generate_experiments(exp_list):
    ###### Game plan is to separate nested fields from single layer fields, upload them to separate table, then do joins on BQ level
    all_singles = []
    metrics_table = []
    variations_table = []

    for exp in exp_list:
        print(f"Processing experiment {exp['id']}")

        # single layer fields:
        nested_key_list = []
        for k,v in exp.items():
            if isinstance(v, list):
                nested_key_list.append(k)
        
        single_layer_experiment = {}    
        for k,v in exp.items():
            if k not in nested_key_list:
                k = k.replace('-', '_')
                single_layer_experiment[k] = exp[k]
        single_layer_experiment['upload_ts'] = str(datetime.utcnow())

        all_singles.append(flatten(single_layer_experiment, {}, ''))

        # nested part into separate tables:
        # metrics table:

        flattened_metric = []
        for element in exp['metrics']:
            flattened_metric.append(element)

        updated_metric = populating_vals(outer_dict=exp, inner_flattened_list=flattened_metric, destination_key='metrics')
        new_flattened_metric = flatten_dupe_vals(vals=updated_metric, key='metrics')

        metric_list = []
        for metric in new_flattened_metric:
            metric_dict = {}
            metric_dict['metrics_aggregator'] = metric['metrics_aggregator']
            if 'metrics_event_id' in metric.keys():
                metric_dict['metrics_event_id'] = metric['metrics_event_id']
            metric_dict['metrics_scope'] =  metric['metrics_scope']
            metric_dict['metrics_winning_direction'] = metric['metrics_winning_direction']
            metric_dict['experiment_id'] = exp['id']
            metric_dict['upload_ts'] = str(datetime.utcnow())
            metric_list.append(metric_dict)

        metrics_table.extend(metric_list)
    
        # variations table:
        variations = {}
        variations['experiment_id'] = exp['id']
        variations['variations'] = exp['variations']
        variations['upload_ts'] = str(datetime.utcnow())

        flattened_variations = []

        for var in exp['variations']:
            flattened_actions = []  
            
            if len(var['actions']) > 0:
                for action in var['actions']:

                    flattened_changes = []

                    if action['changes'] != []:

                        for element in action['changes']:
                            flattened_changes.append(element)
                        # Replace old 'changes' with new 'flattened_changes'
                        updated_changes = populating_vals(outer_dict=action, inner_flattened_list=flattened_changes, destination_key='changes')
                        new_flattened_changes = flatten_dupe_vals(vals=updated_changes, key='changes')

                        update_actions = populating_vals(outer_dict=var, inner_flattened_list=new_flattened_changes, destination_key='actions')
                        flat = flatten_dupe_vals(vals=update_actions, key='actions')
                        flattened_actions.extend(flat)

                    else:
                        new_actions = flatten_dupe_vals(vals=var, key='actions')
                        flattened_actions.extend(new_actions)
                    
            else:
                other_flat = {}
                for k,v in var.items():
                    if k != 'actions':
                        other_flat['actions'] = []
                        other_flat[k] = v
                flat = [other_flat]
                flattened_actions.extend(flat)

            update_variations = populating_vals(outer_dict=variations, inner_flattened_list=flattened_actions, destination_key='variations')
            flattened_variations.extend(flatten_dupe_vals(vals=update_variations, key='variations'))

        variations_table.extend(flattened_variations)

    return all_singles, metrics_table, variations_table
  


In [5]:
############################################### generate and upload all result time series ########################
def generate_results(results_jayson):  
    flattened_j_ts = []
    flattened_metrics = []

    for metric in results_jayson['metrics']:
        
        if 'results' in metric.keys():
            for ts in metric['results']:
                flattened_results = []

                for element in ts['timeseries']:
                    flattened_timeseries = []
                    element['upload_ts'] = str(datetime.utcnow())
                    flattened_timeseries.append(flatten(element, {}, ''))

                    # Replace old 'timeseries' with new 'flattened_timeseries'
                    updated_results = populating_vals(outer_dict=ts, inner_flattened_list=flattened_timeseries, destination_key='timeseries')
                    flattened_results.extend(flatten_dupe_vals(vals=updated_results, key='timeseries'))

            # Replace old 'metrics' with new 'flattened_results'
            update_metrics = populating_vals(outer_dict=metric, inner_flattened_list=flattened_results, destination_key='results')
            flattened_metrics.extend(flatten_dupe_vals(vals=update_metrics, key='results'))

        else:
            flattened_metrics = [flatten(results_jayson, {}, '')]
    
    update_new_j_ts = populating_vals(outer_dict=results_jayson, inner_flattened_list=flattened_metrics, destination_key='metrics')
    flattened_j_ts.extend(flatten_dupe_vals(vals=update_new_j_ts, key='metrics'))

    return flattened_j_ts


In [6]:
# ############################################### Keys and Authentication #######################################
# if not os.environ.get('GOOGLE_ACCOUNT_CREDENTIALS'):
#     os.environ['GOOGLE_ACCOUNT_CREDENTIALS'] = '/home/engineering/keyfile.json'
gbq_key = os.environ.get('GOOGLE_ACCOUNT_CREDENTIALS')

# directory = str(os.path.abspath(os.path.dirname(__file__)))
directory = os.getcwd()

############################################### Instantiating Popelines #######################################
pope = popelines.popeline(dataset_id='optimizely', service_key_file_loc=gbq_key, directory='.', verbose=False)


In [7]:
# Optimizely parameters
headers = {
    'Authorization': 'Bearer 2:EWAWmaXb4TgtYVU2VvwoEF-9UbJxBahkiFh1633_Oc9nmju7iJis',
}

# endpoints
project_endpoint = 'https://api.optimizely.com/v2/projects'
experiment_endpoint = 'https://api.optimizely.com/v2/experiments'


############################################### generate and upload all projects ##############################
all_projects = generate_projects(project_endpoint, headers)


In [9]:
############################################### generate and upload all experiments ##############################
# get a list of project_id from all_projects
project_id_list = []
for project in all_projects:
    project_id_list.append(project['id'])

experiment_id_list = []
origin_single_table = []
origin_metrics_table = []
origin_variations_table = []

# loop over all project_id_list to get experiments within each project
for project_id in project_id_list:
    # params include project_id (required) and experiments pulling per each request (default only 25)
    params = (
        ('project_id', project_id),
        ('per_page', 100),
    ) 

    exp_list = read_endpoint(endpoint=experiment_endpoint, headers_set=headers, params_set=params)
    exp_id_list = []
    for exp in exp_list:
        exp_id_list.append(exp['id'])
    experiment_id_list.extend(exp_id_list)


In [10]:
experiment_id_list

[10756790433,
 10796678057,
 10800885728,
 10815382176,
 10945903729,
 10955300529,
 11039523400,
 11938601024,
 11966791385,
 11992110033,
 10195324920,
 10890302148,
 10488931082,
 10545851126,
 10556110251,
 10564323775,
 10605041742,
 10622090898,
 10666463868,
 10675031376,
 10681551672,
 10683631399,
 10715090552,
 10733622163,
 10797220881,
 10797550884,
 10805332324,
 10844011372,
 10854082584,
 10882272812,
 11011180459,
 11066932017,
 11085711684,
 11087580674,
 11174107321,
 11189744252,
 11191727326,
 11191905720,
 11199410199,
 11206301430,
 11308083317,
 11334340375,
 11356331084,
 11367191147,
 11378763890,
 11396076650,
 11404885421,
 11406773739,
 11432181239,
 11474010139,
 11482931694,
 11556750451,
 11562826019,
 11570380944,
 11657860282,
 11713452853,
 11720710615,
 11722600722,
 12016386767,
 12096030597,
 12117040288,
 12132980378,
 12166698246,
 12169545706,
 12239002639,
 12240020740,
 12275901710,
 12337280003,
 12850290083,
 12903040293,
 13026380185,
 13026

In [11]:
experiment_id_list = [12746930280]

In [12]:
############################################### generate and upload all experiments ##############################
# loop over all experiment_id in experiment_id_list from above
for experiment_id in experiment_id_list:
    result_endpoint = f'https://api.optimizely.com/v2/experiments/{experiment_id}/timeseries'
    response_ts = requests.get(result_endpoint, headers=headers)
    print(f"got experiment {experiment_id}")
    
    # if '' then the experiment has not started yet
    if response_ts.text == '' or 'bad' in response_ts.text:
        j_ts = {'experiment_id': experiment_id}
        new_j_ts = j_ts
        new_j_ts['upload_ts'] = str(datetime.utcnow())
        pope.write_to_json(file_name=f'{directory}/../uploads/no_results.json', jayson=[new_j_ts], mode='w')
#         pope.write_to_bq(table_name='results', file_name=f'{directory}/../uploads/no_results.json', append=True, ignore_unknown_values=False, bq_schema_autodetect=False)

    else:
        j_ts = json.loads(response_ts.text)
        new_j_ts = pope.fix_json_values(callback=fix_values, obj=j_ts, reset_key='results')
        flattened_j_ts = generate_results(new_j_ts)

#         pope.write_to_json(file_name=f'{directory}/../uploads/results.json', jayson=flattened_j_ts, mode='w')
#         pope.write_to_bq(table_name='results', file_name=f'{directory}/../uploads/results.json', append=True, ignore_unknown_values=False, bq_schema_autodetect=False)
#         print(f"Successfully uploaded result time series for experiment {experiment_id}")

got experiment 12746930280


In [14]:
experiment_id = 12746930280

In [15]:
result_endpoint = f'https://api.optimizely.com/v2/experiments/{experiment_id}/timeseries'
response_ts = requests.get(result_endpoint, headers=headers)



In [16]:
j_ts = json.loads(response_ts.text)

In [17]:
new_j_ts = pope.fix_json_values(callback=fix_values, obj=j_ts, reset_key='results')

In [21]:
new_j_ts['metrics'][1].keys()

dict_keys(['aggregator', 'event_id', 'name', 'results', 'scope', 'winning_direction'])

# debug generate_results

In [22]:
metric = new_j_ts['metrics'][0]

In [31]:
flattened_results = []
for ts in metric['results']:

    for element in ts['timeseries']:
        flattened_timeseries = []
        element['upload_ts'] = str(datetime.utcnow())
        flattened_timeseries.append(flatten(element, {}, ''))

        # Replace old 'timeseries' with new 'flattened_timeseries'
        updated_results = populating_vals(outer_dict=ts, inner_flattened_list=flattened_timeseries, destination_key='timeseries')
        flattened_results.extend(flatten_dupe_vals(vals=updated_results, key='timeseries'))
        
        

In [33]:
flattened_results

200

In [34]:
flattened_j_ts = []
flattened_metrics = []

for metric in new_j_ts['metrics']:

#     if 'results' in metric.keys():
    flattened_results = []
    for ts in metric['results']:

        for element in ts['timeseries']:
            flattened_timeseries = []
            element['upload_ts'] = str(datetime.utcnow())
            flattened_timeseries.append(flatten(element, {}, ''))

            # Replace old 'timeseries' with new 'flattened_timeseries'
            updated_results = populating_vals(outer_dict=ts, inner_flattened_list=flattened_timeseries, destination_key='timeseries')
            flattened_results.extend(flatten_dupe_vals(vals=updated_results, key='timeseries'))

    # Replace old 'metrics' with new 'flattened_results'
    update_metrics = populating_vals(outer_dict=metric, inner_flattened_list=flattened_results, destination_key='results')
    flattened_metrics.extend(flatten_dupe_vals(vals=update_metrics, key='results'))

#     else:
#         flattened_metrics = [flatten(new_j_ts, {}, '')]

update_new_j_ts = populating_vals(outer_dict=new_j_ts, inner_flattened_list=flattened_metrics, destination_key='metrics')
flattened_j_ts.extend(flatten_dupe_vals(vals=update_new_j_ts, key='metrics'))



In [37]:
flattened_j_ts

[{'confidence_threshold': 0.95,
  'end_time': '2019-02-27T21:30:00.000000Z',
  'experiment_id': 12746930280,
  'metrics_aggregator': 'unique',
  'metrics_event_id': 12826470463,
  'metrics_name': 'CTA - View Guide Now',
  'metrics_results_is_baseline': False,
  'metrics_results_level': 'variation',
  'metrics_results_name': 'Variation #1',
  'metrics_results_results_id': '12762280184',
  'metrics_results_timeseries_lift_is_significant': False,
  'metrics_results_timeseries_lift_lift_status': 'equal',
  'metrics_results_timeseries_lift_significance': 0.0,
  'metrics_results_timeseries_lift_visitors_remaining': 9223372036854775807,
  'metrics_results_timeseries_samples': 0,
  'metrics_results_timeseries_time': '2019-01-07T23:54:30.000000Z',
  'metrics_results_timeseries_upload_ts': '2019-02-27 21:39:10.727490',
  'metrics_results_timeseries_value': 0,
  'metrics_results_timeseries_variance': 0.0,
  'metrics_results_variation_id': '12762280184',
  'metrics_scope': 'visitor',
  'metrics_wi