In [9]:
# azureml-core of version 1.0.72 or higher is required
# azureml-dataprep[pandas] of version 1.1.34 or higher is required
from azureml.core import Workspace, Dataset
import pandas as pd
from pandas.io.json import json_normalize
import numpy as np
import matplotlib.pyplot as plt
from datetime import timedelta, datetime, timezone

subscription_id = '04b1ca0e-558c-48d0-ba28-fee796a82076'
resource_group = 'analytics2040c59b'
workspace_name = 'VitableML'

## How many days to consider
t = timedelta(days=13)
start_time = datetime.now(timezone.utc) - t
end_time = datetime.now(timezone.utc) - timedelta(days = 3)

DollarVariable = "amount"
parameterFilename = 'models/prod_quiz_parameter_recommender/source_files/vitableParameters.csv'
parameterList = np.genfromtxt(parameterFilename, delimiter=',', dtype='<U12')

  


In [31]:
def load_orders(subscription_id, resource_group, workspace_name, t):
    workspace = Workspace(subscription_id, resource_group, workspace_name)
    dataset = Dataset.get_by_name(workspace, name='tracked_user_events')
    orders = dataset.with_timestamp_columns('Created').time_recent(t).to_pandas_dataframe()

    print('Got Orders')

    return orders

def load_recommendations(subscription_id, resource_group, workspace_name, start_time, end_time):

    workspace = Workspace(subscription_id, resource_group, workspace_name)
    dataset = Dataset.get_by_name(workspace, name='parameter_set_recommendations')

    recommendations = dataset.to_pandas_dataframe()
    recommendations = recommendations[recommendations.RecommenderType == "ParameterSet"]
    recommendations['Created'] = pd.to_datetime(recommendations['Created'])
    recommendations = recommendations[(recommendations['Created'] >= start_time) & (recommendations['Created'] < end_time)]
    recommendations.info()

    print('Got recommendations')

    return recommendations

def get_relevant_recommendation_info(recommendations):
    recommendations_df = recommendations[["Created","ModelOutput","RecommendationCorrelatorId"]]
    recommendations_df.ModelOutput = recommendations_df.ModelOutput.apply(lambda x:eval(x))
    jdf = json_normalize(recommendations_df.ModelOutput).set_index(recommendations_df.index)
    recommendations_df = recommendations_df.join(jdf).drop(['ModelOutput'], axis = 1)

    return recommendations_df

def get_relevant_order_info(orders, DollarVariable):
    orders.Properties = orders.Properties.apply(lambda x:eval(x))
    jdf = json_normalize(orders.Properties).set_index(orders.index)
    orders = orders.join(jdf).drop(['Properties'], axis = 1)
    orders_df = orders[['RecommendationCorrelatorId',DollarVariable]]

    return orders_df

def get_recommendor_outcomes(recommendations_df, orders_df,DollarVariable):
    outcomes_df = recommendations_df.set_index('RecommendationCorrelatorId').join(orders_df.set_index('RecommendationCorrelatorId'))
    outcomes_df = outcomes_df.fillna(0)
    outcomes_df['Purchased'] = 0
    outcomes_df['Purchased'][outcomes_df['amount']>0] = 1
    columns_titles = ["Created","RecommendedParameters.pills_cap","RecommendedParameters.bulk_cap","RecommendedParameters.basket_value_cap","Purchased",DollarVariable]
    outcomes_df=outcomes_df.reindex(columns=columns_titles)

    return outcomes_df

def measure_performance(parameterList,outcomes_df):

    customer_array = np.array(outcomes_df)
    print(customer_array)
    N_parameters = len(parameterList)
    store_array = np.zeros([N_parameters,10])

    for i in range(N_parameters):
        split_parameter = parameterList[i].split("_")
        store_array[i,0] = int(split_parameter[0])
        store_array[i,1] = int(split_parameter[1])
        store_array[i,2] = int(split_parameter[2])
        #store_array[i,3] = int(split_parameter[3])
        #store_array[i,4] = int(split_parameter[4])

    N_customers = len(customer_array)


    store_array[:,[5,6,7,8,9]] = 0
    for n_customer in range(N_customers):
        for n_parameter in range(N_parameters):
            if int(customer_array[n_customer,1]) == store_array[n_parameter,0]:
                if int(customer_array[n_customer,2]) == store_array[n_parameter,1]:
                    if int(customer_array[n_customer,3]) == store_array[n_parameter,2]:
                        #if int(customer_array[n_customer,3]) == store_array[n_parameter,3]:
                        #    if int(customer_array[n_customer,4]) == store_array[n_parameter,4]:

                        store_array[n_parameter,5] = store_array[n_parameter,5] + 1 ## Someone accepted the recommendation

                        if int(customer_array[n_customer,4]) == 1:  ## Someone purchased the recommendation
                            store_array[n_parameter,6] = store_array[n_parameter,6] + 1
                            store_array[n_parameter,7] = store_array[n_parameter,7] + int(customer_array[n_customer,5])

    ## Store array 8 is average revenue per invoked recommendation 
    store_array[:,8] = store_array[:,7]/store_array[:,5]
    store_array = np.nan_to_num(store_array).copy()
    
    ## Pull out the total average revenue per invoked recommendation
    performance = np.sum(store_array[:,7])/np.sum(store_array[:,5]) 

    return performance

In [32]:
#orders = load_orders(subscription_id, resource_group, workspace_name, t)
#recommendations = load_recommendations(subscription_id, resource_group, workspace_name, start_time, end_time)
#recommendations_df = get_relevant_recommendation_info(recommendations)
#orders_df = get_relevant_order_info(orders, DollarVariable)
#outcomes_df = get_recommendor_outcomes(recommendations_df, orders_df, DollarVariable)
performance = measure_performance(parameterList,outcomes_df)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


[[Timestamp('2021-08-14 04:25:38.726666700+0000', tz='UTC') 10 2 120 0
  0.0]
 [Timestamp('2021-08-14 04:26:10.546666700+0000', tz='UTC') 10 2 70 0 0.0]
 [Timestamp('2021-08-14 04:29:00.910000+0000', tz='UTC') 4 2 100 0 0.0]
 ...
 [Timestamp('2021-08-14 04:20:54.540000+0000', tz='UTC') 4 2 140 0 0.0]
 [Timestamp('2021-08-14 04:22:31.713333300+0000', tz='UTC') 10 2 100 0
  0.0]
 [Timestamp('2021-08-14 04:24:04.153333300+0000', tz='UTC') 4 2 100 0 0.0]]
4678.0
4578.0
