In [1]:
# Import necessary libraries
from google.cloud import bigquery
import pandas as pd

# Initialize a BigQuery client
client = bigquery.Client()

# Define your SQL query
query = """
    SELECT *
    FROM `dev_gsokolov.deal_cnt_tw`
"""

# Execute the query and convert the result to a pandas DataFrame
df = client.query(query).to_dataframe()

# Display the DataFrame
display(df)



Unnamed: 0,user_id,deal_count
0,16798787,106927
1,12306957,47717
2,909086,21773
3,35716982,13252
4,25163307,12166
...,...,...
78169,22690424,1
78170,18831466,1
78171,10455219,1
78172,25987599,1


In [5]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression

# Create a binary target variable: 1 if user made at least 5 deals, 0 otherwise
df['made_at_least_5_deals'] = (df['deal_count'] >= 5).astype(int)

# Fit logistic regression model
X = df[['deal_count']]
y = df['made_at_least_5_deals']
model = LogisticRegression()
model.fit(X, y)

# Predict the probability of making at least 5 deals for each user
df['probability_of_at_least_5_deals'] = model.predict_proba(X)[:, 1]

# Since winning is certain if at least 5 deals are made, probability of winning is the same
df['probability_of_winning'] = df['probability_of_at_least_5_deals']

# Calculate the overall probability of winning
overall_probability_of_winning = df['probability_of_winning'].mean()

display(df)
print(f"Overall Probability of Winning: {overall_probability_of_winning}")

Unnamed: 0,user_id,deal_count,made_at_least_5_deals,probability_of_at_least_5_deals,probability_of_winning
0,16798787,106927,1,1.000000e+00,1.000000e+00
1,12306957,47717,1,1.000000e+00,1.000000e+00
2,909086,21773,1,1.000000e+00,1.000000e+00
3,35716982,13252,1,1.000000e+00,1.000000e+00
4,25163307,12166,1,1.000000e+00,1.000000e+00
...,...,...,...,...,...
78169,22690424,1,0,2.130930e-16,2.130930e-16
78170,18831466,1,0,2.130930e-16,2.130930e-16
78171,10455219,1,0,2.130930e-16,2.130930e-16
78172,25987599,1,0,2.130930e-16,2.130930e-16


Overall Probability of Winning: 0.8734617647812162


In [8]:
# Given constants
gift_price = 10  # Example gift price
email_open_rate = 0.2  # Example probability of opening the email
email_click_rate = 0.01  # Example probability of clicking the promotion

# Calculate the probability of winning adjusted for the email open rate and click rate
df['adjusted_probability_of_winning'] = df['probability_of_at_least_5_deals'] * email_open_rate * email_click_rate

# Calculate expected spending for each user
df['expected_spending'] = df['adjusted_probability_of_winning'] * gift_price

# Calculate total expected spending
total_expected_spending = df['expected_spending'].sum()

In [9]:
total_expected_spending

1365.6400000001358

In [10]:
from scipy.stats import norm
# Calculate the 95% confidence interval lower bound for each probability
# Predict the probability of making at least 5 deals for each user
prob_predictions = model.predict_proba(X)[:, 1]

confidence_level = 0.95
z_score = norm.ppf((1 + confidence_level) / 2)
std_error = np.sqrt((prob_predictions * (1 - prob_predictions)) / len(df))  # standard error of probability
lower_bounds = prob_predictions - z_score * std_error

In [11]:
lower_bounds

array([ 1.00000000e+00,  1.00000000e+00,  1.00000000e+00, ...,
       -1.02329437e-10, -1.02329437e-10, -1.02329437e-10])

In [14]:
df['adjusted_probability_of_winning_lower_bound'] = lower_bounds * email_open_rate * email_click_rate
# Calculate expected spending for each user using the lower bounds
df['expected_spending_lower_bound'] = df['adjusted_probability_of_winning_lower_bound'] * gift_price
total_expected_spending_lower_bound = df['expected_spending_lower_bound'].sum()
total_expected_spending_lower_bound

1365.6020673399892

In [15]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from scipy.stats import norm

# Define your SQL query
query = """
    SELECT *
    FROM `dev_gsokolov.deal_cnt_tw`
"""

# Execute the query and convert the result to a pandas DataFrame
df = client.query(query).to_dataframe()

# Create a binary target variable: 1 if user made at least 5 deals, 0 otherwise
df['made_at_least_5_deals'] = (df['deal_count'] >= 5).astype(int)

# Fit logistic regression model
X = df[['deal_count']]
y = df['made_at_least_5_deals']
model = LogisticRegression()
model.fit(X, y)

# Predict the probability of making at least 5 deals for each user
prob_predictions = model.predict_proba(X)[:, 1]

# Calculate the 95% confidence interval lower bound for each probability
confidence_level = 0.95
z_score = norm.ppf((1 + confidence_level) / 2)
std_error = np.sqrt((prob_predictions * (1 - prob_predictions)) / len(df))  # standard error of probability
lower_bounds = prob_predictions - z_score * std_error

# Replace negative probabilities with zero (since probabilities cannot be negative)
lower_bounds = np.maximum(lower_bounds, 0)

# Given constants
gift_price = 10  # Example gift price
email_open_rate = 0.2  # Example probability of opening the email
email_click_rate = 0.01  # Example probability of clicking the promotion

# Calculate the probability of winning adjusted for the email open rate and click rate using the lower bounds
df['adjusted_probability_of_winning_lower_bound'] = lower_bounds * email_open_rate * email_click_rate

# Calculate expected spending for each user using the lower bounds
df['expected_spending_lower_bound'] = df['adjusted_probability_of_winning_lower_bound'] * gift_price

# Calculate total expected spending using the lower bounds
total_expected_spending_lower_bound = df['expected_spending_lower_bound'].sum()

display(df)
print(f"Total Expected Spending on Gifts (Lower Bound): {total_expected_spending_lower_bound:.2f}")

Unnamed: 0,user_id,deal_count,made_at_least_5_deals,adjusted_probability_of_winning_lower_bound,expected_spending_lower_bound
0,16798787,78193,1,0.002,0.02
1,12306957,32491,1,0.002,0.02
2,909086,12944,1,0.002,0.02
3,7545452,10196,1,0.002,0.02
4,25163307,9924,1,0.002,0.02
...,...,...,...,...,...
78169,4509711,0,0,0.000,0.00
78170,22948052,0,0,0.000,0.00
78171,15432788,0,0,0.000,0.00
78172,30464031,0,0,0.000,0.00


Total Expected Spending on Gifts (Lower Bound): 1325.66
