In [1]:
import numpy as np
import pandas as pd
from sklearn.utils import resample
import matplotlib.pyplot as plt
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.model_selection import GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.metrics import roc_auc_score
from sklearn.metrics import roc_curve
from sklearn import ensemble
from sklearn.ensemble import GradientBoostingClassifier

import warnings
warnings.filterwarnings("ignore")

%matplotlib inline

# Displaying all columns and rows 
pd.options.display.max_columns = 10000
pd.options.display.max_rows = 10000000

# 1. Importing Data

In [2]:
valid = pd.read_csv("validation.csv") 

In [3]:
valid.shape

(303925, 25)

In [4]:
win = pd.read_csv("win_final.csv")
win.head()

Unnamed: 0,bidid,LRLINEAR,LRQUAD,LRORTB,LREXP,GBCLINEAR,GBCQUAD,GBCORTB,GBCEXP,GBCCUBE,GBCFOUR,GBCFIF,GBCKAN
0,bbcb813b6166538503d8b33a5602d7d72f6019dc,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,5a07316c49477cb5d9b4d5aa39c27d6c3be7f92d,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,f6ece71dae81d6b16bfb24ad6dd5611472d4c673,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,b4d5c57c9b38ff5a12954fa01e11931b4e6bfbbb,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
4,0899bf144249458ea9c89188473694bf44c7ca15,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0


In [5]:
win.shape

(303925, 13)

In [6]:
# merge1 = pd.merge(valid, win, on='bidid')
# valid_w = merge1.loc[merge1['GBC_ORTB'] == 1]
# valid_w.head()

In [7]:
#average CTR
avgCTR = np.sum(valid.click)/len(valid.click)

pCTR_valid_gbc = pd.read_csv("pCTR_valid.csv")

In [8]:
pCTR_valid_gbc.shape

(303925, 1)

In [9]:
pCTR_valid_gbc.columns = ["p_valid"]

# 4. Bidding

## 4.1 Bidding Mechanism

In [10]:
def bidding(bidprice):
    budget = 6250
    imps = 0
    click = 0
    for i in range(0, len(valid)):
        if budget < valid.payprice[i]/1000:
            continue
        if bidprice[i] >= valid.payprice[i]:
            budget = budget - valid.payprice[i]/1000
            imps += 1
            if valid.click[i]==1:
                click += 1
    
    click = click
    imps = imps
    CTR = click/imps
    spend = 6250-budget
    CPM = spend*1000/imps
    eCPC = spend/click
    
    return click, imps, spend, CTR, CPM, eCPC

In [11]:
pd_pctr = pd.DataFrame(columns = ['bidid','pCTR'])

pd_pctr['pCTR'] = pCTR_valid_gbc["p_valid"]
pd_pctr['bidid'] = valid.bidid

pd_pctr.head()

Unnamed: 0,bidid,pCTR
0,bbcb813b6166538503d8b33a5602d7d72f6019dc,0.000175
1,5a07316c49477cb5d9b4d5aa39c27d6c3be7f92d,0.000147
2,f6ece71dae81d6b16bfb24ad6dd5611472d4c673,0.000108
3,b4d5c57c9b38ff5a12954fa01e11931b4e6bfbbb,0.000111
4,0899bf144249458ea9c89188473694bf44c7ca15,0.000143


In [12]:
pd_pctr.shape

(303925, 2)

In [13]:
merge = pd.merge(pd_pctr, win, on='bidid')
#valid_w = merge1.loc[merge1['GBC_ORTB'] == 1]
merge.head()

Unnamed: 0,bidid,pCTR,LRLINEAR,LRQUAD,LRORTB,LREXP,GBCLINEAR,GBCQUAD,GBCORTB,GBCEXP,GBCCUBE,GBCFOUR,GBCFIF,GBCKAN
0,bbcb813b6166538503d8b33a5602d7d72f6019dc,0.000175,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,5a07316c49477cb5d9b4d5aa39c27d6c3be7f92d,0.000147,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,f6ece71dae81d6b16bfb24ad6dd5611472d4c673,0.000108,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,b4d5c57c9b38ff5a12954fa01e11931b4e6bfbbb,0.000111,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
4,0899bf144249458ea9c89188473694bf44c7ca15,0.000143,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0


In [14]:
merge2 = merge.drop(columns = ['pCTR','bidid'])
merge2.head()

Unnamed: 0,LRLINEAR,LRQUAD,LRORTB,LREXP,GBCLINEAR,GBCQUAD,GBCORTB,GBCEXP,GBCCUBE,GBCFOUR,GBCFIF,GBCKAN
0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0


In [24]:
summ = merge2['GBCEXP'] + merge2["GBCFIF"]

In [26]:
summ.head()
print(summ.shape)

(303925,)


In [16]:
# merge1 = merge.drop(columns = ['LR_ORTB','GBC_NoBase','GBC_Base40'])
# merge1.head()

In [17]:
#new_pCTR = merge1.pCTR * merge1.GBC_ORTB

In [27]:
new_pCTR = merge.pCTR * summ
new_pCTR.head()

0    0.000000
1    0.000000
2    0.000000
3    0.000111
4    0.000143
dtype: float64

In [28]:
new_pCTR.shape

(303925,)

## ----------------- GBC + Exp -------------------

In [29]:
Exp1 = pd.DataFrame(columns=['Base_bid','Imps','Spend','Click'])

iteration = 0
for i in np.arange(3,303,3):
    iteration += 1
    base_bid = i
    #Non-linear strategy inputs to get bid price 
    bidprice = base_bid * np.exp(new_pCTR/avgCTR)
    #bidding mechanism
    click, imps, spend, CTR, CPM, eCPC = bidding(bidprice)
    #feeding data into a dataframe
    Exp1.loc[iteration,'Base_Bid'] = base_bid
    Exp1.loc[iteration,'Imps'] = imps
    Exp1.loc[iteration,'Click'] = click
    Exp1.loc[iteration,'Spend'] = round(spend,2)
    Exp1.loc[iteration,'CTR'] = round(CTR,5)
    Exp1.loc[iteration,'CPM'] = round(CPM*1000,2)
    Exp1.loc[iteration,'eCPC'] = round(eCPC,2)
    print('Iteration',iteration,'/',(500-20)/5,': ''| Base Bid =',base_bid,  'Click = ',click,  '| Spend = ',spend,
          '| CTR = ',CTR, '| CPM = ', CPM*1000 , '| eCPC = ', eCPC)

Iteration 1 / 96.0 : | Base Bid = 3 Click =  104 | Spend =  638.424000000552 | CTR =  0.009003549476235823 | CPM =  55270.01991174375 | eCPC =  6.138692307697616
Iteration 2 / 96.0 : | Base Bid = 6 Click =  108 | Spend =  877.8570000013342 | CTR =  0.00484543945443941 | CPM =  39385.2124366878 | eCPC =  8.12830555556791
Iteration 3 / 96.0 : | Base Bid = 9 Click =  115 | Spend =  1081.7730000009024 | CTR =  0.003938625933283101 | CPM =  37049.55818894796 | eCPC =  9.406721739138282
Iteration 4 / 96.0 : | Base Bid = 12 Click =  124 | Spend =  1312.0280000016546 | CTR =  0.003285725641908901 | CPM =  34765.83905248297 | eCPC =  10.58087096775528
Iteration 5 / 96.0 : | Base Bid = 15 Click =  129 | Spend =  1563.46500000196 | CTR =  0.0028522154417617404 | CPM =  34568.51950123729 | eCPC =  12.119883720945428
Iteration 6 / 96.0 : | Base Bid = 18 Click =  130 | Spend =  1913.9670000016495 | CTR =  0.00230467849734962 | CPM =  33931.37376569662 | eCPC =  14.722823076935764
Iteration 7 / 96.0 

KeyboardInterrupt: 

In [39]:
RETRAINEDEXP1 = pd.DataFrame(columns = ['bidprice'])
RETRAINEDEXP1['bidprice'] = 51 * np.exp(new_pCTR/avgCTR)

RETRAINEDEXP1['bidid']=valid.bidid
RETRAINEDEXP1 = RETRAINEDEXP1[['bidid','bidprice']]

RETRAINEDEXP1.head()

Unnamed: 0,bidid,bidprice
0,bbcb813b6166538503d8b33a5602d7d72f6019dc,51.0
1,5a07316c49477cb5d9b4d5aa39c27d6c3be7f92d,51.0
2,f6ece71dae81d6b16bfb24ad6dd5611472d4c673,51.0
3,b4d5c57c9b38ff5a12954fa01e11931b4e6bfbbb,60.285657
4,0899bf144249458ea9c89188473694bf44c7ca15,63.203747


In [40]:
RETRAINEDEXP1.to_csv("retrained_gbcexp.csv",index=False)