## This part : Basic Bidding Strategy
### Consists of 3 parts:
### (1) The Constant Bidding
### (2) The Random Bidding
### (3) The Gaussian Random Bidding

In [1]:
# Library
import pandas as pd
import os
from sklearn import linear_model, datasets, preprocessing, metrics
from sklearn.metrics import accuracy_score
from sklearn.feature_extraction import DictVectorizer
import numpy as np
from sklearn.model_selection import KFold

In [2]:
# load data set
train = pd.read_csv('train.csv')
val = pd.read_csv('validation.csv')
test = pd.read_csv('test.csv')
print(train.shape, val.shape, test.shape)

(2430981, 25) (303925, 25) (303375, 22)


In [3]:
# now get rid of all the cases that payprice equals zero, which is the failed bid 
train1 = train[train.payprice>0]
# now get rid of all the bidprice = payprice cases, not win cases
train2 = train1[train1.bidprice>train1.payprice]

# now get rid of all the cases that payprice equals zero, which is the failed bid 
val1 = val[val.payprice>0]
# now get rid of all the bidprice = payprice cases, not win cases
val2 = val1[val1.bidprice>val1.payprice]

train2.shape,val2.shape

((2427741, 25), (303507, 25))

## Part1: Constant Bidding

In [40]:
#calculating constant bid stategy

bids = pd.DataFrame()
bids['constants'] = [6,7,70,77,78,78.1,78.2,78.3,78.4,78.5,79,80,100]

# from data exploration found that higher CTR while after 15;00
val_new = val2
#val_new = val2[val2['hour'] >15]

def constant_bidding(bid):
   
    #budget = 100000000 #assume infinity budget here
    budget = 6250       # 6250 CNY budget
    clicks = 0          # clicks is integer initial 0
    cost = 0.0          # cost is flot initial at 0.0
    impression = 0      # impression is integer initial 0
    
   
    for click, pay_price in val_new[['click','payprice']].values:
            if bid > pay_price:
                impression += 1
                clicks += click
                cost += pay_price/1000
            if cost+bid >= budget:     # since we do not want to exceed the budget, thus last bid added must below budget
                break
    return impression, clicks, cost

# in order to differ from data, capitalize first letter
def result_table(bids):   
    Impression = []
    Click = []
    Costs = []
    for i in bids['constants']:
        [impression, clicks, cost] = constant_bidding(i)
        Impression.append(impression)
        Click.append(clicks)
        Costs.append(cost)
    bids['impression_won'] = Impression
    bids['clicks'] = Click
    bids['cost'] = Costs
    bids['CTR'] = ((bids.clicks/bids.impression_won)*100).round(4).astype(str)+"%"
    bids['CPM'] = (bids.cost/bids.impression_won).round(3)
    bids['CPC'] = (bids.cost/bids.clicks)
    return bids
result_table(bids)

constant_bid = bids.sort_values("clicks",ascending = False)
constant_bid

Unnamed: 0,constants,impression_won,clicks,cost,CTR,CPM,CPC
4,78.0,144954,66,6172.039,0.0455%,0.043,93.515742
11,80.0,144004,66,6170.013,0.0458%,0.043,93.485045
5,78.1,144418,65,6171.935,0.045%,0.043,94.952846
6,78.2,144416,65,6171.822,0.045%,0.043,94.951108
7,78.3,144414,65,6171.722,0.045%,0.043,94.949569
8,78.4,144412,65,6171.602,0.045%,0.043,94.947723
9,78.5,144410,65,6171.512,0.045%,0.043,94.946338
10,79.0,144399,65,6171.005,0.045%,0.043,94.938538
12,100.0,124733,62,6150.033,0.0497%,0.049,99.194081
3,77.0,151373,59,6173.027,0.039%,0.041,104.627576


In [44]:
#calculating constant bid stategy

bids = pd.DataFrame()
bids['constants'] = [6,7,120,130,132,135,137,140,200]

# from data exploration found that higher CTR while after 15;00
val_new = val2[val2['hour'] >15]

def constant_bidding(bid):
   
    #budget = 100000000 #assume infinity budget here
    budget = 6250       # 6250 CNY budget
    clicks = 0          # clicks is integer initial 0
    cost = 0.0          # cost is flot initial at 0.0
    impression = 0      # impression is integer initial 0
    
   
    for click, pay_price in val_new[['click','payprice']].values:
            if bid > pay_price:
                impression += 1
                clicks += click
                cost += pay_price/1000
            if cost+bid >= budget:     # since we do not want to exceed the budget, thus last bid added must below budget
                break
    return impression, clicks, cost

In [45]:
result_table(bids)
constant_bid = bids.sort_values("clicks",ascending = False)
constant_bid

Unnamed: 0,constants,impression_won,clicks,cost,CTR,CPM,CPC
4,132,109550,77,5997.067,0.0703%,0.055,77.883987
5,135,110322,77,6099.644,0.0698%,0.055,79.216156
3,130,109185,76,5949.462,0.0696%,0.054,78.282395
6,137,110052,76,6113.07,0.0691%,0.056,80.435132
7,140,109367,75,6110.027,0.0686%,0.056,81.467027
2,120,106629,73,5631.017,0.0685%,0.053,77.137219
8,200,89887,65,6050.092,0.0723%,0.067,93.078338
1,7,4882,4,23.249,0.0819%,0.005,5.81225
0,6,3822,0,16.889,0.0%,0.004,inf


In [6]:
constant_bid.to_csv("constant_bid.csv")

## Analysis of Constant Bidding Strategy
### Case 1: Infinity budget
If we have infinite budget, then for the constant bidding, the higher the bid price the better clicks and CTR we will have
### Case 2: fixed budget

If we have only 6250 CNY fen, only constant bid price in greater than or equal to 7 will win a click, for advertisemnt consideration, the bid price would be 132 such that obtain highest number of clicks, and CTR with relative lower cost.


## Part2: Random Bidding

In [32]:
#calculate random bids from a list of bids

bids = pd.DataFrame()
bids['random'] =[100,150,180,200,250,300,400]

# from data exploration found that higher CTR while after 15;00
#val_new = val2[val2['hour'] >15]
val_new = val2

from random import randrange
def random_bid(bid):
    # random bidding strategy differs from constant bidding: we choose random bid price other than constant
    
    #budget = 100000000 #assume infinity budget here
    budget = 6250       # 6250 CNY budget
    clicks = 0          # clicks is integer initial 0
    cost = 0.0          # cost is flot initial at 0.0
    impression = 0      # impression is integer initial 0
    #average_bid =0     

    for click, pay_price in val_new[['click','payprice']].values:
        # set the random bid price
        bid_r = randrange(0,bid)

        if bid_r > pay_price:
            impression += 1
            clicks += click
            cost += pay_price/1000
        if cost+bid_r >= budget:     # since we do not want to exceed the budget, thus last bid added must below budget
            break
    return impression, clicks, cost

# in order to differ from data, capitalize first letter
Impression = []
Click = []
Costs = []
true_random_bid = []
for i in bids['random']:
    [impression, clicks, cost] = random_bid(i)
    Impression.append(impression)
    Click.append(clicks)
    Costs.append(cost)
bids['impression_won'] = Impression
bids['clicks'] = Click
bids['cost'] = Costs
bids['CTR'] = ((bids.clicks/bids.impression_won)*100).round(3).astype(str)+"%"
bids['CPM'] = (bids.cost/bids.impression_won ).round(3)
bids['CPC'] = (bids.cost/bids.clicks).round(2)
random_bid =bids.sort_values("clicks",ascending= False)
random_bid

Unnamed: 0,random,impression_won,clicks,cost,CTR,CPM,CPC
1,150,134959,77,6102.556,0.057%,0.045,79.25
2,180,122907,56,6072.602,0.046%,0.049,108.44
5,300,96360,56,5952.063,0.058%,0.062,106.29
4,250,104215,55,6005.349,0.053%,0.058,109.19
3,200,116599,54,6054.261,0.046%,0.052,112.12
6,400,87543,49,5860.041,0.056%,0.067,119.59
0,100,113125,45,4040.67,0.04%,0.036,89.79


In [24]:
#calculate random bids from a list of bids

bids = pd.DataFrame()
bids['random'] =[100,150,200,250,300,400]

# from data exploration found that higher CTR while after 15;00
val_new = val2[val2['hour'] >15]

from random import randrange
def random_bid(bid):
    # random bidding strategy differs from constant bidding: we choose random bid price other than constant
    
    #budget = 100000000 #assume infinity budget here
    budget = 6250       # 6250 CNY budget
    clicks = 0          # clicks is integer initial 0
    cost = 0.0          # cost is flot initial at 0.0
    impression = 0      # impression is integer initial 0
    #average_bid =0     

    for click, pay_price in val_new[['click','payprice']].values:
        # set the random bid price
        bid_r = randrange(0,bid)

        if bid_r > pay_price:
            impression += 1
            clicks += click
            cost += pay_price/1000
        if cost+bid_r >= budget:     # since we do not want to exceed the budget, thus last bid added must below budget
            break
    return impression, clicks, cost

# in order to differ from data, capitalize first letter
Impression = []
Click = []
Costs = []
true_random_bid = []
for i in bids['random']:
    [impression, clicks, cost] = random_bid(i)
    Impression.append(impression)
    Click.append(clicks)
    Costs.append(cost)
bids['impression_won'] = Impression
bids['clicks'] = Click
bids['cost'] = Costs
bids['CTR'] = ((bids.clicks/bids.impression_won)*100).round(3).astype(str)+"%"
bids['CPM'] = (bids.cost/bids.impression_won ).round(3)
bids['CPC'] = (bids.cost/bids.clicks).round(2)

In [25]:
random_bid =bids.sort_values("clicks",ascending= False)
random_bid

Unnamed: 0,random,impression_won,clicks,cost,CTR,CPM,CPC
4,300,97053,83,5951.132,0.086%,0.061,71.7
3,250,91214,67,5219.822,0.073%,0.057,77.91
5,400,88088,65,5862.188,0.074%,0.067,90.19
2,200,81759,60,4221.249,0.073%,0.052,70.35
1,150,69117,40,3090.787,0.058%,0.045,77.27
0,100,50110,29,1776.919,0.058%,0.035,61.27


In [9]:
random_bid.to_csv("random_bid.csv")

## Analysis of Random Bidding Strategy
### Case 1: Infinity budget
If we have infinite budget, then for the random bidding, the higher the bid price the better clicks and CTR we will have
### Case 2: fixed budget
If we have only 6250 CNY fen, the bid price upper bound would be 300 such that obtain highest number of impressions, highest number of clicks.


In [22]:
s = np.random.normal(80,20,10)

In [23]:
s

array([55.73064093, 74.07899706, 99.09279609, 91.80163429, 64.25093552,
       80.82744469, 82.27116582, 54.1526843 , 54.55891392, 54.75542427])

# Part 3 Guassian random

In [68]:
#calculate random bids from a list of bids

bids = pd.DataFrame()
bids['random'] =[70,80,90,100,130,140]

# from data exploration found that higher CTR while after 15;00
# because of the the limited budget, shorten the bid range will increase result
val_new = val2[val2['hour'] >15]
#val_new = val2
from random import randrange
def random_bid(bid):
    # random bidding strategy differs from constant bidding: we choose random bid price other than constant
    
    #budget = 100000000 #assume infinity budget here
    budget = 6250       # 6250 CNY budget
    clicks = 0          # clicks is integer initial 0
    cost = 0.0          # cost is flot initial at 0.0
    impression = 0      # impression is integer initial 0
    #average_bid =0     

    for click, pay_price in val_new[['click','payprice']].values:
        # set the random bid price
        bid_r = np.random.normal(bid,20)

        if bid_r > pay_price:
            impression += 1
            clicks += click
            cost += pay_price/1000
        if cost+bid_r >= budget:     # since we do not want to exceed the budget, thus last bid added must below budget
            break
    return impression, clicks, cost

# in order to differ from data, capitalize first letter
Impression = []
Click = []
Costs = []
true_random_bid = []
for i in bids['random']:
    [impression, clicks, cost] = random_bid(i)
    Impression.append(impression)
    Click.append(clicks)
    Costs.append(cost)
bids['impression_won'] = Impression
bids['clicks'] = Click
bids['cost'] = Costs
bids['CTR'] = ((bids.clicks/bids.impression_won)*100).round(3).astype(str)+"%"
bids['CPM'] = (bids.cost/bids.impression_won ).round(3)
bids['CPC'] = (bids.cost/bids.clicks).round(2)

In [69]:
random_bid_g =bids.sort_values("clicks",ascending= False)
random_bid_g

Unnamed: 0,random,impression_won,clicks,cost,CTR,CPM,CPC
4,130,108730,82,5953.892,0.075%,0.055,72.61
5,140,106647,70,6046.795,0.066%,0.057,86.38
3,100,96778,67,4721.005,0.069%,0.049,70.46
2,90,90352,64,4177.889,0.071%,0.046,65.28
1,80,81702,53,3513.813,0.065%,0.043,66.3
0,70,71928,46,2826.745,0.064%,0.039,61.45


In [39]:
#calculate random bids from a list of bids

bids = pd.DataFrame()
bids['random'] =[78,80,82,100,150]

# from data exploration found that higher CTR while after 15;00
# because of the the limited budget, shorten the bid range will increase result
#val_new = val2[val2['hour'] >15]
val_new = val2
from random import randrange
def random_bid(bid):
    # random bidding strategy differs from constant bidding: we choose random bid price other than constant
    
    #budget = 100000000 #assume infinity budget here
    budget = 6250       # 6250 CNY budget
    clicks = 0          # clicks is integer initial 0
    cost = 0.0          # cost is flot initial at 0.0
    impression = 0      # impression is integer initial 0
    #average_bid =0     

    for click, pay_price in val_new[['click','payprice']].values:
        # set the random bid price
        bid_r = np.random.normal(bid,20)

        if bid_r > pay_price:
            impression += 1
            clicks += click
            cost += pay_price/1000
        if cost+bid_r >= budget:     # since we do not want to exceed the budget, thus last bid added must below budget
            break
    return impression, clicks, cost

# in order to differ from data, capitalize first letter
Impression = []
Click = []
Costs = []
true_random_bid = []
for i in bids['random']:
    [impression, clicks, cost] = random_bid(i)
    Impression.append(impression)
    Click.append(clicks)
    Costs.append(cost)
bids['impression_won'] = Impression
bids['clicks'] = Click
bids['cost'] = Costs
bids['CTR'] = ((bids.clicks/bids.impression_won)*100).round(3).astype(str)+"%"
bids['CPM'] = (bids.cost/bids.impression_won ).round(3)
bids['CPC'] = (bids.cost/bids.clicks).round(2)

random_bid_g =bids.sort_values("clicks",ascending= False)
random_bid_g

Unnamed: 0,random,impression_won,clicks,cost,CTR,CPM,CPC
0,78,143827,72,6124.321,0.05%,0.043,85.06
2,82,139198,70,6120.565,0.05%,0.044,87.44
1,80,141221,66,6114.11,0.047%,0.043,92.64
3,100,123961,61,6094.823,0.049%,0.049,99.92
4,150,102588,58,6049.659,0.057%,0.059,104.3
