In [1]:
%load_ext rpy2.ipython

In [2]:
%%R
testData = data.matrix(read.csv( file="test.csv", header=TRUE ))

In [3]:
%%R
print(head(testData))

     bidder_id payment_account address
[1,]      1345            2638    1338
[2,]      3105            3345    2890
[3,]      1978            1260    2650
[4,]      4322            3038    3530
[5,]      3800            2376    3774
[6,]      3895            3568    2145


In [4]:
import csv
from sets import Set

#build a set of bidder ids

print('starting ...')

fin = open('test.csv', 'rU')
rdr = csv.reader(fin, delimiter=',')

bidderIdSet = Set([])

for row in rdr:
    bidderId = row[0]
    if bidderId == 'bidder_id': #skip header
        continue
    bidderIdSet.add(bidderId)

print(len(bidderIdSet))

fin.close()

starting ...
4700


In [5]:
import csv

#separate bids for test bidders

print('starting ...')

fin = open('bids.csv', 'rU')
rdr = csv.reader(fin, delimiter=',')

testFout = open('test_bids.csv', 'wb')
testWtr = csv.writer(testFout, delimiter=',')

#write only the bidders which exist in test dataset
for row in rdr:
    bidderId = row[1]
    if row[0] == 'bid_id': #header
        testWtr.writerow(row)
    elif bidderId in bidderIdSet:
        testWtr.writerow(row)
        
fin.close()
testFout.close()

print('done')

starting ...
done


In [7]:
import csv

#process test bidders info

fin = open('test_bids.csv', 'rU')
rdr = csv.reader(fin, delimiter=',')

slidingWindowSize = 10

bidderIdToInfo = {} #bidderId -> info object

for row in rdr:
    bidderId = row[1]
    currArrivalTime = row[5]
    currMerchandise = row[3]
    currAuction = row[2]
    currURL = row[8]
    currDevice = row[4]
    currCountry = row[6]
    
    if bidderId == 'bidder_id':
        continue
    elif bidderIdToInfo.has_key(bidderId):
        #number of bids
        bidderIdToInfo[bidderId]['numBids'] = bidderIdToInfo[bidderId]['numBids'] + 1
        
        #interval
        arrivalTimesList = bidderIdToInfo[bidderId]['arrivalTimes']
        arrivalTimesList.append(currArrivalTime) #append the new arrival time
        length = len(arrivalTimesList)
        bidderIdToInfo[bidderId]['avgInterval'] = (int(currArrivalTime)-int(arrivalTimesList[0])) / (length-1)
        if length >= slidingWindowSize: #window size at least = 3
            currMin = (int(currArrivalTime) - int(arrivalTimesList[length-slidingWindowSize])) / (slidingWindowSize-1)
            if bidderIdToInfo[bidderId]['minInterval'] == '' or currMin < bidderIdToInfo[bidderId]['minInterval']:
                bidderIdToInfo[bidderId]['minInterval'] = currMin
                
        #merchandises
        merchandises = bidderIdToInfo[bidderId]['merchandises']
        if merchandises.has_key(currMerchandise):
            merchandises[currMerchandise] = merchandises[currMerchandise] + 1
        else:
            merchandises[currMerchandise] = 1
            
        #auctions
        auctions = bidderIdToInfo[bidderId]['auctions']
        if not auctions.has_key(currAuction):
            auctions[currAuction] = 1
        
        #urls
        urls = bidderIdToInfo[bidderId]['urls']
        if not urls.has_key(currURL):
            urls[currURL] = 1
            
        #devices
        devices = bidderIdToInfo[bidderId]['devices']
        if not devices.has_key(currDevice):
            devices[currDevice] = 1
        
        #countries
        countries = bidderIdToInfo[bidderId]['countries']
        if not countries.has_key(currCountry):
            countries[currCountry] = 1
        
    else:
        newInfo = {}
        newInfo['numBids'] = 1
        arrivalTimesList = []
        arrivalTimesList.append(currArrivalTime) #append the arrival time of the current bid
        newInfo['arrivalTimes'] = arrivalTimesList
        newInfo['avgInterval'] = ''
        newInfo['minInterval'] = ''
        
        newMerchandises = {}
        newMerchandises[currMerchandise] = 1
        newInfo['merchandises'] = newMerchandises
        
        newAuctions = {}
        newAuctions[currAuction] = 1
        newInfo['auctions'] = newAuctions
        
        newURLs = {}
        newURLs[currURL] = 1
        newInfo['urls'] = newURLs
        
        newDevices = {}
        newDevices[currDevice] = 1
        newInfo['devices'] = newDevices
        
        newCountries = {}
        newCountries[currCountry] = 1
        newInfo['countries'] = newCountries
        
        bidderIdToInfo[bidderId] = newInfo
        
fin.close()

#write the results
fout = open('test_bidders_info.csv', 'wb')
wtr = csv.writer(fout, delimiter=',')

wtr.writerow(['bidder_id', 'number of bids', 'avg arrival interval', 'min interval', 'number of auctions', 'number of urls', 'merchandise', 'number of devices', 'number of countries'])
for key, val in bidderIdToInfo.items(): 
    merchList = val['merchandises'].keys()[0] #there exists only one merchandise per bidder
    currRowVec = [key, str(val['numBids']), val['avgInterval'], val['minInterval'], len(val['auctions']), len(val['urls']), merchList, len(val['devices']), len(val['countries'])]
    wtr.writerow(currRowVec)
    
fout.close()

print('Test bidders: ' + str(len(bidderIdToInfo)))

Test bidders: 4630


In [8]:
#reaction time for each bidder

import csv

print('starting ...')

fin = open('bids.csv', 'rU')
rdr = csv.reader(fin, delimiter=',')

bidderIdToReactionTimeList = {}
auctionToLastBid = {}
firstRow = True

for row in rdr:
    if firstRow:
        firstRow = False
        continue
    
    bidderId = row[1]
    auction = row[2]
    time = int(row[5])
    
    if not auctionToLastBid.has_key(auction):
        auctionToLastBid[auction] = time
        continue
        
    reactionTime = time - auctionToLastBid[auction]
    auctionToLastBid[auction] = time
        
    if not bidderIdToReactionTimeList.has_key(bidderId):
        newList = [reactionTime]
        bidderIdToReactionTimeList[bidderId] = newList
    else:
        reactTimeList = bidderIdToReactionTimeList[bidderId]
        reactTimeList.append(reactionTime)
    
        
fin.close()

print(len(bidderIdToReactionTimeList))
print('done')

starting ...
6610
done


In [10]:
#attach reaction time feature with bidderIdToReactionTimeList

import numpy
def median(lst):
    return numpy.median(numpy.array(lst))

#modify human info

fin = open('test_bidders_info.csv', 'rU')
rdr = csv.reader(fin, delimiter=',')
fout = open('test_bidders_info_final.csv', 'wb')
wtr = csv.writer(fout, delimiter=',')

firstRow = True
for row in rdr:
    if firstRow:
        firstRow = False
        row.append('min reaction time')
        row.append('median reaction time')
        wtr.writerow(row)
        continue
        
    bidderId = row[0]
    
    if not bidderIdToReactionTimeList.has_key(bidderId): #if the bidder made only one bid as the first in the auction
        print bidderId
        wtr.writerow(row)
        continue
    
    reactionTimeList = bidderIdToReactionTimeList[bidderId]
    row.append(min(reactionTimeList))
    row.append(median(reactionTimeList))
    wtr.writerow(row)

fin.close()
fout.close()

print("done")

7a1eed5c707439cd630acffb3b3dc298dhz6q
f7b6e7e8d3ac5b17ee7673699899e2e0dwvpa
8f434535037d1ea471a068b2354af83eihjbk
done
