In [1]:
import numpy as np
from scipy import stats

In [2]:
# Example PN-Tree
PN_Types_Net = '''YouLike1 | Exploit | Personalized | RootNode
YouLike2 | Exploit | Personalized | RootNode
BecauseYouLike1 | Explore | Personalized | RootNode
BecauseYouLike2 | Explore | Personalized | RootNode
Supply1 | Non-Personalized | RootNode
Supply2 | Non-Personalized | RootNode'''

In [3]:
# Mapping child node to parent node & parent node to child node
ChildParentDict = {}
ParentChildDict = {}
PNs = []

for eachPN in PN_Types_Net.split('\n'):
    path = [eNode.strip() for eNode in eachPN.split('|') if len(eNode) > 1]
    if len(path) > 0:
        PNs.append(path[0])
    for child, parent in zip(path, path[1:]):
        ChildParentDict[child]  = parent
        if parent in ParentChildDict:
            ParentChildDict[parent].add(child)
        else:
            ParentChildDict[parent] = {child}        


In [4]:
# PN's performance till date
# sub population under consider is PopType1(context), for every context we are going to maintain this dictionary 

PNsAlphaBeta = {}
PNsAlphaBeta['PopType1'] = {}
PNsAlphaBeta['PopType1']['YouLike1']        = {'clicks': 200,  'impressions': 10000}
PNsAlphaBeta['PopType1']['YouLike2']        = {'clicks': 25,   'impressions': 1000}
PNsAlphaBeta['PopType1']['BecauseYouLike1'] = {'clicks': 100,  'impressions': 10000}
PNsAlphaBeta['PopType1']['BecauseYouLike2'] = {'clicks': 15,   'impressions': 1000}
PNsAlphaBeta['PopType1']['Supply1']         = {'clicks': 20,   'impressions': 10000}
PNsAlphaBeta['PopType1']['Supply2']         = {'clicks': 400,  'impressions': 100000}

PNsAlphaBeta


{'PopType1': {'YouLike1': {'clicks': 200, 'impressions': 10000},
  'YouLike2': {'clicks': 25, 'impressions': 1000},
  'BecauseYouLike1': {'clicks': 100, 'impressions': 10000},
  'BecauseYouLike2': {'clicks': 15, 'impressions': 1000},
  'Supply1': {'clicks': 20, 'impressions': 10000},
  'Supply2': {'clicks': 400, 'impressions': 100000}}}

In [5]:
# computing some global variables, which will be used in downstream logic
nodeSet = set()
heightOfTheTree = 1
allPaths = []
for eachPN in PN_Types_Net.split('\n'):
    path = [eNode.strip() for eNode in eachPN.split('|') if len(eNode) > 1]
    for node in path:
        nodeSet.add(node)
    allPaths.append(path) 
    if len(path) > heightOfTheTree:
        heightOfTheTree = len(path)

In [6]:
# mapping nodeid to node index to index nodeid in matrix
noOfNodes = len(nodeSet)
nodeIdx = noOfNodes
nodeIdToIndex = {}
for idx in range(heightOfTheTree):
    for path in allPaths:
        if len( path[idx:idx+1] ) > 0:
            if path[idx:idx+1][0] not in nodeIdToIndex:
                nodeIdToIndex[path[idx:idx+1][0]] = nodeIdx
                nodeIdx = nodeIdx - 1
print('No Of Nodes in our reference tree:\t', noOfNodes)
print('nodeIdToIndex mapping:\t', nodeIdToIndex)             

No Of Nodes in our reference tree:	 11
nodeIdToIndex mapping:	 {'YouLike1': 11, 'YouLike2': 10, 'BecauseYouLike1': 9, 'BecauseYouLike2': 8, 'Supply1': 7, 'Supply2': 6, 'Exploit': 5, 'Explore': 4, 'Non-Personalized': 3, 'Personalized': 2, 'RootNode': 1}


In [7]:
# clicks ( sum of every column in resultant's matrix corresponds to total clicks in that branch )
clicks = np.zeros ( (len(nodeIdToIndex), len(nodeIdToIndex) ))
nodeIdToIndex_items = list(nodeIdToIndex.items())
nodeIdToIndex_items.sort(key=lambda x:x[1], reverse=True)

# for all terminal nodes
for path in allPaths:
    clicks[nodeIdToIndex[path[0]]-1][nodeIdToIndex[path[0]]-1] = PNsAlphaBeta['PopType1'][ path[0] ]['clicks']    

# total clicks @ every non-terminal nodes
for path in allPaths:
    for child, parent in zip(path, path[1:]):        
        childIdx  = nodeIdToIndex[child]-1
        parentIdx = nodeIdToIndex[ChildParentDict[child]]-1
        clicks[ childIdx ][ parentIdx ] = sum(clicks[:, childIdx ])

In [8]:
np.savetxt("/Users/viswanath.g/Desktop/clicks.csv", clicks, delimiter=",")

In [9]:
# impressions ( sum of every column in resultant's matrix corresponds to total impressions in that branch )
impressions = np.zeros ( (len(nodeIdToIndex), len(nodeIdToIndex) ))
nodeIdToIndex_items = list(nodeIdToIndex.items())
nodeIdToIndex_items.sort(key=lambda x:x[1], reverse=True)

# all terminal nodes
for path in allPaths:
    impressions[nodeIdToIndex[path[0]]-1][nodeIdToIndex[path[0]]-1] = PNsAlphaBeta['PopType1'][ path[0] ]['impressions']    

# total impressions @ every non-terminal nodes
for path in allPaths:
    for child, parent in zip(path, path[1:]):        
        childIdx  = nodeIdToIndex[child]-1
        parentIdx = nodeIdToIndex[ChildParentDict[child]]-1
        impressions[ childIdx ][ parentIdx ] = sum(impressions[:, childIdx ])

In [10]:
np.savetxt("/Users/viswanath.g/Desktop/impressions.csv", impressions, delimiter=",")

In [11]:
# sampling ctrs for every terminal of the node ( which is nothing but every PN)
sampled_cts = {}
for path in allPaths:
    sampled_ctrs_per_path = []
    child = path[0]
    while child in ChildParentDict:
        childIdx  = nodeIdToIndex[child]-1 
        alpha_ = sum(clicks[:,childIdx])
        beta_  = sum(impressions[:,childIdx]) - sum(clicks[:,childIdx])
        child  = ChildParentDict[ child ]
        sampled_ctrs_per_path.append( stats.beta.rvs(alpha_, beta_, size=1)[0] )
    sampled_cts[path[0]] = sum(sampled_ctrs_per_path) / len(sampled_ctrs_per_path)
sampled_cts    

{'YouLike1': 0.016927735463418688,
 'YouLike2': 0.018208336613166686,
 'BecauseYouLike1': 0.012124057417304207,
 'BecauseYouLike2': 0.014320564347795444,
 'Supply1': 0.0028307026615382517,
 'Supply2': 0.003981454588199759}