In [None]:
import ROOT as root

# Monte Carlo Beekeeper

In this notebook we'll illustrate the use of Monte Carlo (MC) as a means of numerical optimization.  We pose the problem of a beekeeper trying to build a beehive that will permit entrance to small (docile) bees and keep out large (aggressive) bees.  To make the problem somewhat more difficult, we'll 

First, we'll make a population of representative bees.  Then we'll study how to create an entrance door that will allow in small bees and keep out large bees.

In [None]:
# Of course we start with a random number generator
rand = root.TRandom(1122) # seed = 1122

# Number of MC trials
nTrials = 1E4

# histograms to illustrate our data
h2dSmall = root.TH2D("Small Bees","Small Bees",100,0,3.5,100,0,1.4)
h2dLarge = root.TH2D("Large Bees","Large Bees",100,0,3.5,100,0,1.4)

wSmall = root.TH1D("Small Bee Width","Small Bee Width",100,0,1.4)
wLarge = root.TH1D("Large Bee Width","Large Bee Width",100,0,1.4)

lSmall = root.TH1D("Small Bee Length","Small Bee Length",100,0,3.5)
lLarge = root.TH1D("Large Bee Length","Large Bee Length",100,0,3.5)

In [None]:
for i in range(int(nTrials)):
    w = 0.5*rand.Gaus(1,0.2)
    l = 1.0*rand.Gaus(1,0.2)
    
    #10% of small bees have dwarfism
    if rand.Uniform()<0.1 :
        w*= 0.7 
        l*=0.7

    h2dSmall.Fill(l,w)
    wSmall.Fill(w)
    lSmall.Fill(l)
    
    #Make a random big bee
    w = 0.75*rand.Gaus(1,0.25)
    l = 2*rand.Gaus(1,0.25)
    
    #20% of large bees have dwarfism
    if rand.Uniform()<0.2 :
        w *= 0.7
        l *= 0.7

    h2dLarge.Fill(l,w)
    wLarge.Fill(w)
    lLarge.Fill(l)
    

In [None]:
%jsroot on

wSmall.Scale(1.0/wSmall.Integral())
lSmall.Scale(1.0/lSmall.Integral())
wLarge.Scale(1.0/wLarge.Integral())
lLarge.Scale(1.0/lLarge.Integral())


c2 = root.TCanvas("Bee Width","Bee Width")
wSmall.SetLineColor(2)
wLarge.SetLineColor(4)
wSmall.Draw("")
wLarge.Draw("same")
c2.Draw()

c3 = root.TCanvas("Bee Length","Bee Length")
lSmall.SetLineColor(2)
lLarge.SetLineColor(4)
lSmall.Draw("")
lLarge.Draw("same")
c3.Draw()

c1 = root.TCanvas("Size Data","Size Data")
h2dSmall.SetMarkerColor(2)
h2dLarge.SetMarkerColor(4)
h2dLarge.SetXTitle("Length")
h2dLarge.SetYTitle("Width")
h2dLarge.Draw("SCAT")
h2dSmall.Draw("SCATsame")
c1.Draw()

### Now we can see the challenge

There is an ambiguous region in both bee length and bee width in which there are both large and small bees.  One could choose a beehive door that removes all large bees, but this would also keep out almost all of the small bees.  There must be a compromise that can be made.  But which metric would bee best for this?  Let's try two metrics:

1-- Small and large bee acceptance efficiencies.  Here we want to maximize small bee acceptance and minimize large bee acceptance.  This will be defined by the number of each bee class allowed into the beehive divided by the total number of bees in each class.

2-- Small bee purity.  Here we want to create the most pure sample of small bees, similar to acceptance efficiency.  This will be defined as the number of small bees allowed into the beehive divided by the total number of bees that can enter the beehive.

In [None]:
# more histograms!
effW = root.TH2D("effW","effW",100,0,1.4,100,0,1.05)
purW = root.TH2D("purW","purW",100,0,1.4,100,0,1.05)

effL = root.TH2D("effL","effL",100,0,3.5,100,0,1.05)
purL = root.TH2D("purL","purL",100,0,3.5,100,0,1.05)

#Integrate over bee length
for i in range(lSmall.GetNbinsX()):
    l = lSmall.GetBinCenter(i+1)
    effv = lSmall.Integral(1,i+1)
    purv = effv / (effv + lLarge.Integral(1,i+1)+1e-6)
    
    effL.Fill(l,effv)
    purL.Fill(l,purv)

#Integrate over bee width
for i in range(wSmall.GetNbinsX()):
    w = wSmall.GetBinCenter(i+1)
    effv = wSmall.Integral(1,i+1)
    purv = effv / (effv + wLarge.Integral(1,i+1)+1e-6)

    effW.Fill(w,effv)
    purW.Fill(w,purv)


In [None]:
# Plot length data
c4 = root.TCanvas("Length E & P","Length E & P")
legend = root.TLegend(0.6,0.1,0.9,0.4)
effL.SetStats(0)
effL.SetMarkerStyle(20)
effL.SetMarkerSize(0.9)
effL.SetMarkerColor(root.kRed)
effL.Draw("SCAT")
legend.AddEntry(effL,"Efficiency")

purL.SetMarkerStyle(22)
purL.SetMarkerSize(0.9)
purL.SetMarkerColor(root.kBlue)
purL.Draw("SCATsame")
legend.AddEntry(purL,"Purity")
legend.Draw("same")
c4.Draw()

# Repeat for width data
c5 = root.TCanvas("Width E & P","Width E & P")
legend2 = root.TLegend(0.6,0.1,0.9,0.4)
effW.SetStats(0)
effW.SetMarkerStyle(20)
effW.SetMarkerSize(0.9)
effW.SetMarkerColor(root.kRed)
effW.Draw("SCAT")
legend2.AddEntry(effW,"Efficiency")

purW.SetMarkerStyle(22)
purW.SetMarkerSize(0.9)
purW.SetMarkerColor(root.kBlue)
purW.Draw("SCATsame")
legend2.AddEntry(purW,"Purity")
legend2.Draw("same")
c5.Draw()

### That's a good start, but now what?

We can see that both efficiency and purity provide insight to the problem, but they do not necessarily solve the problem.  Let's try looking at the problem in two new ways:

1-- Let's multiply efficiency by purity to try to see where they together form a maximum.

2-- Let's look at both efficiency and purity in two dimensions, so we can account for both length and width at the same time.

In [None]:
epW = root.TH2D("epW","epW",100,0,1.4,100,0,1.05)
epL = root.TH2D("epL","epL",100,0,3.5,100,0,1.05)

eff2d = root.TH2D("eff2d","eff2d",100,0,3.5,100,0,1.4)
pur2d = root.TH2D("pur2d","pur2d",100,0,3.5,100,0,1.4)
ep2d = root.TH2D("ep2d","ep2d",100,0,3.5,100,0,1.4)

#Integrate over bee length
for i in range(lSmall.GetNbinsX()):
    l = lSmall.GetBinCenter(i+1)
    effv = lSmall.Integral(1,i+1)
    purv = effv / (effv + lLarge.Integral(1,i+1)+1e-6)
    epL.Fill(l,effv*purv)

#Integrate over bee width
for i in range(wSmall.GetNbinsX()):
    w = wSmall.GetBinCenter(i+1)
    effv = wSmall.Integral(1,i+1)
    purv = effv / (effv + wLarge.Integral(1,i+1)+1e-6)
    epW.Fill(w,effv*purv)

totSmall = h2dSmall.Integral()
for l in range(lSmall.GetNbinsX()):
    for w in range(wSmall.GetNbinsX()):
        effv = h2dSmall.Integral(1,l+1,1,w+1)
        purv = effv / (effv + h2dLarge.Integral(1,l+1,1,w+1)+1e-6)
        eff2d.SetBinContent(l+1,w+1,effv/totSmall)
        pur2d.SetBinContent(l+1,w+1,purv)
        ep2d.SetBinContent(l+1,w+1,effv*purv/totSmall)
        
c6 = root.TCanvas("ExP Length","ExP Length")
legend = root.TLegend(0.6,0.1,0.9,0.4)
effL.SetStats(0)
effL.SetMarkerStyle(20)
effL.SetMarkerSize(0.9)
effL.SetMarkerColor(root.kRed)
effL.Draw("SCAT")
legend.AddEntry(effL,"Efficiency")

purL.SetMarkerStyle(22)
purL.SetMarkerSize(0.9)
purL.SetMarkerColor(root.kBlue)
purL.Draw("SCATsame")
legend.AddEntry(purL,"Purity")
legend.Draw("same")

epL.SetMarkerStyle(29)
epL.SetMarkerSize(1.3)
epL.SetMarkerColor(root.kGreen)
epL.Draw("SCATsame")
legend.AddEntry(epL,"Efficiency x Purity")
c6.Draw()

c7 = root.TCanvas("ExP Width","ExP Width")
legend2 = root.TLegend(0.6,0.1,0.9,0.4)
effW.SetStats(0)
effW.SetMarkerStyle(20)
effW.SetMarkerSize(0.9)
effW.SetMarkerColor(root.kRed)
effW.Draw("SCAT")
legend2.AddEntry(effW,"Efficiency")

purW.SetMarkerStyle(22)
purW.SetMarkerSize(0.9)
purW.SetMarkerColor(root.kBlue)
purW.Draw("SCATsame")
legend2.AddEntry(purW,"Purity")
legend2.Draw("same")
epW.SetMarkerStyle(29)
epW.SetMarkerSize(1.3)
epW.SetMarkerColor(root.kGreen)
epW.Draw("SCATsame")
legend2.AddEntry(epW,"Efficiency x Purity")
legend2.Draw("same")
c7.Draw()

In [None]:
root.gStyle.SetPalette(root.kSunset)

c8 = root.TCanvas("2D Efficiency","2D Efficiency")
eff2d.SetStats(0)
eff2d.Draw("colz")
c8.Draw()

c9 = root.TCanvas("2D Purity","2D Purity")
pur2d.SetStats(0)
pur2d.Draw("colz")
c9.Draw()

c10 = root.TCanvas("2D E x P","2D E x P")
ep2d.SetStats(0)
ep2d.Draw("colz")
c10.Draw()