# Exploratory Data Analysis for datasets
--- 

Aim 
* Understand structure of datasets, variables and where things are.

There are 11 files supplied in total, which are in the directory `./data/`.

Only the `.pkl` files have been downloaded, which are still approximately 1.7 GB in size. 

GitHub blocks pushes that are over 100 MB so they have not been committed.

In [10]:
import pandas as pd
import os

In [11]:
files = []
for file in os.listdir('data/'):
    if file.endswith('.pkl'):
        files.append(file)

print(files, len(files))

['total_dataset.pkl', 'jpsi_mu_k_swap.pkl', 'psi2S.pkl', 'jpsi_mu_pi_swap.pkl', 'phimumu.pkl', 'pKmumu_piTop.pkl', 'signal.pkl', 'pKmumu_piTok_kTop.pkl', 'k_pi_swap.pkl', 'jpsi.pkl', 'acceptance_mc.pkl'] 11


The number of entries for each dataset varies. For one dataset, there are only 773 events. Will there be a problem with this when creating a ML algorithm?

In [12]:
summary = pd.DataFrame(columns=['filename', 'size'])
for file in files:
    df = pd.read_pickle('data/' + file)
    summary = summary.append({'filename': file, 'size':len(df)}, ignore_index=True)
summary

Unnamed: 0,filename,size
0,total_dataset.pkl,498245
1,jpsi_mu_k_swap.pkl,1986
2,psi2S.pkl,181076
3,jpsi_mu_pi_swap.pkl,773
4,phimumu.pkl,68318
5,pKmumu_piTop.pkl,1127
6,signal.pkl,255741
7,pKmumu_piTok_kTop.pkl,1447
8,k_pi_swap.pkl,133292
9,jpsi.pkl,786776


# Total Dataset
* The `total_dataset.pkl` is the LHCb data to analyse.


In [13]:
total = pd.read_pickle('data/total_dataset.pkl')
pd.set_option('max_columns', None)
total

Unnamed: 0,mu_plus_MC15TuneV1_ProbNNk,mu_plus_MC15TuneV1_ProbNNpi,mu_plus_MC15TuneV1_ProbNNmu,mu_plus_MC15TuneV1_ProbNNe,mu_plus_MC15TuneV1_ProbNNp,mu_plus_P,mu_plus_PT,mu_plus_ETA,mu_plus_PHI,mu_plus_PE,mu_plus_PX,mu_plus_PY,mu_plus_PZ,mu_plus_IPCHI2_OWNPV,mu_minus_MC15TuneV1_ProbNNk,mu_minus_MC15TuneV1_ProbNNpi,mu_minus_MC15TuneV1_ProbNNmu,mu_minus_MC15TuneV1_ProbNNe,mu_minus_MC15TuneV1_ProbNNp,mu_minus_P,mu_minus_PT,mu_minus_ETA,mu_minus_PHI,mu_minus_PE,mu_minus_PX,mu_minus_PY,mu_minus_PZ,mu_minus_IPCHI2_OWNPV,K_MC15TuneV1_ProbNNk,K_MC15TuneV1_ProbNNpi,K_MC15TuneV1_ProbNNmu,K_MC15TuneV1_ProbNNe,K_MC15TuneV1_ProbNNp,K_P,K_PT,K_ETA,K_PHI,K_PE,K_PX,K_PY,K_PZ,K_IPCHI2_OWNPV,Pi_MC15TuneV1_ProbNNk,Pi_MC15TuneV1_ProbNNpi,Pi_MC15TuneV1_ProbNNmu,Pi_MC15TuneV1_ProbNNe,Pi_MC15TuneV1_ProbNNp,Pi_P,Pi_PT,Pi_ETA,Pi_PHI,Pi_PE,Pi_PX,Pi_PY,Pi_PZ,Pi_IPCHI2_OWNPV,B0_MM,B0_ENDVERTEX_CHI2,B0_ENDVERTEX_NDOF,B0_FDCHI2_OWNPV,Kstar_MM,Kstar_ENDVERTEX_CHI2,Kstar_ENDVERTEX_NDOF,Kstar_FDCHI2_OWNPV,J_psi_MM,J_psi_ENDVERTEX_CHI2,J_psi_ENDVERTEX_NDOF,J_psi_FDCHI2_OWNPV,B0_IPCHI2_OWNPV,B0_DIRA_OWNPV,B0_OWNPV_X,B0_OWNPV_Y,B0_OWNPV_Z,B0_FD_OWNPV,B0_ID,q2,phi,costhetal,costhetak,polarity,year
0,0.000303,0.282979,0.966269,2.708744e-06,2.358479e-05,22529.217656,3371.873364,2.586844,-2.463601,22529.465415,-2626.130000,-2114.940000,22275.460000,70.701008,0.002457,0.092010,0.998433,4.034561e-07,4.906135e-04,81540.897697,9651.149975,2.823654,-2.694584,81540.966151,-8702.870000,-4171.900000,80967.730000,8.875478,0.069748,0.010268,-1000.000000,9.986840e-01,0.062544,7363.169875,332.027626,3.791666,0.401218,7379.701050,305.660000,129.670000,7355.680000,14.776949,0.016157,0.859210,-1000.0,1.652386e-07,0.010583,4228.375666,517.000531,2.790918,-2.524810,4230.678518,-421.740000,-299.040000,4196.650000,11.286473,5207.656947,19.883980,5,93.424302,1141.238000,1.404714,1,25.129624,1904.696000,1.119574,1,79.203282,3.159163,0.999968,0.8398,-0.2076,103.8536,2.377813,-511,3.627847,0.687398,-0.467658,0.992306,1,2016
1,0.020258,0.003956,0.998035,8.310519e-06,2.909099e-03,161024.220000,8534.230892,3.629914,-1.540762,161024.254665,256.282917,-8530.381936,160797.905240,14.296193,0.000362,0.060204,0.990564,1.333117e-06,1.854383e-05,14411.220000,1511.469380,2.945311,-2.311160,14411.607321,-1019.571040,-1115.802213,14331.738283,384.612312,0.998938,0.001425,-1000.000000,3.024387e-07,0.007307,37167.950000,3011.735455,3.204432,-1.823306,37171.228446,-752.436891,-2916.228588,37045.727915,49.893348,0.159368,0.701000,-1000.0,3.885431e-08,0.093486,4610.670000,544.932810,2.825103,-1.653290,4612.781990,-44.902299,-543.079691,4578.354080,38.273170,5301.665969,4.817663,5,595.683158,858.348769,0.688884,1,93.627591,3703.792558,0.226903,1,529.336399,0.894800,0.999998,0.7938,-0.1812,-32.1607,6.161559,-511,13.718153,1.989341,0.814684,0.110453,-1,2016
2,0.088214,0.007898,0.998085,1.014832e-05,3.530469e-03,109393.330000,5710.313234,3.645141,0.265732,109393.381025,5509.884739,1499.615748,109244.189645,2981.436866,0.094421,0.015697,0.995131,1.037932e-06,1.544617e-03,88415.880000,7368.072478,3.176302,0.216243,88415.943132,7196.474119,1580.902369,88108.338675,3269.153291,0.982696,0.127773,-1000.000000,7.269481e-07,0.004898,46366.910000,3312.751173,3.330676,-0.083727,46369.538060,3301.146292,-277.044201,46248.416434,1542.015832,0.000128,0.987065,-1000.0,1.053780e-04,0.000009,25805.840000,2136.945667,3.182652,-0.253811,25806.217428,2068.483472,-536.575165,25717.209050,5407.187831,5272.426522,3.634679,5,38894.639606,890.802323,0.008342,1,9410.613563,3093.250321,2.101162,1,21959.653044,2.782695,1.000000,0.8126,-0.1998,40.6020,47.151129,-511,9.567142,-1.738231,0.242143,-0.598537,1,2016
3,0.000404,0.016088,0.990623,3.606953e-06,1.098000e-05,13815.260342,1236.546436,3.104590,1.340393,13815.664371,282.390000,1203.870000,13759.810000,194.296469,0.002816,0.028667,0.999300,3.555200e-06,3.011680e-05,52285.455669,4366.470496,3.174162,-1.049955,52285.562426,2172.800000,-3787.480000,52102.810000,157.873671,0.002285,0.914874,0.008754,8.521141e-09,0.000720,13326.945200,592.225030,3.806310,-0.446913,13336.085834,534.060000,-255.950000,13313.780000,9.953859,0.011112,0.976315,-1000.0,6.369709e-06,0.006169,4061.254141,300.925550,3.294156,1.282427,4063.651701,85.580000,288.500000,4050.090000,7.341118,5292.627244,16.194969,5,472.220367,928.365000,0.033066,1,16.990620,4334.095000,2.328953,1,456.161747,5.415985,0.999958,0.8506,-0.1859,-31.5099,4.679321,-511,18.784472,-0.041052,-0.832555,-0.497081,1,2016
4,0.000061,0.059106,0.997773,1.061150e-06,2.045247e-06,40205.947339,2979.715433,3.293958,-0.438803,40206.086170,2697.420000,-1265.950000,40095.380000,68.710692,0.000045,0.006776,0.999090,7.706678e-06,5.132925e-07,34640.801279,4419.134966,2.748142,-1.095364,34640.962413,2022.740000,-3929.030000,34357.770000,199.719856,0.002197,0.991869,-1000.000000,2.432451e-06,0.007109,16755.097756,1153.853597,3.367555,-2.047052,16762.369099,-528.990000,-1025.450000,16715.320000,101.363455,0.003536,0.994638,-1000.0,1.233509e-06,0.000885,6085.829664,183.916809,4.192154,-1.989760,6087.429889,-74.820000,-168.010000,6083.050000,8.507556,5262.367923,8.599249,5,545.970792,747.467000,0.601719,1,136.433679,3088.918000,0.000064,1,377.561935,3.011884,0.999983,0.7698,-0.2535,-44.0725,4.054877,511,9.541409,-0.084586,-0.006600,0.294391,1,2016
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
498240,0.040453,0.001366,0.997394,1.393092e-03,1.167161e-02,211552.172468,7447.801664,4.039390,1.877470,211552.198853,-2248.410000,7100.310000,211421.030000,8.532494,0.000095,0.072946,0.997004,1.625261e-06,7.970841e-06,49317.895696,2516.896437,3.667756,2.402401,49318.008876,-1860.020000,1695.610000,49253.630000,174.498094,0.074645,0.904629,-1000.000000,1.282201e-07,0.040532,6546.325989,138.533513,4.548582,-2.372478,6564.914389,-99.540000,-96.350000,6544.860000,6.780701,0.046899,0.776018,-1000.0,6.083153e-04,0.030226,2269.403840,264.392104,2.839576,1.281165,2273.691655,75.510000,253.380000,2253.950000,102.724448,5647.038865,23.723467,5,331.385560,822.602000,0.927287,1,144.955352,2782.009000,6.048594,1,200.606594,10.078610,0.999990,0.8309,-0.1246,39.0764,10.975222,511,7.739509,-1.323554,0.598088,-0.040562,1,2016
498241,0.000047,0.020918,0.998482,1.134924e-05,6.563242e-07,40338.590000,2968.512043,3.301038,-2.905452,40338.728375,-2886.130413,-694.489016,40229.215496,15.406290,0.000089,0.062077,0.995043,2.579451e-05,9.552649e-06,14123.710000,2840.279217,2.286833,-2.968935,14124.105205,-2798.048806,-487.964044,13835.172501,99.394800,0.000166,0.997793,-1000.000000,6.525253e-09,0.000022,7401.050000,1176.328254,2.525995,-1.972228,7417.496753,-459.634861,-1082.812983,7306.968793,55.758981,0.957129,0.005701,-1000.0,1.220162e-06,0.886422,6906.630000,1353.727292,2.313021,-2.326798,6908.040083,-928.681329,-984.951049,6772.662724,73.105478,5258.249413,1.026611,5,265.208306,884.257912,0.001564,1,107.817869,3087.734352,0.121885,1,181.686012,1.376554,0.999956,0.7990,-0.1814,-76.8413,1.959396,511,9.534103,2.481835,0.410797,-0.204429,1,2016
498242,0.011117,0.139928,0.991624,8.119469e-07,2.001283e-03,59256.560000,2274.457017,3.952914,1.632646,59256.654198,-140.585028,2270.108053,59212.893430,105.480537,0.000091,0.031638,0.990296,3.487722e-06,1.016908e-06,35845.880000,2558.595589,3.331641,0.103181,35846.035718,2544.987814,263.530677,35754.450095,3210.448299,0.998607,0.001079,-1000.000000,7.052736e-07,0.048747,17419.490000,1427.616849,3.193047,1.885164,17426.484122,-441.441109,1357.652244,17360.891164,1591.550255,0.091812,0.902166,-1000.0,1.076690e-08,0.075273,3528.190000,156.656650,3.807138,0.715006,3530.949520,118.289774,102.707523,3524.710395,7.764301,5269.445569,4.849316,5,7550.686577,876.695459,0.233225,1,2242.326484,3677.636156,1.132430,1,4276.507378,1.523706,0.999999,0.7960,-0.1658,-46.5753,19.827259,-511,13.524815,-2.516108,-0.527713,0.994013,-1,2016
498243,0.014127,0.357343,0.758277,5.019459e-05,4.491047e-02,22014.330000,1115.553871,3.674847,-0.069939,22014.583554,1112.826630,-77.957246,21986.047051,235.914330,0.080263,0.400178,0.658433,1.718723e-07,4.206474e-04,48795.080000,2909.108534,3.512040,-3.110835,48795.194393,-2907.732579,-89.463479,48708.283892,108.099453,0.311602,0.299116,-1000.000000,5.119317e-06,0.154439,14234.660000,908.784395,3.443454,2.287915,14243.218116,-597.267362,684.953118,14205.620586,27.959294,0.188552,0.717613,-1000.0,1.003458e-06,0.095832,5194.130000,658.282363,2.754758,2.007133,5196.004839,-278.204499,596.605336,5152.247159,187.413624,5205.925158,5.153339,5,661.260629,864.582221,0.033529,1,234.191463,3617.565996,4.732816,1,412.795524,0.593799,0.999998,0.8533,-0.2319,-63.5059,5.819832,511,13.086508,0.965240,0.033597,-0.533498,-1,2016


In [14]:
print(total.iloc[0])

mu_plus_MC15TuneV1_ProbNNk     0.000303
mu_plus_MC15TuneV1_ProbNNpi    0.282979
mu_plus_MC15TuneV1_ProbNNmu    0.966269
mu_plus_MC15TuneV1_ProbNNe     0.000003
mu_plus_MC15TuneV1_ProbNNp     0.000024
                                 ...   
phi                            0.687398
costhetal                     -0.467658
costhetak                      0.992306
polarity                              1
year                               2016
Name: 0, Length: 81, dtype: object


In [15]:
total.columns

Index(['mu_plus_MC15TuneV1_ProbNNk', 'mu_plus_MC15TuneV1_ProbNNpi',
       'mu_plus_MC15TuneV1_ProbNNmu', 'mu_plus_MC15TuneV1_ProbNNe',
       'mu_plus_MC15TuneV1_ProbNNp', 'mu_plus_P', 'mu_plus_PT', 'mu_plus_ETA',
       'mu_plus_PHI', 'mu_plus_PE', 'mu_plus_PX', 'mu_plus_PY', 'mu_plus_PZ',
       'mu_plus_IPCHI2_OWNPV', 'mu_minus_MC15TuneV1_ProbNNk',
       'mu_minus_MC15TuneV1_ProbNNpi', 'mu_minus_MC15TuneV1_ProbNNmu',
       'mu_minus_MC15TuneV1_ProbNNe', 'mu_minus_MC15TuneV1_ProbNNp',
       'mu_minus_P', 'mu_minus_PT', 'mu_minus_ETA', 'mu_minus_PHI',
       'mu_minus_PE', 'mu_minus_PX', 'mu_minus_PY', 'mu_minus_PZ',
       'mu_minus_IPCHI2_OWNPV', 'K_MC15TuneV1_ProbNNk',
       'K_MC15TuneV1_ProbNNpi', 'K_MC15TuneV1_ProbNNmu',
       'K_MC15TuneV1_ProbNNe', 'K_MC15TuneV1_ProbNNp', 'K_P', 'K_PT', 'K_ETA',
       'K_PHI', 'K_PE', 'K_PX', 'K_PY', 'K_PZ', 'K_IPCHI2_OWNPV',
       'Pi_MC15TuneV1_ProbNNk', 'Pi_MC15TuneV1_ProbNNpi',
       'Pi_MC15TuneV1_ProbNNmu', 'Pi_MC15TuneV1_ProbNN

# Standard Model Signal

* `sig.pkl` - The signal decay, simulated as per the Standard Model

In [16]:
sig = pd.read_pickle('data/signal.pkl')
sig

Unnamed: 0,mu_plus_MC15TuneV1_ProbNNk,mu_plus_MC15TuneV1_ProbNNpi,mu_plus_MC15TuneV1_ProbNNmu,mu_plus_MC15TuneV1_ProbNNe,mu_plus_MC15TuneV1_ProbNNp,mu_plus_P,mu_plus_PT,mu_plus_ETA,mu_plus_PHI,mu_plus_PE,mu_plus_PX,mu_plus_PY,mu_plus_PZ,mu_plus_IPCHI2_OWNPV,mu_minus_MC15TuneV1_ProbNNk,mu_minus_MC15TuneV1_ProbNNpi,mu_minus_MC15TuneV1_ProbNNmu,mu_minus_MC15TuneV1_ProbNNe,mu_minus_MC15TuneV1_ProbNNp,mu_minus_P,mu_minus_PT,mu_minus_ETA,mu_minus_PHI,mu_minus_PE,mu_minus_PX,mu_minus_PY,mu_minus_PZ,mu_minus_IPCHI2_OWNPV,K_MC15TuneV1_ProbNNk,K_MC15TuneV1_ProbNNpi,K_MC15TuneV1_ProbNNmu,K_MC15TuneV1_ProbNNe,K_MC15TuneV1_ProbNNp,K_P,K_PT,K_ETA,K_PHI,K_PE,K_PX,K_PY,K_PZ,K_IPCHI2_OWNPV,Pi_MC15TuneV1_ProbNNk,Pi_MC15TuneV1_ProbNNpi,Pi_MC15TuneV1_ProbNNmu,Pi_MC15TuneV1_ProbNNe,Pi_MC15TuneV1_ProbNNp,Pi_P,Pi_PT,Pi_ETA,Pi_PHI,Pi_PE,Pi_PX,Pi_PY,Pi_PZ,Pi_IPCHI2_OWNPV,B0_MM,B0_ENDVERTEX_CHI2,B0_ENDVERTEX_NDOF,B0_FDCHI2_OWNPV,Kstar_MM,Kstar_ENDVERTEX_CHI2,Kstar_ENDVERTEX_NDOF,Kstar_FDCHI2_OWNPV,J_psi_MM,J_psi_ENDVERTEX_CHI2,J_psi_ENDVERTEX_NDOF,J_psi_FDCHI2_OWNPV,q2,phi,costhetal,costhetak,B0_IPCHI2_OWNPV,B0_DIRA_OWNPV,B0_OWNPV_X,B0_OWNPV_Y,B0_OWNPV_Z,B0_FD_OWNPV,B0_ID,polarity,year
0,0.001626,0.368621,0.965144,2.496697e-06,6.789551e-05,28796.58,2531.302013,3.122733,-2.023842,28796.773836,-1107.965812,-2275.939726,28685.109897,5200.297017,0.003853,0.129218,0.993714,2.191040e-06,3.501567e-03,36820.21,1892.582858,3.660590,3.025223,36820.361597,-1879.782709,219.742665,36771.537833,3210.481325,0.952580,0.150182,-1000.0,1.172057e-06,0.265748,18897.33,542.935290,4.242727,-1.156751,18903.777350,218.431523,-497.057741,18889.528909,122.763954,0.000466,0.994559,-1000.0,2.061752e-03,0.000080,23073.62,862.307882,3.979631,-0.556162,23074.042120,732.347283,-455.238774,23057.501274,603.692520,5284.270324,6.390714,5,13571.160252,879.282240,1.930010,1,828.361100,2807.328516,0.526095,1,12427.533366,7.881143,1.795533,-0.077694,-0.885188,4.304049,0.999999,0.7708,-0.2054,40.5605,27.879712,511,1,2016
1,0.000075,0.008453,0.998412,5.320784e-06,1.026381e-06,24673.47,2274.749879,3.074874,-1.014814,24673.696228,1200.562971,-1932.132388,24568.386899,2674.051363,0.000106,0.038999,0.997084,4.350128e-06,3.458252e-06,28591.17,4474.787157,2.541605,-2.215601,28591.365229,-2689.537777,-3576.325859,28238.825788,1181.105151,0.994033,0.001715,-1000.0,3.064377e-07,0.349174,10465.38,1785.938424,2.453914,-1.894421,10477.017492,-567.938144,-1693.228372,10311.867071,486.837980,0.080632,0.899613,-1000.0,1.440089e-08,0.031932,2641.69,584.915261,2.188366,-2.507974,2645.374433,-471.377515,-346.307812,2576.121153,2085.204114,5298.855894,6.877693,5,9942.800523,925.766975,0.207682,1,3643.076035,4000.524319,3.247928,1,7484.268965,16.004902,1.677407,0.783783,-0.133953,1.159875,0.999999,0.7926,-0.1152,-1.7938,12.678658,-511,1,2016
2,0.000051,0.007129,0.998914,6.864172e-06,1.078601e-06,28464.70,3379.198551,2.820631,-2.119273,28464.896096,-1761.872664,-2883.537335,28263.406788,555.260890,0.000142,0.011256,0.994612,1.088820e-06,8.162336e-07,13748.96,3336.596706,2.094099,-1.891526,13749.365977,-1051.892693,-3166.449011,13337.954247,3402.503430,0.987394,0.088656,-1000.0,4.170585e-07,0.018577,41004.13,4596.040121,2.878469,-1.540241,41007.101751,140.410218,-4593.894836,40745.737105,438.598188,0.045356,0.707700,-1000.0,4.235347e-03,0.033577,2250.52,137.250817,3.489323,-0.939693,2254.843699,80.982924,-110.813143,2246.330894,64.881677,5267.743449,4.914970,5,10027.645173,1046.289247,0.437560,1,582.743279,2617.422809,0.000709,1,7729.029705,6.851072,-1.776031,-0.375908,0.741662,4.786264,0.999997,0.8396,-0.2445,53.8937,12.155244,511,1,2016
3,0.006548,0.088188,0.949859,7.272984e-06,5.321462e-03,11540.26,1048.238326,3.089809,-0.829094,11540.743674,708.131068,-772.886783,11492.553993,1108.518673,0.037230,0.078754,0.999341,4.784393e-06,3.200485e-04,89285.35,4997.859709,3.575191,-0.791353,89285.412517,3512.914633,-3555.001048,89145.359514,601.806342,0.992092,0.001614,-1000.0,3.813968e-04,0.632330,18255.57,868.226176,3.738354,-2.858312,18262.243920,-833.621953,-242.674951,18234.912101,1485.301245,0.007130,0.985881,-1000.0,6.342139e-04,0.000861,18075.56,535.290729,4.212434,-2.257787,18076.098837,-339.488460,-413.864410,18067.632195,55.782131,5222.472589,3.728025,5,4369.645972,890.617733,1.077903,1,1876.064861,1173.040840,0.219702,1,1993.887355,1.376297,-2.687960,-0.616140,0.150085,2.156253,1.000000,0.8143,-0.1723,32.0894,21.083577,-511,1,2016
4,0.000130,0.041185,0.992115,3.291775e-06,4.089373e-06,8605.87,1542.434031,2.404101,-2.030454,8606.518585,-684.287965,-1382.335966,8466.516150,164.866107,0.000138,0.016025,0.983563,1.194478e-05,3.747493e-06,19259.86,2154.245354,2.880587,2.966109,19260.149815,-2121.160769,376.098442,19139.002957,51.172239,0.796452,0.117949,-1000.0,1.902171e-05,0.614987,5159.65,358.811142,3.357764,1.039066,5183.213781,181.926376,309.270479,5147.158700,12.526984,0.000079,0.997613,-1000.0,3.481603e-08,0.000007,10150.92,1159.169607,2.859732,0.823041,10151.879465,788.229321,849.922771,10084.517969,134.250163,5289.231764,6.350834,5,394.582954,936.044689,2.787296,1,140.272714,2367.164132,0.615747,1,222.766713,5.603466,0.131486,-0.016247,-0.998332,1.590714,0.999938,0.8220,-0.1640,-9.6481,1.710604,511,1,2016
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
255736,0.000333,0.108626,0.987444,1.502582e-06,1.870762e-05,23313.42,2234.277666,3.035954,-0.162299,23313.659425,2204.915575,-361.031846,23206.110303,4138.493636,0.025853,0.297043,0.996462,2.256385e-10,1.711394e-02,90363.49,2722.617410,4.195167,-0.730493,90363.551771,2027.929016,-1816.631351,90322.464976,300.303014,0.899901,0.354130,-1000.0,1.130151e-04,0.000240,43956.57,2891.270575,3.413570,-0.539710,43959.342160,2480.298282,-1485.787998,43861.379374,347.924214,0.000097,0.783415,-1000.0,6.696978e-05,0.000017,32029.83,2514.577723,3.236166,-0.572358,32030.134088,2113.819743,-1361.935102,31930.970995,1517.441104,5279.290170,3.706880,5,9089.425645,839.451335,1.993100,1,2249.826850,3333.559174,0.040430,1,12178.923615,11.111750,0.868360,-0.645703,-0.611537,2.645413,1.000000,0.8647,-0.2082,27.0830,28.458499,-511,1,2016
255737,0.000133,0.251615,0.984600,3.506863e-07,2.739955e-06,24766.97,2118.363086,3.150180,2.675359,24767.195374,-1892.265554,952.256919,24676.210017,119.588167,0.000296,0.174927,0.995393,1.668922e-05,1.870787e-05,58078.13,4821.136029,3.180200,2.742648,58078.226109,-4442.539649,1872.750404,57877.679909,148.137405,0.195039,0.518709,-1000.0,2.415160e-07,0.169591,16562.89,279.523356,4.774910,-0.541275,16570.245687,239.566134,-144.018659,16560.531146,8.575089,0.020626,0.969661,-1000.0,1.724527e-05,0.012161,3860.45,402.571546,2.951084,-0.718709,3862.972177,302.998108,-265.058478,3839.402343,522.166692,5252.896225,4.485479,5,961.960582,947.304512,0.055661,1,678.697691,329.689715,2.102468,1,221.530880,0.108695,1.297744,-0.500200,-0.164196,0.238440,1.000000,0.8580,-0.1066,-26.6170,6.922365,-511,1,2016
255738,0.142958,0.057447,0.992398,2.708243e-06,8.296998e-04,73885.16,10205.654608,2.667913,-3.125113,73885.235548,-10204.268902,-168.173000,73176.919054,4160.829349,0.001498,0.099646,0.943581,6.564996e-06,1.069255e-04,12345.79,1776.598977,2.626544,3.093656,12346.242117,-1774.558138,85.131313,12217.292122,2553.796568,0.686871,0.010365,-1000.0,4.616078e-07,0.662965,29584.66,4997.318572,2.464291,-2.739205,29588.778689,-4598.176118,-1957.030734,29159.542425,4677.502341,0.234463,0.868848,-1000.0,1.491985e-05,0.000074,48024.72,7632.797821,2.526033,-2.761900,48024.922810,-7089.179075,-2828.982613,47414.281904,2903.494188,5272.782934,15.064039,5,40328.931682,914.271762,9.070844,1,7619.596349,444.172196,0.002846,1,7589.245945,0.197245,-0.159502,0.732894,-0.517692,4.281706,1.000000,0.8241,-0.2009,-18.6741,28.255551,-511,1,2016
255739,0.000063,0.009217,0.996697,4.489019e-06,6.358561e-07,30614.89,2465.913356,3.210446,1.116355,30615.072324,1082.438040,2215.639088,30515.418415,88.014414,0.000050,0.021696,0.999928,2.400704e-06,1.290786e-06,51195.68,5276.243624,2.962922,0.964512,51195.789030,3006.497963,4335.864017,50923.068485,614.836161,0.428538,0.735193,-1000.0,7.248601e-06,0.011874,90288.38,4535.119858,3.683673,0.704208,90289.729649,3456.325627,2936.175282,90174.410178,124.879514,0.000363,0.986130,-1000.0,1.299096e-06,0.000006,44087.99,2396.836799,3.604445,0.930261,44088.210919,1432.409137,1921.725970,44022.789957,14.747967,5298.700165,0.560759,5,1397.429252,1021.496506,0.009412,1,138.086662,1071.609467,0.014150,1,706.270627,1.148296,-0.135000,-0.572585,0.375601,5.515809,0.999995,0.8456,-0.1410,-18.2321,7.737330,511,1,2016


# Backgrounds simulation samples

* `jpsi.pkl` - <img src="https://latex.codecogs.com/gif.latex?B^{0}\rightarrow{}J/\psi{}K^{\ast{}0} " /> with <img src="https://latex.codecogs.com/gif.latex?J/\psi\rightarrow\mu\mu " />
* `psi2S.pkl` - <img src="https://latex.codecogs.com/gif.latex?B^{0}\rightarrow{}\psi{}(2S)K^{\ast{}0} " /> with <img src="https://latex.codecogs.com/gif.latex?\psi{}(2S)\rightarrow\mu\mu " />
* `jpsi_mu_k_swap.pkl` - <img src="https://latex.codecogs.com/gif.latex?B^{0}\rightarrow{}J/\psi{}K^{\ast{}0} " /> with the muon reconstructed as kaon and the kaon reconstructed as a muon
* `jpsi_mu_pi_swap.pkl` - <img src="https://latex.codecogs.com/gif.latex?B^{0}\rightarrow{}J/\psi{}K^{\ast{}0} " /> with the muon reconstructed as pion and the pion reconstructed as a muon
* `k_pi_swap.pkl` - signal decay but with the kaon reconstructed as a pion and the pion reconstructed as a kaon
* `phimumu.pkl` - <img src="https://latex.codecogs.com/gif.latex?B_{s}^{0}\rightarrow{}\phi\mu\mu " /> with <img src="https://latex.codecogs.com/gif.latex?\phi{}\rightarrow{}KK " /> and one of the kaons reconstructed as a pion
* `pKmumu_piTok_kTop.pkl` - <img src="https://latex.codecogs.com/gif.latex?\Lambda_{b}^{0}\rightarrow{}pK\mu\mu " /> with the proton reconstructed as a kaon and the kaon reconstructed as a pion
* `pKmumu_piTop.pkl`  - <img src="https://latex.codecogs.com/gif.latex?\Lambda_{b}^{0}\rightarrow{}pK\mu\mu " /> with the proton reconstructed as a pion

In [17]:
acc = pd.read_pickle('data/acceptance_mc.pkl')
cols = acc.columns
acc

Unnamed: 0,mu_plus_MC15TuneV1_ProbNNk,mu_plus_MC15TuneV1_ProbNNpi,mu_plus_MC15TuneV1_ProbNNmu,mu_plus_MC15TuneV1_ProbNNe,mu_plus_MC15TuneV1_ProbNNp,mu_plus_P,mu_plus_PT,mu_plus_ETA,mu_plus_PHI,mu_plus_PE,mu_plus_PX,mu_plus_PY,mu_plus_PZ,mu_plus_IPCHI2_OWNPV,mu_minus_MC15TuneV1_ProbNNk,mu_minus_MC15TuneV1_ProbNNpi,mu_minus_MC15TuneV1_ProbNNmu,mu_minus_MC15TuneV1_ProbNNe,mu_minus_MC15TuneV1_ProbNNp,mu_minus_P,mu_minus_PT,mu_minus_ETA,mu_minus_PHI,mu_minus_PE,mu_minus_PX,mu_minus_PY,mu_minus_PZ,mu_minus_IPCHI2_OWNPV,K_MC15TuneV1_ProbNNk,K_MC15TuneV1_ProbNNpi,K_MC15TuneV1_ProbNNmu,K_MC15TuneV1_ProbNNe,K_MC15TuneV1_ProbNNp,K_P,K_PT,K_ETA,K_PHI,K_PE,K_PX,K_PY,K_PZ,K_IPCHI2_OWNPV,Pi_MC15TuneV1_ProbNNk,Pi_MC15TuneV1_ProbNNpi,Pi_MC15TuneV1_ProbNNmu,Pi_MC15TuneV1_ProbNNe,Pi_MC15TuneV1_ProbNNp,Pi_P,Pi_PT,Pi_ETA,Pi_PHI,Pi_PE,Pi_PX,Pi_PY,Pi_PZ,Pi_IPCHI2_OWNPV,B0_MM,B0_ENDVERTEX_CHI2,B0_ENDVERTEX_NDOF,B0_FDCHI2_OWNPV,Kstar_MM,Kstar_ENDVERTEX_CHI2,Kstar_ENDVERTEX_NDOF,Kstar_FDCHI2_OWNPV,J_psi_MM,J_psi_ENDVERTEX_CHI2,J_psi_ENDVERTEX_NDOF,J_psi_FDCHI2_OWNPV,q2,phi,costhetal,costhetak,B0_IPCHI2_OWNPV,B0_DIRA_OWNPV,B0_OWNPV_X,B0_OWNPV_Y,B0_OWNPV_Z,B0_FD_OWNPV,B0_ID,polarity,year
0,0.000045,0.039243,0.999345,8.361805e-07,0.000001,47515.45,3035.832415,3.442694,-2.097649,47515.567474,-1526.463207,-2624.154822,47418.368912,4601.945001,0.010311,0.211840,0.999337,3.517750e-07,1.943322e-03,89391.14,2376.493053,4.320366,-2.883412,89391.202443,-2297.726988,-606.770073,89359.544489,819.399523,0.274679,0.455835,-1000.000000,5.190202e-03,0.283925,47350.60,852.163519,4.710623,-1.646976,47353.173466,-64.855081,-849.691993,47342.931233,63.114619,0.007086,0.967605,-1000.000000,1.816356e-03,0.012418,58883.30,1022.752011,4.746132,-2.171214,58883.465410,-577.841807,-843.872337,58874.417171,56.989358,5274.115110,1.103854,5,9226.614676,903.946079,0.614476,1,138.450118,3194.170959,0.483961,1,8884.149469,10.202749,-2.562693,0.410836,-0.325254,0.706831,1.000000,0.8757,-0.1507,-67.1291,39.395123,511,1,2016
1,0.001295,0.118154,0.992041,2.135627e-07,0.000328,7465.87,1400.820724,2.357511,-0.076334,7466.617611,1396.741531,-106.826011,7333.274586,254.712621,0.000072,0.003676,0.998752,4.283517e-05,3.550794e-07,28780.54,5945.033668,2.259448,0.999709,28780.733944,3213.572267,5001.637592,28159.830564,186.714175,0.981197,0.032259,0.000017,3.055742e-08,0.877571,5995.65,1271.487444,2.232556,0.232762,6015.940151,1237.199146,293.289265,5859.277993,56.480656,0.000193,0.988699,-1000.000000,8.663335e-06,0.000056,12620.43,2540.219043,2.285928,0.086471,12621.201734,2530.728020,219.382023,12362.141425,190.532823,5254.506808,8.990359,5,1181.668044,928.883784,1.783365,1,205.424368,2982.544337,1.431347,1,924.794184,8.895808,1.152139,-0.977628,-0.929107,0.766567,0.999973,0.8690,-0.1879,-16.4842,3.848004,-511,1,2016
2,0.018971,0.052269,0.993025,1.504784e-05,0.029875,139095.95,4196.451282,4.193844,-0.511106,139095.990129,3660.161980,-2052.661113,139032.633231,179.056756,0.140606,0.047305,0.999469,3.159617e-04,1.062209e-02,112151.79,4040.544810,4.016296,0.315368,112151.839770,3841.273952,1253.242509,112078.981071,112.395127,0.255139,0.332653,-1000.000000,1.182735e-03,0.163254,90366.08,2222.054976,4.398433,0.303542,90367.428488,2120.471036,664.176860,90338.756280,40.437413,0.107735,0.445421,-1000.000000,4.208463e-03,0.417292,69456.24,2015.057048,4.232986,0.430355,69456.380231,1831.319811,840.667981,69427.003536,93.871398,5290.822058,5.425555,5,1167.580846,818.910729,2.139121,1,148.470052,3393.746335,0.581858,1,741.906643,11.518581,-2.749286,0.776564,-0.539335,1.170415,1.000000,0.8587,-0.2014,15.6514,31.008944,511,1,2016
3,0.000231,0.259635,0.971924,6.076813e-05,0.000147,16229.29,2946.570852,2.390978,1.386500,16229.633933,539.974656,2896.671737,15959.560587,442.960107,0.000077,0.010334,0.999871,2.461603e-06,9.127081e-07,52868.81,4884.041416,3.072847,2.102541,52868.915579,-2476.394120,4209.671331,52642.731790,198.026304,0.131563,0.645387,0.004579,2.755399e-05,0.114812,5654.49,677.496581,2.811339,0.725372,5675.999836,506.938496,449.460765,5613.755921,219.181693,0.125680,0.757970,-1000.000000,1.177922e-06,0.062825,5174.13,189.386534,4.000448,1.458318,5176.012084,21.257060,188.189789,5170.662820,6.718180,5280.105583,2.904426,5,1452.760246,881.411569,0.670515,1,269.528897,3752.995191,0.622816,1,1217.370098,14.085193,-2.838959,-0.492500,0.379058,6.179803,0.999977,0.7812,-0.1659,16.0543,6.435185,-511,1,2016
4,0.005569,0.315182,0.973565,1.367964e-06,0.003432,24957.42,1914.807750,3.259226,-0.731608,24957.643654,1424.811065,-1279.219352,24883.856701,1889.072511,0.000280,0.073715,0.997247,2.656544e-06,3.548336e-06,53524.18,4042.397043,3.275014,1.018927,53524.284286,2119.347019,3442.287331,53371.311309,851.369116,0.756115,0.210010,-1000.000000,5.346630e-05,0.372541,21472.74,940.380094,3.820923,0.453369,21478.414282,845.379869,411.882991,21452.138550,12.868221,0.005672,0.928545,-1000.000000,5.037287e-04,0.004457,7394.90,623.991343,3.163772,0.130322,7396.216996,618.699937,81.089969,7368.526367,156.868869,5297.018672,4.577070,5,4399.476238,852.674126,0.133995,1,224.560677,4278.029203,0.435223,1,4190.525924,18.301773,1.171204,-0.180348,0.105453,0.620364,1.000000,0.8997,-0.1221,-50.4380,11.638834,511,1,2016
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
716854,0.000062,0.003175,0.999322,2.309370e-06,0.000001,21752.35,2737.286994,2.761919,-0.916564,21752.606607,1665.777182,-2172.078836,21579.434433,30.595262,0.000383,0.000850,0.999853,4.434693e-05,2.985865e-06,8808.64,2394.773395,1.976580,-0.316735,8809.273656,2275.651225,-745.889479,8476.862570,1966.466786,0.990928,0.001717,-1000.000000,3.406014e-06,0.388420,10510.85,570.504717,3.606052,-1.550691,10522.437204,11.469220,-570.389419,10495.355739,34.844428,0.000055,0.997894,-1000.000000,3.731128e-10,0.000007,11029.64,982.079051,3.109829,-1.960719,11030.523032,-373.304904,-908.362654,10985.830841,224.677463,5263.197537,5.809147,5,2616.053982,878.114617,1.320584,1,300.903937,2568.645195,0.411745,1,2244.310414,6.598139,-1.749968,-0.488072,-0.679964,0.145360,1.000000,0.8721,-0.1452,-29.0464,5.605457,511,1,2016
716855,0.000737,0.031372,0.995584,6.899959e-06,0.000049,34724.83,2550.932812,3.302791,1.815204,34724.990745,-617.280016,2475.120924,34631.005765,279.367672,0.000528,0.057241,0.999105,1.814907e-06,1.391922e-05,57034.57,3625.797886,3.447719,1.120697,57034.667868,1577.421817,3264.682330,56919.203831,539.482238,0.592949,0.450263,-1000.000000,1.354539e-03,0.046336,145626.79,6399.306863,3.817522,1.603830,145627.626784,-211.353505,6395.815665,145486.119054,23.236779,0.017238,0.969150,-1000.000000,2.181445e-06,0.000190,58391.95,2344.758503,3.907739,1.822541,58392.116802,-584.065824,2270.849963,58344.853521,55.845563,5273.793822,3.552075,5,1230.626575,1116.515612,0.503172,1,78.471835,2127.593080,2.326392,1,989.925005,4.526858,0.941953,-0.325766,0.104648,0.832383,1.000000,0.8011,-0.2018,-10.7847,13.046422,511,1,2016
716856,0.729794,0.035291,0.725008,1.966208e-05,0.361820,4687.27,1431.843156,1.854845,0.050745,4688.460701,1430.000019,72.627600,4463.219155,66747.941652,0.001933,0.322747,0.965763,2.250920e-07,9.677358e-05,12236.37,3656.220613,1.878023,-0.953931,12236.826160,2115.054160,-2982.364007,11677.362785,14364.033080,0.878399,0.154524,-1000.000000,8.623820e-06,0.003936,44217.55,9121.694704,2.260800,-0.730832,44220.305799,6792.190148,-6088.634270,43266.458299,661.924797,0.000043,0.991247,-1000.000000,1.791542e-06,0.001958,14886.37,3109.283366,2.248112,-0.850349,14887.024270,2051.258362,-2336.660476,14558.034508,700.957631,5307.264957,1.661639,5,229616.522179,901.291553,0.469274,1,1489.162403,2216.440517,0.009178,1,199931.595907,4.912975,-2.499693,0.253629,0.315782,0.507845,1.000000,0.8349,-0.1570,-0.9651,20.143577,-511,1,2016
716857,0.000955,0.196032,0.940839,8.133744e-06,0.000119,48537.30,2644.574006,3.602227,-2.935226,48537.415001,-2588.460964,-541.886990,48465.201120,59.389422,0.000055,0.032053,0.995548,7.179223e-07,6.378481e-07,42238.51,2066.688226,3.709933,2.626554,42238.642150,-1798.584059,1017.986053,42187.919204,119.544375,0.895348,0.013185,-1000.000000,4.760771e-05,0.834355,8993.69,426.607885,3.740997,-0.120036,9007.229141,423.538144,-51.085500,8983.566415,58.604710,0.000250,0.998113,-1000.000000,7.265398e-13,0.000010,6189.38,692.719938,2.879965,-0.699507,6190.953450,530.041243,-446.001338,6150.492978,552.409557,5274.878752,11.377882,5,994.429909,888.026684,2.290779,1,678.445141,1682.704082,7.195170,1,216.349531,2.831493,0.532486,0.059340,-0.682951,0.631402,0.999998,0.8648,-0.1829,52.1866,7.341194,511,1,2016


In [18]:
df = pd.read_pickle('data/signal.pkl')
df.columns

Index(['mu_plus_MC15TuneV1_ProbNNk', 'mu_plus_MC15TuneV1_ProbNNpi',
       'mu_plus_MC15TuneV1_ProbNNmu', 'mu_plus_MC15TuneV1_ProbNNe',
       'mu_plus_MC15TuneV1_ProbNNp', 'mu_plus_P', 'mu_plus_PT', 'mu_plus_ETA',
       'mu_plus_PHI', 'mu_plus_PE', 'mu_plus_PX', 'mu_plus_PY', 'mu_plus_PZ',
       'mu_plus_IPCHI2_OWNPV', 'mu_minus_MC15TuneV1_ProbNNk',
       'mu_minus_MC15TuneV1_ProbNNpi', 'mu_minus_MC15TuneV1_ProbNNmu',
       'mu_minus_MC15TuneV1_ProbNNe', 'mu_minus_MC15TuneV1_ProbNNp',
       'mu_minus_P', 'mu_minus_PT', 'mu_minus_ETA', 'mu_minus_PHI',
       'mu_minus_PE', 'mu_minus_PX', 'mu_minus_PY', 'mu_minus_PZ',
       'mu_minus_IPCHI2_OWNPV', 'K_MC15TuneV1_ProbNNk',
       'K_MC15TuneV1_ProbNNpi', 'K_MC15TuneV1_ProbNNmu',
       'K_MC15TuneV1_ProbNNe', 'K_MC15TuneV1_ProbNNp', 'K_P', 'K_PT', 'K_ETA',
       'K_PHI', 'K_PE', 'K_PX', 'K_PY', 'K_PZ', 'K_IPCHI2_OWNPV',
       'Pi_MC15TuneV1_ProbNNk', 'Pi_MC15TuneV1_ProbNNpi',
       'Pi_MC15TuneV1_ProbNNmu', 'Pi_MC15TuneV1_ProbNN

---
# Investigating the variables

Refer to the documentation for the variable meanings.

Seems like the dataframe keys are a bit scrambled in order, but all are present. 

In [19]:
for i in range(len(cols)):
    if cols[i]!= total.columns[i]:
        print(cols[i],total.columns[i])

q2 B0_IPCHI2_OWNPV
phi B0_DIRA_OWNPV
costhetal B0_OWNPV_X
costhetak B0_OWNPV_Y
B0_IPCHI2_OWNPV B0_OWNPV_Z
B0_DIRA_OWNPV B0_FD_OWNPV
B0_OWNPV_X B0_ID
B0_OWNPV_Y q2
B0_OWNPV_Z phi
B0_FD_OWNPV costhetal
B0_ID costhetak


In [20]:
cols.size, total.columns.size

(81, 81)