In [1]:
#====================#
#  Import Packages   #
#====================#

import math 
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import gridspec

import uproot3 as uproot

In [2]:
# First, read in the currently used .txt file for before/after trigger change for run 3b as DF
run3b_before_good_runs = pd.read_csv("calc_data_pot_run_3b_before_filtered.txt", sep=" ", header=None)
run3b_after_good_runs = pd.read_csv("calc_data_pot_run_3b_after_filtered.txt", sep=" ", header=None)

run3b_before_charlie_good_runs = pd.read_csv("charlie_data_before_3b_good_runs.txt", sep=" ", header=None)
run3b_after_charlie_good_runs = pd.read_csv("charlie_data_after_3b_good_runs.txt", sep=" ", header=None)

# format the columns into run and subrun
run3b_before_good_runs.columns = ["run", "subrun"]
run3b_after_good_runs.columns = ["run", "subrun"]
run3b_before_charlie_good_runs.columns = ["run", "subrun"]
run3b_after_charlie_good_runs.columns = ["run", "subrun"]

In [3]:
run3b_before_good_runs

Unnamed: 0,run,subrun
0,15073,176
1,15286,78
2,15286,84
3,15286,86
4,15286,98
...,...,...
199660,14436,142
199661,14436,152
199662,14436,187
199663,14436,190


In [4]:
print("My run 3b good runs before trigger: %d" % len(run3b_before_good_runs.index))
print("Charlie's run 3b good runs before trigger: %d" % len(run3b_before_charlie_good_runs.index))
print("\nMy run 3b good runs after trigger: %d" % len(run3b_after_good_runs.index))
print("Charlie's run 3b good runs after trigger: %d" % len(run3b_after_charlie_good_runs.index))

num_of_xtra_runs_before = len(run3b_before_charlie_good_runs.index) - len(run3b_before_good_runs.index)
num_of_xtra_runs_after = len(run3b_after_charlie_good_runs.index) - len(run3b_after_good_runs.index)
print("\nCharlie has extra runs for both before and after trigger change, number of extra runs " +
      "are %d and %d, respectively." % (num_of_xtra_runs_before, num_of_xtra_runs_after))

My run 3b good runs before trigger: 199665
Charlie's run 3b good runs before trigger: 207385

My run 3b good runs after trigger: 54037
Charlie's run 3b good runs after trigger: 54629

Charlie has extra runs for both before and after trigger change, number of extra runs are 7720 and 592, respectively.


In [5]:
# Now compare charlie's and my lists, and get a dataframe containing different events
df_diff_before = pd.concat([run3b_before_good_runs, run3b_before_charlie_good_runs]).drop_duplicates(keep=False)
df_diff_after = pd.concat([run3b_after_good_runs, run3b_after_charlie_good_runs]).drop_duplicates(keep=False)

# combine before and after
df_diff_run3b = pd.concat([df_diff_before, df_diff_after])
df_run3b_good_runs_from_data = pd.concat([run3b_before_good_runs, run3b_after_good_runs])

In [6]:
print("Charlie and I has in total of %d and %d different runs, for before and after, respectively" % (len(df_diff_before.index),len(df_diff_after.index)))

Charlie and I has in total of 46780 and 10696 different runs, for before and after, respectively


In [7]:
# now, import the actual correct MCC9 good run list containing good runs (no subrun for this)
df_good_runs_mcc9 = pd.read_csv("goodruns_mcc9_run3_hardcoded.list", sep=",", header=None)
df_charlie_good_runs_mcc9 = pd.read_csv("charlie_mcc9_run3b_good_runs.txt", sep=",", header=None)
df_good_runs_mcc9 = df_good_runs_mcc9.T
df_charlie_good_runs_mcc9 = df_charlie_good_runs_mcc9.T

# rename the column as that's the run column
df_good_runs_mcc9.columns = ["run"]
df_charlie_good_runs_mcc9.columns = ["run"]

In [8]:
# Now, we need to compare our extra run dfs and see which ones from our extra runs doesn't match with the actual good run list
condition = df_diff_run3b["run"].isin(df_good_runs_mcc9["run"])
bad_runs_not_suppose_to_be_there = df_diff_run3b.loc[condition]

In [9]:
print(bad_runs_not_suppose_to_be_there)

         run  subrun
25     16118      19
26     16118      22
27     16118      35
28     16118      45
29     16118      51
...      ...     ...
54606  17415     198
54607  17415     199
54608  17415     269
54618  16962      57
54625  16962     386

[57476 rows x 2 columns]


In [10]:
merged_df_before = pd.merge(run3b_before_good_runs, run3b_before_charlie_good_runs, on=['run', 'subrun'], suffixes=('_w', '_c'), how='outer', indicator=True)
merged_df_after = pd.merge(run3b_after_good_runs, run3b_after_charlie_good_runs, on=['run', 'subrun'], suffixes=('_w', '_c'), how='outer', indicator=True)

In [11]:
df_common_subruns_before = merged_df_before[merged_df_before['_merge'] == 'both'].drop('_merge', axis=1)
df_common_subruns_after = merged_df_after[merged_df_after['_merge'] == 'both'].drop('_merge', axis=1)

In [12]:
df_exclusive_will_before = merged_df_before[merged_df_before['_merge'] == 'left_only'].drop('_merge', axis=1)
df_exclusive_will_after = merged_df_after[merged_df_after['_merge'] == 'left_only'].drop('_merge', axis=1)

In [13]:
df_exclusive_charlie_before = merged_df_before[merged_df_before['_merge'] == 'right_only'].drop('_merge', axis=1)
df_exclusive_charlie_after = merged_df_after[merged_df_after['_merge'] == 'right_only'].drop('_merge', axis=1)

In [14]:
# Entry number sanity check
print(len(df_common_subruns_before.index)+len(df_exclusive_will_before.index))
print(len(df_common_subruns_after.index)+len(df_exclusive_will_after.index))

199665
54037


In [15]:
# Entry number sanity check
print(len(df_common_subruns_before.index)+len(df_exclusive_charlie_before.index))
print(len(df_common_subruns_after.index)+len(df_exclusive_charlie_after.index))

207385
54629


# Entry number sanity check
print(len(df_common_subruns_before.index)+len(df_exclusive_will_before.index))
print(len(df_common_subruns_after.index)+len(df_exclusive_will_after.index))

# Entry number sanity check
print(len(df_common_subruns_before.index)+len(df_exclusive_charlie_before.index))
print(len(df_common_subruns_after.index)+len(df_exclusive_charlie_after.index))

In [16]:
# save the three subsets into .txt
df_common_subruns_before.to_csv("df_common_subruns_before.txt", sep=" ", index=False)
df_common_subruns_after.to_csv("df_common_subruns_after.txt", sep=" ", index=False)

df_exclusive_will_before.to_csv("df_exclusive_will_before.txt", sep=" ", index=False)
df_exclusive_will_after.to_csv("df_exclusive_will_after.txt", sep=" ", index=False)

df_exclusive_charlie_before.to_csv("df_exclusive_charlie_before.txt", sep=" ", index=False)
df_exclusive_charlie_after.to_csv("df_exclusive_charlie_after.txt", sep=" ", index=False)


In [18]:
df_common_subruns_before.head(-1)

Unnamed: 0,run,subrun
0,15073,176
1,15286,78
2,15286,84
3,15286,86
4,15286,98
...,...,...
199659,14436,97
199660,14436,142
199661,14436,152
199662,14436,187
