In [1]:
from datetime import datetime
import meme.archive
import pandas as pd
import pytz
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.ticker import MaxNLocator
local_time_zone = pytz.timezone('US/Pacific')
import warnings
import matplotlib.dates as mdates
warnings.filterwarnings("ignore")
import seaborn as sns

## Pull data from archive for FEL tuning study

In [2]:
from utils_archiver import convert_to_dataframe_no_filling_gap, group_sample

In [3]:
eV_to_joules = 1.60218e-19 

In [4]:
quads_inj_bctrl = ['SOLN:IN20:121:BCTRL', 'QUAD:IN20:121:BCTRL', 'QUAD:IN20:122:BCTRL', 'QUAD:IN20:361:BCTRL', 'QUAD:IN20:371:BCTRL', 
             'QUAD:IN20:425:BCTRL', 'QUAD:IN20:441:BCTRL', 'QUAD:IN20:511:BCTRL', 'QUAD:IN20:525:BCTRL']
quads_linac_bctrl = ['QUAD:LI21:201:BCTRL', 'QUAD:LI21:211:BCTRL', 'QUAD:LI21:221:BCTRL', 'QUAD:LI21:251:BCTRL', 
               'QUAD:LI21:271:BCTRL', 'QUAD:LI21:278:BCTRL', 'QUAD:LI24:740:BCTRL', 'QUAD:LI24:860:BCTRL', 
               'QUAD:LI26:201:BCTRL', 'QUAD:LI26:301:BCTRL', 'QUAD:LI26:401:BCTRL', 'QUAD:LI26:501:BCTRL', 
               'QUAD:LI26:601:BCTRL', 'QUAD:LI26:701:BCTRL', 'QUAD:LI26:801:BCTRL', 'QUAD:LI26:901:BCTRL']
quads_ltuh_bctrl = ['QUAD:LTUH:440:BCTRL', 'QUAD:LTUH:460:BCTRL', 'QUAD:LTUH:620:BCTRL', 'QUAD:LTUH:640:BCTRL', 
              'QUAD:LTUH:660:BCTRL', 'QUAD:LTUH:680:BCTRL']
quads_ltus_bctrl = ['QUAD:LTUS:BCTRL', 'QUAD:LTUS:640:BCTRL', 'QUAD:LTUS:660:BCTRL', 'QUAD:LTUS:680:BCTRL']
quads_all_bctrl = quads_inj_bctrl + quads_linac_bctrl + quads_ltuh_bctrl + quads_ltus_bctrl
# get the BACT PV lists for quads
quads_inj_bact = [quad.replace("BCTRL", "BACT") for quad in quads_inj_bctrl] 
quads_linac_bact = [quad.replace("BCTRL", "BACT") for quad in quads_linac_bctrl] 
quads_ltuh_bact = [quad.replace("BCTRL", "BACT") for quad in quads_ltuh_bctrl] 
quads_ltus_bact = [quad.replace("BCTRL", "BACT") for quad in quads_ltus_bctrl] 

vcc_profile = ['CAMR:IN20:186:XRMS', 'CAMR:IN20:186:YRMS']

RF_ampls = ['ACCL:LI21:1:L1S_S_AV', 'ACCL:LI21:180:L1X_S_AV', 'ACCL:LI22:1:ADES', 'ACCL:LI25:1:ADES']
RF_phases = ['ACCL:LI21:1:L1S_S_PV', 'ACCL:LI21:180:L1X_S_PV', 'ACCL:LI22:1:PDES', 'ACCL:LI25:1:PDES']
 
blen = ['BLEN:LI21:265:AIMAX1H', 'BLEN:LI24:886:BIMAX1H']
bcharge = ['SIOC:SYS0:ML00:CALC038', 'SIOC:SYS0:ML00:CALC252'] # at gun, after BC1 # ['BPMS:IN20:221:TMIT1H', 'BPMS:LI21:233:TMIT1H', 'BPMS:LI24:801:TMIT1H'] # at gun, BC1, BC2 
hxr_energy = ['BEND:DMPH:400:BACT','SIOC:SYS0:ML00:AO627'] # beam energy, photon energy
sxr_energy = ['BEND:DMPS:400:BDES','SIOC:SYS0:ML00:AO628']
hxr_intensity = ['GDET:FEE1:241:ENRC1H'] #, 'SIOC:SYS0:ML00:AO627'] # number of photon, photon energy
laser_iris_status = ['IRIS:LR20:130:CONFG_SEL'] # the number of states represents the iris size: 1 [1.8mm], 2 [1.6mm], 3 [1.4mm], 4 [1.2mm], 5 [1.1mm], 6 [1.0mm], 7 [0.8mm]

undh_corr_x = ['XCOR:UNDH:1380:BCTRL', 'XCOR:UNDH:1480:BCTRL', 'XCOR:UNDH:1580:BCTRL', 'XCOR:UNDH:1680:BCTRL',
                 'XCOR:UNDH:1780:BCTRL', 'XCOR:UNDH:1880:BCTRL', 'XCOR:UNDH:1980:BCTRL', 'XCOR:UNDH:2080:BCTRL',
                 'XCOR:UNDH:2180:BCTRL', 'XCOR:UNDH:2280:BCTRL', 'XCOR:UNDH:2380:BCTRL', 'XCOR:UNDH:2480:BCTRL',
                 'XCOR:UNDH:2580:BCTRL', 'XCOR:UNDH:2680:BCTRL', 'XCOR:UNDH:2780:BCTRL', 'XCOR:UNDH:2880:BCTRL',
                 'XCOR:UNDH:2980:BCTRL', 'XCOR:UNDH:3080:BCTRL', 'XCOR:UNDH:3180:BCTRL', 'XCOR:UNDH:3280:BCTRL',
                 'XCOR:UNDH:3380:BCTRL', 'XCOR:UNDH:3480:BCTRL', 'XCOR:UNDH:3580:BCTRL', 'XCOR:UNDH:3680:BCTRL',
                 'XCOR:UNDH:3780:BCTRL', 'XCOR:UNDH:3880:BCTRL', 'XCOR:UNDH:3980:BCTRL', 'XCOR:UNDH:4080:BCTRL',
                 'XCOR:UNDH:4180:BCTRL', 'XCOR:UNDH:4280:BCTRL', 'XCOR:UNDH:4380:BCTRL', 'XCOR:UNDH:4480:BCTRL',
                 'XCOR:UNDH:4580:BCTRL', 'XCOR:UNDH:4680:BCTRL', 'XCOR:UNDH:4780:BCTRL']
undh_corr_y = ['YCOR:UNDH:1380:BCTRL', 'YCOR:UNDH:1480:BCTRL', 'YCOR:UNDH:1580:BCTRL', 'YCOR:UNDH:1680:BCTRL',
                 'YCOR:UNDH:1780:BCTRL', 'YCOR:UNDH:1880:BCTRL', 'YCOR:UNDH:1980:BCTRL', 'YCOR:UNDH:2080:BCTRL',
                 'YCOR:UNDH:2180:BCTRL', 'YCOR:UNDH:2280:BCTRL', 'YCOR:UNDH:2380:BCTRL', 'YCOR:UNDH:2480:BCTRL',
                 'YCOR:UNDH:2580:BCTRL', 'YCOR:UNDH:2680:BCTRL', 'YCOR:UNDH:2780:BCTRL', 'YCOR:UNDH:2880:BCTRL',
                 'YCOR:UNDH:2980:BCTRL', 'YCOR:UNDH:3080:BCTRL', 'YCOR:UNDH:3180:BCTRL', 'YCOR:UNDH:3280:BCTRL',
                 'YCOR:UNDH:3380:BCTRL', 'YCOR:UNDH:3480:BCTRL', 'YCOR:UNDH:3580:BCTRL', 'YCOR:UNDH:3680:BCTRL',
                 'YCOR:UNDH:3780:BCTRL', 'YCOR:UNDH:3880:BCTRL', 'YCOR:UNDH:3980:BCTRL', 'YCOR:UNDH:4080:BCTRL',
                 'YCOR:UNDH:4180:BCTRL', 'YCOR:UNDH:4280:BCTRL', 'YCOR:UNDH:4380:BCTRL', 'YCOR:UNDH:4480:BCTRL',
                 'YCOR:UNDH:4580:BCTRL', 'YCOR:UNDH:4680:BCTRL', 'YCOR:UNDH:4780:BCTRL']
undh_shifter = ['PHAS:UNDH:1495:GapDes', 'PHAS:UNDH:1595:GapDes', 'PHAS:UNDH:1695:GapDes', 'PHAS:UNDH:1795:GapDes',
                 'PHAS:UNDH:1895:GapDes', 'PHAS:UNDH:1995:GapDes', 'PHAS:UNDH:2095:GapDes', 'PHAS:UNDH:2295:GapDes',
                 'PHAS:UNDH:2395:GapDes', 'PHAS:UNDH:2495:GapDes', 'PHAS:UNDH:2595:GapDes', 'PHAS:UNDH:2695:GapDes',
                 'PHAS:UNDH:2795:GapDes', 'PHAS:UNDH:2995:GapDes', 'PHAS:UNDH:3095:GapDes', 'PHAS:UNDH:3195:GapDes',
                 'PHAS:UNDH:3295:GapDes', 'PHAS:UNDH:3395:GapDes', 'PHAS:UNDH:3495:GapDes', 'PHAS:UNDH:3595:GapDes',
                 'PHAS:UNDH:3695:GapDes', 'PHAS:UNDH:3795:GapDes', 'PHAS:UNDH:3895:GapDes', 'PHAS:UNDH:3995:GapDes',
                 'PHAS:UNDH:4095:GapDes', 'PHAS:UNDH:4195:GapDes', 'PHAS:UNDH:4295:GapDes', 'PHAS:UNDH:4395:GapDes',
                 'PHAS:UNDH:4495:GapDes', 'PHAS:UNDH:4595:GapDes', 'PHAS:UNDH:4695:GapDes']

status_name = ['XRMS on VCC', 'YRMS on VCC', 'Bunch length at BC1', 'Bunch length at BC2', 'Charge at gun [pC]', 'Charge after BC1 [pC]', 
               'HXR electron energy [GeV]', 'HXR photon energy [eV]', 'laser_iris_status']
status_from_archive = vcc_profile + blen + bcharge + hxr_energy + laser_iris_status
bpm_signal = ['BPMS:DMPH:381:TMIT1H'] #signal for filtering on
final_columns = quads_all_bctrl + ['hxr_pulse_intensity'] + status_name


In [5]:
# loading the quads from lcls-live 
quads = pd.read_csv('quad_mapping.csv')
quads_list = quads['device_name'].tolist()
quads_list = [quad + ':BCTRL' for quad in quads_list]
quads_list.extend(['SOLN:IN20:111:BCTRL', 'SOLN:IN20:121:BCTRL', 'SOLN:IN20:311:BCTRL','QUAD:IN20:121:BCTRL',
                   'QUAD:IN20:122:BCTRL', 'QUAD:IN20:361:BCTRL','QUAD:IN20:371:BCTRL', 'QUAD:IN20:425:BCTRL', 
                   'QUAD:IN20:441:BCTRL', 'QUAD:IN20:511:BCTRL', 'QUAD:IN20:525:BCTRL'])


##### We used to pull all the BACT data but the dataset would become huge size when we track much more PVs now

In [6]:
start_time = datetime(2025, 3, 1, 00, 00, 00)
end_time = datetime(2025, 3, 17, 00, 00, 00)


# all_data_inj = meme.archive.get([quad.replace("BCTRL", "BACT") for quad in quads_inj_bctrl], start_time, end_time, timeout=5000)
# all_data_linac = meme.archive.get([quad.replace("BCTRL", "BACT") for quad in quads_linac_bctrl], start_time, end_time, timeout=5000)
# all_data_ltuh = meme.archive.get([quad.replace("BCTRL", "BACT") for quad in quads_ltuh_bctrl], start_time, end_time, timeout=5000)
# all_data_ltus = meme.archive.get([quad.replace("BCTRL", "BACT") for quad in quads_ltus_bctrl], start_time, end_time, timeout=5000)
# all_data_corrx = meme.archive.get([corr.replace("BACT", "BCTRL") for corr in undh_corr_x], start_time, end_time, timeout=5000)
# all_data_corry = meme.archive.get([corr.replace("BACT", "BCTRL") for corr in undh_corr_y], start_time, end_time, timeout=5000)
# all_data_shifter = meme.archive.get([shifter.replace("GapAct", "GapDes") for shifter in undh_shifter], start_time, end_time, timeout=5000)
# all_data_others = meme.archive.get(RF_ampls+RF_phases+hxr_intensity+status_from_archive+bpm_signal, start_time, end_time, timeout=5000)


all_data_quads = meme.archive.get(quads_list, start_time, end_time, timeout=5000)
all_data_others = meme.archive.get(RF_ampls+RF_phases+hxr_intensity+status_from_archive+bpm_signal, start_time, end_time, timeout=8000)

all_data = all_data_quads + all_data_others


2025-03-17T16:57:34.992003579 WARN pvxs.client.io Server 134.79.151.36:36931 no supported auth.  try to force 'anonymous'
2025-03-17T16:58:14.994115232 WARN pvxs.tcp.io connection to Server 134.79.151.36:36931 timeout
2025-03-17T16:58:15.308329962 WARN pvxs.client.io Server 134.79.151.36:36931 no supported auth.  try to force 'anonymous'
2025-03-17T16:58:55.308887255 WARN pvxs.tcp.io connection to Server 134.79.151.36:36931 timeout
2025-03-17T16:58:55.343241225 WARN pvxs.client.io Server 134.79.151.36:36931 no supported auth.  try to force 'anonymous'
2025-03-17T16:59:35.344397092 WARN pvxs.tcp.io connection to Server 134.79.151.36:36931 timeout
2025-03-17T16:59:35.378524125 WARN pvxs.client.io Server 134.79.151.36:36931 no supported auth.  try to force 'anonymous'
2025-03-17T17:00:15.380241146 WARN pvxs.tcp.io connection to Server 134.79.151.36:36931 timeout
2025-03-17T17:00:15.413603145 WARN pvxs.client.io Server 134.79.151.36:36931 no supported auth.  try to force 'anonymous'
2025-0

In [7]:
result_df = convert_to_dataframe_no_filling_gap(all_data, pv_name='GDET:FEE1:241:ENRC1H')
print('Number of total samples:', result_df.shape[0])

Number of total samples: 1267766


### Filtering out the unphysical samples 

In [8]:
# result_df['hxr_pulse_intensity'] = result_df['SIOC:SYS0:ML00:AO627'] * eV_to_joules * result_df['GDET:FEE1:241:ENRC1H'] # photon_energy_ev * eV_to_joules * number_of_photons
condition = (result_df['BPMS:DMPH:381:TMIT1H']*eV_to_joules*1e12 > 150) & (result_df['GDET:FEE1:241:ENRC1H'] > 0)
# final_df = result_df
final_df = result_df[condition]
final_df.rename(columns={'GDET:FEE1:241:ENRC1H': 'hxr_pulse_intensity'}, inplace=True)
status_mapping = dict(zip(status_from_archive, status_name))
final_df.rename(columns=status_mapping, inplace=True)
final_df = final_df.drop('BPMS:DMPH:381:TMIT1H', axis=1)
print('Number of total samples:', final_df.shape[0])

Number of total samples: 1076733


In [24]:
# We don't do it anymore
# grouping the samples that share the same settings of tuning knobs
grouped_df = group_sample(final_df, pv_signal=quads_inj_bact+quads_linac_bact+quads_ltuh_bact)
print('Number of final samples after grouping:', grouped_df.shape[0])

NameError: name 'local_time_zone' is not defined

### Save dataframe to pickle file

In [9]:
final_df.to_pickle('/sdf/data/ad/ard/u/zihanzhu/ml/lcls_fel_tuning/dataset/hxr_archiver_Mar_2025.pkl')
# grouped_df.to_pickle('/sdf/home/z/zihanzhu/lcls/fel_tuning/hxr_archiver_Sep_small.pkl')