In [None]:
import legend_data_monitor as ldm

# General LegendDataMonitor functionality

## Set up Geds Subsystem and get data

Set up **Geds** `Subsystem` for given experiment and period (defines channel map and status)

In [None]:
geds = ldm.Subsystem("geds", experiment="L200", period="p02")

The `Subsystem` object knows its channel map and status, and will use it to map channels in data (see below)

In [None]:
geds.channel_map

Get data for parameters of interest based on given dataset

In [None]:
geds.get_data(
    ["baseline", "cuspEmax_ctc_cal"],
    type="phy",
    path="/data1/users/marshall/prod-ref",
    version="v06.00",
    start="2023-01-26 04:30:00",
    end="2023-01-26 07:00:00",
)
geds.data

## Flag pulser events

In [None]:
# set up Pulser Subsystem
pulser = ldm.Subsystem("pulser", experiment="L200", period="p02")
# get general Pulser data with pulser event flag corresponding to the same dataset
pulser.get_data(
    type="phy",
    path="/data1/users/marshall/prod-ref",
    version="v06.00",
    start="2023-01-26 04:30:00",
    end="2023-01-26 07:00:00",
)
pulser.data

In [None]:
# flag pulser events in Geds data
geds.flag_pulser_events(pulser)
geds.data

It looks like all of them are flagged `False`, but that's because most of the events are not pulser
Let's check if there are events with `True`

In [None]:
geds.data["flag_pulser"].unique()

In [None]:
# now you can select only pulser events
geds.data[geds.data["flag_pulser"]]

In [None]:
# or only non-pulser i.e. physical events
geds.data[~geds.data["flag_pulser"]]

## Analysis Data examples

### Geds `cuspEmax_ctc_cal` in physical events

In [None]:
data1 = ldm.AnalysisData(geds.data, parameters="cuspEmax_ctc_cal", event_type="phy")
data1.data

The `cuspEmax_ctc_cal_mean` column is already there for LegendDataMonitor plotting purposes.

The value is repeated for each channel, and was obtained via:

In [None]:
data1.data.groupby("channel").mean(numeric_only=True)["cuspEmax_ctc_cal"]

You can also group by detector name if that's convenient

In [None]:
data1.data.groupby("name").mean(numeric_only=True)["cuspEmax_ctc_cal"]

In [None]:
data1.data.groupby("name").mean(numeric_only=True)["cuspEmax_ctc_cal"].plot.bar(
    figsize=(15, 3)
)

Let's look at the energy distribution for a specific detector

In [None]:
ax = data1.data[data1.data["name"] == "V08682A"]["cuspEmax_ctc_cal"].plot.hist(
    histtype="step", linewidth=1.5, bins=100, log=True
)
# for some reason xlabel=... in .plot.hist() does not work
ax.set_xlabel("Energy [keV]")
ax.set_ylabel("")
ax.set_title("V08682A")

### Geds `baseline` variation in pulser events

In [None]:
# % variation from the mean of Geds baselines in pulser events
data2 = ldm.AnalysisData(
    geds.data, parameters="baseline", event_type="pulser", variation=True
)
# now the baseline column shows %, while the mean column is preserved
data2.data

Let's look at the variation trend in time for two specific detectors

In [None]:
# select data from 2 specific detectors
data2_two_detectors = data2.data[data2.data["name"].isin(["V08682A", "P00661C"])]
# set index to datetime -> will be automatically plotted on the x-axis
data2_two_detectors.set_index("datetime").groupby("name")["baseline"].plot(
    legend=True,
    sharex=True,
    figsize=(5, 2),
    xlabel="UTC Time",
    ylabel="Baseline variation [%]",
)

I don't like how the x-axis tick labels looks like - we can use `DateFormatter` from `matplotlib.dates`  to fix ut

But it only works with `datetime.datetime` type x-axis, while our `'datetime'` column is of type `Timestamp` specific to `pandas`
-> plot through `matplotlib.pyplot` converting to `datetime.datetime` with `.dt.to_pydatetime()`
Note: if you convert and put it back in the `DataFrame` column like this:
```python
data2_2dets['datetime'] = data2_2dets['datetime].dt.to_pydatetime()
```
it will still be of type `Timestamp` and will not work with `DateFormatter`. Believe me, I tried.

In [None]:
import matplotlib.pyplot as plt
from matplotlib.dates import DateFormatter

In [None]:
fig, ax = plt.subplots()
for det_name, det_data in data2_two_detectors.groupby("name"):
    plt.plot(
        det_data["datetime"].dt.to_pydatetime(), det_data["baseline"], label=det_name
    )

ax.set_xlabel("UTC Time")
ax.set_ylabel("Baseline variation [%]")
ax.legend()

ax.xaxis.set_major_formatter(DateFormatter("%Y\n%m/%d\n%H:%M"))

### Pulser event rate

In [None]:
# M - months, D - days, H - hours, T - minutes
data3 = ldm.AnalysisData(
    pulser.data, parameters="event_rate", time_window="10T", event_type="pulser"
)
data3.data

In [None]:
ax = data3.data.plot(x="datetime", y="event_rate", legend=False)
# expected rate 1/60s ~= 0.017 Hz
# set limits to +- 30%
ax.set_ylim([0.017 * 0.7, 0.017 * 1.3])
ax.set_ylabel("Event rate [Hz]")
ax.set_xlabel("UTC Time")
ax.set_title("Pulser event rate in AUX001")

# Funcionality using dict/json

Instead of using arguments experiment=..., period=... etc., you can give them to the functions as a dict.

This can be convenient if you want to set up a config from which to read, to remember what settings you used later (much like it's being done for LegendDataMonitor plots, see below)

## Separate configs

In [None]:
setup = {"experiment": "L200", "period": "p02"}

In [None]:
geds1 = ldm.subsystem.Subsystem("geds", setup=setup)

In [None]:
dataset = {
    "type": "phy",
    "path": "/data1/users/marshall/prod-ref",
    "version": "v06.00",
    "selection": {"start": "2023-01-26 04:30:00", "end": "2023-01-26 07:00:00"},
}

In [None]:
geds1.get_data("baseline", dataset=dataset)
geds1.data

In [None]:
selection = {"parameters": "event_rate", "event_type": "all", "time_window": "10T"}

In [None]:
data4 = ldm.AnalysisData(geds1.data, selection=selection)
data4.data

## Combined config example 1

In [None]:
config1 = {
    "setup": {"experiment": "L200", "period": "p02"},
    "dataset": {
        "type": "phy",
        "path": "/data1/users/marshall/prod-ref",
        "version": "v06.00",
        "selection": {"start": "2023-01-26 04:30:00", "end": "2023-01-26 07:00:00"},
    },
    "analysis": {"parameters": "event_rate", "event_type": "all", "time_window": "10T"},
}

In [None]:
geds2 = ldm.subsystem.Subsystem("geds", setup=config1["setup"])
geds2.get_data("baseline", dataset=config1["dataset"])
data5 = ldm.AnalysisData(geds2.data, selection=config1["analysis"])
data5.data

## Combined config example 2

In [None]:
config2 = {
    "experiment": "L200",
    "period": "p02",
    "type": "phy",
    "path": "/data1/users/marshall/prod-ref",
    "version": "v06.00",
    "selection": {"start": "2023-01-26 04:30:00", "end": "2023-01-26 07:00:00"},
    "parameters": "event_rate",
    "event_type": "all",
    "time_window": "10T",
}

In [None]:
geds3 = ldm.subsystem.Subsystem("geds", setup=config2)
geds2.get_data("baseline", dataset=config2)
data5 = ldm.AnalysisData(geds2.data, selection=config2)
data5.data

# LegendDataMonitor plots

In [None]:
user_config = "user_config_example_L200.json"

In [None]:
ldm.control_plots(user_config)