In [13]:
pip install xarray-datatree psycopg2-binary zarr

Collecting zarr
  Downloading zarr-2.14.2-py3-none-any.whl (203 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m203.3/203.3 kB[0m [31m15.5 MB/s[0m eta [36m0:00:00[0m
Collecting asciitree (from zarr)
  Downloading asciitree-0.3.3.tar.gz (4.0 kB)
  Preparing metadata (setup.py) ... [?25ldone
Collecting fasteners (from zarr)
  Downloading fasteners-0.18-py3-none-any.whl (18 kB)
Collecting numcodecs>=0.10.0 (from zarr)
  Downloading numcodecs-0.11.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (6.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m6.7/6.7 MB[0m [31m36.6 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
Building wheels for collected packages: asciitree
  Building wheel for asciitree (setup.py) ... [?25ldone
[?25h  Created wheel for asciitree: filename=asciitree-0.3.3-py3-none-any.whl size=5034 sha256=1ace7ace32e7068f0438bf1d651e26db665f2bbd1266f42a50754874c48d5116
  Stored in directory: /home/jovyan/.cache/pip/wheels

In [77]:
import pandas as pd
from sqlalchemy.orm import sessionmaker
from src.db_utils import connect
from datatree import open_datatree
import matplotlib.pyplot as plt
import yaml
from pathlib import Path

with Path('config.yml').open() as handle:
    config = yaml.load(handle, yaml.Loader)
    
URI = f"postgresql://dbreader:fairly-mast@{config['host']}:{config['port']}/mast_db"
metadata, engine = connect(URI)
Session = sessionmaker(bind = engine)
session = Session()

### Database Stats

In [78]:
num_shots = session.query(metadata.tables['shots']).count()
num_signals = session.query(metadata.tables['signals']).count()

print(f'Number of shots: {num_shots}')
print(f'Number of signals: {num_signals}')

Number of shots: 25556
Number of signals: 916


### Querying the Metadatabase

Query the shot table and shot the results

In [8]:
# Find shot IDs
query = (
    session.query(metadata.tables['shots'])
)

result = pd.read_sql(query.statement, con=engine.connect())
result

Unnamed: 0,shot_id,timestamp,reference_shot,signal_ids,scenario,current_range,heating,divertor_config,pellets,plasma_shape,...,cpf_vol_ipmax,cpf_vol_max,cpf_vol_truby,cpf_wmhd_ipmax,cpf_wmhd_max,cpf_wmhd_truby,cpf_zeff_ipmax,cpf_zeff_max,cpf_zeff_truby,cpf_zmag_efit
0,11695,2004-12-13 11:54:00+00:00,,,,,,Conventional,False,,...,,,,,,,,,,
1,11696,2004-12-13 12:07:00+00:00,,,,,,Conventional,False,,...,,,,,,,,,,
2,11697,2004-12-13 12:19:00+00:00,,,,,,Conventional,False,,...,,,,,,,,,,
3,11698,2004-12-13 12:31:00+00:00,,,,,,Conventional,False,,...,,,,,,,,,,
4,11699,2004-12-13 12:45:00+00:00,,,,,,Conventional,False,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
25551,30469,2013-09-27 14:39:00+00:00,30467.0,"[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14...",3.0,700 kA,SS Beam,Conventional,False,Connected Double Null,...,8.988730,9.047923,0.0,47466.250,49115.805,0.0,,,,0.015299
25552,30470,2013-09-27 15:03:00+00:00,30467.0,"[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14...",2.0,700 kA,SS Beam,Conventional,False,,...,9.687049,10.055509,0.0,17290.434,22310.516,0.0,,,,0.015164
25553,30471,2013-09-27 15:20:00+00:00,30470.0,"[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14...",2.0,700 kA,SS Beam,Conventional,False,Lower Single Null,...,8.817559,9.283702,0.0,38063.582,40906.090,0.0,,,,0.014340
25554,30472,2013-09-27 15:36:00+00:00,16492.0,"[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14...",,,Ohmic,Conventional,False,Connected Double Null,...,,,,,,,,,,0.000000


In [9]:
result['campaign'].unique()

array(['M5', 'M6', 'M7', 'M8', 'M9', 'MU1', 'MU2', 'MU3'], dtype=object)

Filtering with CPF Summary Data and shot IDS

In [84]:
shots = metadata.tables['shots']
signals = metadata.tables['signals']

q = (
    session.query(signals.c.signal_id)
          .filter(signals.c.name == 'ABM_CALIB_SHOT')
          .first()
)

q = (
    session.query(shots)
        .filter(shots.c.campaign == 'M9')
        .join(q)
)

q = pd.read_sql(q.statement, con=engine.connect())
q

InvalidRequestError: Query.select_from() being called on a Query with existing criterion. 

A more advanced query. Here we: 
 - Find shots with a given CPF value
 - Find corresponding signals
 - Filter signals by name

In [30]:
shots = metadata.tables['shots']
signals = metadata.tables['signals']
shot_signal_link = metadata.tables['shot_signal_link']

qshots = (
    session.query(shots.c.shot_id)
        .filter(shots.c.campaign == 'M9')
)

# Query for corresponding signal IDs
qshot_signal = (
    session.query(shot_signal_link.c.signal_id)
    .filter(shot_signal_link.c.shot_id.in_(qshots))
    .distinct()
)

# Query for signal data, filter only names containing 'acd'
qsignal = (
    session.query(signals)
    .filter(signals.c.signal_id.in_(qshot_signal))
    # .filter(signals.c.name.contains('AMC'))
)

result = pd.read_sql(qsignal.statement, con=engine.connect())
result

Unnamed: 0,signal_id,name,units,rank,dim_1_label,dim_2_label,dim_3_label,uri,description,signal_type,quality,doi,camera_metadata,camera
0,1,ABM_CALIB_SHOT,,1,,,,data/mast/ABM_CALIB_SHOT.zarr,Calibration Shot,Analysed,Not Checked,,,
1,2,ABM_CHANNEL_STATUS,,2,,,,data/mast/ABM_CHANNEL_STATUS.zarr,channel_status,Analysed,Not Checked,,,
2,3,ABM_CHANNEL_TYPE,,2,,,,data/mast/ABM_CHANNEL_TYPE.zarr,channel_type,Analysed,Not Checked,,,
3,4,ABM_GAIN,,2,,,,data/mast/ABM_GAIN.zarr,GAIN,Analysed,Not Checked,,,
4,5,ABM_I-BOL,W/m^2,2,,,,data/mast/ABM_I-BOL.zarr,i-bol,Analysed,Not Checked,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
911,912,ESM_V_LOOP_STATIC,V,1,,,,data/mast/ESM_V_LOOP_STATIC.zarr,static V_loop,Analysed,Not Checked,,,
912,913,ESM_W_DOT,W,1,,,,data/mast/ESM_W_DOT.zarr,W dot,Analysed,Not Checked,,,
913,914,ESM_X,W,1,,,,data/mast/ESM_X.zarr,"""stored magnetic power""",Analysed,Not Checked,,,
914,915,ESX_PASSNUMBER,Passno,1,,,,data/mast/ESX_PASSNUMBER.zarr,Passno,Analysed,Not Checked,,,


In [6]:
shots = metadata.tables['shots']
signals = metadata.tables['signals']
shot_signal_link = metadata.tables['shot_signal_link']

# Query all shots with zmag_efit > .04
qshots = (
    session.query(shots)
          .filter(shots.c.campaign == 'M9')
)
qshots = pd.read_sql(qshots.statement, con=engine.connect())
shot_ids = qshots['shot_id'].values
shot_ids = list(map(str, shot_ids))

# Query for corresponding signal IDs
qshot_signal = (
    session.query(shot_signal_link.c.signal_id)
    .filter(shot_signal_link.c.shot_id.in_(qshots.shot_id))
    .distinct()
)
qshot_signal = pd.read_sql(qshot_signal.statement, con=engine.connect())

# Query for signal data, filter only names containing 'acd'
qsignal = (
    session.query(signals)
    .filter(signals.c.signal_id.in_(qshot_signal.signal_id))
    # .filter(signals.c.name.contains('AMC'))
)

result = pd.read_sql(qsignal.statement, con=engine.connect())
result

Unnamed: 0,signal_id,name,units,rank,dim_1_label,dim_2_label,dim_3_label,uri,description,signal_type,quality,doi,camera_metadata,camera
0,1,ABM_CALIB_SHOT,,1,,,,data/mast/ABM_CALIB_SHOT.zarr,Calibration Shot,Analysed,Not Checked,,,
1,2,ABM_CHANNEL_STATUS,,2,,,,data/mast/ABM_CHANNEL_STATUS.zarr,channel_status,Analysed,Not Checked,,,
2,3,ABM_CHANNEL_TYPE,,2,,,,data/mast/ABM_CHANNEL_TYPE.zarr,channel_type,Analysed,Not Checked,,,
3,4,ABM_GAIN,,2,,,,data/mast/ABM_GAIN.zarr,GAIN,Analysed,Not Checked,,,
4,5,ABM_I-BOL,W/m^2,2,,,,data/mast/ABM_I-BOL.zarr,i-bol,Analysed,Not Checked,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
911,912,ESM_V_LOOP_STATIC,V,1,,,,data/mast/ESM_V_LOOP_STATIC.zarr,static V_loop,Analysed,Not Checked,,,
912,913,ESM_W_DOT,W,1,,,,data/mast/ESM_W_DOT.zarr,W dot,Analysed,Not Checked,,,
913,914,ESM_X,W,1,,,,data/mast/ESM_X.zarr,"""stored magnetic power""",Analysed,Not Checked,,,
914,915,ESX_PASSNUMBER,Passno,1,,,,data/mast/ESX_PASSNUMBER.zarr,Passno,Analysed,Not Checked,,,


### Loading data

Here is an example of loading the data found in the database into a dataset and plotting some time series

In [14]:
# Read data 
data = {row['name']: open_datatree(row.uri, engine='zarr') for index, row in result[['name', 'uri']].iterrows()}

dataset = data['AMC_PLASMA_CURRENT']
# Choose only relevant shots
dataset = dataset.filter(lambda x: x.name in shot_ids)
dataset = dataset.sel(time=slice(0, .5))

for shot_id, shot in dataset.items():
    data = shot['data']
    time = shot['time']
    plt.plot(time, data, label=f'Shot {shot_id}') 

plt.ylabel(f'{shot.label} ({shot.units})')
plt.xlabel('Time')
plt.legend()

GroupNotFoundError: group not found at path ''

Another example with the same data but with multi dimensional data this time. Use EFM PSI which should be an equillibrium reconstruction.

In [12]:
# Query for signal data, filter only names containing 'acd'
qsignal = (
    session.query(signals)
    .filter(signals.c.name.contains('EFM_PSI(R,Z)'))
)

result = pd.read_sql(qsignal.statement, con=engine.connect())
result

Unnamed: 0,signal_id,name,units,rank,dim_1_label,dim_2_label,dim_3_label,uri,description,signal_type,quality,doi,camera_metadata,camera
0,882,"EFM_PSI(R,Z)",Wb/rad,3,,,,/home/lhs18285/git/fair-mast/data/mast/zarr/EF...,"psi(r,z)",Analysed,Not Checked,,,


In [8]:
# Read data 
data = {row['name']: open_datatree(row.uri, engine='zarr') for index, row in result[['name', 'uri']].iterrows()}

# Choose only relevant shots
dataset = data['EFM_PSI(R,Z)']
dataset = dataset.filter(lambda x: x.name in shot_ids)
dataset = dataset.isel(time=50)

n_shots = len(dataset)
fig, axes = plt.subplots(2, 3, figsize=(10, 5))
axes = axes.flatten()

for index, (shot_id, shot) in enumerate(dataset.items()):
    data = shot['data']
    time = shot['time']
    axes[index].matshow(data, cmap='plasma')
    axes[index].set_xticks([], [])
    axes[index].set_yticks([], [])
    axes[index].set_title(f'Shot {shot_id}')
plt.suptitle(f'EFM_PSI(R,Z) ({shot.attrs["units"]})')
plt.tight_layout()

IndexError: index 50 is out of bounds for axis 0 with size 16