In [66]:
import pandas as pd
from sqlalchemy import select
from src.db_utils import connect
import xarray as xr

URI = 'postgresql://root:root@localhost:5433/mast_db'
metadata, engine = connect(URI)

### Querying the Metadatabase

Query the shot table and shot the results

In [67]:
# Find shot IDs
query = (
    select(metadata.tables['shots'])
         .where(metadata.tables['shots'].c.cpf_zmag_efit > .04)
)
result = pd.read_sql(query, con=engine)
shot_ids = [int(x) for x in result['shot_id']]

result

Unnamed: 0,shot_id,timestamp,reference_shot,scenario,current_range,heating,divertor_config,pellets,plasma_shape,rpm_coil,...,cpf_vol_ipmax,cpf_vol_max,cpf_vol_truby,cpf_wmhd_ipmax,cpf_wmhd_max,cpf_wmhd_truby,cpf_zeff_ipmax,cpf_zeff_max,cpf_zeff_truby,cpf_zmag_efit
0,30110,2023-03-22 11:11:33.235461+00:00,,1,400 kA,101,X Divertor,False,Double Null,,...,8.140033,8.66066,0.0,90911.8,108315.53,0.0,,,,0.044837
1,30111,2023-03-22 11:11:33.302173+00:00,,1,400 kA,101,X Divertor,False,Double Null,,...,8.456449,9.741916,0.0,43684.14,55665.31,0.0,,,,0.042443
2,30113,2023-03-22 11:11:33.354532+00:00,,1,400 kA,101,X Divertor,False,Double Null,,...,8.391672,8.426641,0.0,43127.703,60260.926,0.0,,,,0.04222
3,30116,2023-03-22 11:11:33.448358+00:00,,1,400 kA,101,X Divertor,False,Double Null,,...,7.845821,8.73802,0.0,60943.785,83505.234,0.0,,,,0.045105
4,30117,2023-03-22 11:11:33.501495+00:00,,1,400 kA,101,X Divertor,False,Double Null,,...,8.683957,8.846392,0.0,58406.06,84545.93,0.0,,,,0.052367
5,30122,2023-03-22 11:11:33.716983+00:00,,1,400 kA,101,X Divertor,False,Double Null,,...,7.604147,7.919124,0.0,66065.16,91565.36,0.0,,,,0.05738
6,30123,2023-03-22 11:11:33.851215+00:00,,1,400 kA,101,X Divertor,False,Double Null,,...,7.806886,8.057045,0.0,79191.72,88456.24,0.0,,,,0.069823


A more advanced query. Here we: 
 - Find shots with a given CPF value
 - Find corresponding signals
 - Filter signals by name

In [68]:
# Find shot IDs

from sqlalchemy.orm import sessionmaker
Session = sessionmaker(bind = engine)
session = Session()

shots = metadata.tables['shots']
signals = metadata.tables['signals']
shot_signal_link = metadata.tables['shot_signal_link']

# Query all shots with zmag_efit > ..04
qshots = (
    session.query(shots)
          .filter(shots.c.cpf_zmag_efit > .04)
)
qshots = pd.read_sql(qshots.statement, con=qshots.session.bind)

# Query for corresponding signal IDs
qshot_signal = (
    session.query(shot_signal_link.c.signal_id)
    .filter(shot_signal_link.c.shot_id.in_(qshots.shot_id))
    .distinct()
)
qshot_signal = pd.read_sql(qshot_signal.statement, con=qshot_signal.session.bind)

# Query for signal data, filter only names containing 'acd'
qsignal = (
    session.query(signals)
    .filter(signals.c.signal_id.in_(qshot_signal.signal_id))
    .filter(signals.c.name.contains('acd'))
)

result = pd.read_sql(qsignal.statement, con=qsignal.session.bind)
result

Unnamed: 0,signal_id,name,units,dim_1_label,dim_2_label,dim_3_label,uri,description,signal_type,quality,doi,camera_metadata,camera
0,22,acd_ACD_PASSNUMBER,,,,,/home/lhs18285/git/fair-mast/data/mast/zarr/ac...,,Raw,Validated,,,
1,23,acd_ACD_SS_OTRES_C6EMIS,,,,,/home/lhs18285/git/fair-mast/data/mast/zarr/ac...,,Raw,Validated,,,
2,24,acd_ACD_SS_SNPFIT_C6D,,,,,/home/lhs18285/git/fair-mast/data/mast/zarr/ac...,,Raw,Validated,,,
3,25,acd_ACD_SS_SNPFIT_C6EMIS,,,,,/home/lhs18285/git/fair-mast/data/mast/zarr/ac...,,Raw,Validated,,,
4,26,acd_ACD_SS_SNPFIT_C6ONLY_ZF,,,,,/home/lhs18285/git/fair-mast/data/mast/zarr/ac...,,Raw,Validated,,,
5,27,acd_ACD_SS_SNPRAW_C6D,,,,,/home/lhs18285/git/fair-mast/data/mast/zarr/ac...,,Raw,Validated,,,
6,28,acd_ACD_SS_SNPRAW_C6EMIS,,,,,/home/lhs18285/git/fair-mast/data/mast/zarr/ac...,,Raw,Validated,,,
7,29,acd_ACD_SS_SNPRAW_C6ONLY_ZF,,,,,/home/lhs18285/git/fair-mast/data/mast/zarr/ac...,,Raw,Validated,,,
8,30,acd_ACD_SS_TRESFT_C6DNS,,,,,/home/lhs18285/git/fair-mast/data/mast/zarr/ac...,,Raw,Validated,,,
9,31,acd_ACD_SS_TRESFT_C6EMIS,,,,,/home/lhs18285/git/fair-mast/data/mast/zarr/ac...,,Raw,Validated,,,


### Loading data

Here is an example of loading the data found in the database into a dataset.

In [69]:
# Read data 
data = {row['name']: xr.open_zarr(row.uri, consolidated=False) for index, row in result[['name', 'uri']].iterrows()}
dataset = data['acd_ACD_SW_TRESFIT_C6D']

# Choose only relevant shots
dataset = dataset.isel(index=dataset['shot_id'].isin(shot_ids))
dataset

Unnamed: 0,Array,Chunk
Bytes,17.00 kiB,17.00 kiB
Shape,"(68, 64)","(68, 64)"
Count,3 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 17.00 kiB 17.00 kiB Shape (68, 64) (68, 64) Count 3 Tasks 1 Chunks Type float32 numpy.ndarray",64  68,

Unnamed: 0,Array,Chunk
Bytes,17.00 kiB,17.00 kiB
Shape,"(68, 64)","(68, 64)"
Count,3 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,17.00 kiB,17.00 kiB
Shape,"(68, 64)","(68, 64)"
Count,3 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 17.00 kiB 17.00 kiB Shape (68, 64) (68, 64) Count 3 Tasks 1 Chunks Type float32 numpy.ndarray",64  68,

Unnamed: 0,Array,Chunk
Bytes,17.00 kiB,17.00 kiB
Shape,"(68, 64)","(68, 64)"
Count,3 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,272 B,272 B
Shape,"(68,)","(68,)"
Count,3 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 272 B 272 B Shape (68,) (68,) Count 3 Tasks 1 Chunks Type float32 numpy.ndarray",68  1,

Unnamed: 0,Array,Chunk
Bytes,272 B,272 B
Shape,"(68,)","(68,)"
Count,3 Tasks,1 Chunks
Type,float32,numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,272 B,272 B
Shape,"(68,)","(68,)"
Count,3 Tasks,1 Chunks
Type,float32,numpy.ndarray
"Array Chunk Bytes 272 B 272 B Shape (68,) (68,) Count 3 Tasks 1 Chunks Type float32 numpy.ndarray",68  1,

Unnamed: 0,Array,Chunk
Bytes,272 B,272 B
Shape,"(68,)","(68,)"
Count,3 Tasks,1 Chunks
Type,float32,numpy.ndarray
