### Example stuff from ali_offline_demod.py

#### packages

In [1]:
import numpy as np
import matplotlib as mpl
mpl.rcParams['axes.formatter.useoffset'] = False
import matplotlib.pyplot as plt
import scipy

from scipy.signal import sawtooth, square, savgol_filter
import pandas as pd
import glob as gl
import os
import cmath

from scipy.signal import sawtooth, square,find_peaks
from scipy import spatial
# import lambdafit as lf
from scipy.interpolate import CubicSpline,interp1d
import h5py

from tqdm import tqdm as tqdm_terminal
from tqdm.notebook import trange, tqdm_notebook
from scipy.signal.windows import hann

from scipy.fft import fft, ifft, fftfreq
from copy import deepcopy
from scipy.interpolate import CubicSpline, interp1d
from scipy.optimize import curve_fit

### matt's current read_data function

In [2]:
#Functions for reading, processing, and demodulating real data
def read_data(filename,channels='all',start_channel=0,stop_channel=1000):
    if channels == 'all':
        file = h5py.File(filename, 'r')
        adc_i = np.array(file['time_ordered_data']['adc_i'])
        adc_i = np.delete(adc_i, slice(0,22), 0)
        adc_q = file['time_ordered_data']['adc_q']
        adc_q = np.delete(adc_q, slice(0,22), 0)
        t = np.array(file['time_ordered_data']['timestamp'])  
    elif channels == 'some':
        start_channel += 23 #eliminate the first 23 empty channels in hdf5 -> makes channel numbering match resonator numbering
        stop_channel += 23 + 1 #eliminate the first 23 empty channels in hdf5 -> makes channel numbering match resonator numbering; +1 forces python to include the stop_channel
        file = h5py.File(filename, 'r')
        adc_i = np.array(file['time_ordered_data']['adc_i'][start_channel:stop_channel]) 
        adc_q = np.array(file['time_ordered_data']['adc_q'][start_channel:stop_channel]) 
        t = np.array(file['time_ordered_data']['timestamp'])  
    
    return t, adc_i, adc_q


'/Users/leayamashiro/AliCPT/ali_LY_git'

In [6]:
# create file object
file = h5py.File('../alicpt_data/data_files/ts_toneinit_fcenter_4250.0_20240506174818_t_20240506191017.hd5', 'r')

In [7]:
# h5py.File acts like Python dictionary -- this means we can check keys: 
print('HDF5 dictionary: ', list(file.keys()))


HDF5 dictionary:  ['dimension', 'global_data', 'time_ordered_data']


In [8]:
# based on that, there are three datasets in the file
# we can examine each key in the set - create different objects: 

dimension = file['dimension']
global_data = file['global_data']
time_data = file['time_ordered_data']

# figured out that these are 'Group' objects, not individual data sets
# inspect them with same list function: 

print('dimension keys: ', list(dimension))
print('global data keys: ', list(global_data))
print('time_data keys: ', list(time_data))

dimension keys:  ['n_attenuators', 'n_fftbins', 'n_sample', 'n_tones']
global data keys:  ['attenuator_settings', 'baseband_freqs', 'chan_number', 'chanmask', 'detector_beam_ampl', 'detector_delta_x', 'detector_delta_y', 'detector_dx_dy_elevation_angle', 'detector_pol', 'dfoverf_per_mK', 'ifslice_number', 'lo_freq', 'rfsoc_number', 'sample_rate', 'tile_number', 'tone_powers']
time_data keys:  ['adc_i', 'adc_q', 'timestamp']


In [9]:
len(time_data)

3

okay, now seeing that there are many pieces of information in this set – based on Matt's 'read_data' function, the relevant data to work on is the 'time_ordered_data'


i am going to assume thus that i do not need to worry about the 'dimension' and 'global' objects 

In [10]:
# organize time_data into its constituent parts for personal inspection: 

i_hd5 = time_data['adc_i']
q_hd5 = time_data['adc_q']
t_hd5 = time_data['timestamp']

# inspect: 

print('time data properties --')
print('i:', i_hd5.shape, i_hd5.dtype)
print('q:', q_hd5.shape, q_hd5.dtype)
print('t:', t_hd5.shape, t_hd5.dtype)



time data properties --
i: (1024, 293288) int32
q: (1024, 293288) int32
t: (293288,) float64


In [None]:
# time investigation: 



In [12]:
# so 1024 channels, 

t_hd5[0]

1715047819.7287214

In [None]:
# # first look: 

# # Assuming time_data contains 'adc_i', 'adc_q', and 'timestamp'
# adc_i = time_data['adc_i']  # Shape: (1024, 293288)
# adc_q = time_data['adc_q']  # Shape: (1024, 293288)
# timestamp = time_data['timestamp']  # Shape: (293288,)

# # Flatten the 'adc_i' and 'adc_q' arrays to make them column vectors
# # If we want to treat each sample in 'adc_i' and 'adc_q' as a separate entry, we can concatenate them
# adc_i_flat = adc_i.flatten()  # Flattened shape: (1024 * 293288,)
# adc_q_flat = adc_q.flatten()  # Flattened shape: (1024 * 293288,)

# # Combine the flattened 'adc_i', 'adc_q', and 'timestamp' into a structured array or DataFrame
# # We need to repeat the timestamp values to match the flattened data shape
# timestamp_repeated = np.tile(timestamp, adc_i.shape[0])  # Repeat the timestamp for each of the 1024 samples

# # Create the DataFrame
# df = pd.DataFrame({
#     'timestamp': timestamp_repeated,
#     'adc_i': adc_i_flat,
#     'adc_q': adc_q_flat
# })

# # View the first few rows of the DataFrame
# print(df.head())

In [45]:
# so we need to have arrays with time stamp events corresponding to the i and q data, go back and use the file object

i = np.array(t_hd5, i_hd5)
q = np.array(t_hd5, q_hd5)

# based on Matt's function -- i = np.array(file['time_ordered_data']['adc_i'])
i[20000]

data = np.array([t_hd5, [i_hd5, q_hd5]])


ValueError: setting an array element with a sequence. The requested array has an inhomogeneous shape after 1 dimensions. The detected shape was (2,) + inhomogeneous part.

In [41]:
# wondering about 'dimension' object for chunking: 

n_atten = dimension['n_attenuators']
n_fft = dimension['n_fftbins']
n_samp = dimension['n_sample']
n_tone = dimension['n_tones']

print(n_atten.shape, n_fft.shape, n_samp.shape, n_tone.shape)

# okay so not going to be impacted in the chunking because just characteristic data 
# if it's single-value data then why not call it 'global data'? 

(1,) (1,) (1,) (1,)


In [16]:
# just looking 

t[0:20000]

array([1.71504782e+09, 1.71504782e+09, 1.71504782e+09, ...,
       1.71504786e+09, 1.71504786e+09, 1.71504786e+09])

In [17]:
t[50000]

1715047922.1249328

so we have a set of 293288 events

AttributeError: 'Dataset' object has no attribute 'index'

In [1]:
293288/1024

286.4140625

In [6]:
h5py.File?

[0;31mInit signature:[0m
[0mh5py[0m[0;34m.[0m[0mFile[0m[0;34m([0m[0;34m[0m
[0;34m[0m    [0mname[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mmode[0m[0;34m=[0m[0;34m'r'[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mdriver[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mlibver[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0muserblock_size[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mswmr[0m[0;34m=[0m[0;32mFalse[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mrdcc_nslots[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mrdcc_nbytes[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mrdcc_w0[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mtrack_order[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mfs_strategy[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mfs_persist[0m[0;

In [None]:
# FROM PAST: (can utilize for non-pandas numpy setup)

# make length object, empty arrays
len_testdata = len(testdata) 
datalist = [] 
testTime = []
dataCount = []

# iterate through all 'testdata' object and pull out correlation
# info, time info, and count info 
for n in range(len_testdata):
    test_dataFiles = np.load(testdata[n])
    datalist.append(test_dataFiles["data"]) # Append data
    testTime.append(test_dataFiles["time"]) # Append times
    dataCount.append(test_dataFiles["count"]) # Append counts 
# concatenate data with vertical stack
test_data = np.concatenate(datalist, axis=0)  
data_times = np.concatenate(testTime, axis=0)
data_counts = np.concatenate(dataCount, axis=0)



# make a combined data object
data = [{"data": test_data,
        "time": data_times,
        "count": data_counts}]
# OR make an ASCII table with it??
from astropy.table import Table
from astropy.io import ascii
# Okay too hard because data objects have multiple dimensions lol 