# Convert SpikeGadgets exported date into binary

## Recording directory

In [1]:
# Path of recording directory
# rec_dir = 'C:/Users/Tatsumi/Documents/Data/KQTY_NP/32623/20230907_1min'
# rec_dir = 'C:/Users/Tatsumi/Documents/Data/KQTY_NP/32623/20230907_161730.rec'
rec_dir = 'E:/Dataset/KQTY/GridBat/32622/flight_room/231006/ephys/20231006_152611.rec'

## Functions

In [2]:
import sys
import os
import string
import glob
from pathlib import Path

import numpy as np

The original trodes function `readTrodesExtractedDataFile3` cannot be imported because of the error caused by the testing function in the script. This is because the new version of NumPy does not support some syntax any more.  
To load the data exported by Trodes, import and run the function copied and fixed for the part causing the error.

In [3]:
# Trodes function for data interface
trodes_dir = 'C:/Users/Tatsumi/Documents/GitHub/Neuropixels_spikegadgets_analysis/python/fromTrodes/' # path of Trodes python functions
sys.path.append(trodes_dir)
from readTrodesExtractedDataFile3 import readTrodesExtractedDataFile as readTrodes

First of all, specify the directories for each data band.

In [4]:
lfp_dir = glob.glob('{}/*.LFP'.format(rec_dir))
ap_dir = glob.glob('{}/*.spikeband'.format(rec_dir))
if len(lfp_dir) == 1:
    lfp_dir = Path(lfp_dir[0])
if len(ap_dir) == 1:
    ap_dir = Path(ap_dir[0])

# Make directory for binary data
file_header = lfp_dir.parts[-1].split('.')[0]
binary_dir = rec_dir + '/' + file_header + '.binary'
if not os.path.exists(binary_dir):
    os.mkdir(binary_dir)
print(binary_dir)

E:/Dataset/KQTY/GridBat/32622/flight_room/231006/ephys/20231006_152611.rec/20231006_152611_merged.binary


Load the signals and timestamps for individual probe. Then, concatenate all channels into a binary data.

In [5]:
streams = ['LFP', 'spikeband']

### Memmap to load long recording

In [10]:
# fname = 'C:/Users/Tatsumi/Documents/Data/KQTY_NP/32623/20230907_1min/20230907_161730_merged_split1.binary/20230907_161730_merged_split1.LFP_mmap_probe2.dat'
fname = 'E:/Dataset/KQTY/GridBat/32622/flight_room/231006/ephys/20231006_152611.rec/20231006_152611_merged.LFP/20231006_152611_merged.LFP_nt1161ch1.dat'
f = open(fname,mode='rb')
for i in range(10):
    data = f.read(2)
    print(int.from_bytes(data, "little"))
    
f.close()

21308
24948
29810
29472
29797
26996
26478
15987
2573
25924


In [7]:
stream = 'spikeband'

In [None]:
from tqdm import tqdm


if stream == 'LFP':
    stream_dir = lfp_dir
if stream == 'spikeband':
    stream_dir = ap_dir
print('Processing the stream {}...'.format(stream))

# gain = 50
# scale_to_uv = (600/32767)*(1000/gain)

num_chans = np.zeros(3) # Number of channels
for probe in range(3):
    # num_chans[probe] = int(len(glob.glob(stream_dir+'/*nt'+str(probe+1)+'*')))
    num_chans[probe] = int(len(glob.glob('{}/*nt{}*'.format(stream_dir,probe+1))))
    print('Probe {} has {} channels'.format(probe+1,int(num_chans[probe])))
num_probe = sum(i > 0 for i in num_chans) # number of probes

# Make a list of all dat files
stream_file = {}
for probe in range(num_probe):
    stream_file[probe] = {}
    stream_file[probe]['file_path'] = glob.glob('{}/*nt{}*'.format(stream_dir,probe+1))

# Load timestamps
ts_file = glob.glob('{}/*timestamps*'.format(stream_dir))
ts_data = readTrodes(ts_file[0])
ts_lfp = ts_data['data']
print('Timestamps has {} samples'.format(ts_lfp.size))

# concatenate data and write into a binary file
for probe in range(num_probe):
    binary_mmap_filename = os.path.join(binary_dir,'{}.{}_mmap_probe{}.dat'.format(file_header,stream,probe+1))
    f = np.memmap(binary_mmap_filename, dtype='int16', mode='w+', shape=(int(num_chans[probe]),ts_lfp.size))
    # f = open(binary_path,'wb')
    # concatenated_binary = np.zeros([int(num_chans[probe]),ts_lfp.size],np.int16)
    # for ch in range(int(num_chans[probe]))
    for ch in tqdm(range(int(num_chans[probe])), desc="Loading data..."):
        # concatenated_binary[ch,:] = readTrodes(stream_file[probe]['file_path'][ch])['data']
        f[ch,:] = readTrodes(stream_file[probe]['file_path'][ch])['data']
    # concatenated_binary = concatenated_binary.reshape((concatenated_binary.size,1))

    print(f)
    f = f.transpose()
    # f = f.reshape((1,f.size))
    f.shape = (1,f.size)
    print(f.shape)
    print(f)
    # f.astype('int16').tofile(binary_mmap_filename)
    # f.write(concatenated_binary)
    # f.close()
    # f.flush()
    del f
    
    print('-----------------------')
    print('The stream {} for probe {} was converted into a binary file.'.format(stream,probe+1))
    print('The path of the binary file is {}'.format(binary_mmap_filename))

Processing the stream spikeband...
Probe 1 has 192 channels
Probe 2 has 192 channels
Probe 3 has 0 channels


  return np.dtype(typearr)


Timestamps has 195711745 samples


Loading data...: 100%|███████████████████████████████████████████████████████████████| 192/192 [42:27<00:00, 13.27s/it]


[[ -384   320  -128 ...   512   640  1024]
 [ -192   128 -1024 ...  -512   192  -832]
 [ -320 -1088  -960 ...   128  -320 -1152]
 ...
 [-1408   192  -192 ...  2304  1984  2304]
 [  256 -1024 -1920 ...  2624  1920  2816]
 [-1088 -1472 -1984 ...  4736  3904  3840]]


### Concatenate without memmap

In [45]:
# from tqdm import tqdm

# for stream in streams:
#     if stream == 'LFP':
#         stream_dir = lfp_dir
#     if stream == 'spikeband':
#         stream_dir = ap_dir
#     print('Processing the stream {}...'.format(stream))
    
#     # gain = 50
#     # scale_to_uv = (600/32767)*(1000/gain)
    
#     num_chans = np.zeros(3) # Number of channels
#     for probe in range(3):
#         # num_chans[probe] = int(len(glob.glob(stream_dir+'/*nt'+str(probe+1)+'*')))
#         num_chans[probe] = int(len(glob.glob('{}/*nt{}*'.format(stream_dir,probe+1))))
#         print('Probe {} has {} channels'.format(probe+1,int(num_chans[probe])))
#     num_probe = sum(i > 0 for i in num_chans) # number of probes
    
#     # Make a list of all dat files
#     stream_file = {}
#     for probe in range(num_probe):
#         stream_file[probe] = {}
#         stream_file[probe]['file_path'] = glob.glob('{}/*nt{}*'.format(stream_dir,probe+1))
    
#     # Load timestamps
#     ts_file = glob.glob('{}/*timestamps*'.format(stream_dir))
#     ts_data = readTrodes(ts_file[0])
#     ts_lfp = ts_data['data']
#     print('Timestamps has {} samples'.format(ts_lfp.size))
    
#     # concatenate data and write into a binary file
#     for probe in range(num_probe):
#         binary_path = os.path.join(binary_dir,'{}.{}_probe{}.dat'.format(file_header,stream,probe+1))
#         f = open(binary_path,'wb')
#         concatenated_binary = np.zeros([int(num_chans[probe]),ts_lfp.size],np.int16)
#         # for ch in range(int(num_chans[probe])):
#         for ch in tqdm(range(int(num_chans[probe])), desc="Loading data..."):
#             concatenated_binary[ch,:] = readTrodes(stream_file[probe]['file_path'][ch])['data']
#         concatenated_binary = concatenated_binary.reshape((concatenated_binary.size,1))
#         f.write(concatenated_binary)
#         f.close()
#         print('-----------------------')
#         print('The stream {} for probe {} was converted into a binary file.'.format(stream,probe+1))
#         print('The path of the binary file is {}'.format(binary_path))
#     print('\n')

Processing the stream LFP...
Probe 1 has 200 channels
Probe 2 has 200 channels
Probe 3 has 0 channels
Timestamps has 6436518 samples


Loading data...: 100%|███████████████████████████████████████████████████████████████| 200/200 [00:05<00:00, 34.67it/s]


-----------------------
The stream LFP for probe 1 was converted into a binary file.
The path of the binary file is C:/Users/Tatsumi/Documents/Data/KQTY_NP/32623/20230907_161730.rec/20230907_161730_merged.binary\20230907_161730_merged.LFP_probe1.dat


Loading data...: 100%|███████████████████████████████████████████████████████████████| 200/200 [00:05<00:00, 33.88it/s]


-----------------------
The stream LFP for probe 2 was converted into a binary file.
The path of the binary file is C:/Users/Tatsumi/Documents/Data/KQTY_NP/32623/20230907_161730.rec/20230907_161730_merged.binary\20230907_161730_merged.LFP_probe2.dat


Processing the stream spikeband...
Probe 1 has 200 channels
Probe 2 has 200 channels
Probe 3 has 0 channels
Timestamps has 128730794 samples


MemoryError: Unable to allocate 48.0 GiB for an array with shape (200, 128730794) and data type int16

## Debugging

In [5]:
from tqdm import tqdm

stream = 'LFP'
stream_dir = 'E:/Dataset/KQTY/GridBat/32622/flight_room/231006/ephys/20231006_152611.rec/20231006_152611_merged.LFP'

num_chans = np.zeros(3) # Number of channels
for probe in range(3):
    # num_chans[probe] = int(len(glob.glob(stream_dir+'/*nt'+str(probe+1)+'*')))
    num_chans[probe] = int(len(glob.glob('{}/*nt{}*'.format(stream_dir,probe+1))))
    print('Probe {} has {} channels'.format(probe+1,int(num_chans[probe])))
num_probe = sum(i > 0 for i in num_chans) # number of probes
    
# Make a list of all dat files
stream_file = {}
for probe in range(num_probe):
    stream_file[probe] = {}
    stream_file[probe]['file_path'] = glob.glob('{}/*nt{}*'.format(stream_dir,probe+1))

# Load timestamps
ts_file = glob.glob('{}/*timestamps*'.format(stream_dir))
ts_data = readTrodes(ts_file[0])
ts_lfp = ts_data['data']
print('Timestamps has {} samples'.format(ts_lfp.size))


Probe 1 has 192 channels
Probe 2 has 192 channels
Probe 3 has 0 channels
Timestamps has 9785570 samples


  return np.dtype(typearr)


In [6]:
# concatenate data and write into a binary file
probe = 0
binary_mmap_filename = os.path.join(binary_dir,'{}.{}_mmap_probe{}.dat'.format(file_header,stream,probe+1))
f = np.memmap(binary_mmap_filename, dtype='int16', mode='w+', shape=(int(num_chans[probe]),ts_lfp.size))
# f = open(binary_path,'wb')
# concatenated_binary = np.zeros([int(num_chans[probe]),ts_lfp.size],np.int16)
# for ch in range(int(num_chans[probe]))
for ch in tqdm(range(int(num_chans[probe])), desc="Loading data..."):
    # concatenated_binary[ch,:] = readTrodes(stream_file[probe]['file_path'][ch])['data']
    f[ch,:] = readTrodes(stream_file[probe]['file_path'][ch])['data']
# concatenated_binary = concatenated_binary.reshape((concatenated_binary.size,1))

print(f)
f = f.transpose()
f = f.reshape((1,f.size))        
print(f.shape)
print(f)
f.tofile(binary_mmap_filename)
# f.write(concatenated_binary)
# f.close()
# f.flush()
del f

Loading data...: 100%|███████████████████████████████████████████████████████████████| 192/192 [00:03<00:00, 51.39it/s]


[[ -320  -192  -256 ... -1216 -1152 -1216]
 [    0    64  -128 ...     0   128   320]
 [  960  1024  1024 ... -1408 -1024 -1088]
 ...
 [ 1536  1408  1088 ...  3264  3584  3264]
 [  448   512   576 ... -1216 -1472 -1536]
 [  192   128   192 ... -1344 -1600 -1792]]
(1, 1878829440)
[[ -320     0   960 ...  3264 -1536 -1792]]


In [18]:
del f

NameError: name 'f' is not defined

In [7]:
# fname = 'C:/Users/Tatsumi/Documents/Data/KQTY_NP/32623/20230907_1min/20230907_161730_merged_split1.binary/20230907_161730_merged_split1.LFP_mmap_probe2.dat'
fname = 'E:/Dataset/KQTY/GridBat/32622/flight_room/231006/ephys/20231006_152611.rec/20231006_152611_merged.binary/20231006_152611_merged.LFP_mmap_probe1.dat'
f = np.memmap(fname,dtype='int16',mode='r')

In [8]:
f

memmap([ -320,     0,   960, ...,  3264, -1536, -1792], dtype=int16)