## Converts .eeg file to pandas dataframe

In [None]:
import os
import numpy as np
import struct

def eeg_to_ascii(file_name, chanlist='all', triallist='all', typerange='all', accepttype='all', rtrange='all', responsetype='all', data_format='auto'):
    """This function reads the data from a binary EEG file, extracts and scales the data, and returns it in ASCII format.

    Parameters:

    - **file_name (str)**: The name of the binary EEG file you want to read.
    - **chanlist (str or list)**: List of channels to read from the file. Default is 'all', which reads all channels in the file.
    - **triallist (str or list)**: List of trials to read from the file. Default is 'all', which reads all trials in the file.
    - **typerange (str, list or tuple)**: Range of type codes to read from the file. Default is 'all', which reads all type codes in the file.
    - **accepttype (str, list or tuple)**: Range of accept codes to read from the file. Default is 'all', which reads all accept codes in the file.
    - **rtrange (str, list or tuple)**: Range of reaction times to read from the file. Default is 'all', which reads all reaction times in the file.
    - **responsetype (str, list, or tuple)**: Range of response codes to read from the file. Default is 'all', which reads all response codes in the file.
    - **data_format (str)**: The format in which to return the data. Default is 'auto', which returns the data in its native format.

    Returns:

    - **data (numpy.ndarray)**: A 3D array containing the EEG data (in volts) for each trial, channel, and time point.
    - **chan_names (list of str)**: A list of channel names.
    - **rate (int)**: The sample rate (in Hz) of the EEG data.
    - **xmin (float)**: The minimum time (in seconds) of the data."""

    #Check if file ends with .eeg
    if not file_name.endswith('.eeg'):
        raise ValueError("File must be a binary EEG file (.eeg).")

    if not os.path.isfile(file_name):
        raise ValueError(f"File {file_name} not found.")
    
    with open(file_name, 'rb') as f:
        try:
            # Read general part of the ERP header and set variables
            f.read(20) # skip revision number
            f.read(342) # skip the first 362 bytes

            nsweeps = struct.unpack('<H', f.read(2))[0]  # number of sweeps
            f.read(4)  # skip 4 bytes
            pnts = struct.unpack('<H', f.read(2))[0]  # number of points per waveform
            chan = struct.unpack('<H', f.read(2))[0]  # number of channels
            f.read(4)  # skip 4 bytes
            rate = struct.unpack('<H', f.read(2))[0]  # sample rate (Hz)
            f.read(127)  # skip 127 bytes
            xmin = struct.unpack('<f', f.read(4))[0]  # in s
            xmax = struct.unpack('<f', f.read(4))[0]  # in s
            f.read(387)  # skip 387 bytes

            # Read electrode configuration
            chan_names = []
            baselines = []
            sensitivities = []
            calibs = []
            factors = []
            for elec in range(chan):
                chan_name = f.read(10).decode('ascii').strip('\x00')
                chan_names.append(chan_name)
                f.read(37)  # skip 37 bytes
                baseline = struct.unpack('<H', f.read(2))[0]
                baselines.append(baseline)
                f.read(10)  # skip 10 bytes
                sensitivity = struct.unpack('<f', f.read(4))[0]
                sensitivities.append(sensitivity)
                f.read(8)  # skip 8 bytes
                calib = struct.unpack('<f', f.read(4))[0]
                calibs.append(calib)
                factor = calib * sensitivity / 204.8
                factors.append(factor)

        except struct.error:
            raise ValueError("Error reading binary file. File may be corrupted or not in the expected format.")
        except Exception as e:
            raise ValueError(f"Error reading file: {e}")

    # Read and process epoch datapoints data
    data = np.empty((nsweeps, len(chan_names), pnts), dtype=float)
    sweep_headers = []

    # Constants for the sweep header size in bytes and data point size in bytes
    SWEEP_HEAD_SIZE = 13
    DATA_POINT_SIZE = 4

    with open(file_name, 'rb') as f:
        # Ensure the file pointer is at the beginning of the EEG data
        f.seek((900 + chan * 75))

        for sweep in range(nsweeps):
            # Read the sweep header
            try:
                f.read(SWEEP_HEAD_SIZE)
                accept = struct.unpack('<c', f.read(1))[0]
                ttype = struct.unpack('<h', f.read(2))[0]
                correct = struct.unpack('<h', f.read(2))[0]
                rt = struct.unpack('<f', f.read(4))[0]
                response = struct.unpack('<h', f.read(2))[0]
                #reserved  struct.unpack('<h', f.read(2))[0]
                sweep_headers.append((accept, ttype, correct, rt, response))
            except struct.error:
                raise ValueError("Error reading sweep header. File may be corrupted or not in the expected format.")
            except Exception as e:
                raise ValueError(f"Error reading sweep header: {e}")

            for point in range(pnts):
                for channel in range(chan):
                    try:
                        # Read the data point as a 4-byte integer
                        value = struct.unpack('<l', f.read(DATA_POINT_SIZE))[0]

                        # Scale the data point to microvolts and store it in the data array
                        data[sweep, channel, point] = value * factors[channel]
                    except struct.error:
                        raise ValueError("Error reading data points. File may be corrupted or not in the expected format.")
                    except Exception as e:
                        raise ValueError(f"Error reading data points: {e}")

    #Convert data from microvolts to volts
    data = data * 1e-6
    # Return relevant data in ASCII format
    return data, chan_names, rate, xmin, sweep_headers

## Example Usage

In [None]:
file_name = "/home/woess/current assignment/read_neuro_scratch/16017_rest_ec.eeg"


eeg_data, channels, sample_rate, xmix = eeg_to_ascii(file_name)
print("Channels:", channels)
print("EEG data:", eeg_data)
print("Sample rate:", sample_rate)
print("Xmin:", xmix)

In [None]:
#Place eeg_data into a pandas dataframe
import pandas as pd
import matplotlib.pyplot as plt

df = pd.DataFrame(eeg_data[0].transpose(), columns=channels)
#Display decimals (change '10' to the number of decimals you want to display)
pd.set_option('display.float_format', lambda x: '%.50f' % x)
df

# #plot the data
# plt.plot(df.index.to_list(),df["O2"].to_list())
#plt.show()