In [1]:
# -*- coding: utf-8 -*-
import pandas as pd
import os
import sys
sys.path.append('/p-antipsychotics-sleep')
import numpy as np
import pickle
import argparse
import copy

import matplotlib
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
from matplotlib.figure import Figure
import textwrap

import faster2lib.eeg_tools as et
import faster2lib.summary_psd as sp
import faster2lib.summary_common as sc
import stage

from datetime import datetime
import logging
from logging import getLogger, StreamHandler, FileHandler, Formatter

import warnings
import seaborn as sns
import math

In [2]:
def psd_freq_bins(sample_freq):
    """ assures frequency bins compatibe among different sampling frequencies

    Args:
        sample_freq (int): The sampling frequency

    Returns:
        np.array: An array of frequency bins
    """
    n_fft = int(256 * sample_freq/100)
    # same frequency bins given by signal.welch()
    freq_bins = 1/(n_fft/sample_freq)*np.arange(0, 129)

    return freq_bins

def get_start_indices_of_sleep_stages(stages):
    # Convert the list into a pandas Series
    stages_series = pd.Series(stages)
    # Detect the stages where the stage changes
    change_points = stages_series.ne(stages_series.shift())
    # Return a dictionary where the keys are the start indices and the values are the stages
    return {index: stage for index, stage in stages_series[change_points].items()}

def get_end_indices_of_sleep_stages(stages):
    # Convert the list into a pandas Series
    stages_series = pd.Series(stages)
    # Detect the stages where the stage changes
    change_points = stages_series.ne(stages_series.shift())
    # As we want the ending indices, we need to shift the change_points series by -1
    change_points = change_points.shift(-1).fillna(False)
    # Return a dictionary where the keys are the end indices and the values are the stages
    return {index: stage for index, stage in stages_series[change_points].items()}

def get_indices_of_stage(stage_indices, stage_name):
    # Use a dictionary comprehension to extract the indices corresponding to the specified stage
    return [index for index, stage in stage_indices.items() if stage == stage_name]

def get_nrem_spectrum(data, nrem_start_index):
    """
    Returns the frequency spectrum for the NREM start epoch.
    Parameters:
    data (2D array-like): Time-series data of the format [epoch][frequency].
    nrem_start_index (int): The index of the epoch where NREM starts.
    
    Returns:
    1D array-like: The frequency spectrum at the NREM start epoch.
    """
    return data[nrem_start_index]

def calculate_elapsed_time_df(epoch_len_sec, indices):
    # Convert the epoch length from seconds to hours
    epoch_len_hour = epoch_len_sec / 3600

    # Calculate the elapsed time for each NREM index and truncate it to an integer
    elapsed_times = [index * epoch_len_hour for index in indices]
    elapsed_times_floor = [math.floor(time) for time in elapsed_times]

    # Create a dataframe
    df = pd.DataFrame({
        'index': indices,
        'Elapsed_time': elapsed_times,
        'time_in_hour': elapsed_times_floor
    })

    return df

def calculate_avg_spectrum_per_elapsed_time(spectrum_df, elapsed_time_df):
    # Filter the spectrum dataframe to include only the rows corresponding to NREM start epochs
    stage_spectrum_df = spectrum_df.loc[elapsed_time_df['index']]

    # Add the elapsed_time_floor column to the dataframe
    stage_spectrum_df['time_in_hour'] = elapsed_time_df['time_in_hour'].values

    # Group by the elapsed_time_floor column and calculate the mean for each group
    avg_spectrum_df = stage_spectrum_df.groupby('time_in_hour').mean()

    return avg_spectrum_df

def extract_psd_each(psd_info,epoch_len_sec,sample_freq):
    freq_bin=psd_freq_bins(sample_freq)
    
    stage_call=psd_info["stage_call"]
    norm_psd=psd_info["norm"]
    start_idx=get_start_indices_of_sleep_stages(stage_call)
    nrem_start_idx= get_indices_of_stage(start_idx, 'NREM')
    rem_start_idx= get_indices_of_stage(start_idx, 'REM')
    wake_start_idx= get_indices_of_stage(start_idx, 'WAKE')
    end_idx=get_end_indices_of_sleep_stages(stage_call)
    nrem_end_idx= get_indices_of_stage(end_idx, 'NREM')
    rem_end_idx= get_indices_of_stage(end_idx, 'REM')
    wake_end_idx= get_indices_of_stage(end_idx, 'WAKE')
    
    nrem_start_elapsed_time_df = calculate_elapsed_time_df(epoch_len_sec, nrem_start_idx)
    nrem_start_psd_df=calculate_avg_spectrum_per_elapsed_time(pd.DataFrame(norm_psd),
                                                                 nrem_start_elapsed_time_df)   
    nrem_start_psd_df["type"]="nrem_start"
    nrem_end_elapsed_time_df = calculate_elapsed_time_df(epoch_len_sec, nrem_end_idx)
    nrem_end_psd_df=calculate_avg_spectrum_per_elapsed_time(pd.DataFrame(norm_psd),
                                                                 nrem_end_elapsed_time_df)
    nrem_end_psd_df["type"]="nrem_end"
    rem_start_elapsed_time_df = calculate_elapsed_time_df(epoch_len_sec, rem_start_idx)
    rem_start_psd_df=calculate_avg_spectrum_per_elapsed_time(pd.DataFrame(norm_psd),
                                                                 rem_start_elapsed_time_df)   
    rem_start_psd_df["type"]="rem_start"
    rem_end_elapsed_time_df = calculate_elapsed_time_df(epoch_len_sec, rem_end_idx)
    rem_end_psd_df=calculate_avg_spectrum_per_elapsed_time(pd.DataFrame(norm_psd),
                                                                 rem_end_elapsed_time_df)
    rem_end_psd_df["type"]="rem_end"
    
    wake_start_elapsed_time_df = calculate_elapsed_time_df(epoch_len_sec, wake_start_idx)
    wake_start_psd_df=calculate_avg_spectrum_per_elapsed_time(pd.DataFrame(norm_psd),
                                                                 wake_start_elapsed_time_df) 
    wake_start_psd_df["type"]="wake_start"
    wake_end_elapsed_time_df = calculate_elapsed_time_df(epoch_len_sec, wake_end_idx)
    wake_end_psd_df=calculate_avg_spectrum_per_elapsed_time(pd.DataFrame(norm_psd),
                                                                 wake_end_elapsed_time_df)
    wake_end_psd_df["type"]="wake_end"
    
    # デルタ波とシータ波の範囲のカラムを取得
    frequency_columns = [f"f@{i}" for i in freq_bin]
    delta_range=(0,4)
    theta_range=(4,12)
    delta_columns = [col for col in frequency_columns if delta_range[0] <= float(col[2:]) <= delta_range[1]]
    theta_columns = [col for col in frequency_columns if theta_range[0] <= float(col[2:]) <= theta_range[1]]
    df=pd.concat([nrem_start_psd_df,nrem_end_psd_df,rem_start_psd_df,rem_end_psd_df,wake_start_psd_df,wake_end_psd_df])
    df.columns = list(frequency_columns) + list(df.columns[129:])
    #for df in [nrem_start_psd_df,nrem_end_psd_df,rem_start_psd_df,rem_end_psd_df,wake_start_psd_df,wake_end_psd_df]:
        # 各行についてデルタ波とシータ波の平均パワーを計算
    df['delta_power'] = df[delta_columns].apply(np.mean, axis=1)
    df['theta_power'] = df[theta_columns].apply(np.mean, axis=1)
        
    return df

def extract_psd_from_psdinfo(psd_info_path,epoch_len_sec,sample_freq):
    print(psd_info_path)
    with open(psd_info_path, 'rb') as file:
        # pickle.load()関数でデータを読み込みます。
        psd_info_list = pickle.load(file)
    psd_start_n_end_df_list = []  # 修正: 各データフレームを格納するリストを用意
    
    for psd_info in psd_info_list:
        df_append = extract_psd_each(psd_info, epoch_len_sec, sample_freq).reset_index()
        df_append["exp_label"] = psd_info["exp_label"]
        df_append["mouse_group"] = psd_info["mouse_group"]
        df_append["mouse_ID"] = psd_info["mouse_id"]
        psd_start_n_end_df_list.append(df_append)  # 修正: append()の代わりにリストに追加
    
    # 修正: pd.concat()を使用してリスト内のデータフレームを結合
    psd_start_n_end_df = pd.concat(psd_start_n_end_df_list, ignore_index=True)
    psd_start_n_end_df = psd_start_n_end_df.set_index(["exp_label", "mouse_group", "mouse_ID", "type", "time_in_hour"])
    psd_start_n_end_df = psd_start_n_end_df * 100
    return psd_start_n_end_df

In [3]:
import os

# 現在のワーキングディレクトリを取得
current_directory = os.getcwd()
print("Current working directory:", current_directory)

Current working directory: /p-antipsychotics-sleep


In [4]:
def make_df_from_summary_dic(stats_fname):
    print(stats_fname)
    stats = np.load(stats_fname, allow_pickle=True)[()]
    df_exp_info = stats["stagetime"]
    data_array = stats["stagetime_profile"]
    transition_array = stats["swtrans_profile"]  # [hourly_psw, hourly_pws]
    bout_array = stats["bout_profile"]
    
    # リストを用意
    stage_merge_list = []
    sw_transition_merge_list = []
    stage_bout_merge_list = []
    
    type_list = ["REM", "NREM", "Wake"]
    
    for i in range(df_exp_info.shape[0]):
        # sleep wake transition
        df_swtansition_append = pd.DataFrame({
            "exp_label": df_exp_info['Experiment label'][i],
            "mouse_group": df_exp_info['Mouse group'][i],
            "mouse_ID": df_exp_info['Mouse ID'][i],
            "hourly_psw": transition_array[i][0],
            "hourly_pws": transition_array[i][1],
            "time_in_hour": np.arange(len(transition_array[i][0]))
        })
        sw_transition_merge_list.append(df_swtansition_append)
        
        # bout count and length
        for j, stage in enumerate(type_list):
            if stage == "Wake":
                stage_temp = "WAKE"
            else:
                stage_temp = stage
            bout_array_temp = bout_array[i]
            for hour in range(len(data_array[i][j])):
                filtered_bouts = bout_array_temp[(bout_array_temp.stage == stage_temp) & (bout_array_temp.hour == hour)]
                if filtered_bouts.empty:
                    bout_count = 0
                    mean_duration_sec = 0
                else:
                    bout_count = filtered_bouts.bout_count.iloc[0]
                    mean_duration_sec = filtered_bouts.mean_duration_sec.iloc[0]
                
                stage_bout_append = pd.DataFrame({
                    "exp_label": df_exp_info['Experiment label'][i],
                    "mouse_group": df_exp_info['Mouse group'][i],
                    "mouse_ID": df_exp_info['Mouse ID'][i],
                    "stage": stage,
                    "bout_count": [bout_count],
                    "mean_duration_sec": [mean_duration_sec],
                    "time_in_hour": [hour]
                })
                stage_bout_merge_list.append(stage_bout_append)
        
        # hourly stage
        for j, stage in enumerate(type_list):
            df_append = pd.DataFrame({
                "exp_label": df_exp_info['Experiment label'][i],
                "mouse_group": df_exp_info['Mouse group'][i],
                "mouse_ID": df_exp_info['Mouse ID'][i],
                "stage": stage,
                "min_per_hour": data_array[i][j],
                "time_in_hour": np.arange(len(data_array[i][j]))
            })
            stage_merge_list.append(df_append)
    
    # 修正: pd.concat()でリスト内のデータフレームを結合
    stage_merge_df = pd.concat(stage_merge_list, ignore_index=True)
    sw_transition_merge_df = pd.concat(sw_transition_merge_list, ignore_index=True)
    stage_bout_merge_df = pd.concat(stage_bout_merge_list, ignore_index=True)
    
    # インデックスを設定
    stage_merge_df = stage_merge_df.set_index(["exp_label", "mouse_group", "mouse_ID", "stage", "time_in_hour"])
    sw_transition_merge_df = sw_transition_merge_df.set_index(["exp_label", "mouse_group", "mouse_ID", "time_in_hour"])
    stage_bout_merge_df = stage_bout_merge_df.set_index(["exp_label", "mouse_group", "mouse_ID", "stage", "time_in_hour"])
    
    return stage_merge_df, sw_transition_merge_df, stage_bout_merge_df

def rename_group_name(merge_df,before_str,after_str):
    index_name_list=list(merge_df.index.names)
    merge_df=merge_df.reset_index()
    merge_df["mouse_group"]=merge_df["mouse_group"].str.replace(before_str,after_str)
    merge_df=merge_df.set_index(index_name_list)
    return merge_df

def rename_group_name_bulk(merge_df, rename_dict):
    #example rename dictionary
    #rename_dict = {
    #"PFC-ex-enlarge-HP-hM4Di": "PFC-ex-enlarge",
    #"uniPFC-ex-enlarge": "PFC-ex-enlarge",
    #"biPFC-ex-enlarge": "PFC-ex-enlarge"
    #}
    index_name_list = list(merge_df.index.names)
    merge_df = merge_df.reset_index()
    for before_str, after_str in rename_dict.items():
        merge_df["mouse_group"] = merge_df["mouse_group"].str.replace(before_str, after_str)
    merge_df = merge_df.set_index(index_name_list)
    return merge_df


def rename_group_of_specified_mouse(merge_df,mouse_id_list,group_str):
    index_name_list=list(merge_df.index.names)
    merge_df=merge_df.reset_index()
    merge_df.loc[merge_df.mouse_ID.isin(mouse_id_list),["mouse_group"]]=group_str
    merge_df=merge_df.set_index(index_name_list)
    return merge_df

def add_index(merge_df,index_name,index_val):
    index_name_list=list(merge_df.index.names)
    #index_name_list.append(index_name)
    #print(index_name_list)
    merge_df=merge_df.reset_index()
    merge_df[index_name]=index_val
    index_name_list.append(index_name)
    merge_df=merge_df.set_index(index_name_list)
    return merge_df

def read_psd_ts_csv(csvpath, stage_type):
    df = pd.read_csv(csvpath, header=[0, 1, 2, 3], index_col=0).reset_index(drop=True)
    merge_list = []  # 修正: 各データフレームを格納するリストを用意
    epoch_len_sec = 8
    for i in range(df.shape[1]):
        array = np.array(df.iloc[:, i])
        bin_num = int(3600 / epoch_len_sec)
        answer = divmod(len(array), bin_num)
        if answer[1] != 0:
            array = array[:-answer[1]]
        array_reshape = array.reshape(-1, bin_num)  # 60 min (3600 sec) bin
        binned_data = np.nanmean(array_reshape, axis=1)
        df_append = pd.DataFrame({
            "exp_label": df.columns[i][0],
            "mouse_group": df.columns[i][1],
            "mouse_ID": df.columns[i][2],
            "stage": stage_type,
            "norm_delta_percentage": binned_data,
            "time_in_hour": np.arange(len(binned_data))
        })
        merge_list.append(df_append)  # 修正: リストに追加

    # 修正: pd.concat()でリスト内のデータフレームを結合
    merge_df = pd.concat(merge_list, ignore_index=True)
    merge_df = merge_df.set_index(["exp_label", "mouse_group", "mouse_ID", "stage", "time_in_hour"])
    return merge_df


def read_hourly_psd_ts_csv(csvpath,stage_type):
    return

def merge_hourly_psd_ts_csv(dir):
    #csv_fname="PSD_norm_hourly_allday_percentage-profile.csv"
    csv_fname="PSD_raw_hourly_allday_percentage-profile.csv"
    freq_bins=sp.psd_freq_bins(sample_freq=128)
    frequency_columns = [f"f@{i}" for i in freq_bins]
    delta_range=(1, 4)
    theta_range=(4, 12)
    # デルタ波とシータ波の範囲のカラムを取得
    delta_columns = [col for col in frequency_columns if delta_range[0] <= float(col[2:]) <= delta_range[1]]
    theta_columns = [col for col in frequency_columns if theta_range[0] <= float(col[2:]) <= theta_range[1]]
    #csv読み込み、カラム名そろえる
    df=pd.read_csv(os.path.join(dir,csv_fname)).rename(columns={"Experiment label":"exp_label","Mouse group":"mouse_group",
                                                                "Mouse ID":"mouse_ID","Stage":"stage","hour":"time_in_hour"})
    #nanを前後から補完
    for column in frequency_columns:
        #df[column] = df.groupby(['mouse_ID', 'stage'])[column].apply(lambda group: group.ffill().bfill().fillna(group.mean()))
        df[column] = df.groupby(['mouse_ID', 'stage'])[column].transform(lambda group: group.ffill().bfill().fillna(group.mean()))
    # 各行についてデルタ波とシータ波の平均パワーを計算
    df['delta_power'] = df[delta_columns].apply(np.mean, axis=1)
    df['theta_power'] = df[theta_columns].apply(np.mean, axis=1)
    df=df.set_index(["exp_label","mouse_group","mouse_ID","stage","time_in_hour"])
    return df

def merge_psd_ts_csv(dir):
    psd_norm_csv_list = [
        "power-timeseries_norm_delta_percentage_NREM.csv",
        "power-timeseries_norm_delta_percentage_Wake.csv",
        "power-timeseries_norm_delta_percentage.csv"
    ]
    stage_type_list = ["NREM", "Wake", "Total"]
    merge_list = []  # 修正: 各データフレームを格納するリストを用意
    
    for i, csv in enumerate(psd_norm_csv_list):
        df_append = read_psd_ts_csv(os.path.join(dir, csv), stage_type_list[i])
        merge_list.append(df_append)  # 修正: リストに追加
    
    # 修正: pd.concat()でリスト内のデータフレームを結合
    merge_df = pd.concat(merge_list, ignore_index=False)  # インデックスを保持する場合は ignore_index=False
    return merge_df

def meta_merge_psd_csv(analyzed_dir_list, subdir_vehicle, subdir_rapalog):
    psd_ts_list = []  # PSD timeseries データフレームを格納するリスト
    psd_profile_list = []  # PSD profile データフレームを格納するリスト

    for dir in analyzed_dir_list:
        # Vehicle データの処理
        df_append_vehicle = merge_hourly_psd_ts_csv(os.path.join(dir, subdir_vehicle, "PSD_raw"))
        df_append_vehicle = add_index(df_append_vehicle, "drug", "vehicle")
        psd_ts_list.append(df_append_vehicle)  # リストに追加

        # Rapalog データの処理
        df_append_rapalog = merge_hourly_psd_ts_csv(os.path.join(dir, subdir_rapalog, "PSD_raw"))
        df_append_rapalog = add_index(df_append_rapalog, "drug", "rapalog")
        psd_ts_list.append(df_append_rapalog)  # リストに追加

        # Profile データの処理
        csv_fname = "PSD_norm_allday_percentage-profile.csv"
        df_profile_append_vehicle = read_psd_profile_csv(os.path.join(dir, subdir_vehicle, "PSD_norm", csv_fname))
        df_profile_append_vehicle = add_index(df_profile_append_vehicle, "drug", "vehicle")
        psd_profile_list.append(df_profile_append_vehicle)  # リストに追加

        df_profile_append_rapalog = read_psd_profile_csv(os.path.join(dir, subdir_rapalog, "PSD_norm", csv_fname))
        df_profile_append_rapalog = add_index(df_profile_append_rapalog, "drug", "rapalog")
        psd_profile_list.append(df_profile_append_rapalog)  # リストに追加

    # リスト内のデータフレームを結合
    merge_psd_ts_df = pd.concat(psd_ts_list, ignore_index=False)  # 元のインデックスを保持
    merge_psd_profile_df = pd.concat(psd_profile_list, ignore_index=False)  # 元のインデックスを保持

    return merge_psd_ts_df, merge_psd_profile_df

def read_psd_profile_csv(csvpath):
    df = pd.read_csv(csvpath)
    exp_label_list = df.iloc[:, 0].unique()
    group_list = df.iloc[:, 1].unique()
    mouse_list = df.iloc[:, 2].unique()
    stage_list = df.iloc[:, 4].unique()
    freq_list = [float(s[2:]) for s in df.columns if s.startswith("f")]
    
    merge_list = []  # 修正: 各データフレームを格納するリストを用意
    
    for l in exp_label_list:
        for g in group_list:
            for m in mouse_list:
                for s in stage_list:
                    try:
                        df_append = pd.DataFrame({
                            "exp_label": l,
                            "mouse_group": g,
                            "mouse_ID": m,
                            "stage": s,
                            "normalized_power": df[
                                (df["Experiment label"] == l) &
                                (df["Mouse group"] == g) &
                                (df["Mouse ID"] == m) &
                                (df["Stage"] == s)
                            ].iloc[0, 6:].values,
                            "frequency": freq_list
                        })
                        merge_list.append(df_append)  # 修正: リストに追加
                    except Exception:
                        pass
    
    # 修正: pd.concat()でリスト内のデータフレームを結合
    merge_df = pd.concat(merge_list, ignore_index=False)
    merge_df = merge_df.set_index(["exp_label", "mouse_group", "mouse_ID", "stage", "frequency"])
    return merge_df


def process_stats_path_list(analyzed_dir_list,vehicle_path,rapalog_path):
    stats_list_vehicle=[]
    stats_list_rapalog=[]
    #vehicle_path="vehicle_60h/stagetime_stats.npy"
    #rapalog_path="rapalog_60h/stagetime_stats.npy"
    #vehicle_path="vehicle_84h_before_24h_after_60h/stagetime_stats.npy"
    #rapalog_path="rapalog_84h_before_24h_after_60h/stagetime_stats.npy"
    for dir in analyzed_dir_list:
        stats_list_vehicle.append(os.path.join(dir,vehicle_path))
        stats_list_rapalog.append(os.path.join(dir,rapalog_path))
    return stats_list_vehicle,stats_list_rapalog

def process_psd_info_path_list(analyzed_dir_list):
    psd_info_list_vehicle=[]
    psd_info_list_rapalog=[]
    vehicle_path="vehicle_24h_before6h/psd_info_list.pkl"
    rapalog_path="rapalog_24h_before6h/psd_info_list.pkl"
    #vehicle_path="vehicle_84h_before_24h_after_60h/stagetime_stats.npy"
    #rapalog_path="rapalog_84h_before_24h_after_60h/stagetime_stats.npy"
    for dir in analyzed_dir_list:
        psd_info_list_vehicle.append(os.path.join(dir,vehicle_path))
        psd_info_list_rapalog.append(os.path.join(dir,rapalog_path))
    return psd_info_list_vehicle,psd_info_list_rapalog

def merge_individual_df(analyzed_dir_list, vehicle_path, rapalog_path, epoch_len_sec, ample_freq):
    stats_list_vehicle, stats_list_rapalog = process_stats_path_list(analyzed_dir_list, vehicle_path, rapalog_path)
    psd_info_list_vehicle, psd_info_list_rapalog = process_psd_info_path_list(analyzed_dir_list)
    
    meta_merge_list = []  # meta_merge_df用リスト
    meta_merge_list2 = []  # meta_merge_df2用リスト
    meta_merge_list3 = []  # meta_merge_df3用リスト
    psd_start_n_end_list = []  # psd_start_n_end_df用リスト
    
    # Vehicleデータの処理
    for stats in stats_list_vehicle:
        df, df2, df3 = make_df_from_summary_dic(stats)
        df = add_index(df, "drug", "vehicle")
        meta_merge_list.append(df)
        df2 = add_index(df2, "drug", "vehicle")
        meta_merge_list2.append(df2)
        df3 = add_index(df3, "drug", "vehicle")
        meta_merge_list3.append(df3)
    
    for psd_info_list in psd_info_list_vehicle:
        df4 = extract_psd_from_psdinfo(psd_info_list, epoch_len_sec, ample_freq)
        df4 = add_index(df4, "drug", "vehicle")
        psd_start_n_end_list.append(df4)
    
    # Rapalogデータの処理
    for stats in stats_list_rapalog:
        df, df2, df3 = make_df_from_summary_dic(stats)
        df = add_index(df, "drug", "rapalog")
        meta_merge_list.append(df)
        df2 = add_index(df2, "drug", "rapalog")
        meta_merge_list2.append(df2)
        df3 = add_index(df3, "drug", "rapalog")
        meta_merge_list3.append(df3)
    
    for psd_info_list in psd_info_list_rapalog:
        df4 = extract_psd_from_psdinfo(psd_info_list, epoch_len_sec, ample_freq)
        df4 = add_index(df4, "drug", "rapalog")
        psd_start_n_end_list.append(df4)
    
    # pd.concatでリスト内のデータフレームを結合
    meta_merge_df = pd.concat(meta_merge_list, ignore_index=False)
    meta_merge_df2 = pd.concat(meta_merge_list2, ignore_index=False)
    meta_merge_df3 = pd.concat(meta_merge_list3, ignore_index=False)
    psd_start_n_end_df = pd.concat(psd_start_n_end_list, ignore_index=False)
    
    return meta_merge_df, meta_merge_df2, meta_merge_df3, psd_start_n_end_df


def exclude_mouse(meta_merge_df,exclude_mouse_list):
    index_name_list=list(meta_merge_df.index.names)
    meta_merge_df=meta_merge_df.reset_index()
    meta_merge_df=meta_merge_df[~meta_merge_df.mouse_ID.isin(exclude_mouse_list)]
    meta_merge_df=meta_merge_df.set_index(index_name_list)
    return meta_merge_df

def plot_timeseries(ax,x_val,y_val,y_err,plot_color,label):
    ax.plot(x_val,y_val,color=plot_color,label=label)
    ax.fill_between(x_val, y_val-y_err, y_val+y_err, facecolor=plot_color, alpha=0.2)

def calculate_delta(meta_merge_df):
    delta_df=meta_merge_df.loc[pd.IndexSlice[:,:,:,:,:,"rapalog"],:].copy()
    index_name_list=list(delta_df.index.names)
    delta_df=delta_df.reset_index()

    vehicle_df=meta_merge_df.loc[pd.IndexSlice[:,:,:,:,:,"vehicle"],:].copy()
    vehicle_df=vehicle_df.reset_index()
    index_name_list=[s for s in index_name_list if s != 'drug']
    delta_df["rapa-vehicle-delta_min_per_hour"]=delta_df["min_per_hour"]-vehicle_df["min_per_hour"]
    delta_df=delta_df.set_index(index_name_list)
    delta_df.drop(columns=["drug","min_per_hour"],inplace=True)
    return(delta_df)

In [5]:
def merge_sleep_stage_df(analyzed_dir_list,epoch_len_sec,sample_freq):
    vehicle_path="vehicle_24h_before6h/stagetime_stats.npy"
    rapalog_path="rapalog_24h_before6h/stagetime_stats.npy"
    meta_stage_df,meta_merge_df_sw,meta_stage_bout_df,meta_psd_start_end_df=merge_individual_df(analyzed_dir_list,
                                                                          vehicle_path,rapalog_path,epoch_len_sec,sample_freq)
    return meta_stage_df,meta_merge_df_sw,meta_stage_bout_df,meta_psd_start_end_df

def merge_psd_df(analyzed_dir_list):
    subdir_vehicle="vehicle_24h_before6h"
    subdir_rapalog="rapalog_24h_before6h"
    merge_psd_ts_df,merge_psd_profile_df=meta_merge_psd_csv(analyzed_dir_list,subdir_vehicle,subdir_rapalog)
    return merge_psd_ts_df,merge_psd_profile_df

def group_analysis_each_df(df):
    if "time_in_hour" in df.index.names:
        if "stage" in df.index.names:
            keys = ["mouse_group","drug","stage","time_in_hour"]
        else:
            keys = ["mouse_group","drug","type","time_in_hour"]
    else:
        if "stage" in df.index.names:
            keys = ["mouse_group","drug","stage"]
        else:
            keys = ["mouse_group","drug","type"]
    df_grouped=df.groupby(keys)
    mean=df_grouped.mean()
    sem=df_grouped.sem()
    count=df_grouped.count()
    return mean,sem,count

def extract_mean_n_err(mean,sem,g_name,drug,sleep_stage,val_name):
    y=np.array(mean.loc[pd.IndexSlice[g_name,drug,sleep_stage,:],val_name]).flatten()
    err=np.array(sem.loc[pd.IndexSlice[g_name,drug,sleep_stage,:],val_name]).flatten()
    return y,err

def extract_mean_n_err_for_PSD(mean,sem,g_name,drug,sleep_stage):
    freq_bins=sp.psd_freq_bins(sample_freq=128)
    frequency_columns = [f"f@{i}" for i in freq_bins]
    y=np.array(mean.loc[pd.IndexSlice[g_name,drug,sleep_stage,:],frequency_columns]).flatten()
    err=np.array(sem.loc[pd.IndexSlice[g_name,drug,sleep_stage,:],frequency_columns]).flatten()
    return y,err

def plot_ts_1group(mean,sem,count,g_name,sleep_stage,ax1,val_name,y_label):
    x_val=np.arange(0,24)
    dark_period=[[0,12],[24,36],[48,60]]
    light_period=[[12,24],[36,48]]
    
    y,err=extract_mean_n_err(mean,sem,g_name,"vehicle",sleep_stage,val_name)
    sample_n=count.loc[pd.IndexSlice[g_name,"vehicle",sleep_stage,0]][0]
    #label_str="vehicle (n=%d)"%sample_n
    label_str="vehicle"
    plot_timeseries(ax1,x_val,y,err,"k",label_str)

    y,err=extract_mean_n_err(mean,sem,g_name,"rapalog",sleep_stage,val_name)
    sample_n=count.loc[pd.IndexSlice[g_name,"rapalog",sleep_stage,0]][0]
    #label_str="rapalog (n=%d)"%sample_n
    label_str="rapalog"
    plot_timeseries(ax1,x_val,y,err,"r",label_str)
    
    for ax in [ax1]:
        ax.plot([0,60],[0.1,0.1],linewidth=5,color="yellow")
        ax.plot([6.5,17.5],[0.1,0.1],linewidth=5,color="k")
        #ax.plot([37,47],[0.1,0.1],linewidth=10,color="yellow")
        if val_name=="min_per_hour":
            if sleep_stage=="REM":
                ax.set_ylim([0,20])
                ax.set_yticks([0,10,20])
            else:
                ax.set_ylim([0,60])
                ax.set_yticks([0,20,40,60])
        elif val_name=="bout_count":
            if sleep_stage=="REM":
                ax.set_ylim([0,10])
                ax.set_yticks([0,5,10])
            else:
                ax.set_ylim([0,40])
                ax.set_yticks([0,20,40])
        elif val_name=="mean_duration_sec":
            if sleep_stage=="Wake":
                ax.set_ylim([0,2000])
                ax.set_yticks([0,1000,2000])
            elif sleep_stage=="NREM":
                ax.set_ylim([0,600])
                ax.set_yticks([0,300,600])
            elif sleep_stage=="REM":
                ax.set_ylim([0,100])
                ax.set_yticks([0,50,100])
        elif val_name=="norm_delta_percentage":
            ax.set_ylim([0,10])
            ax.set_yticks([0,5,10])
        elif val_name=="delta_power":
            ax.set_ylim([0,20])
            ax.set_yticks([0,10,20])
        elif val_name=="theta_power":
            ax.set_ylim([0,10])
            ax.set_yticks([0,5,10])
        elif val_name=="norm_delta_power":
            ax.set_ylim([0.8,1.2])
            ax.set_yticks([0.8,1,1.2])
        elif val_name=="norm_theta_power":
            ax.set_ylim([0.8,1.2])
            ax.set_yticks([0.8,1,1.2])
        else:
            ax.set_ylim([0,60])
            ax.set_yticks([0,20,40,60])
        #ax.set_ylabel("NREM sleep duration (min/h)")
        ax.set_ylabel(y_label)
        ax.set_xticks([0,6,12,18,24])
        ax.set_xticklabels([-6,0,6,12,18])
        ax.plot([6,6],[0,ax.get_ylim()[1]],"--",color="gray")
        ax.set_xlabel("Time after ip (h)")
        ax.set_xlim([0,24])
        ax.spines['right'].set_visible(False)
        ax.spines['top'].set_visible(False)
        ax.legend(fontsize=10,frameon=False)
    plt.subplots_adjust(wspace=0.4, hspace=0.6)

def plot_PSD_1group(mean,sem,count,g_name,sleep_stage,ax1,y_label):
    freq_bins=sp.psd_freq_bins(sample_freq=128)
    frequency_columns = [f"f@{i}" for i in freq_bins]
    x_val=freq_bins
    
    y,err=extract_mean_n_err_for_PSD(mean,sem,g_name,"vehicle",sleep_stage)
    sample_n=count.loc[pd.IndexSlice[g_name,"vehicle",sleep_stage]].max()
    #label_str="vehicle (n=%d)"%sample_n
    label_str="vehicle"
    plot_timeseries(ax1,x_val,y,err,"k",label_str)

    y,err=extract_mean_n_err_for_PSD(mean,sem,g_name,"rapalog",sleep_stage)
    sample_n=count.loc[pd.IndexSlice[g_name,"rapalog",sleep_stage]].max()
    #label_str="rapalog (n=%d)"%sample_n
    label_str="rapalog"
    plot_timeseries(ax1,x_val,y,err,"r",label_str)
    
    for ax in [ax1]:
        #ax.set_ylabel("NREM sleep duration (min/h)")
        ax.set_ylabel(y_label)
        ax.set_xticks([0,6,12,18,24,30])
        ax.set_xticklabels([0,6,12,18,24,30])
        ax.set_xlim([0,30])
        ax.set_xlabel("EEG Frequency (Hz)")
        #ax.plot([6,6],[0,60],"--",color="gray")
        if y_label=="Norm power change":
            ax.set_yticks([0.5,1,1.5])
            ax.set_yticklabels([0.5,1,1.5])
            ax.set_ylim([0.5,1.5])
        else:
            ax.set_yticks([0,5,10])
            ax.set_yticklabels([0,5,10])
            ax.set_ylim([0,10])
        
        ax.spines['right'].set_visible(False)
        ax.spines['top'].set_visible(False)
        ax.legend(fontsize=10,frameon=False)
    plt.subplots_adjust(wspace=0.4, hspace=0.6)
    
def plot_PSD_1group_zoom(mean,sem,count,g_name,sleep_stage,ax1,y_label):
    freq_bins=sp.psd_freq_bins(sample_freq=128)
    frequency_columns = [f"f@{i}" for i in freq_bins]
    x_val=freq_bins
    
    y,err=extract_mean_n_err_for_PSD(mean,sem,g_name,"vehicle",sleep_stage)
    sample_n=count.loc[pd.IndexSlice[g_name,"vehicle",sleep_stage]].max()
    #label_str="vehicle (n=%d)"%sample_n
    label_str="vehicle"
    plot_timeseries(ax1,x_val,y,err,"k",label_str)

    y,err=extract_mean_n_err_for_PSD(mean,sem,g_name,"rapalog",sleep_stage)
    sample_n=count.loc[pd.IndexSlice[g_name,"rapalog",sleep_stage]].max()
    #label_str="rapalog (n=%d)"%sample_n
    label_str="rapalog"
    plot_timeseries(ax1,x_val,y,err,"r",label_str)
    
    for ax in [ax1]:
        #ax.set_ylabel("NREM sleep duration (min/h)")
        ax.set_ylabel(y_label)
        ax.set_xticks([0,4,8,12])
        ax.set_xticklabels([0,4,8,12])
        ax.set_xlim([0,12])
        ax.set_xlabel("EEG Frequency (Hz)")
        #ax.plot([6,6],[0,60],"--",color="gray")
        if y_label=="Norm power change":
            #ax.set_yticks([0.6,1,1.4])
            #ax.set_yticklabels([0.6,1,1.4])
            #ax.set_ylim([0.6,1.4])
            ax.set_yticks([0.5,1,1.5])
            ax.set_yticklabels([0.5,1,1.5])
            ax.set_ylim([0.5,1.5])
        else:
            ax.set_yticks([0,5,10])
            ax.set_yticklabels([0,5,10])
            ax.set_ylim([0,10])
        
        ax.spines['right'].set_visible(False)
        ax.spines['top'].set_visible(False)
        ax.legend(fontsize=10,frameon=False)
    plt.subplots_adjust(wspace=0.4, hspace=0.6)


def plot_bargraph(df,target_group,sleep_stage,y_value,y_label,ax,is_norm=False):
    mouse_list=df.loc[pd.IndexSlice[target_group,:,sleep_stage,"vehicle"]].index.get_level_values(1)
    sns.barplot(data=df.loc[pd.IndexSlice[target_group,:,sleep_stage],:].reset_index(), 
    x="drug", y=y_value,ax=ax,palette=["r","gray"])
    for m in mouse_list:
        data1=df.loc[pd.IndexSlice[target_group,m,sleep_stage,"rapalog"],y_value]
        data2=df.loc[pd.IndexSlice[target_group,m,sleep_stage,"vehicle"],y_value]
        ax.plot([0,1],[data1,data2],color="k")

    for ax in [ax]:
        if y_value=="min_per_hour":
            if sleep_stage=="REM":
                ax.set_ylim([0,10])
                ax.set_yticks([0,5,10])
            else:
                ax.set_ylim([0,60])
                ax.set_yticks([0,30,60])
        elif y_value=="bout_count":
            if sleep_stage=="REM":
                ax.set_ylim([0,10])
                ax.set_yticks([0,5,10])
            else:
                ax.set_ylim([0,40])
                ax.set_yticks([0,20,40])
        elif y_value=="mean_duration_sec":
            if sleep_stage=="Wake":
                ax.set_ylim([0,3000])
                ax.set_yticks([0,1500,3000])
            elif sleep_stage=="NREM":
                ax.set_ylim([0,600])
                ax.set_yticks([0,300,600])
            elif sleep_stage=="REM":
                ax.set_ylim([0,100])
                ax.set_yticks([0,50,100])
        elif y_value=="delta_power":
            if is_norm:
                ax.set_ylim([0.8,1.4])
                ax.set_yticks([0.8,1,1.2,1.4])
            else:
                ax.set_ylim([0,20])
                ax.set_yticks([0,10,20])
        elif y_value=="theta_power":
            if is_norm:
                ax.set_ylim([0.8,1.4])
                ax.set_yticks([0.8,1,1.2,1.4])
            else:
                ax.set_ylim([0,20])
                ax.set_yticks([0,10,20])
        #ax.set_ylabel("NREM sleep duration (min/h)")
        ax.set_ylabel(y_label)
        ax.set_xticks([0,1])
        ax.set_xticklabels(["rapalog","vehicle"],rotation=90)
        ax.set_xlim([-0.5,1.5])
        ax.set_xlabel("")
        ax.spines['right'].set_visible(False)
        ax.spines['top'].set_visible(False)

def process_group_analysis(meta_stage_df,meta_merge_df_sw,meta_stage_bout_df,merge_psd_ts_df,merge_psd_profile_df):
    meta_stage_mean,meta_stage_sem,meta_stage_count=group_analysis_each_df(meta_stage_df)
    meta_sw_mean,meta_sw_sem,meta_sw_count=group_analysis_each_df(meta_merge_df_sw)
    meta_stage_bout_mean,meta_stage_bout_sem,meta_stage_bout_count=group_analysis_each_df(meta_stage_bout_df)
    meta_psd_ts_mean,meta_psd_ts_sem,meta_psd_ts_count=group_analysis_each_df(merge_psd_ts_df)
    meta_psd_profile_mean,meta_psd_profile_sem,meta_psd_profile_count=group_analysis_each_df(merge_psd_profile_df)

In [6]:
def fill_na(df):
    df = df.sort_index()
    # 'mouse_ID', 'stage'に基づいてグループ化し、各グループにおける欠損値を前後のtime_in_hourの値から補完
    df_filled = df.groupby(level=['mouse_ID', 'stage']).apply(lambda group: group.sort_index(level='time_in_hour').bfill().ffill())
    return df_filled

def calculate_mean_power(df, x, y):
    index_name_list=list(df.index.names)
    index_name_list.remove('exp_label')
    index_name_list.remove('time_in_hour')
    df=df.reset_index()
    # Frequency column names
    freq_cols = [f'{i}' for i in df.columns if 'f@' in i]+ ['delta_power', 'theta_power']
    # Filtering rows based on time_in_hour
    df_filtered = df[(df['time_in_hour'] >= x) & (df['time_in_hour'] <= y)]
    # Grouping by mouse_group and mouse_ID, and calculating mean for each frequency
    if "stage" in index_name_list:
        df_grouped = df_filtered.groupby(['mouse_group', 'mouse_ID','stage',"drug"])[freq_cols].mean().reset_index()
    if "type" in index_name_list:
        df_grouped = df_filtered.groupby(['mouse_group', 'mouse_ID','type',"drug"])[freq_cols].mean().reset_index()
    return df_grouped.set_index(index_name_list)

def calculate_normalized_psd_ts(df, x, y):
    return 
def calculate_mean_values(meta_stage_df, meta_stage_bout_df, x, y):
    index_name_list=list(meta_stage_df.index.names)
    index_name_list.remove('exp_label')
    index_name_list.remove('time_in_hour')
    meta_stage_df=meta_stage_df.reset_index()
    meta_stage_bout_df=meta_stage_bout_df.reset_index()
    # Filtering rows based on time_in_hour
    meta_stage_df_filtered = meta_stage_df[(meta_stage_df['time_in_hour'] >= x) & (meta_stage_df['time_in_hour'] <= y)]
    meta_stage_bout_df_filtered = meta_stage_bout_df[(meta_stage_bout_df['time_in_hour'] >= x) & (meta_stage_bout_df['time_in_hour'] <= y)]
    # Grouping by mouse_group, mouse_ID, drug and stage, and calculating mean for min_per_hour, bout_count, mean_duration_sec
    meta_stage_df_grouped = meta_stage_df_filtered.groupby(['mouse_group', 'mouse_ID', 'drug', 'stage'])['min_per_hour'].mean().reset_index()
    meta_stage_bout_df_grouped = meta_stage_bout_df_filtered.groupby(['mouse_group', 'mouse_ID', 'drug', 'stage'])['bout_count', 'mean_duration_sec'].mean().reset_index()
    # Merging the two dataframes
    df_merged = pd.merge(meta_stage_df_grouped, meta_stage_bout_df_grouped, on=['mouse_group', 'mouse_ID', 'drug', 'stage'])
    return df_merged.set_index(index_name_list)

def calculate_ratio_with_groupby(df, baseline_start, baseline_end):
    """
    Calculate baseline values for a specified `time_in_hour` range and compute ratios.

    Parameters:
    - df (pd.DataFrame): MultiIndex DataFrame with `time_in_hour` in the index.
    - baseline_start (int or float): Start of the baseline range for `time_in_hour`.
    - baseline_end (int or float): End of the baseline range for `time_in_hour`.

    Returns:
    - pd.DataFrame: DataFrame with normalized values.
    """

    # デバッグ: 初期状態のデータフレームを確認
    print("Initial DataFrame Index Names:", df.index.names)
    print("Initial DataFrame Columns:", df.columns)

    # MultiIndex をリセットして操作しやすくする
    df_reset = df.reset_index()
    print("After reset_index, Columns:", df_reset.columns)

    # 数値カラムを自動選択
    numeric_columns = df_reset.select_dtypes(include=['number']).columns.tolist()
    print("Detected numeric columns:", numeric_columns)

    # time_in_hour を退避（後でインデックスとして再設定するため）
    original_time_in_hour = df_reset['time_in_hour']

    # グループ化キー（reset_index の結果を反映）
    group_keys = [
        'mouse_ID_0', 'stage_1', 'exp_label', 
        'mouse_group', 'drug'
    ]

    # ベースラインデータの抽出
    baseline_df = df_reset[
        (df_reset['time_in_hour'] >= baseline_start) & (df_reset['time_in_hour'] <= baseline_end)
    ]
    if baseline_df.empty:
        raise ValueError(f"No data found in baseline range: {baseline_start} to {baseline_end}")

    # デバッグ: ベースラインデータを確認
    print("Baseline DataFrame:")
    print(baseline_df.head())

    # ベースライン値の計算
    baseline_means = (
        baseline_df.groupby(group_keys)[numeric_columns]
        .mean()
        .add_suffix('_baseline')
        .reset_index()
    )
    print("Baseline Means Calculated:")
    print(baseline_means.head())

    # 元データにベースライン値を結合
    merged_df = pd.merge(
        df_reset,
        baseline_means,
        on=group_keys,
        how='left'
    )

    # 正規化（ベースラインに対する比率を計算）
    for col in numeric_columns:
        baseline_col = f"{col}_baseline"
        merged_df[col] = (merged_df[col] / merged_df[baseline_col]) * 100

    # ベースライン列を削除
    merged_df.drop(columns=[f"{col}_baseline" for col in numeric_columns], inplace=True)

    # time_in_hour をインデックスとして再設定
    merged_df['time_in_hour'] = original_time_in_hour
    index_cols = ['mouse_ID_0', 'stage_1', 'exp_label', 'mouse_group', 'time_in_hour', 'drug']
    result_df = merged_df.set_index(index_cols)

    # デバッグ: 結果の確認
    print("Final DataFrame after normalization:")
    print(result_df.head())

    return result_df

def merge_n_plot(analyzed_dir_list,epoch_len_sec,sample_freq,exclude_mouse_list,target_group,output_dir,group_rename_dic=None):
    #merge analyzed data
    meta_stage_df,meta_sw_trans_df,meta_stage_bout_df,meta_psd_start_end_df=merge_sleep_stage_df(analyzed_dir_list,epoch_len_sec,sample_freq)
    merge_psd_ts_df,merge_psd_profile_df=merge_psd_df(analyzed_dir_list)
    
    #rename group if needed
    meta_stage_df=rename_group_name_bulk(meta_stage_df,group_rename_dic)
    meta_sw_trans_df=rename_group_name_bulk(meta_sw_trans_df,group_rename_dic)
    meta_stage_bout_df=rename_group_name_bulk(meta_stage_bout_df,group_rename_dic)
    meta_psd_start_end_df=rename_group_name_bulk(meta_psd_start_end_df,group_rename_dic)
    merge_psd_ts_df=rename_group_name_bulk(merge_psd_ts_df,group_rename_dic)
    merge_psd_profile_df=rename_group_name_bulk(merge_psd_profile_df,group_rename_dic)

    #exclude mouse if needed
    meta_stage_df=exclude_mouse(meta_stage_df,exclude_mouse_list)
    meta_sw_trans_df=exclude_mouse(meta_sw_trans_df,exclude_mouse_list)
    meta_stage_bout_df=exclude_mouse(meta_stage_bout_df,exclude_mouse_list)
    meta_psd_start_end_df=exclude_mouse(meta_psd_start_end_df,exclude_mouse_list)
    merge_psd_ts_df=exclude_mouse(merge_psd_ts_df,exclude_mouse_list)
    merge_psd_profile_df=exclude_mouse(merge_psd_profile_df,exclude_mouse_list)

    #fill nan
    merge_psd_ts_df=fill_na(merge_psd_ts_df)
      
    #make_normalized_psd_timeseries
    index_name_list=list(merge_psd_ts_df.index.names)

    # reset_indexを実行
    merge_norm_psd_ts_df=calculate_ratio_with_groupby(merge_psd_ts_df, 0, 5)
    merge_norm_psd_ts_df=merge_norm_psd_ts_df.set_index(index_name_list)
    #merge_norm_psd_ts_df.rename(columns={"delta_power":"norm_delta_power",
    #                                     "theta_power":"norm_theta_power"},inplace=True)

    #quantify timeseries data by time window
    merge_psd_ts_df_before=calculate_mean_power(merge_psd_ts_df, 6, 6)
    merge_psd_ts_df_after=calculate_mean_power(merge_psd_ts_df, 7, 7)
    #meta_psd_start_end_df_before=calculate_mean_power(meta_psd_start_end_df, 4, 6)
    #meta_psd_start_end_df_after=calculate_mean_power(meta_psd_start_end_df, 7, 9)
    #merge_norm_psd_ts_df_after=calculate_mean_power(merge_norm_psd_ts_df, 7, 9)
    meta_psd_start_end_df_before=calculate_mean_power(meta_psd_start_end_df, 6, 6)
    meta_psd_start_end_df_after=calculate_mean_power(meta_psd_start_end_df, 7, 7)
    merge_norm_psd_ts_df_after=calculate_mean_power(merge_norm_psd_ts_df, 7, 7)
    meta_stage_n_bout_df_before=calculate_mean_values(meta_stage_df, meta_stage_bout_df, 4, 6)
    meta_stage_n_bout_df_after=calculate_mean_values(meta_stage_df, meta_stage_bout_df, 10, 21)
    
    #rename_column_name
    merge_norm_psd_ts_df.rename(columns={"delta_power":"norm_delta_power",
                                         "theta_power":"norm_theta_power"},inplace=True)
    print(merge_norm_psd_ts_df)
    merge_norm_psd_ts_df.to_csv(os.path.join(output_dir,"merge_norm_psd_ts_df.csv"))

    #group analysis of timeseries data
    meta_stage_mean,meta_stage_sem,meta_stage_count=group_analysis_each_df(meta_stage_df)
    #meta_sw_trans_mean,meta_sw_trans_sem,meta_sw_trans_count=group_analysis_each_df(meta_sw_trans_df)
    meta_stage_bout_mean,meta_stage_bout_sem,meta_stage_bout_count=group_analysis_each_df(meta_stage_bout_df)
    meta_psd_ts_mean,meta_psd_ts_sem,meta_psd_ts_count=group_analysis_each_df(merge_psd_ts_df)
    meta_norm_psd_ts_mean,meta_norm_psd_ts_sem,meta_norm_psd_ts_count=group_analysis_each_df(merge_norm_psd_ts_df)
    meta_psd_ts_after_mean,meta_psd_ts_after_sem,meta_psd_ts_after_count=group_analysis_each_df(merge_psd_ts_df_after)
    meta_psd_ts_before_mean,meta_psd_ts_before_sem,meta_psd_ts_before_count=group_analysis_each_df(merge_psd_ts_df_before)
    meta_norm_psd_ts_after_mean,meta_norm_psd_ts_after_sem,meta_norm_psd_ts_after_count=group_analysis_each_df(merge_norm_psd_ts_df_after)
    meta_psd_start_end_df_mean,meta_psd_start_end_df_sem,meta_psd_start_end_df_count=group_analysis_each_df(meta_psd_start_end_df)
    meta_psd_start_end_df_before_mean,meta_psd_start_end_df_before_sem,meta_psd_start_end_df_before_count=group_analysis_each_df(meta_psd_start_end_df_before)
    meta_psd_start_end_df_after_mean,meta_psd_start_end_df_after_sem,meta_psd_start_end_df_after_count=group_analysis_each_df(meta_psd_start_end_df_after)
        
    meta_stage_df.to_csv(os.path.join(output_dir,"meta_stage_df.csv"))
    meta_sw_trans_df.to_csv(os.path.join(output_dir,"meta_sw_trans_df.csv"))
    meta_stage_bout_df.to_csv(os.path.join(output_dir,"meta_stage_bout_df.csv"))
    merge_psd_ts_df.to_csv(os.path.join(output_dir,"merge_psd_ts_df.csv"))
    merge_psd_profile_df.to_csv(os.path.join(output_dir,"merge_psd_profile_df.csv"))
    meta_psd_start_end_df.to_csv(os.path.join(output_dir,"meta_psd_start_end_df.csv"))
    meta_psd_start_end_df_before.to_csv(os.path.join(output_dir,"meta_psd_start_end_df_before.csv"))
    meta_psd_start_end_df_after.to_csv(os.path.join(output_dir,"meta_psd_start_end_df_after.csv"))
    meta_stage_n_bout_df_before.to_csv(os.path.join(output_dir,"meta_stage_n_bout_df_before.csv"))
    meta_stage_n_bout_df_after.to_csv(os.path.join(output_dir,"meta_stage_n_bout_df_after.csv"))
    merge_psd_ts_df_before.to_csv(os.path.join(output_dir,"merge_psd_ts_df_before.csv"))
    merge_psd_ts_df_after.to_csv(os.path.join(output_dir,"merge_psd_ts_df_after.csv"))
    merge_norm_psd_ts_df_after.to_csv(os.path.join(output_dir,"merge_norm_psd_ts_df_after.csv"))
    meta_norm_psd_ts_mean.to_csv(os.path.join(output_dir,"meta_norm_psd_ts_mean_df.csv"))
    # フォント設定
    plt.rcParams["font.size"] = 16
    plt.rcParams['pdf.fonttype'] = 42

    ##timeserisのプロット
    # gridspecを作成
    row_num=13
    col_num=3
    
    gs = gridspec.GridSpec(row_num,col_num)

    # Figureを作成
    fig = plt.figure(figsize=((col_num*3+1),row_num*4))

    # 各axesを作成
    axes = []
    for row in range(row_num):
        for col in range(col_num):
            ax = fig.add_subplot(gs[row, col])
            axes.append(ax)

    # 1行目: 各ステージの割合の時系列変化
    plot_ts_1group(meta_stage_mean,meta_stage_sem,meta_stage_count,
                target_group,sleep_stage="Wake",ax1=axes[0],val_name="min_per_hour",
                y_label="Wake duration (min/h)")
    plot_ts_1group(meta_stage_mean,meta_stage_sem,meta_stage_count,
                target_group,sleep_stage="NREM",ax1=axes[1],val_name="min_per_hour",
                y_label="NREM sleep duration (min/h)")
    plot_ts_1group(meta_stage_mean,meta_stage_sem,meta_stage_count,
                target_group,sleep_stage="REM",ax1=axes[2],val_name="min_per_hour",
                y_label="REM sleep duration (min/h)")

    # 2行目: 各ステージのブートの回数の時系列変化
    plot_ts_1group(meta_stage_bout_mean,meta_stage_bout_sem,meta_stage_bout_count,
                target_group,sleep_stage="Wake",ax1=axes[3],val_name="bout_count",
                y_label="Wake bout (/h)")
    plot_ts_1group(meta_stage_bout_mean,meta_stage_bout_sem,meta_stage_bout_count,
                target_group,sleep_stage="NREM",ax1=axes[4],val_name="bout_count",
                y_label="NREM bout (/h)")
    plot_ts_1group(meta_stage_bout_mean,meta_stage_bout_sem,meta_stage_bout_count,
                target_group,sleep_stage="REM",ax1=axes[5],val_name="bout_count",
                y_label="REM bout (/h)")

    # 3行目: 各ステージのブートの平均長さの時系列変化
    plot_ts_1group(meta_stage_bout_mean,meta_stage_bout_sem,meta_stage_bout_count,
                target_group,sleep_stage="Wake",ax1=axes[6],val_name="mean_duration_sec",
                y_label="mean Wake bout length (s)")
    plot_ts_1group(meta_stage_bout_mean,meta_stage_bout_sem,meta_stage_bout_count,
                target_group,sleep_stage="NREM",ax1=axes[7],val_name="mean_duration_sec",
                y_label="mean NREM bout length (s)")
    plot_ts_1group(meta_stage_bout_mean,meta_stage_bout_sem,meta_stage_bout_count,
                target_group,sleep_stage="REM",ax1=axes[8],val_name="mean_duration_sec",
                y_label="mean REM bout length (s)")
    
    # 4行目: 各ステージのデルタパワーの時系列変化
    plot_ts_1group(meta_psd_ts_mean,meta_psd_ts_sem,meta_psd_ts_count,
                target_group,sleep_stage="Wake",ax1=axes[9],val_name="delta_power",
                y_label="delta power (%)")
    plot_ts_1group(meta_psd_ts_mean,meta_psd_ts_sem,meta_psd_ts_count,
                target_group,sleep_stage="NREM",ax1=axes[10],val_name="delta_power",
                y_label="delta power (%)")
    plot_ts_1group(meta_psd_ts_mean,meta_psd_ts_sem,meta_psd_ts_count,
                target_group,sleep_stage="REM",ax1=axes[11],val_name="delta_power",
                y_label="delta power (%)")
    
    # 5行目: 各ステージのデルタパワーの時系列変化
    plot_ts_1group(meta_norm_psd_ts_mean,meta_norm_psd_ts_sem,meta_norm_psd_ts_count,
                target_group,sleep_stage="Wake",ax1=axes[12],val_name="norm_delta_power",
                y_label="norm. delta power")
    plot_ts_1group(meta_norm_psd_ts_mean,meta_norm_psd_ts_sem,meta_norm_psd_ts_count,
                target_group,sleep_stage="NREM",ax1=axes[13],val_name="norm_delta_power",
                y_label="norm. delta power")
    plot_ts_1group(meta_norm_psd_ts_mean,meta_norm_psd_ts_sem,meta_norm_psd_ts_count,
                target_group,sleep_stage="REM",ax1=axes[14],val_name="norm_delta_power",
                y_label="norm. delta power")
    
    # 6行目: 各ステージのシータパワーの時系列変化
    plot_ts_1group(meta_psd_ts_mean,meta_psd_ts_sem,meta_psd_ts_count,
                target_group,sleep_stage="Wake",ax1=axes[15],val_name="theta_power",
                y_label="theta power (%)")
    plot_ts_1group(meta_psd_ts_mean,meta_psd_ts_sem,meta_psd_ts_count,
                target_group,sleep_stage="NREM",ax1=axes[16],val_name="theta_power",
                y_label="theta power (%)")
    plot_ts_1group(meta_psd_ts_mean,meta_psd_ts_sem,meta_psd_ts_count,
                target_group,sleep_stage="REM",ax1=axes[17],val_name="theta_power",
                y_label="theta power (%)")
    
    # 7行目: 各ステージのデルタパワーの時系列変化
    plot_ts_1group(meta_norm_psd_ts_mean,meta_norm_psd_ts_sem,meta_norm_psd_ts_count,
                target_group,sleep_stage="Wake",ax1=axes[18],val_name="norm_theta_power",
                y_label="norm. theta power")
    plot_ts_1group(meta_norm_psd_ts_mean,meta_norm_psd_ts_sem,meta_norm_psd_ts_count,
                target_group,sleep_stage="NREM",ax1=axes[19],val_name="norm_theta_power",
                y_label="norm. theta power")
    plot_ts_1group(meta_norm_psd_ts_mean,meta_norm_psd_ts_sem,meta_norm_psd_ts_count,
                target_group,sleep_stage="REM",ax1=axes[20],val_name="norm_theta_power",
                y_label="norm. theta power")

    # 8行目: 薬剤投与前のパワースペクトラム密度
    plot_PSD_1group(meta_psd_ts_before_mean,meta_psd_ts_before_sem,meta_psd_ts_before_count,
                target_group,sleep_stage="Wake",ax1=axes[21],y_label="Normalized power (%)")
    plot_PSD_1group(meta_psd_ts_before_mean,meta_psd_ts_before_sem,meta_psd_ts_before_count,
                target_group,sleep_stage="NREM",ax1=axes[22],y_label="Normalized power (%)")
    plot_PSD_1group(meta_psd_ts_before_mean,meta_psd_ts_before_sem,meta_psd_ts_before_count,
                target_group,sleep_stage="REM",ax1=axes[23],y_label="Normalized power (%)")

    # 9行目: 薬剤投与後のパワースペクトラム密度
    plot_PSD_1group(meta_psd_ts_after_mean,meta_psd_ts_after_sem,meta_psd_ts_after_count,
                target_group,sleep_stage="Wake",ax1=axes[24],y_label="Normalized power (%)")
    plot_PSD_1group(meta_psd_ts_after_mean,meta_psd_ts_after_sem,meta_psd_ts_after_count,
                target_group,sleep_stage="NREM",ax1=axes[25],y_label="Normalized power (%)")
    plot_PSD_1group(meta_psd_ts_after_mean,meta_psd_ts_after_sem,meta_psd_ts_after_count,
                target_group,sleep_stage="REM",ax1=axes[26],y_label="Normalized power (%)")
    
    # 10行目: ブートの最初のエポックのデルタパワーの変化
    """
    plot_ts_1group(meta_psd_start_end_df_mean,meta_psd_start_end_df_sem,meta_psd_start_end_df_count,
                target_group,sleep_stage="wake_start",ax1=axes[27],val_name="delta_power",y_label="delta_power")
    plot_ts_1group(meta_psd_start_end_df_mean,meta_psd_start_end_df_sem,meta_psd_start_end_df_count,
                target_group,sleep_stage="nrem_start",ax1=axes[28],val_name="delta_power",y_label="delta_power")
    plot_ts_1group(meta_psd_start_end_df_mean,meta_psd_start_end_df_sem,meta_psd_start_end_df_count,
                target_group,sleep_stage="rem_start",ax1=axes[29],val_name="delta_power",y_label="delta_power")
    
    # 11行目: ブートの最後のエポックのデルタパワーの変化
    plot_ts_1group(meta_psd_start_end_df_mean,meta_psd_start_end_df_sem,meta_psd_start_end_df_count,
                target_group,sleep_stage="wake_end",ax1=axes[30],val_name="delta_power",y_label="delta_power")
    plot_ts_1group(meta_psd_start_end_df_mean,meta_psd_start_end_df_sem,meta_psd_start_end_df_count,
                target_group,sleep_stage="nrem_end",ax1=axes[31],val_name="delta_power",y_label="delta_power")
    plot_ts_1group(meta_psd_start_end_df_mean,meta_psd_start_end_df_sem,meta_psd_start_end_df_count,
                target_group,sleep_stage="rem_end",ax1=axes[32],val_name="delta_power",y_label="delta_power")
    """
    # 12行目: 薬剤投与後のパワースペクトラム密度
    plot_PSD_1group(meta_norm_psd_ts_after_mean,meta_norm_psd_ts_after_sem,meta_norm_psd_ts_after_count,
                target_group,sleep_stage="Wake",ax1=axes[33],y_label="Norm power change")
    plot_PSD_1group(meta_norm_psd_ts_after_mean,meta_norm_psd_ts_after_sem,meta_norm_psd_ts_after_count,
                target_group,sleep_stage="NREM",ax1=axes[34],y_label="Norm power change")
    plot_PSD_1group(meta_norm_psd_ts_after_mean,meta_norm_psd_ts_after_sem,meta_norm_psd_ts_after_count,
                target_group,sleep_stage="REM",ax1=axes[35],y_label="Norm power change")
    
    # 13行目: 薬剤投与後のパワースペクトラム密度
    plot_PSD_1group_zoom(meta_norm_psd_ts_after_mean,meta_norm_psd_ts_after_sem,meta_norm_psd_ts_after_count,
                target_group,sleep_stage="Wake",ax1=axes[36],y_label="Norm power change")
    plot_PSD_1group_zoom(meta_norm_psd_ts_after_mean,meta_norm_psd_ts_after_sem,meta_norm_psd_ts_after_count,
                target_group,sleep_stage="NREM",ax1=axes[37],y_label="Norm power change")
    plot_PSD_1group_zoom(meta_norm_psd_ts_after_mean,meta_norm_psd_ts_after_sem,meta_norm_psd_ts_after_count,
                target_group,sleep_stage="REM",ax1=axes[38],y_label="Norm power change")

    # プロットを表示
    plt.tight_layout()
    plt.show()

    # 図を保存
    fig.savefig(os.path.join(output_dir,"timeseries_and_PSD_plot.pdf"))
            
    ##bargraphのプロット
    
    # gridspecを作成
    # gridspecを作成
    row_num=5
    col_num=3
    
    gs = gridspec.GridSpec(row_num,col_num)

    # Figureを作成
    fig2 = plt.figure(figsize=((col_num*3+1),row_num*4))


    # 各axesを作成
    axes = []
    for row in range(row_num):
        for col in range(col_num):
            ax = fig2.add_subplot(gs[row, col])
            axes.append(ax)
    df=meta_stage_n_bout_df_after

    # 1行目: 各ステージの割合の時系列変化の平均値
    plot_bargraph(df,target_group,sleep_stage="Wake",y_value="min_per_hour",
                y_label="mean Wake duration (min/h)\n during 1-3h after ip",ax=axes[0])
    plot_bargraph(df,target_group,sleep_stage="NREM",y_value="min_per_hour",
                y_label="mean NREM duration (min/h)\n during 1-3h after ip",ax=axes[1])
    plot_bargraph(df,target_group,sleep_stage="REM",y_value="min_per_hour",
                y_label="mean REM duration (min/h)\n during 1-3h after ip",ax=axes[2])

    # 2行目: 各ステージのブート数の時系列変化の平均値
    plot_bargraph(df,target_group,sleep_stage="Wake",y_value="bout_count",
                y_label="mean Wake bout count (/h)\n during 1-3h after ip",ax=axes[3])
    plot_bargraph(df,target_group,sleep_stage="NREM",y_value="bout_count",
                y_label="mean NREM bout count (/h)\n during 1-3h after ip",ax=axes[4])
    plot_bargraph(df,target_group,sleep_stage="REM",y_value="bout_count",
                y_label="mean REM bout count (/h)\n during 1-3h after ip",ax=axes[5])

    # 3行目: 各ステージのブートの長さの時系列変化の平均値
    plot_bargraph(df,target_group,sleep_stage="Wake",y_value="mean_duration_sec",
                y_label="mean Wake bout length (s)\n during 1-3h after ip",ax=axes[6])
    plot_bargraph(df,target_group,sleep_stage="NREM",y_value="mean_duration_sec",
                y_label="mean NREM bout length (s)\n during 1-3h after ip",ax=axes[7])
    plot_bargraph(df,target_group,sleep_stage="REM",y_value="mean_duration_sec",
                y_label="mean REM bout length (s)\n during 1-3h after ip",ax=axes[8])
    
    # 4行目: 各ステージの薬剤投与後のデルタの変化
    df=merge_norm_psd_ts_df_after
    plot_bargraph(df,target_group,sleep_stage="Wake",y_value="delta_power",
                y_label="relative delta power change\n during 1h after ip",ax=axes[9],is_norm=True)
    plot_bargraph(df,target_group,sleep_stage="NREM",y_value="delta_power",
                y_label="relative delta power change\n during 1h after ip",ax=axes[10],is_norm=True)
    plot_bargraph(df,target_group,sleep_stage="REM",y_value="delta_power",
                y_label="relative delta power change\n during 1-3h after ip",ax=axes[11],is_norm=True)
    
    # 5行目: 各ステージの薬剤投与後のシータの変化
    df=merge_norm_psd_ts_df_after
    plot_bargraph(df,target_group,sleep_stage="Wake",y_value="theta_power",
                y_label="relative theta power change\n during 1h after ip",ax=axes[12],is_norm=True)
    plot_bargraph(df,target_group,sleep_stage="NREM",y_value="theta_power",
                y_label="relative theta power change\n during 1h after ip",ax=axes[13],is_norm=True)
    plot_bargraph(df,target_group,sleep_stage="REM",y_value="theta_power",
                y_label="relative theta power change\n during 1-3h after ip",ax=axes[14],is_norm=True)


    plt.tight_layout()
    plt.show()
    
    fig2.savefig(os.path.join(output_dir,"bargraph.pdf"))
    
    
def wilcoxon_n_paried_t(stage_df,psd_df,bout_df,target_group,stage):
    print(stage)
    print(target_group)
    print("stage duration")
    data1=stage_df[(stage_df.mouse_group==target_group)&(stage_df.stage==stage)&(stage_df.drug=="vehicle")].min_per_hour
    data2=stage_df[(stage_df.mouse_group==target_group)&(stage_df.stage==stage)&(stage_df.drug=="rapalog")].min_per_hour
    from scipy.stats import wilcoxon
    # ウィルコクソンの符号順位検定
    statistic, p_value = wilcoxon(data1, data2)
    print("wilcoxon")
    print('Statistic:', statistic)
    print('p-value:', p_value)

    #from scipy.stats import ttest_rel
    #statistic, p_value = ttest_rel(data1, data2)
    #print("paired t test")
    #print('Statistic:', statistic)
    #print('p-value:', p_value)
    
    print("stage bout count")
    data1=bout_df[(bout_df.mouse_group==target_group)&(bout_df.stage==stage)&(bout_df.drug=="vehicle")].bout_count
    data2=bout_df[(bout_df.mouse_group==target_group)&(bout_df.stage==stage)&(bout_df.drug=="rapalog")].bout_count
    
    # ウィルコクソンの符号順位検定
    statistic, p_value = wilcoxon(data1, data2)
    print("wilcoxon")
    print('Statistic:', statistic)
    print('p-value:', p_value)
    
    print("stage bout length")
    data1=bout_df[(bout_df.mouse_group==target_group)&(bout_df.stage==stage)&(bout_df.drug=="vehicle")].mean_duration_sec
    data2=bout_df[(bout_df.mouse_group==target_group)&(bout_df.stage==stage)&(bout_df.drug=="rapalog")].mean_duration_sec

    # ウィルコクソンの符号順位検定
    statistic, p_value = wilcoxon(data1, data2)
    print("wilcoxon")
    print('Statistic:', statistic)
    print('p-value:', p_value)

    print("norm delta power")
    data1=psd_df[(psd_df.mouse_group==target_group)&(psd_df.stage==stage)&(psd_df.drug=="vehicle")].delta_power
    data2=psd_df[(psd_df.mouse_group==target_group)&(psd_df.stage==stage)&(psd_df.drug=="rapalog")].delta_power

    # ウィルコクソンの符号順位検定
    statistic, p_value = wilcoxon(data1, data2)
    print("wilcoxon")
    print('Statistic:', statistic)
    print('p-value:', p_value)

    return

# Analysis for sleep stage and PSD

In [7]:
analyzed_dir_list=["/p-antipsychotics-sleep/analyzed/prjCZP-9/20241202"]

#"/temp/analyzed/prj11/20231230_2" is omitted by abnormal PSD shape

rename_dict = {
    "PFC-ex-enlarge-HP-hM4Di": "PFC-ex-enlarge",
    "uniPFC-ex-enlarge": "PFC-ex-enlarge",
    "biPFC-ex-enlarge": "PFC-ex-enlarge",
    "noAAV": "PFC-ex-GFPctrl"
    }

exclude_mouse_list=[]
target_group="WT"
output_dir="/p-antipsychotics-sleep/figure/prjCZP-9/effect-of-CZP"
os.makedirs(output_dir,exist_ok=True)
epoch_len_sec=8
sample_freq=128
#merge_n_plot(analyzed_dir_list,epoch_len_sec,sample_freq,exclude_mouse_list,target_group,output_dir,group_rename_dic=rename_dict)
test=merge_n_plot(analyzed_dir_list,epoch_len_sec,sample_freq,exclude_mouse_list,target_group,output_dir,group_rename_dic=rename_dict)

/p-antipsychotics-sleep/analyzed/prjCZP-9/20241202/vehicle_24h_before6h/stagetime_stats.npy
/p-antipsychotics-sleep/analyzed/prjCZP-9/20241202/vehicle_24h_before6h/psd_info_list.pkl
/p-antipsychotics-sleep/analyzed/prjCZP-9/20241202/rapalog_24h_before6h/stagetime_stats.npy


  change_points = change_points.shift(-1).fillna(False)
  change_points = change_points.shift(-1).fillna(False)
  change_points = change_points.shift(-1).fillna(False)
  change_points = change_points.shift(-1).fillna(False)


/p-antipsychotics-sleep/analyzed/prjCZP-9/20241202/rapalog_24h_before6h/psd_info_list.pkl


  change_points = change_points.shift(-1).fillna(False)
  change_points = change_points.shift(-1).fillna(False)
  change_points = change_points.shift(-1).fillna(False)
  change_points = change_points.shift(-1).fillna(False)
  df['delta_power'] = df[delta_columns].apply(np.mean, axis=1)
  df['theta_power'] = df[theta_columns].apply(np.mean, axis=1)
  df['delta_power'] = df[delta_columns].apply(np.mean, axis=1)
  df['theta_power'] = df[theta_columns].apply(np.mean, axis=1)


Initial DataFrame Index Names: ['mouse_ID', 'stage', 'exp_label', 'mouse_group', 'mouse_ID', 'stage', 'time_in_hour', 'drug']
Initial DataFrame Columns: Index(['Device label', 'epoch #', 'f@0.0', 'f@0.39143730886850153',
       'f@0.7828746177370031', 'f@1.1743119266055047', 'f@1.5657492354740061',
       'f@1.9571865443425076', 'f@2.3486238532110093', 'f@2.7400611620795106',
       ...
       'f@47.36391437308868', 'f@47.75535168195719', 'f@48.14678899082569',
       'f@48.53822629969419', 'f@48.92966360856269', 'f@49.321100917431195',
       'f@49.71253822629969', 'f@50.103975535168196', 'delta_power',
       'theta_power'],
      dtype='object', length=133)


ValueError: cannot insert stage, already exists

In [14]:
test

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,Unnamed: 7_level_0,Device label,epoch #,f@0.0,f@0.39143730886850153,f@0.7828746177370031,f@1.1743119266055047,f@1.5657492354740061,f@1.9571865443425076,f@2.3486238532110093,f@2.7400611620795106,...,f@47.36391437308868,f@47.75535168195719,f@48.14678899082569,f@48.53822629969419,f@48.92966360856269,f@49.321100917431195,f@49.71253822629969,f@50.103975535168196,delta_power,theta_power
mouse_ID,stage,exp_label,mouse_group,mouse_ID,stage,time_in_hour,drug,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1
YI033,NREM,EEG_p-iino-1-1,WT,YI033,NREM,0,rapalog,Ch0,208,0.228605,0.789053,1.868880,3.349512,4.294789,4.739086,5.040432,5.468253,...,0.000199,0.000179,0.000159,0.000148,0.000140,0.000131,0.000136,0.000134,5.029675,2.482488
YI033,NREM,EEG_p-iino-1-1,WT,YI033,NREM,0,vehicle,Ch0,321,0.205784,0.724794,1.708488,2.965944,3.896916,4.502828,5.003271,5.561362,...,0.000210,0.000200,0.000186,0.000172,0.000159,0.000147,0.000152,0.000152,4.969058,2.531525
YI033,NREM,EEG_p-iino-1-1,WT,YI033,NREM,1,rapalog,Ch0,338,0.232433,0.832696,1.954994,3.424819,4.495342,4.873630,5.011729,5.244805,...,0.000202,0.000199,0.000184,0.000166,0.000154,0.000146,0.000146,0.000142,4.946909,2.504302
YI033,NREM,EEG_p-iino-1-1,WT,YI033,NREM,1,vehicle,Ch0,371,0.207495,0.767009,1.752045,2.986193,3.998139,4.608984,5.029374,5.411054,...,0.000222,0.000205,0.000196,0.000186,0.000170,0.000151,0.000149,0.000146,4.876493,2.555400
YI033,NREM,EEG_p-iino-1-1,WT,YI033,NREM,2,rapalog,Ch0,204,0.195075,0.711189,1.678705,2.864633,3.917926,4.731031,5.317797,5.652919,...,0.000225,0.000204,0.000183,0.000170,0.000160,0.000153,0.000152,0.000146,5.047196,2.495795
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
YI036,Wake,EEG_p-iino-1-1,WT,YI036,Wake,21,vehicle,Ch3,234,0.264819,1.088688,2.016940,2.836186,3.273414,3.643433,4.049651,4.341839,...,0.000733,0.000654,0.000619,0.000559,0.000509,0.000465,0.000427,0.000401,4.006635,2.879676
YI036,Wake,EEG_p-iino-1-1,WT,YI036,Wake,22,rapalog,Ch3,24,0.229768,0.733444,1.733834,3.346141,4.422489,5.152306,6.334002,7.164731,...,0.000557,0.000523,0.000515,0.000468,0.000433,0.000400,0.000347,0.000318,6.018002,2.166850
YI036,Wake,EEG_p-iino-1-1,WT,YI036,Wake,22,vehicle,Ch3,109,0.233852,1.003472,1.810760,2.462402,3.140010,3.801144,4.442034,4.922645,...,0.000461,0.000448,0.000418,0.000385,0.000357,0.000325,0.000287,0.000255,4.230430,2.811774
YI036,Wake,EEG_p-iino-1-1,WT,YI036,Wake,23,rapalog,Ch3,82,0.216976,0.753515,1.587263,2.598009,3.500212,4.616593,6.453883,7.660143,...,0.000495,0.000459,0.000414,0.000355,0.000315,0.000297,0.000278,0.000252,5.876463,2.227716


In [17]:
test.index

MultiIndex([('YI033', 'NREM', 'EEG_p-iino-1-1', 'WT', 'YI033', 'NREM', ...),
            ('YI033', 'NREM', 'EEG_p-iino-1-1', 'WT', 'YI033', 'NREM', ...),
            ('YI033', 'NREM', 'EEG_p-iino-1-1', 'WT', 'YI033', 'NREM', ...),
            ('YI033', 'NREM', 'EEG_p-iino-1-1', 'WT', 'YI033', 'NREM', ...),
            ('YI033', 'NREM', 'EEG_p-iino-1-1', 'WT', 'YI033', 'NREM', ...),
            ('YI033', 'NREM', 'EEG_p-iino-1-1', 'WT', 'YI033', 'NREM', ...),
            ('YI033', 'NREM', 'EEG_p-iino-1-1', 'WT', 'YI033', 'NREM', ...),
            ('YI033', 'NREM', 'EEG_p-iino-1-1', 'WT', 'YI033', 'NREM', ...),
            ('YI033', 'NREM', 'EEG_p-iino-1-1', 'WT', 'YI033', 'NREM', ...),
            ('YI033', 'NREM', 'EEG_p-iino-1-1', 'WT', 'YI033', 'NREM', ...),
            ...
            ('YI036', 'Wake', 'EEG_p-iino-1-1', 'WT', 'YI036', 'Wake', ...),
            ('YI036', 'Wake', 'EEG_p-iino-1-1', 'WT', 'YI036', 'Wake', ...),
            ('YI036', 'Wake', 'EEG_p-iino-1-1', 'WT', 'YI036

In [46]:
import pandas as pd

def calculate_ratio_with_groupby(df, baseline_start, baseline_end):
    """
    Calculate baseline values for a specified `time_in_hour` range and compute ratios.

    Parameters:
    - df (pd.DataFrame): MultiIndex DataFrame with `time_in_hour` in the index.
    - baseline_start (int or float): Start of the baseline range for `time_in_hour`.
    - baseline_end (int or float): End of the baseline range for `time_in_hour`.

    Returns:
    - pd.DataFrame: DataFrame with normalized values.
    """

    # デバッグ: 初期状態のデータフレームを確認
    print("Initial DataFrame Index Names:", df.index.names)
    print("Initial DataFrame Columns:", df.columns)

    # MultiIndex をリセットして操作しやすくする
    df_reset = df.reset_index()
    print("After reset_index, Columns:", df_reset.columns)

    # 数値カラムを自動選択
    numeric_columns = df_reset.select_dtypes(include=['number']).columns.tolist()
    print("Detected numeric columns:", numeric_columns)

    # time_in_hour を退避（後でインデックスとして再設定するため）
    original_time_in_hour = df_reset['time_in_hour']

    # グループ化キー（reset_index の結果を反映）
    group_keys = [
        'mouse_ID_0', 'stage_1', 'exp_label', 
        'mouse_group', 'drug'
    ]

    # ベースラインデータの抽出
    baseline_df = df_reset[
        (df_reset['time_in_hour'] >= baseline_start) & (df_reset['time_in_hour'] <= baseline_end)
    ]
    if baseline_df.empty:
        raise ValueError(f"No data found in baseline range: {baseline_start} to {baseline_end}")

    # デバッグ: ベースラインデータを確認
    print("Baseline DataFrame:")
    print(baseline_df.head())

    # ベースライン値の計算
    baseline_means = (
        baseline_df.groupby(group_keys)[numeric_columns]
        .mean()
        .add_suffix('_baseline')
        .reset_index()
    )
    print("Baseline Means Calculated:")
    print(baseline_means.head())

    # 元データにベースライン値を結合
    merged_df = pd.merge(
        df_reset,
        baseline_means,
        on=group_keys,
        how='left'
    )

    # 正規化（ベースラインに対する比率を計算）
    for col in numeric_columns:
        baseline_col = f"{col}_baseline"
        merged_df[col] = (merged_df[col] / merged_df[baseline_col]) * 100

    # ベースライン列を削除
    merged_df.drop(columns=[f"{col}_baseline" for col in numeric_columns], inplace=True)

    # time_in_hour をインデックスとして再設定
    merged_df['time_in_hour'] = original_time_in_hour
    index_cols = ['mouse_ID_0', 'stage_1', 'exp_label', 'mouse_group', 'time_in_hour', 'drug']
    result_df = merged_df.set_index(index_cols)

    # デバッグ: 結果の確認
    print("Final DataFrame after normalization:")
    print(result_df.head())

    return result_df




In [47]:
test2=calculate_ratio_with_groupby(test, 0, 5)

Initial DataFrame Index Names: ['mouse_ID_0', 'stage_1', 'exp_label', 'mouse_group', 'mouse_ID_4', 'stage_5', 'time_in_hour', 'drug']
Initial DataFrame Columns: Index(['Device label', 'epoch #', 'f@0.0', 'f@0.39143730886850153',
       'f@0.7828746177370031', 'f@1.1743119266055047', 'f@1.5657492354740061',
       'f@1.9571865443425076', 'f@2.3486238532110093', 'f@2.7400611620795106',
       ...
       'f@47.36391437308868', 'f@47.75535168195719', 'f@48.14678899082569',
       'f@48.53822629969419', 'f@48.92966360856269', 'f@49.321100917431195',
       'f@49.71253822629969', 'f@50.103975535168196', 'delta_power',
       'theta_power'],
      dtype='object', length=133)
After reset_index, Columns: Index(['mouse_ID_0', 'stage_1', 'exp_label', 'mouse_group', 'mouse_ID_4',
       'stage_5', 'time_in_hour', 'drug', 'Device label', 'epoch #',
       ...
       'f@47.36391437308868', 'f@47.75535168195719', 'f@48.14678899082569',
       'f@48.53822629969419', 'f@48.92966360856269', 'f@49.321100

In [48]:
test2

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,mouse_ID_4,stage_5,Device label,epoch #,f@0.0,f@0.39143730886850153,f@0.7828746177370031,f@1.1743119266055047,f@1.5657492354740061,f@1.9571865443425076,...,f@47.36391437308868,f@47.75535168195719,f@48.14678899082569,f@48.53822629969419,f@48.92966360856269,f@49.321100917431195,f@49.71253822629969,f@50.103975535168196,delta_power,theta_power
mouse_ID_0,stage_1,exp_label,mouse_group,time_in_hour,drug,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1
YI033,NREM,EEG_p-iino-1-1,WT,0,rapalog,YI033,NREM,Ch0,84.267387,112.661962,110.055681,108.838492,109.081422,104.060594,99.506728,...,95.474380,92.686557,89.332053,89.719180,91.193340,90.209995,92.623377,93.749239,101.148125,99.060326
YI033,NREM,EEG_p-iino-1-1,WT,0,vehicle,YI033,NREM,Ch0,112.565751,107.972699,106.790662,104.838718,103.073799,102.014726,101.266468,...,93.984815,96.774374,97.082452,95.156301,93.579728,92.624052,96.738368,99.575475,103.498034,98.061136
YI033,NREM,EEG_p-iino-1-1,WT,1,rapalog,YI033,NREM,Ch0,136.934504,114.548110,116.142901,113.853563,111.533896,108.919896,102.331752,...,96.876449,102.668789,103.217051,100.453951,100.337372,100.620212,99.480631,98.833930,99.483683,99.930775
YI033,NREM,EEG_p-iino-1-1,WT,1,vehicle,YI033,NREM,Ch0,130.099357,108.870583,113.010544,107.511554,103.777514,104.664598,103.653846,...,99.085524,99.467672,102.173221,102.942651,99.877455,95.109787,95.239256,96.026719,101.570026,98.985941
YI033,NREM,EEG_p-iino-1-1,WT,2,rapalog,YI033,NREM,Ch0,82.646860,96.137190,99.195329,97.763211,93.290662,94.929413,99.337585,...,107.683574,105.282726,102.782739,102.606198,104.106147,105.325436,103.563357,101.908506,101.500480,99.591296
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
YI036,Wake,EEG_p-iino-1-1,WT,21,vehicle,YI036,Wake,Ch3,132.828761,108.172081,116.855759,116.232384,115.507045,108.716815,104.501005,...,103.651400,98.041167,101.253672,100.243664,97.375822,96.543052,95.019094,96.190654,99.677274,99.561975
YI036,Wake,EEG_p-iino-1-1,WT,22,rapalog,YI036,Wake,Ch3,15.015641,107.931859,90.978314,112.244444,147.687969,155.555289,152.356813,...,89.182874,91.942974,97.906218,96.608111,97.096361,97.738965,90.834014,88.665296,152.272865,73.997198
YI036,Wake,EEG_p-iino-1-1,WT,22,vehicle,YI036,Wake,Ch3,61.873226,95.522757,107.708974,104.350626,100.284255,104.286193,109.024467,...,65.234635,67.134809,68.279585,68.971458,68.275235,67.368921,63.867104,61.198323,105.244859,97.214322
YI036,Wake,EEG_p-iino-1-1,WT,23,rapalog,YI036,Wake,Ch3,51.303441,101.923011,93.467938,102.755793,114.667808,123.115387,136.515471,...,79.317314,80.641987,78.604719,73.254984,70.670376,72.503785,72.772313,70.336647,148.691517,76.075735


In [39]:
test

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,Unnamed: 7_level_0,Device label,epoch #,f@0.0,f@0.39143730886850153,f@0.7828746177370031,f@1.1743119266055047,f@1.5657492354740061,f@1.9571865443425076,f@2.3486238532110093,f@2.7400611620795106,...,f@47.36391437308868,f@47.75535168195719,f@48.14678899082569,f@48.53822629969419,f@48.92966360856269,f@49.321100917431195,f@49.71253822629969,f@50.103975535168196,delta_power,theta_power
mouse_ID_0,stage_1,exp_label,mouse_group,mouse_ID_4,stage_5,time_in_hour,drug,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1
YI033,NREM,EEG_p-iino-1-1,WT,YI033,NREM,0,rapalog,Ch0,208,0.228605,0.789053,1.868880,3.349512,4.294789,4.739086,5.040432,5.468253,...,0.000199,0.000179,0.000159,0.000148,0.000140,0.000131,0.000136,0.000134,5.029675,2.482488
YI033,NREM,EEG_p-iino-1-1,WT,YI033,NREM,0,vehicle,Ch0,321,0.205784,0.724794,1.708488,2.965944,3.896916,4.502828,5.003271,5.561362,...,0.000210,0.000200,0.000186,0.000172,0.000159,0.000147,0.000152,0.000152,4.969058,2.531525
YI033,NREM,EEG_p-iino-1-1,WT,YI033,NREM,1,rapalog,Ch0,338,0.232433,0.832696,1.954994,3.424819,4.495342,4.873630,5.011729,5.244805,...,0.000202,0.000199,0.000184,0.000166,0.000154,0.000146,0.000146,0.000142,4.946909,2.504302
YI033,NREM,EEG_p-iino-1-1,WT,YI033,NREM,1,vehicle,Ch0,371,0.207495,0.767009,1.752045,2.986193,3.998139,4.608984,5.029374,5.411054,...,0.000222,0.000205,0.000196,0.000186,0.000170,0.000151,0.000149,0.000146,4.876493,2.555400
YI033,NREM,EEG_p-iino-1-1,WT,YI033,NREM,2,rapalog,Ch0,204,0.195075,0.711189,1.678705,2.864633,3.917926,4.731031,5.317797,5.652919,...,0.000225,0.000204,0.000183,0.000170,0.000160,0.000153,0.000152,0.000146,5.047196,2.495795
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
YI036,Wake,EEG_p-iino-1-1,WT,YI036,Wake,21,vehicle,Ch3,234,0.264819,1.088688,2.016940,2.836186,3.273414,3.643433,4.049651,4.341839,...,0.000733,0.000654,0.000619,0.000559,0.000509,0.000465,0.000427,0.000401,4.006635,2.879676
YI036,Wake,EEG_p-iino-1-1,WT,YI036,Wake,22,rapalog,Ch3,24,0.229768,0.733444,1.733834,3.346141,4.422489,5.152306,6.334002,7.164731,...,0.000557,0.000523,0.000515,0.000468,0.000433,0.000400,0.000347,0.000318,6.018002,2.166850
YI036,Wake,EEG_p-iino-1-1,WT,YI036,Wake,22,vehicle,Ch3,109,0.233852,1.003472,1.810760,2.462402,3.140010,3.801144,4.442034,4.922645,...,0.000461,0.000448,0.000418,0.000385,0.000357,0.000325,0.000287,0.000255,4.230430,2.811774
YI036,Wake,EEG_p-iino-1-1,WT,YI036,Wake,23,rapalog,Ch3,82,0.216976,0.753515,1.587263,2.598009,3.500212,4.616593,6.453883,7.660143,...,0.000495,0.000459,0.000414,0.000355,0.000315,0.000297,0.000278,0.000252,5.876463,2.227716
