## 1. Preprocess ##

In [None]:
import os
import sys
from pathlib import Path
import pandas as pd
import numpy as np
import math
# from tqdm.notebook import tqdm
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
# 获取项目根目录
project_root = Path(os.getcwd()).parent
sys.path.append(str(project_root))

# 导入预处理器
import importlib
import src.preprocess as preprocess
importlib.reload(preprocess)
from src.preprocess import Preprocessor_A, Preprocessor_B

# 初始化预处理器
preprocessor_a = Preprocessor_A()
preprocessor_b = Preprocessor_B()

In [None]:
from config import body_length, features_range, canvas_settings

In [None]:
# Task1a, Task1b, Task3c preprocess
def preprocess_main_a(project_root, taskID, subIDs, features_range, canvas_settings, body_length, preprocessor):
    input_path = Path(project_root) / 'data' / 'raw' / taskID
    output_path = Path(project_root) / 'data' / 'processed' / taskID 
    os.makedirs(output_path, exist_ok=True)

    for subID in subIDs:
        if taskID == 'Task1a':
            feature_init = pd.DataFrame({
            'neck_length': [0.5], 'head_length': [0.5], 'leg_length': [0.5], 'tail_length': [0.5],
            'neck_angle': [0.5], 'head_angle': [0.5], 'leg_angle': [0.5], 'tail_angle': [0.5]
         })
        elif taskID == 'Task1b':
            stimulus_data = pd.read_csv(input_path / f'{taskID}_{subID}_sti.csv')
            feature_init = stimulus_data[stimulus_data['type'] == 2]
        else:
            feature_init = pd.read_csv(input_path / f'{taskID}_{subID}_sti.csv')
            
        mouse_trajactory = pd.read_csv(input_path / f'{taskID}_{subID}_mouse.csv')
        
        feature_trajactory = preprocessor.process(taskID, feature_init, mouse_trajactory, features_range, canvas_settings, body_length)
        feature_trajactory.to_csv(os.path.join(output_path, f'{taskID}_{subID}_feature.csv'), index=False)

# Task2, Task3a, Task3b preprocess
def preprocess_main_b(project_root, taskID, subIDs, preprocessor):
    input_path = Path(project_root) / 'data' / 'raw' / taskID
    output_path = Path(project_root) / 'data' / 'processed'
    os.makedirs(output_path, exist_ok=True)

    all_data = []
    for subID in subIDs:
        if taskID in ['Task2', 'Task3a']:
            stimulus_data = pd.read_csv(input_path / f'{taskID}_{subID}_sti.csv')
        elif taskID == 'Task3b':
            left_stimulus_data = pd.read_csv(input_path / f'{taskID}_{subID}_left.csv')
            right_stimulus_data = pd.read_csv(input_path / f'{taskID}_{subID}_right.csv')
            stimulus_data = pd.merge(left_stimulus_data, right_stimulus_data, on=['pairID'])

        behavior_data = pd.read_csv(input_path / f'{taskID}_{subID}_bhv.csv')

        combined_data = preprocessor.process(taskID, stimulus_data, behavior_data)
        combined_data.insert(0, 'iSub', subID)
        all_data.append(combined_data)

    processed_data = pd.concat(all_data, ignore_index=True)
    processed_data.to_csv(os.path.join(output_path, f'{taskID}_processed.csv'), index=False)

In [136]:
# Task1b, Task3c reconstruct
def preprocess_construct(project_root, taskID, subIDs):
    raw_path = Path(project_root) / 'data' / 'raw' / taskID
    processed_path = Path(project_root) / 'data' / 'processed' / taskID
    output_path = Path(project_root) / 'data' / 'processed'

    all_data = []
    for subID in subIDs:
        if taskID == 'Task1b':
            stimulus_data = pd.read_csv(raw_path / f'{taskID}_{subID}_sti.csv')
            stimulus_data = stimulus_data.drop(columns=['version', 'display_height', 'PairID'])
            stimulus_data['type'] = stimulus_data['type'].replace({1: 'target', 2: 'adjust_init'})

        elif taskID == 'Task3c':
            stimulus_data = pd.read_csv(raw_path / f'{taskID}_{subID}_sti.csv')
            stimulus_data.insert(0, 'type', 'adjust_init')

        feature_trajactory = pd.read_csv(processed_path / f'{taskID}_{subID}_feature.csv')
        adjust_after = feature_trajactory.groupby('iTrial').last().reset_index()

        new_rows = stimulus_data[stimulus_data['type'] == 'adjust_init'][['iTrial', 'body_ori']].copy()
        new_rows.insert(0, 'type', 'adjust_after')

        feature_columns = ['neck_length', 'head_length', 'leg_length', 'tail_length', 
                        'neck_angle', 'head_angle', 'leg_angle', 'tail_angle']
        new_rows = new_rows.merge(adjust_after[['iTrial'] + feature_columns], on='iTrial', how='left')

        combined_data = pd.concat([stimulus_data, new_rows], ignore_index=True)
        combined_data.insert(0, 'iSub', subID)
        all_data.append(combined_data)

    processed_data = pd.concat(all_data, ignore_index=True)
    processed_data.to_csv(os.path.join(output_path, f'{taskID}_processed.csv'), index=False)


In [139]:
subIDs = [1,2,3,7,8,9,13,14,15,19,20,21]
# preprocess_main_a(project_root, 'Task1a', subIDs, features_range, canvas_settings, body_length, preprocessor_a)
preprocess_main_a(project_root, 'Task1b', subIDs, features_range, canvas_settings, body_length, preprocessor_a)
preprocess_main_a(project_root, 'Task3c', subIDs, features_range, canvas_settings, body_length, preprocessor_a)

preprocess_construct(project_root, 'Task1b', subIDs)
preprocess_construct(project_root, 'Task3c', subIDs)

# preprocess_main_b(project_root, 'Task2', subIDs, preprocessor_b)
# preprocess_main_b(project_root, 'Task3a', subIDs, preprocessor_b)

## 2. Perceptive Error Analysis

In [141]:
# 获取项目根目录
project_root = Path(os.getcwd()).parent
sys.path.append(str(project_root))

# 导入处理器
import importlib
import src.error_evaluation as error_evaluation
importlib.reload(error_evaluation)
from src.error_evaluation import Processor

# 初始化预处理器
processor = Processor()

In [None]:
def error_calculation(processed_data):
    
    columns = ['neck_length', 'head_length', 'leg_length', 'tail_length', 
               'neck_angle', 'head_angle', 'leg_angle', 'tail_angle']
    
    results = []
    for iSub, group in processed_data.groupby('iSub'):
        target = group[group['type'] == 'target'].reset_index(drop=True)
        adjust_after = group[group['type'] == 'adjust_after'].reset_index(drop=True)

        result = target[['iSub','iTrial'] + columns].reset_index(drop=True).copy()
        for col in columns:
            result[f'{col}_diff'] = adjust_after[col] - target[col]
        results.append(result)
        
    final_results = pd.concat(results, ignore_index=True)

    return final_results

In [142]:
def error_summary(error):
    parts = ['neck', 'head', 'leg', 'tail']
    
    # 创建基础数据框架
    lengths = error['neck_length'].unique()
    subs = error['iSub'].unique()
    base_df = pd.DataFrame([(sub, length) for sub in subs for length in lengths],
                        columns=['iSub', 'length'])
    
    # 为每个部位计算统计量
    stats = []
    for part in parts:
        # 分组计算均值和标准差
        grouped = error.groupby(['iSub', f'{part}_length'])[f'{part}_length_diff'].agg(['mean', 'std']).reset_index()
        
        # 重命名列
        grouped.columns = ['iSub', f'{part}_length', 
                        f'{part}_length_error_mean', f'{part}_length_error_sd']
        stats.append(grouped)
    
    # 合并所有统计结果
    result = base_df.copy()
    for part in parts:
        result[f'{part}_length'] = result['length']
    result = result.drop('length', axis=1)
    
    # 合并统计数据
    for stat_df in stats:
        merge_cols = ['iSub', f'{stat_df.columns[1].split("_")[0]}_length']
        result = result.merge(stat_df, on=merge_cols, how='left')
    
    # 排序并返回结果
    return result.sort_values(['iSub', 'neck_length'])

In [144]:

processed_path = Path(project_root) / 'data' / 'processed'
processed_data = pd.read_csv(processed_path / f'Task1b_processed.csv')

error = error_calculation(processed_data)
summary = error_summary(error)

In [None]:
def evaluate_main(project_root, processor):
    processed_path = Path(project_root) / 'data' / 'processed'
    processed_data = pd.read_csv(processed_path / f'Task1b_processed.csv')

    error = processor.error_calculation(processed_data)

    summary = processor.error_summary(error)

    