In [None]:
%load_ext autoreload
%autoreload 2
import os
import sys
from pathlib import Path
import pandas as pd
import joblib

project_root = Path(os.getcwd()).parent
sys.path.append(str(project_root))


## Oral Analysis

In [None]:
from glob import glob
import os
import pandas as pd
from src.preprocess_b import Recording_Processor

processor = Recording_Processor()

files = glob('../data/raw/Task2/*_rec.csv')
files.sort(key=lambda x: int(x.split('_')[-2]))
for file in files:
    file_name = os.path.basename(file).split('.')[0][:-4]
    new_file_path = os.path.join('../data/processed/Task2/', file_name + '_convert_rec.csv')
    df = pd.read_csv(file)
    result_df = processor.process(df)
    result_df.to_csv(new_file_path, index=False)

## Generate processed files

In [4]:
from src.preprocess_a import Preprocessor_A
from src.preprocess_b import Preprocessor_B
preprocessor_a = Preprocessor_A()
preprocessor_b = Preprocessor_B()

from config import body_length, features_range, canvas_settings

In [5]:
# Task1a, Task1b, Task3c preprocess
def preprocess_main_a(project_root, taskID, subIDs, features_range, canvas_settings, body_length, preprocessor):
    input_dir = Path(project_root) / 'data' / 'raw' / taskID
    output_dir = Path(project_root) / 'data' / 'processed' / taskID 
    os.makedirs(output_dir, exist_ok=True)

    for subID in subIDs:
        if taskID == 'Task1a':
            feature_init = pd.DataFrame({
            'neck_length': [0.5], 'head_length': [0.5], 'leg_length': [0.5], 'tail_length': [0.5],
            'neck_angle': [0.5], 'head_angle': [0.5], 'leg_angle': [0.5], 'tail_angle': [0.5]
         })
        elif taskID == 'Task1b':
            stimulus_data = pd.read_csv(input_dir / f'{taskID}_{subID}_sti.csv')
            feature_init = stimulus_data[stimulus_data['type'] == 2]
        else:
            feature_init = pd.read_csv(input_dir / f'{taskID}_{subID}_sti.csv')
            
        mouse_trajactory = pd.read_csv(input_dir / f'{taskID}_{subID}_mouse.csv')
        
        feature_trajactory = preprocessor.process(taskID, feature_init, mouse_trajactory, features_range, canvas_settings, body_length)
        feature_trajactory.to_csv(os.path.join(output_dir, f'{taskID}_{subID}_feature.csv'), index=False)

# Task2, Task3a, Task3b preprocess
def preprocess_main_b(project_root, taskID, subIDs, preprocessor):
    input_dir = Path(project_root) / 'data' / 'raw' / taskID
    output_dir = Path(project_root) / 'data' / 'processed'
    os.makedirs(output_dir, exist_ok=True)

    all_data = []
    for subID in subIDs:
        if taskID in ['Task2']:
            stimulus_data = pd.read_csv(input_dir / f'{taskID}_{subID}_sti.csv')
            recording_data = pd.read_csv(input_dir / f'{taskID}_{subID}_rec.csv')
        elif taskID == 'Task3a':
            stimulus_data = pd.read_csv(input_dir / f'{taskID}_{subID}_sti.csv')
        elif taskID == 'Task3b':
            left_stimulus_data = pd.read_csv(input_dir / f'{taskID}_{subID}_left.csv')
            right_stimulus_data = pd.read_csv(input_dir / f'{taskID}_{subID}_right.csv')
            stimulus_data = pd.merge(left_stimulus_data, right_stimulus_data, on=['pairID'])

        behavior_data = pd.read_csv(input_dir / f'{taskID}_{subID}_bhv.csv')

        if taskID in ['Task2']:
            combined_data = preprocessor.process(taskID, stimulus_data, behavior_data, recording_data)
        else:
            combined_data = preprocessor.process(taskID, stimulus_data, behavior_data)

        combined_data.insert(0, 'iSub', subID)
        all_data.append(combined_data)

    processed_data = pd.concat(all_data, ignore_index=True)
    processed_data.to_csv(os.path.join(output_dir, f'{taskID}_processed.csv'), index=False)

In [16]:
# Task1b, Task3c reconstruct
def preprocess_construct(project_root, taskID, subIDs):
    raw_dir = Path(project_root) / 'data' / 'raw' / taskID
    processed_dir = Path(project_root) / 'data' / 'processed' / taskID
    output_dir = Path(project_root) / 'data' / 'processed'

    all_data = []
    for subID in subIDs:
        if taskID == 'Task1b':
            stimulus_data = pd.read_csv(raw_dir / f'{taskID}_{subID}_sti.csv')
            stimulus_data = stimulus_data.drop(columns=['version', 'display_height', 'PairID'])
            stimulus_data['type'] = stimulus_data['type'].replace({1: 'target', 2: 'adjust_init'})

        elif taskID == 'Task3c':
            stimulus_data = pd.read_csv(raw_dir / f'{taskID}_{subID}_sti.csv')
            stimulus_data.insert(0, 'type', 'adjust_init')

        feature_trajactory = pd.read_csv(processed_dir / f'{taskID}_{subID}_feature.csv')
        adjust_after = feature_trajactory.groupby('iTrial').last().reset_index()

        new_rows = stimulus_data[stimulus_data['type'] == 'adjust_init'][['iTrial', 'body_ori']].copy()
        new_rows.insert(0, 'type', 'adjust_after')

        feature_columns = ['neck_length', 'head_length', 'leg_length', 'tail_length', 
                        'neck_angle', 'head_angle', 'leg_angle', 'tail_angle']
        new_rows = new_rows.merge(adjust_after[['iTrial'] + feature_columns], on='iTrial', how='left')

        combined_data = pd.concat([stimulus_data, new_rows], ignore_index=True)
        combined_data.insert(0, 'iSub', subID)
        all_data.append(combined_data)

    processed_data = pd.concat(all_data, ignore_index=True)
    processed_data.to_csv(os.path.join(output_dir, f'{taskID}_processed.csv'), index=False)

In [4]:
subIDs = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24]
# preprocess_main_a(project_root, 'Task1a', subIDs, features_range, canvas_settings, body_length, preprocessor_a)
# preprocess_main_a(project_root, 'Task1b', subIDs, features_range, canvas_settings, body_length, preprocessor_a)
# preprocess_main_a(project_root, 'Task3c', subIDs, features_range, canvas_settings, body_length, preprocessor_a)

# preprocess_construct(project_root, 'Task1b', subIDs)
# preprocess_construct(project_root, 'Task3c', subIDs)

preprocess_main_b(project_root, 'Task2', subIDs, preprocessor_b)
# preprocess_main_b(project_root, 'Task3a', subIDs, preprocessor_b)

## Perceptive Error Analysis

In [3]:
# 获取项目根目录
project_root = Path(os.getcwd()).parent
sys.path.append(str(project_root))

# 导入处理器
import importlib
import src.error_evaluation as error_evaluation
importlib.reload(error_evaluation)
from src.error_evaluation import Processor

# 初始化知觉误差处理器
error_processor = Processor()

In [4]:
# 计算知觉误差
processed_dir = Path(project_root) / 'data' / 'processed'
processed_data = pd.read_csv(processed_dir / f'Task1b_processed.csv')

error = error_processor.error_calculation(processed_data)
summary = error_processor.error_summary(error)

# 连续插值
continuous_predictions = error_processor.analyze_length_error_relationship(error)

In [None]:
result_path = Path(project_root) / 'results' / 'Preprocess'
os.makedirs(result_path, exist_ok=True)

# 绘制误差图
error_processor.plot_error(error, "length", result_path)
error_processor.plot_error(error, "angle", result_path)

# 分特征绘制误差图
error_processor.plot_error_by_feature(error, result_path)

# 绘制误差连续插值图
error_processor.plot_error_interpolate(error, continuous_predictions, result_path)