In [2]:
import os
import sys
from pathlib import Path
import pandas as pd
import joblib

## 1. Preprocess ##

In [22]:
# 获取项目根目录
project_root = Path(os.getcwd()).parent
sys.path.append(str(project_root))

# 导入预处理器
import importlib
import src.preprocess_a as preprocess_a
importlib.reload(preprocess_a)
from src.preprocess_a import Preprocessor_A

import src.preprocess_b as preprocess_b
importlib.reload(preprocess_b)
from src.preprocess_b import Preprocessor_B

# 初始化预处理器
preprocessor_a = Preprocessor_A()
preprocessor_b = Preprocessor_B()

# 导入配置数据
from config import body_length, features_range, canvas_settings

In [12]:
# Task1a, Task1b, Task3c preprocess
def preprocess_main_a(project_root, taskID, subIDs, features_range, canvas_settings, body_length, preprocessor):
    input_dir = Path(project_root) / 'data' / 'raw' / taskID
    output_dir = Path(project_root) / 'data' / 'processed' / taskID 
    os.makedirs(output_dir, exist_ok=True)

    for subID in subIDs:
        if taskID == 'Task1a':
            feature_init = pd.DataFrame({
            'neck_length': [0.5], 'head_length': [0.5], 'leg_length': [0.5], 'tail_length': [0.5],
            'neck_angle': [0.5], 'head_angle': [0.5], 'leg_angle': [0.5], 'tail_angle': [0.5]
         })
        elif taskID == 'Task1b':
            stimulus_data = pd.read_csv(input_dir / f'{taskID}_{subID}_sti.csv')
            feature_init = stimulus_data[stimulus_data['type'] == 2]
        else:
            feature_init = pd.read_csv(input_dir / f'{taskID}_{subID}_sti.csv')
            
        mouse_trajactory = pd.read_csv(input_dir / f'{taskID}_{subID}_mouse.csv')
        
        feature_trajactory = preprocessor.process(taskID, feature_init, mouse_trajactory, features_range, canvas_settings, body_length)
        feature_trajactory.to_csv(os.path.join(output_dir, f'{taskID}_{subID}_feature.csv'), index=False)

# Task2, Task3a, Task3b preprocess
def preprocess_main_b(project_root, taskID, subIDs, preprocessor):
    input_dir = Path(project_root) / 'data' / 'raw' / taskID
    output_dir = Path(project_root) / 'data' / 'processed'
    os.makedirs(output_dir, exist_ok=True)

    all_data = []
    for subID in subIDs:
        if taskID in ['Task2']:
            stimulus_data = pd.read_csv(input_dir / f'{taskID}_{subID}_sti.csv')
            recording_data = pd.read_csv(input_dir / f'{taskID}_{subID}_rec.csv')
        elif taskID == 'Task3a':
            stimulus_data = pd.read_csv(input_dir / f'{taskID}_{subID}_sti.csv')
        elif taskID == 'Task3b':
            left_stimulus_data = pd.read_csv(input_dir / f'{taskID}_{subID}_left.csv')
            right_stimulus_data = pd.read_csv(input_dir / f'{taskID}_{subID}_right.csv')
            stimulus_data = pd.merge(left_stimulus_data, right_stimulus_data, on=['pairID'])

        behavior_data = pd.read_csv(input_dir / f'{taskID}_{subID}_bhv.csv')

        if taskID in ['Task2']:
            combined_data = preprocessor.process(taskID, stimulus_data, behavior_data, recording_data)
        else:
            combined_data = preprocessor.process(taskID, stimulus_data, behavior_data)

        combined_data.insert(0, 'iSub', subID)
        all_data.append(combined_data)

    processed_data = pd.concat(all_data, ignore_index=True)
    processed_data.to_csv(os.path.join(output_dir, f'{taskID}_processed.csv'), index=False)

In [16]:
# Task1b, Task3c reconstruct
def preprocess_construct(project_root, taskID, subIDs):
    raw_dir = Path(project_root) / 'data' / 'raw' / taskID
    processed_dir = Path(project_root) / 'data' / 'processed' / taskID
    output_dir = Path(project_root) / 'data' / 'processed'

    all_data = []
    for subID in subIDs:
        if taskID == 'Task1b':
            stimulus_data = pd.read_csv(raw_dir / f'{taskID}_{subID}_sti.csv')
            stimulus_data = stimulus_data.drop(columns=['version', 'display_height', 'PairID'])
            stimulus_data['type'] = stimulus_data['type'].replace({1: 'target', 2: 'adjust_init'})

        elif taskID == 'Task3c':
            stimulus_data = pd.read_csv(raw_dir / f'{taskID}_{subID}_sti.csv')
            stimulus_data.insert(0, 'type', 'adjust_init')

        feature_trajactory = pd.read_csv(processed_dir / f'{taskID}_{subID}_feature.csv')
        adjust_after = feature_trajactory.groupby('iTrial').last().reset_index()

        new_rows = stimulus_data[stimulus_data['type'] == 'adjust_init'][['iTrial', 'body_ori']].copy()
        new_rows.insert(0, 'type', 'adjust_after')

        feature_columns = ['neck_length', 'head_length', 'leg_length', 'tail_length', 
                        'neck_angle', 'head_angle', 'leg_angle', 'tail_angle']
        new_rows = new_rows.merge(adjust_after[['iTrial'] + feature_columns], on='iTrial', how='left')

        combined_data = pd.concat([stimulus_data, new_rows], ignore_index=True)
        combined_data.insert(0, 'iSub', subID)
        all_data.append(combined_data)

    processed_data = pd.concat(all_data, ignore_index=True)
    processed_data.to_csv(os.path.join(output_dir, f'{taskID}_processed.csv'), index=False)

In [23]:
subIDs = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24]
# preprocess_main_a(project_root, 'Task1a', subIDs, features_range, canvas_settings, body_length, preprocessor_a)
# preprocess_main_a(project_root, 'Task1b', subIDs, features_range, canvas_settings, body_length, preprocessor_a)
# preprocess_main_a(project_root, 'Task3c', subIDs, features_range, canvas_settings, body_length, preprocessor_a)

# preprocess_construct(project_root, 'Task1b', subIDs)
# preprocess_construct(project_root, 'Task3c', subIDs)

preprocess_main_b(project_root, 'Task2', subIDs, preprocessor_b)
# preprocess_main_b(project_root, 'Task3a', subIDs, preprocessor_b)

## 2. Accuracy Analysis

## 3. Perceptive Error Analysis

In [58]:
# 获取项目根目录
project_root = Path(os.getcwd()).parent
sys.path.append(str(project_root))

# 导入处理器
import importlib
import src.error_evaluation as error_evaluation
importlib.reload(error_evaluation)
from src.error_evaluation import Processor

# 初始化知觉误差处理器
error_processor = Processor()

In [None]:
# 计算知觉误差
processed_dir = Path(project_root) / 'data' / 'processed'
processed_data = pd.read_csv(processed_dir / f'Task1b_processed.csv')

error = error_processor.error_calculation(processed_data)
summary = error_processor.error_summary(error)

# 连续插值
continuous_predictions = error_processor.analyze_length_error_relationship(error)

In [None]:
result_path = Path(project_root) / 'results' / 'Raw'
os.makedirs(result_path, exist_ok=True)

# 绘制误差图
error_processor.plot_error(error, "length", result_path)
error_processor.plot_error(error, "angle", result_path)

# 分特征绘制误差图
error_processor.plot_error_by_feature(error, result_path)

# 绘制误差连续插值图
error_processor.plot_error_interpolate(error, continuous_predictions, result_path)

In [59]:
# 得到Task2所有刺激的知觉误差预测值
subIDs = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24]
raw_dir = Path(project_root) / 'data' / 'raw' / 'Task2'
processed_dir = Path(project_root) / 'data' / 'processed' / 'Task2'
for subID in subIDs:
    stimulus_data = pd.read_csv(raw_dir / f'Task2_{subID}_sti.csv')

    stimulus_data_perc = error_processor.process(stimulus_data, continuous_predictions)
    stimulus_data_perc.to_csv(os.path.join(processed_dir, f'Task2_{subID}_sti_perc.csv'), index=False)

In [60]:
all_data = []
output_dir = Path(project_root) / 'data' / 'processed'
for subID in subIDs:
    stimulus_data = pd.read_csv(processed_dir / f'Task2_{subID}_sti_perc.csv')
    behavior_data = pd.read_csv(raw_dir / f'Task2_{subID}_bhv.csv')

    combined_data = preprocessor_b.process('Task2', stimulus_data, behavior_data)
    combined_data.insert(0, 'iSub', subID)
    all_data.append(combined_data)

processed_data = pd.concat(all_data, ignore_index=True)
processed_data.to_csv(os.path.join(output_dir, f'Task2_processed_perceived.csv'), index=False)

## 3. Recording Analysis

In [12]:
# 获取项目根目录
project_root = Path(os.getcwd()).parent
sys.path.append(str(project_root))

# 导入处理器
import importlib
import src.audio_coding as audio_coding
importlib.reload(audio_coding)
from src.audio_coding import Processor

# 初始化语音处理器
audio_processor = Processor()

In [15]:
def preprocess_recording(project_root, subIDs, audio_processor):
    input_dir = Path(project_root) / 'data' / 'raw' / 'Task2'
    output_dir = Path(project_root) / 'data' / 'processed'
    os.makedirs(output_dir, exist_ok=True)

    all_data = []
    for subID in subIDs:
        recording_raw = pd.read_csv(input_dir / f'Task2_{subID}_rec.csv')
        behavior_data = pd.read_csv(input_dir / f'Task2_{subID}_bhv.csv')

        structure1 = behavior_data['structure1'][0]
        structure2 = behavior_data['structure2'][0]

        recording_coded = audio_processor.process(recording_raw, [structure1, structure2])
        all_data.append(recording_coded)

    processed_data = pd.concat(all_data, ignore_index=True)
    processed_data.to_csv(os.path.join(output_dir, f'Task2_recording_processed.csv'), index=False)

In [16]:
subIDs = [1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24]
preprocess_recording(project_root, subIDs, audio_processor)

## 4. Animation

In [217]:
# 获取项目根目录
project_root = Path(os.getcwd()).parent
sys.path.append(str(project_root))

# 导入处理器
import importlib
import src.animation as animation
importlib.reload(animation)
from src.animation import Processor

# 初始化语音处理器
animation_compare = Processor()

In [172]:
# 导入一个被试的模型数据
result_path = Path(project_root) / 'results' / 'Bayesian'
fitting_results = joblib.load(result_path / 'M_Base_fitting_results.joblib')
fit_result = fitting_results[9]
step_results = fit_result['step_results']

import src.Bayesian.utils.partition as partition
importlib.reload(partition)
from src.Bayesian.utils.partition import Partition

partition = Partition()
all_centers = partition.get_centers(4, 4)

input_modelfitting = [[step['k'], all_centers[step['k'] - 1][1]] for step in step_results]

In [None]:
# 导入该被试的口头报告数据
raw_dir = Path(project_root) / 'data' / 'raw' / 'Task2' 
processed_dir = Path(project_root) / 'data' / 'processed' / 'Task2' 
input_bhv_csv = os.path.join(raw_dir, 'Task2_9_bhv.csv')
input_rec_csv = os.path.join(processed_dir, 'Task2_9_rec.csv')
output_csv = os.path.join(processed_dir, 'Task2_9_processed.csv')
plots_dir = Path(project_root) / 'results' / 'Plots'

In [230]:
# 绘制单帧图片
animation_compare.process_and_plot(input_rec_csv, input_bhv_csv, input_modelfitting, output_csv, plots_dir, plot_side='both')

Choice 1 在第 0 行之前没有数据，跳过生成图像。
Choice 2 在第 0 行之前没有数据，跳过生成图像。
Choice 3 在第 0 行之前没有数据，跳过生成图像。
Choice 1 在第 1 行之前没有数据，跳过生成图像。
Choice 2 在第 1 行之前没有数据，跳过生成图像。
Choice 1 在第 2 行之前没有数据，跳过生成图像。
Choice 1 在第 3 行之前没有数据，跳过生成图像。
处理完成，图表已分别保存到 '/home/yangjiong/CategoryLearning/results/Plots/choice1', '/home/yangjiong/CategoryLearning/results/Plots/choice2', '/home/yangjiong/CategoryLearning/results/Plots/choice3', 和 '/home/yangjiong/CategoryLearning/results/Plots/choice4' 文件夹中。


In [231]:
# 得到GIF动图
plots_parent_dir = Path(project_root) / 'results' / 'Plots' 

# 定义子文件夹及对应的文件名模式
choices = {
    'choice1': r'^\d+_(\d+)_(\d+)_c1\.png$',
    'choice2': r'^\d+_(\d+)_(\d+)_c2\.png$',
    'choice3': r'^\d+_(\d+)_(\d+)_c3\.png$',
    'choice4': r'^\d+_(\d+)_(\d+)_c4\.png$',
}

# 遍历每个子文件夹并生成GIF
for choice, pattern in choices.items():
    sub_dir = plots_parent_dir / choice
    if not sub_dir.exists() or not sub_dir.is_dir():
        print(f"子文件夹 '{sub_dir}' 不存在或不是一个文件夹，已跳过。")
        continue

    # 定义输出GIF的路径，保存到Plots父文件夹下
    output_gif = plots_parent_dir / f'{choice}_animation.gif'

    # 创建GIF
    animation_compare.create_sorted_gif(sub_dir, output_gif, pattern, duration=0.5)

  images.append(imageio.imread(filepath))


GIF已成功创建并保存为 '/home/yangjiong/CategoryLearning/results/Plots/choice1_animation.gif'。
GIF已成功创建并保存为 '/home/yangjiong/CategoryLearning/results/Plots/choice2_animation.gif'。
GIF已成功创建并保存为 '/home/yangjiong/CategoryLearning/results/Plots/choice3_animation.gif'。
GIF已成功创建并保存为 '/home/yangjiong/CategoryLearning/results/Plots/choice4_animation.gif'。
