In [7]:
import matplotlib.pyplot as plt
from matplotlib_inline import backend_inline
backend_inline.set_matplotlib_formats('png')
plt.rcParams['figure.dpi'] = 300 
plt.rcParams['savefig.dpi'] = 300
plt.rcParams['figure.figsize'] = (10, 5)
import pandas as pd
import numpy as np
from tqdm import tqdm, trange
import os

In [8]:
import matplotlib.dates as mdates
time_format = mdates.DateFormatter('%H:%M')

In [9]:
def filter_data(df, column_name, z_threshold= 2, filter_step= 100, same_threshold=20):
    value_counts = df[column_name].value_counts()
    values_to_filter = value_counts[value_counts > same_threshold].index

    # 初始化过滤后的索引
    filtered_indices = []

    # 分段处理
    for start in range(0, len(df), filter_step):
        end = start + filter_step
        subset = df.iloc[start:end]

        # 计算Z分数
        mean = subset[column_name].mean()
        std = subset[column_name].std()

        if std == 0:  # 防止标准差为0时出错
            z_scores = np.zeros_like(subset[column_name], dtype=float)
        else:
            z_scores = np.abs((subset[column_name] - mean) / std)

        # 筛除异常值
        subset_filtered = subset[z_scores < z_threshold]

        # 筛除出现次数超过阈值的值
        subset_filtered = subset_filtered[~subset_filtered[column_name].isin(values_to_filter)]

        filtered_indices.extend(subset_filtered.index)

    return df.loc[filtered_indices]

In [10]:
def process_csv(file_path):
    file_name = os.path.splitext(os.path.basename(file_path))[0]
    os.makedirs('fig', exist_ok = True)

    df = pd.read_csv(file_path)
    df = filter_data(df, 'y', filter_step= 100)
    df = filter_data(df, 'y', filter_step= 1000)
    df = filter_data(df, 'y', filter_step= 200)
    if df.shape[0] < 200:
        pass
    else:
        df['video_record_time'] = pd.to_datetime(df['video_record_time'])
        df['y'] = df['y'].astype(float)
        df = df.set_index('video_record_time')

        plt.plot(df.index, df['y'], 'b-')
        y_mean = df['y'].mean()
        plt.text(df.index[-1], y_mean, f'{y_mean:.2f}', color='r', 
                 verticalalignment='bottom', horizontalalignment='right')
        plt.axhline(y=y_mean, color='r', linestyle='--', label='Mean')
        
        plt.gca().xaxis.set_major_formatter(time_format)

        plt.xlabel('Time')
        plt.ylabel('displacement')
        plt.title(f'{file_name}')
        plt.savefig(f'fig/{file_name}.png')
        plt.close()


In [11]:
def process_folder(folder_path):
    for root, dirs, files in tqdm(os.walk(folder_path)):
        for file in tqdm(files):
            if file.endswith('.csv'):
                file_path = os.path.join(root, file)
                process_csv(file_path)

In [12]:
folder_path = './data_hn'
process_folder(folder_path)

100%|██████████| 1/1 [00:00<00:00, 21290.88it/s]
0it [00:00, ?it/s]
0it [00:00, ?it/s]
0it [00:00, ?it/s]
100%|██████████| 9/9 [00:03<00:00,  2.33it/s]
100%|██████████| 9/9 [00:05<00:00,  1.79it/s]
100%|██████████| 10/10 [00:10<00:00,  1.10s/it]
0it [00:00, ?it/s]/it]
0it [00:00, ?it/s]
0it [00:00, ?it/s]
0it [00:00, ?it/s]
0it [00:00, ?it/s]
100%|██████████| 10/10 [00:13<00:00,  1.36s/it]
0it [00:00, ?it/s]s/it]
0it [00:00, ?it/s]
100%|██████████| 10/10 [00:09<00:00,  1.10it/s]
100%|██████████| 9/9 [00:03<00:00,  2.78it/s]
100%|██████████| 9/9 [00:03<00:00,  2.68it/s]
100%|██████████| 13/13 [00:04<00:00,  2.79it/s]
100%|██████████| 13/13 [00:05<00:00,  2.43it/s]
100%|██████████| 13/13 [00:04<00:00,  2.75it/s]
100%|██████████| 13/13 [00:03<00:00,  3.29it/s]
100%|██████████| 13/13 [00:07<00:00,  1.77it/s]
100%|██████████| 13/13 [00:04<00:00,  2.68it/s]
100%|██████████| 13/13 [00:04<00:00,  2.73it/s]
100%|██████████| 13/13 [00:05<00:00,  2.54it/s]
100%|██████████| 13/13 [00:04<00:00,  2.