# 疲劳数据整理
  所有疲劳数据整理，每一个目标值建立一个全部特征值的csv文件，舍弃目标值为空的行。
  热处理因为数据过于紊乱，所以采用字符串的全连接方式，如需要特定的目标值可以split得到。
## 库引入

In [None]:
import pandas as pd
import json
import os
from typing import Tuple, List

## 查询字典加载

In [None]:
features = []
for feature_name in ['化学成分', '金相组织', '热处理工艺']:
    with open(f"./{feature_name}.json", "r") as f:
        features.append(json.load(f))

## 整理函数

In [None]:
def connect_features(row: pd.Series) -> pd.Series:
    features_rows = [row]
    if all(name if name in row.keys() else False for name in {'钢号', '热处理号', '炉号'}):
        chemical_composition = features[0].get(f"{row['钢号']}-{row['炉号']}", None)
        microstructure = features[1].get(f"{row['钢号']}-{row['炉号']}-{row['热处理号']}", None)
        hot_process = features[2].get(f"{row['钢号']}-{row['热处理号']}", None)
        if chemical_composition:
            features_rows.append(pd.Series(data=chemical_composition, index=features[0]["column_names"]))
        if microstructure:
            features_rows.append(pd.Series(data=microstructure, index=features[1]["column_names"]))
        if hot_process:
            features_rows.append(pd.Series(data=hot_process, index=features[2]["column_names"]))
    elif all(key if name in row.keys() else False for name in {'钢号', '炉号'}):
        chemical_composition = features[0].get(f"{row['钢号']}-{row['炉号']}", None)
        if chemical_composition:
            features_rows.append(pd.Series(data=chemical_composition, index=features[0]["column_names"]))
    elif all(key if name in row.keys() else False for name in {'钢号', '热处理号'}):
        hot_process = features[2].get(f"{row['钢号']}-{row['热处理号']}", None)
        if hot_process:
            features_rows.append(pd.Series(data=hot_process, index=features[2]["column_names"]))
    return pd.concat(features_rows, axis=0)

In [None]:
def write_to_csv(save_root_path: str, file_name: str, results: List[pd.DataFrame],
              target_names: Tuple[str]) -> None:
    save_father_path = os.path.join(save_root_path, file_name)
    if not os.path.exists(save_father_path):
        os.mkdir(save_father_path)
    if not target_names:
        save_path = os.path.join(save_father_path, f"{file_name}.csv")
        results[0].to_csv(save_path, encoding="gbk")
    else:
        for target_name, result in zip(target_names, results):
            save_path = os.path.join(save_father_path, f"{target_name}.csv")
            result.to_csv(save_path, encoding="gbk")

In [None]:
def tidy_data(data__root_path: str, file_name: str, target_names: List[str]) -> List[pd.DataFrame]:    
    data_path = os.path.join(data_root_path, f'{process_file_name}.csv')
    original_data = pd.read_csv(data_path, encoding='gbk')
    results = [pd.DataFrame() for target_name in target_names] if target_name else [pd.DataFrame()]       
    for index, row in original_data.iterrows():
        if not target_names:
            result_row = connect_features(row)
            results[0] = results[result_index].append(result_row, ignore_index=True)
        else:
            for result_index, target_name in enumerate(target_names):
                if not row[target_name]:
                    continue
                # connect all features
                result_row = connect_features(row)
                # drop other targets
                drop_target = target_names[:]
                drop_target.remove(target_name)
                result_row.drop(drop_target, inplace=True)
                results[result_index] = results[result_index].append(result_row, ignore_index=True)
    return results

## 主函数

In [None]:
data_root_path = '/home/yuyouyu/WorkSpace/106Lab_Data/fatigue_original'
process_file_name = '抗拉强度'
data_path = os.path.join(data_root_path, f'{process_file_name}.csv')
target_names = ['断面收缩率', '伸长率10', '伸长率5', '抗拉强度', '屈服点']
original_data = pd.read_csv(data_path, encoding='gbk')
results = [pd.DataFrame() for target_name in target_names]
for index, row in original_data.iterrows():
    for result_index, target_name in enumerate(target_names):
        if not row[target_name]:
            continue
        result_row = connect_features(row)
        drop_target = target_names[:]
        drop_target.remove(target_name)
        result_row.drop(drop_target, inplace=True)
        results[result_index] = results[result_index].append(result_row, ignore_index=True)    