# Process CSV Raw data


In [1]:
import pandas as pd
import numpy as np
import warnings
from glob import glob
import os
from tqdm import tqdm
warnings.filterwarnings('ignore')

For Training


In [4]:
def process_csv_raw_data(raw_data_path, processed_data_path):
    """Load the raw data from the raw_data_path and save it to the processed_data_path"""

    # Get all the csv files in the raw_data_path
    csv_files = glob(os.path.join(raw_data_path, "*.csv"))

    # Iterate over the csv files
    for csv_file in tqdm(csv_files):

        # Load the csv file
        df = pd.read_csv(csv_file)

        # Get the file name
        file_name = "walking"
        # file_name = os.path.basename(csv_file)
        # print(file_name)

        action = file_name.split(".")[0]
        # print(action)

        # read the csv file
        temp_df = pd.read_csv(csv_file, on_bad_lines='skip')

        # Rename the columns
        temp_df = temp_df.rename(columns={"Time": "time", "Device name设备名称": "ax", "Acceleration X(g)": "ay",
                                          "Acceleration Y(g)": "az", "Acceleration Z(g)": "wx", "Angular velocity X(°/s)": "wy", "Angular velocity Y(°/s)": "wz", "Angular velocity Z(°/s)": "angleX", "Angle X(°)": "angleY", "Angle Y(°)": "angleZ", "Angle Z(°)": "temperature", "Temperature(℃)": "Unnamed"})

        temp_df = temp_df[['ax', 'ay', 'az', 'wx',

                           'wy', 'wz', 'angleX', 'angleY', 'angleZ']]

        temp_df['action'] = action

        temp_df.reset_index(drop=True, inplace=True)
        # print(temp_df.columns)

        # Save the csv file to the processed_data_path
        temp_df.to_csv(os.path.join(
            processed_data_path, file_name), index=False)

In [None]:
raw_data_path = "raw_data/csv_data/"
processed_data_path = "processed_data"

process_csv_raw_data(raw_data_path, processed_data_path)

For Predictions


In [65]:
def process_raw_data(raw_data):
    """Process raw data and return df"""

    # Assuming raw_data is a list of DataFrames or a single DataFrame
    if isinstance(raw_data, list):
        # Concatenate the list of DataFrames into a single DataFrame
        processed_df = pd.concat(raw_data, ignore_index=True)
    elif isinstance(raw_data, pd.DataFrame):
        # Use the single DataFrame as is
        processed_df = raw_data
    else:
        raise ValueError(
            "Input raw_data should be a list of DataFrames or a single DataFrame.")

    # Rename the columns
    processed_df = processed_df.rename(columns={"Time": "time", "Device name设备名称": "ax", "Acceleration X(g)": "ay",
                                                "Acceleration Y(g)": "az", "Acceleration Z(g)": "wx", "Angular velocity X(°/s)": "wy", "Angular velocity Y(°/s)": "wz", "Angular velocity Z(°/s)": "angleX", "Angle X(°)": "AngleY", "Angle Y(°)": "AngleZ", "Angle Z(°)": "temperature", "Temperature(℃)": "Unnamed"})

    processed_df = processed_df[['ax', 'ay', 'az', 'wx',
                                 'wy', 'wz', 'angleX', 'angleY', 'angleZ']]

    processed_df.reset_index(drop=True, inplace=True)

    return processed_df

In [None]:
import pickle

# dump the process_raw_data fun for later use
dump_file_path = "utils/"
pickle.dump(process_raw_data, open(
    dump_file_path + "process_raw_data.pkl", "wb"))

In [12]:
# Merge 2 csv file

import pandas as pd
import os

file1_path = "processed_data\Data_09.10\walking.csv"
file2_path = "processed_data\Data_31.10\walking.csv"

df1 = pd.read_csv(file1_path)
df2 = pd.read_csv(file2_path)

merged_df = pd.concat([df1, df2])

output_directory = "processed_data/train"
os.makedirs(output_directory, exist_ok=True)

output_path = os.path.join(output_directory, "walking.csv")
merged_df.to_csv(output_path, index=False)