In [1]:
import pandas as pd
import numpy as np
import os
import glob
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler, RobustScaler

In [2]:
fe = "002"
if not os.path.exists(f"../out/fe/fe{fe}"):
    os.makedirs(f"../out/fe/fe{fe}")
    os.makedirs(f"../out/fe/fe{fe}/save")

In [3]:
df = pd.read_csv('../data/train/train_info.csv')

In [4]:
feature_arr = []
target_arr = []
mask_arr = []
target_mask_arr = []
id_list = []
player_list = []

In [5]:
seq_len = 1000
shift = 500
offset = 100

In [6]:
def extract_valid_swing(data, sample_rate=85, energy_window_sec=0.5, energy_percentile=40):
    
    N_total = len(data)
    data = data[10:N_total-10] # remove annomly value at beginning
    N_total = len(data)

    ax, ay, az = data[:, 0], data[:, 1], data[:, 2]
    acc_mag = np.sqrt(ax**2 + ay**2 + az**2)
    
    energy_window_size = int(energy_window_sec * sample_rate)
    energy = np.convolve(acc_mag**2, np.ones(energy_window_size)/energy_window_size, mode='same')
    dynamic_energy_threshold = np.percentile(energy, energy_percentile)
    active = (energy > dynamic_energy_threshold)

    if np.any(active):
        start_idx = np.argmax(active)
        end_idx = len(active) - np.argmax(active[::-1])
    else:
        start_idx, end_idx = 0, N_total
    
    trimmed_data = data[start_idx:end_idx]
    
    return trimmed_data

In [7]:
for index, row in df.iterrows():
    unique_id = row['unique_id']
    player_id = row['player_id']
    mode = row['mode'] - 1
    gender = row['gender'] - 1
    hand = row['hold racket handed'] - 1
    year = row['play years']
    level = row['level'] - 2
    
    filepath = f'../data/train/train_data/{unique_id}.txt'
    imu_data = np.loadtxt(filepath)
    imu_data = extract_valid_swing(imu_data)
    
    batch = len(imu_data) // shift
    feature_arr_ = np.zeros((batch, seq_len, 24))
    target_arr_ = np.zeros((batch, 11), dtype=int)
    # mask_arr_ = np.zeros((batch, seq_len), dtype=int)
    # target_mask_arr_ = np.zeros((batch, seq_len), dtype=int)
    
    # print(imu_data[:10])
    diff_prev = np.diff(imu_data, axis=0, prepend=imu_data[:1])
    # print(diff_prev[:10])
    diff_next = np.diff(imu_data, axis=0, append=imu_data[-1:])
    # print(diff_next[:10])
    cumsum = np.cumsum(imu_data, axis=0)
    # print(cumsum[:10])
    all_feature = np.concatenate((imu_data, diff_prev, diff_next, cumsum), axis=1)
    sc = RobustScaler()
    all_feature = sc.fit_transform(all_feature)
    gender_one_hot = np.eye(2)[gender]
    hand_one_hot = np.eye(2)[hand]
    year_one_hot = np.eye(3)[year]
    level_one_hot = np.eye(4)[level]
    target = np.concatenate((gender_one_hot, hand_one_hot, year_one_hot, level_one_hot))
    
    for i in range(batch):
        if i == batch - 1:
            feature = all_feature[i*shift:]
            feature_arr_[i, :len(feature), :] = feature
            target_arr_[i, :len(feature)] = target
            # mask_arr_[i, :len(fe)] = 1
            # target_mask_arr_[i, offset:len(fe)] = 1
        else:
            feature_arr_[i, :, :] = all_feature[i*shift:i*shift+seq_len]
            target_arr_[i, :] = target
            # mask_arr_[i, :] = 1
            # target_mask_arr_[i, offset:] = 1
        
    feature_arr.append(feature_arr_)
    target_arr.append(target_arr_)
    # mask_arr.append(mask_arr_)
    # target_mask_arr.append(target_arr_)
    id_list.extend([unique_id for _ in range(batch)])
    player_list.extend([player_id for _ in range(batch)])

    # break

In [8]:
all_feature_arr = np.concatenate(feature_arr, axis=0)
all_target_arr = np.concatenate(target_arr, axis=0)
# mask_arr = np.concatenate(mask_arr, axis=0)
# target_mask_arr = np.concatenate(target_mask_arr, axis=0)

In [9]:
all_feature_arr.shape

(8402, 1000, 24)

In [10]:
# id_list = np.array(id_list, dtype=int)
# player_list = np.array(player_list, dtype=int)

In [11]:
fe = "002"
np.save(f"../out/fe/fe{fe}/feature_arr.npy", all_feature_arr)
np.save(f"../out/fe/fe{fe}/target_arr.npy", all_target_arr)
# np.save(f"../out/fe/fe{fe}/mask_arr.npy", mask_arr)
# np.save(f"../out/fe/fe{fe}/target_mask_arr.npy", target_mask_arr)

In [12]:
id_list = np.array(id_list, dtype=int)
player_list = np.array(player_list, dtype=int)
np.save(f"../out/fe/fe{fe}/id_list.npy", id_list)
np.save(f"../out/fe/fe{fe}/player_list.npy", player_list)