In [39]:
import pandas as pd
import numpy as np
import os
import glob
import joblib
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler, RobustScaler

In [40]:
fe = "004"
if not os.path.exists(f"../out/fe/fe{fe}"):
    os.makedirs(f"../out/fe/fe{fe}")
    os.makedirs(f"../out/fe/fe{fe}/save")

In [41]:
df = pd.read_csv('../data/train/train_info.csv')

In [42]:
train_feature = []
train_target = []

val_feature = []
val_target = []

In [43]:
seq_len = 500
shift = 250
offset = 100

In [44]:
def extract_valid_swing(data, sample_rate=85, energy_window_sec=0.5, energy_percentile=40):
    
    N_total = len(data)
    data = data[10:N_total-10] # remove annomly value at beginning
    N_total = len(data)

    ax, ay, az = data[:, 0], data[:, 1], data[:, 2]
    acc_mag = np.sqrt(ax**2 + ay**2 + az**2)
    
    energy_window_size = int(energy_window_sec * sample_rate)
    energy = np.convolve(acc_mag**2, np.ones(energy_window_size)/energy_window_size, mode='same')
    dynamic_energy_threshold = np.percentile(energy, energy_percentile)
    active = (energy > dynamic_energy_threshold)

    if np.any(active):
        start_idx = np.argmax(active)
        end_idx = len(active) - np.argmax(active[::-1])
    else:
        start_idx, end_idx = 0, N_total
    
    trimmed_data = data[start_idx:end_idx]
    
    return trimmed_data

In [45]:
selected_columns_df = df[['player_id', 'gender', 'hold racket handed', 'play years', 'level']]
unique_players_info = selected_columns_df.drop_duplicates(subset=['player_id'], keep='first')
sorted_unique_players_info = unique_players_info.sort_values(by='player_id', ascending=True)

In [46]:
from sklearn.model_selection import train_test_split

train_indices, val_indices = train_test_split(
    sorted_unique_players_info['player_id'].to_numpy(),
    test_size=0.2,
    random_state=42,
    stratify=sorted_unique_players_info['level'].to_numpy()
)

In [47]:
all_features = []
for index, row in df.iterrows():
    unique_id = row['unique_id']
    player_id = row['player_id']
    mode = row['mode'] - 1
    gender = row['gender'] - 1
    hand = row['hold racket handed'] - 1
    year = row['play years']
    level = row['level'] - 2
    if player_id in val_indices:
        continue
    
    filepath = f'../data/train/train_data/{unique_id}.txt'
    imu_data = np.loadtxt(filepath)
    imu_data = extract_valid_swing(imu_data)
    batch = len(imu_data) // shift
    feature_arr_ = np.zeros((batch, seq_len, 24))
    target_arr_ = np.zeros((batch, 11), dtype=int)
    # mask_arr_ = np.zeros((batch, seq_len), dtype=int)
    # target_mask_arr_ = np.zeros((batch, seq_len), dtype=int)
    
    # print(imu_data[:10])
    diff_prev = np.diff(imu_data, axis=0, prepend=imu_data[:1])
    # print(diff_prev[:10])
    diff_next = np.diff(imu_data, axis=0, append=imu_data[-1:])
    # print(diff_next[:10])
    cumsum = np.cumsum(imu_data, axis=0)
    # print(cumsum[:10])
    all_feature = np.concatenate((imu_data, diff_prev, diff_next, cumsum), axis=1)
    
    all_features.append(all_feature)
    # break

all_features = np.concatenate(all_features)
print(all_features.shape)
sc = RobustScaler()
sc.fit(all_features)

(3743869, 24)


In [48]:
for index, row in df.iterrows():
    unique_id = row['unique_id']
    player_id = row['player_id']
    mode = row['mode'] - 1
    gender = row['gender'] - 1
    hand = row['hold racket handed'] - 1
    year = row['play years']
    level = row['level'] - 2
    
    filepath = f'../data/train/train_data/{unique_id}.txt'
    imu_data = np.loadtxt(filepath)
    imu_data = extract_valid_swing(imu_data)
    
    batch = len(imu_data) // shift
    feature_arr_ = np.zeros((batch, seq_len, 24))
    target_arr_ = np.zeros((batch, 11), dtype=int)
    # mask_arr_ = np.zeros((batch, seq_len), dtype=int)
    # target_mask_arr_ = np.zeros((batch, seq_len), dtype=int)
    
    # print(imu_data[:10])
    diff_prev = np.diff(imu_data, axis=0, prepend=imu_data[:1])
    # print(diff_prev[:10])
    diff_next = np.diff(imu_data, axis=0, append=imu_data[-1:])
    # print(diff_next[:10])
    cumsum = np.cumsum(imu_data, axis=0)
    # print(cumsum[:10])
    all_feature = np.concatenate((imu_data, diff_prev, diff_next, cumsum), axis=1)
    all_feature = sc.transform(all_feature)
    gender_one_hot = np.eye(2)[gender]
    hand_one_hot = np.eye(2)[hand]
    year_one_hot = np.eye(3)[year]
    level_one_hot = np.eye(4)[level]
    target = np.concatenate((gender_one_hot, hand_one_hot, year_one_hot, level_one_hot))
    
    for i in range(batch):
        if i == batch - 1:
            feature = all_feature[i*shift:]
            feature_arr_[i, :len(feature), :] = feature
            target_arr_[i, :len(feature)] = target
            # mask_arr_[i, :len(fe)] = 1
            # target_mask_arr_[i, offset:len(fe)] = 1
        else:
            feature_arr_[i, :, :] = all_feature[i*shift:i*shift+seq_len]
            target_arr_[i, :] = target
            # mask_arr_[i, :] = 1
            # target_mask_arr_[i, offset:] = 1
    
    if player_id in train_indices:
        train_feature.append(feature_arr_)
        train_target.append(target_arr_)
    else:
        val_feature.append(feature_arr_)
        val_target.append(target_arr_)
        
    # mask_arr.append(mask_arr_)
    # target_mask_arr.append(target_arr_)
    # id_list.extend([unique_id for _ in range(batch)])
    # player_list.extend([player_id for _ in range(batch)])

    # break

In [49]:
all_train_feature = np.concatenate(train_feature, axis=0)
all_train_target = np.concatenate(train_target, axis=0)
all_val_feature = np.concatenate(val_feature, axis=0)
all_val_target = np.concatenate(val_target, axis=0)
# mask_arr = np.concatenate(mask_arr, axis=0)
# target_mask_arr = np.concatenate(target_mask_arr, axis=0)

In [50]:
all_train_feature.shape

(14188, 500, 24)

In [51]:
# id_list = np.array(id_list, dtype=int)
# player_list = np.array(player_list, dtype=int)

In [52]:
# fe = "004"
joblib.dump(sc, f"../out/fe/fe{fe}/robust_scaler.joblib")
np.save(f"../out/fe/fe{fe}/train_feature.npy", all_train_feature)
np.save(f"../out/fe/fe{fe}/train_target.npy", all_train_target)
np.save(f"../out/fe/fe{fe}/val_feature.npy", all_val_feature)
np.save(f"../out/fe/fe{fe}/val_target.npy", all_val_target)

np.save(f"../out/fe/fe{fe}/train_indices.npy", train_indices)
np.save(f"../out/fe/fe{fe}/val_indices.npy", val_indices)
# np.save(f"../out/fe/fe{fe}/mask_arr.npy", mask_arr)
# np.save(f"../out/fe/fe{fe}/target_mask_arr.npy", target_mask_arr)