# Feature extraction

- 15 user information

| No | Attribute |
|---:|:---|
|  1 | PARTICIPANT_ID |
|  2 | USER_ID        |
|  3 | DEVICE         |
|  4 | SCREEN_HEIGHT  |
|  5 | SCREEN_WIDTH   |
|  6 | YMD            |
|  7 | N_TH_TIME      |
|  8 | SESSION_ID     |
|  9 | VER_HOR        |
| 10 | GAME_NUM       |
| 11 | STROKE_NUM     |
| 12 | DATA_POINTS    |
| 13 | START_TIME     |
| 14 | STOP_TIME      |
| 15 | STROKE_IDX     |


- 30 touchalytics features

| No | Feature name | Description |
|---:|:---|:---|
|  1 | INTER_STROKE_TIME     | inter-stroke time [$\text{ms}$]                                          |
|  2 | STROKE_DURATION       | stroke duration [$\text{ms}$]                                            |
|  3 | START_X               | x coordinate at the start point [$\text{px}$]                            |
|  4 | START_Y               | y coordinate at the start point [$\text{px}$]                            |
|  5 | STOP_X                | x coordinate at the stop point [$\text{px}$]                             |
|  6 | STOP_Y                | y coordinate at the stop point [$\text{px}$]                             |
|  7 | LENGTH_2EE            | direct end-to-end distance [$\text{px}$]                                 |
|  8 | MEAN_RESULTANT_LENGTH | mean resultant length                                                    |
|  9 | DIRECTION_FLAG        | up/down/eft/right flag                                                   |
| 10 | DIRECTION_EE          | direction of end-to-end line [$\text{rad}$]                              |
| 11 | VEL_20                | velocity at the 20% percentile point [$\text{px}/\text{ms}$]             |
| 12 | VEL_50                | velocity at the 50% percentile point [$\text{px}/\text{ms}$]             |
| 13 | VEL_80                | velocity at the 80% percentile point [$\text{px}/\text{ms}$]             |
| 14 | ACC_20                | acceleration at the 20% percentile point [$\text{px}/\text{ms}^2$]       |
| 15 | ACC_50                | acceleration at the 50% percentile point [$\text{px}/\text{ms}^2$]       |
| 16 | ACC_80                | acceleration at the 80% percentile point [$\text{px}/\text{ms}^2$]       |
| 17 | MED_VEL_L3            | median velocity at the last 3 points [$\text{px}/\text{ms}$]             |
| 18 | LARGEST_DEVIATION_EE  | largest deviation from end-to-end line [$\text{px}$]                     |
| 19 | DEV_20                | deviation from end-to-end line at the 20% percentile point [$\text{px}$] |
| 20 | DEV_50                | deviation from end-to-end line at the 50% percentile point [$\text{px}$] |
| 21 | DEV_80                | deviation from end-to-end line at the 80% percentile point [$\text{px}$] |
| 22 | AVERAGE_DIRECTION     | average direction [$\text{rad}$]                                         |
| 23 | LENGTH_TRJ            | length of trajectory [$\text{px}$]                                       |
| 24 | RATIO_TRJ_2EE         | ratio end-to-end distance and length of trajectory                       |
| 25 | AVERAGE_VELOCITY      | average velocity [$\text{px}/\text{ms}$]                                 |
| 26 | MED_ACC_F5            | median acceleration at the first 5 points [$\text{px}/\text{ms}^2$]      |
| 27 | MID_PRESSURE          | mid-stroke pressure                                                      |
| 28 | MID_AREA              | mid-stroke area covered [$\text{px}^2$]                                  |
| 29 | MID_FINGER_ORIENT     | mid-stroke finger orientation                                            |
| 30 | PHONE_ORIENT          | phone orientation (0:portrait, 1:landscape)                              |


In [1]:
# import
import math
import numpy as np
import pandas as pd
from tqdm import tqdm


In [2]:
# file path
PATHs = {
    "rawdata": "../data/rawdata.csv",
    "user_info": "../data/user_info.csv",
    "features": "../features/features.csv",
}

In [3]:
df_features = pd.DataFrame()
COLUMNs = []

### Load rawdata.csv, user_info.csv

In [4]:
df_load = pd.DataFrame()

filepath = PATHs["rawdata"]
with open(filepath, "r") as f:
    df_load = pd.read_csv(f)

# df_load.tail(3)

In [5]:
df_user = pd.DataFrame()

filepath = PATHs["user_info"]
with open(filepath, "r") as f:
    df_user = pd.read_csv(f)

# df_user.tail(3)

### Preparation for feature extraction

In [6]:
new_cols = ["STROKE_IDX", "USER_ID", "SESSION_ID", "VER_HOR", "GAME_NUM", "STROKE_NUM", "DATA_POINTS"]
COLUMNs += new_cols

df_features[new_cols] = df_load.loc[
    ~df_load["STROKE_IDX"].duplicated(),
    new_cols
    ].reset_index(drop=True).copy()

df_features = df_features.sort_values(["STROKE_IDX"])

# df_features.tail(3)

In [7]:
new_cols = ["PARTICIPANT_ID", "YMD", "DEVICE", "SCREEN_HEIGHT", "SCREEN_WIDTH", "N_TH_TIME"]
COLUMNs += new_cols

df_1st = df_user.loc[:, ["PARTICIPANT_ID", "1ST_USER_ID", "1ST_YMD", "1ST_DEVICE", "1ST_SCREEN_HEIGHT", "1ST_SCREEN_WIDTH"]].copy()
df_1st = df_1st.rename(columns=lambda s: s.replace("1ST_", ""))
df_1st["N_TH_TIME"] = 1

df_2nd = df_user.loc[:, ["PARTICIPANT_ID", "2ND_USER_ID", "2ND_YMD", "2ND_DEVICE", "2ND_SCREEN_HEIGHT", "2ND_SCREEN_WIDTH"]].copy()
df_2nd = df_2nd.rename(columns=lambda s: s.replace("2ND_", ""))
df_2nd["N_TH_TIME"] = 2

df_3rd = df_user.loc[:, ["PARTICIPANT_ID", "3RD_USER_ID", "3RD_YMD", "3RD_DEVICE", "3RD_SCREEN_HEIGHT", "3RD_SCREEN_WIDTH"]].copy()
df_3rd = df_3rd.rename(columns=lambda s: s.replace("3RD_", ""))
df_3rd["N_TH_TIME"] = 3

df_merge = pd.concat([df_1st, df_2nd, df_3rd], axis="index").reset_index(drop=True)
df_merge = df_merge[df_merge["USER_ID"].notnull()].reset_index(drop=True)
df_merge["YMD"] = df_merge["YMD"].astype(int).astype(str)

df_features = df_features.merge(df_merge, on="USER_ID", how="left")

# df_features.tail(3)

In [8]:
new_cols = ["START_TIME", "STOP_TIME"]
COLUMNs += new_cols

df_tmp1 = df_load[["STROKE_IDX", "TIMESTAMP"]].groupby(["STROKE_IDX"], as_index=False).min()  # START_TIME
df_tmp1.columns = ["STROKE_IDX", "START_TIME"]
df_tmp2 = df_load[["STROKE_IDX", "TIMESTAMP"]].groupby(["STROKE_IDX"], as_index=False).max()  # STOP_TIME
df_tmp2.columns = ["STROKE_IDX", "STOP_TIME"]

df_merge = pd.merge(df_tmp1, df_tmp2, on="STROKE_IDX")
df_features = df_features.merge(df_merge, on="STROKE_IDX", how="left")

# df_features.tail(3)

### Extract features

In [9]:
#############
# Functions #
#############
def calc_length(p1, p2):
    ret = math.sqrt( math.pow( math.fabs(p2[0]-p1[0]), 2 ) + math.pow( math.fabs(p2[1]-p1[1]), 2 ) )
    return ret

def calc_updw(p1, p2):
    ret = "up" if((p2[1]-p1[1]) <= 0) else "down"
    return ret

def calc_lfrt(p1, p2):
    ret = "left" if((p2[0]-p1[0]) <= 0) else "right"
    return ret

# The velocity between two points
def calc_velocity(p1, p2, t1, t2):
    length = math.sqrt( math.pow( math.fabs(p2[0]-p1[0]), 2 ) + math.pow( math.fabs(p2[1]-p1[1]), 2 ) )
    duration = t2 - t1
    try:
        ret = (length / duration)
    except ZeroDivisionError:
        ret = np.nan
    return ret

# The acceleration between two points
def calc_accelerarion(v1, v2, t1, t2):
    diff_ver = v2 - v1
    duration = t2 - t1
    try:
        ret = (diff_ver / duration)
    except ZeroDivisionError:
        ret = np.nan
    return ret

# The area of the contact ellipse
def calc_area(rad_x, rad_y):
    return rad_x * rad_y * math.pi

# 2-dimensional space
#  3 | 4
# ------
# 2 | 1
def calc_angle(p1, p2, sys="rad"):
    ret = math.atan2(p2[1] - p1[1], p2[0] - p1[0])
    if( sys == "deg" ):
        ret = ret * ( 180.0 / math.pi )

    return ret

# Calculate percentiles -> Return two points and a ratio.
def calc_percentile( count, perc=0.5, origin=0 ):
    ret = None
    percentile_point = ( (count-1)*perc ) + origin
    point1 = math.floor( percentile_point )
    point2 = math.ceil( percentile_point )
    if( point1==point2 ):
        ret = {
            "p1": point1, "w1": round(0.5, 2),
            "p2": point2, "w2": round(0.5, 2),
        }
    else:
        ret = {
            "p1": point1, "w1": round(1 - ( percentile_point-point1 ), 2),
            "p2": point2, "w2": round(1 - ( point2-percentile_point ), 2),
        }

    return ret

#### 1. Extract time features
- INTER_STROKE_TIME
- STROKE_DURATION

In [10]:
#--- INTER_STROKE_TIME ---#
new_cols = ["INTER_STROKE_TIME"]
COLUMNs += new_cols

df_tmp = df_features[["STROKE_IDX", "USER_ID", "START_TIME", "STOP_TIME"]]
list_merge = []  # stroke_index, inter_stroke_time
inter_stroke_time = 0
pre_stroke = {k: None for k in df_tmp.columns}
for idx, dict_stroke in enumerate(df_tmp.to_dict(orient='records')):
    if(dict_stroke["USER_ID"] != pre_stroke["USER_ID"]):
        inter_stroke_time = 0
    else:
        inter_stroke_time = dict_stroke["START_TIME"] - pre_stroke["STOP_TIME"]

    list_merge.append([dict_stroke["STROKE_IDX"], inter_stroke_time])
    pre_stroke.update(dict_stroke)

df_merge = pd.DataFrame(data=list_merge, columns=["STROKE_IDX", "INTER_STROKE_TIME"])
df_features = df_features.merge(df_merge, on="STROKE_IDX", how="left")

# df_features.tail(3)

In [11]:
#--- STROKE_DURATION ---#
new_cols = ["STROKE_DURATION"]
COLUMNs += new_cols

df_features.loc[:, "STROKE_DURATION"] = df_features["STOP_TIME"] - df_features["START_TIME"]

# df_features.tail(3)

#### 2. Extract features related to the start and stop points
- START_X
- START_Y
- STOP_X
- STOP_Y
- LENGTH_2EE
- DIRECTION_FLAG
- DIRECTION_EE

In [None]:
new_cols = [
    "START_X", "START_Y", "STOP_X", "STOP_Y",
    "LENGTH_2EE", "DIRECTION_FLAG", "DIRECTION_EE"
    ]
COLUMNs += new_cols

list_merge = []  # stroke_index, features

arr_user = df_features["USER_ID"].unique()
for user_id in arr_user:
    list_tmp = []
    df_tmp1 = df_features.loc[(df_features["USER_ID"]==user_id), ["STROKE_IDX", "VER_HOR"]].copy()
    df_tmp2 = df_load.loc[(df_load["USER_ID"]==user_id), ["STROKE_IDX", "X_COOR", "Y_COOR"]].copy()

    for idx, dict_stroke in enumerate( tqdm(df_tmp1.to_dict(orient='records')) ):
        arr_coor = df_tmp2.loc[(df_tmp2["STROKE_IDX"]==dict_stroke["STROKE_IDX"]), ["X_COOR", "Y_COOR"]].to_numpy()
        start_coor = arr_coor[0]
        stop_coor  = arr_coor[-1]
        start_x = start_coor[0]
        start_y = start_coor[1]
        stop_x = stop_coor[0]
        stop_y = stop_coor[1]
        length_2ee = calc_length(start_coor, stop_coor)
        direction_flag = calc_updw(start_coor, stop_coor) if(dict_stroke["VER_HOR"] == "v") else calc_lfrt(start_coor, stop_coor)
        direction_ee = calc_angle(start_coor, stop_coor)

        # append
        list_tmp.append([
            dict_stroke["STROKE_IDX"],
            start_x,
            start_y,
            stop_x,
            stop_y,
            length_2ee,
            direction_flag,
            direction_ee
            ])

    # append
    list_merge += list_tmp

    del list_tmp, df_tmp1, df_tmp2

df_merge = pd.DataFrame(data=list_merge, columns=(["STROKE_IDX"]+new_cols))
df_features = df_features.merge(df_merge, on="STROKE_IDX", how="left")

# df_features.tail(3)

#### 3. Extract the other features
 - MEAN_RESULTANT_LENGTH
 - VEL_20
 - VEL_50
 - VEL_80
 - ACC_20
 - ACC_50
 - ACC_80
 - MED_VEL_L3
 - LARGEST_DEVIATION_EE
 - DEV_20
 - DEV_50
 - DEV_80
 - AVERAGE_DIRECTION
 - LENGTH_TRJ
 - RATIO_TRJ_2EE
 - AVERAGE_VELOCITY
 - MED_ACC_F5
 - MID_PRESSURE
 - MID_AREA
 - MID_FINGER_ORIENT
 - PHONE_ORIENT

In [None]:
new_cols = [
    "MEAN_RESULTANT_LENGTH",
    "VEL_20", "VEL_50", "VEL_80",
    "ACC_20", "ACC_50", "ACC_80",
    "MED_VEL_L3", "LARGEST_DEVIATION_EE",
    "DEV_20", "DEV_50", "DEV_80",
    "AVERAGE_DIRECTION", "LENGTH_TRJ", "RATIO_TRJ_2EE",
    "AVERAGE_VELOCITY", "MED_ACC_F5", "MID_PRESSURE",
    "MID_AREA", "MID_FINGER_ORIENT", "PHONE_ORIENT"
    ]
COLUMNs += new_cols

list_merge = []  # stroke_index + features

arr_user = df_features["USER_ID"].unique()
for user_id in arr_user:
    list_tmp = []
    df_tmp1 = df_features.loc[(df_features["USER_ID"]==user_id), ["STROKE_IDX", "VER_HOR"]].copy()
    df_tmp2 = df_load.loc[(df_load["USER_ID"]==user_id), ["STROKE_IDX", "DATA_POINTS", "POINTS_IDX", "TIMESTAMP", "X_COOR", "Y_COOR", "FORCE", "X_RADIUS", "Y_RADIUS"]].copy()

    for idx, dict_stroke in enumerate( tqdm(df_tmp1.to_dict(orient='records')) ):
        list_onestroke = df_tmp2.loc[(df_tmp2["STROKE_IDX"]==dict_stroke["STROKE_IDX"]), :].to_dict(orient='records')

        feat_sub = {
            "forces": [],
            "x_rads": [],
            "y_rads": [],
            "vels": [],
            "accs": [],
            "devs": [],
            "cos_sum": 0,
            "sin_sum": 0,
            "direction_sum": 0,
            "length_trj_sum": 0,
        }

        # percentile points
        strkcount = list_onestroke[0]["DATA_POINTS"]
        strk_20 = calc_percentile( strkcount, perc=0.2 )
        strk_50 = calc_percentile( strkcount, perc=0.5 )
        strk_80 = calc_percentile( strkcount, perc=0.8 )

        # coordinates and times at the start and stop points
        start_coor = np.array([list_onestroke[0]["X_COOR"], list_onestroke[0]["Y_COOR"]])
        start_time = list_onestroke[0]["TIMESTAMP"]
        stop_coor  = np.array([list_onestroke[-1]["X_COOR"], list_onestroke[-1]["Y_COOR"]])
        stop_time  = list_onestroke[-1]["TIMESTAMP"]

        #--- Analyze strokes by segment ---#
        # Start point
        feat_sub["forces"].append(list_onestroke[0]["FORCE"])
        feat_sub["x_rads"].append(list_onestroke[0]["X_RADIUS"])
        feat_sub["y_rads"].append(list_onestroke[0]["Y_RADIUS"])
        feat_sub["vels"].append(0)
        feat_sub["devs"].append(0)
        feat_sub["cos_sum"] += 0
        feat_sub["sin_sum"] += 0
        feat_sub["direction_sum"] += 0
        feat_sub["length_trj_sum"] += 0

        # From the second point to the stop point
        for idx in range( strkcount-1 ):
            p1 = np.array([list_onestroke[idx]["X_COOR"], list_onestroke[idx]["Y_COOR"]])
            t1 = list_onestroke[idx]["TIMESTAMP"]
            p2 = np.array([list_onestroke[(idx+1)]["X_COOR"], list_onestroke[(idx+1)]["Y_COOR"]])
            t2 = list_onestroke[(idx+1)]["TIMESTAMP"]

            #- Force -#
            feat_sub["forces"].append(list_onestroke[(idx+1)]["FORCE"])
            #- ----- -#

            #- Radius -#
            feat_sub["x_rads"].append(list_onestroke[(idx+1)]["X_RADIUS"])
            feat_sub["y_rads"].append(list_onestroke[(idx+1)]["Y_RADIUS"])
            #- ------ -#

            #- Velocity -#
            vel = calc_velocity(p1, p2, t1, t2)
            feat_sub["vels"].append(vel)
            #- -------- -#

            #- Deviation -#
            vec1 = stop_coor - start_coor
            vec2 = p2 - start_coor
            if( np.linalg.norm(vec1) != 0 ):
                feat_sub["devs"].append( abs( np.cross(vec1, vec2) / np.linalg.norm(vec1) ) )
            else:
                feat_sub["devs"] = np.nan
            #- ---------------------- -#

            #- MEAN_RESULTANT_LENGTH 1 -#
            rad = calc_angle(p1, p2, sys="rad")
            feat_sub["cos_sum"] += math.cos(rad)
            feat_sub["sin_sum"] += math.sin(rad)
            #- ------------------ -#

            #- AVERAGE_DIRECTION 1 -#
            feat_sub["direction_sum"] += calc_angle(p1, p2, sys="rad")
            #- ----------------------------------------------- -#

            #- LENGTH_TRJ 軌跡の長さ -#
            feat_sub["length_trj_sum"] += calc_length(p1, p2)
            #- -------------------- -#
        #--- ----------------------------- ---#

        #- Accelerations -#
        feat_sub["accs"].append(0)

        for idx in range( strkcount-1 ):
            v1 = feat_sub["vels"][idx]
            t1 = list_onestroke[idx]["TIMESTAMP"]
            v2 = feat_sub["vels"][(idx+1)]
            t2 = list_onestroke[(idx+1)]["TIMESTAMP"]
            acc = calc_accelerarion(v1, v2, t1, t2)
            feat_sub["accs"].append(acc)
        #- ------------- -#

        # MEAN_RESULTANT_LENGTH
        r = math.sqrt( math.pow( feat_sub["cos_sum"], 2 ) + math.pow( feat_sub["sin_sum"], 2 ) )
        mean_resultant_length = r / (strkcount-1)
        # VEL_20
        _vel_1, _vel_2 = 0, 0
        _vel_1 = feat_sub["vels"][ strk_20["p1"] ] * strk_20["w1"]
        _vel_2 = feat_sub["vels"][ strk_20["p2"] ] * strk_20["w2"]
        vel_20 = _vel_1 + _vel_2
        # VEL_50
        _vel_1, _vel_2 = 0, 0
        _vel_1 = feat_sub["vels"][ strk_50["p1"] ] * strk_50["w1"]
        _vel_2 = feat_sub["vels"][ strk_50["p2"] ] * strk_50["w2"]
        vel_50 = _vel_1 + _vel_2
        # VEL_80
        _vel_1, _vel_2 = 0, 0
        _vel_1 = feat_sub["vels"][ strk_80["p1"] ] * strk_80["w1"]
        _vel_2 = feat_sub["vels"][ strk_80["p2"] ] * strk_80["w2"]
        vel_80 = _vel_1 + _vel_2
        # ACC_20
        _acc_1, _acc_2 = 0, 0
        _acc_1 = feat_sub["accs"][ strk_20["p1"] ] * strk_20["w1"]
        _acc_2 = feat_sub["accs"][ strk_20["p2"] ] * strk_20["w2"]
        acc_20 = _acc_1 + _acc_2
        # ACC_50
        _acc_1, _acc_2 = 0, 0
        _acc_1 = feat_sub["accs"][ strk_50["p1"] ] * strk_50["w1"]
        _acc_2 = feat_sub["accs"][ strk_50["p2"] ] * strk_50["w2"]
        acc_50 = _acc_1 + _acc_2
        # ACC_80
        _acc_1, _acc_2 = 0, 0
        _acc_1 = feat_sub["accs"][ strk_80["p1"] ] * strk_80["w1"]
        _acc_2 = feat_sub["accs"][ strk_80["p2"] ] * strk_80["w2"]
        acc_80 = _acc_1 + _acc_2
        # MED_VEL_L3
        med_vel_l3 = np.median(feat_sub["vels"][-3:])
        # LARGEST_DEVIATION_EE
        try:
            largest_deviation_ee = np.nanmax( feat_sub["devs"] )
        except:
            largest_deviation_ee = np.nan
        # DEV_20
        try:
            _dev_1, _dev_2 = 0, 0
            _dev_1 = feat_sub["devs"][ strk_20["p1"] ] * strk_20["w1"]
            _dev_2 = feat_sub["devs"][ strk_20["p2"] ] * strk_20["w2"]
            dev_20 = _dev_1 + _dev_2
        except:
            dev_20 = np.nan
        # DEV_50
        try:
            _dev_1, _dev_2 = 0, 0
            _dev_1 = feat_sub["devs"][ strk_50["p1"] ] * strk_50["w1"]
            _dev_2 = feat_sub["devs"][ strk_50["p2"] ] * strk_50["w2"]
            dev_50 = _dev_1 + _dev_2
        except:
            dev_50 = np.nan
        # DEV_80
        try:
            _dev_1, _dev_2 = 0, 0
            _dev_1 = feat_sub["devs"][ strk_80["p1"] ] * strk_80["w1"]
            _dev_2 = feat_sub["devs"][ strk_80["p2"] ] * strk_80["w2"]
            dev_80 = _dev_1 + _dev_2
        except:
            dev_80 = np.nan
        # AVERAGE_DIRECTION
        try:
            average_direction = feat_sub["direction_sum"] / (strkcount-1)
        except:
            average_direction = np.nan
        # LENGTH_TRJ
        length_trj = feat_sub["length_trj_sum"]
        # RATIO_TRJ_2EE
        try:
            ratio_trj_2ee = feat_sub["length_trj_sum"] / calc_length(start_coor, stop_coor)
        except:
            ratio_trj_2ee = np.nan
        # AVERAGE_VELOCITY
        try:
            average_velocity = feat_sub["length_trj_sum"] / (stop_time - start_time)
        except:
            average_velocity = np.nan
        # MED_ACC_F5
        med_acc_f5 = np.median(feat_sub["accs"][:5])
        # MID_PRESSURE
        _pres_1, _pres_2 = 0, 0
        _pres_1 = feat_sub["forces"][ strk_50["p1"] ] * strk_50["w1"]
        _pres_2 = feat_sub["forces"][ strk_50["p2"] ] * strk_50["w2"]
        mid_pressure = _pres_1 + _pres_2
        # MID_AREA
        _rad_x1, _rad_y1, _rad_x2, _rad_y2 = 0, 0, 0, 0
        _rad_x1 = feat_sub["x_rads"][ strk_50["p1"] ] * strk_50["w1"]
        _rad_y1 = feat_sub["y_rads"][ strk_50["p1"] ] * strk_50["w1"]
        _rad_x2 = feat_sub["x_rads"][ strk_50["p2"] ] * strk_50["w2"]
        _rad_y2 = feat_sub["y_rads"][ strk_50["p2"] ] * strk_50["w2"]
        mid_area = calc_area( (_rad_x1+_rad_x2), (_rad_y1+_rad_y2) )
        # MID_FINGER_ORIENT
        mid_finger_orient = np.nan
        # PHONE_ORIENT
        phone_orient = 0

        # append
        list_tmp .append([
            dict_stroke["STROKE_IDX"],
            mean_resultant_length,
            vel_20,
            vel_50,
            vel_80,
            acc_20,
            acc_50,
            acc_80,
            med_vel_l3,
            largest_deviation_ee,
            dev_20,
            dev_50,
            dev_80,
            average_direction,
            length_trj,
            ratio_trj_2ee,
            average_velocity,
            med_acc_f5,
            mid_pressure,
            mid_area,
            mid_finger_orient,
            phone_orient
        ])

    # append to the list
    list_merge += list_tmp

    del list_tmp, df_tmp1, df_tmp2

df_merge = pd.DataFrame(data=list_merge, columns=(["STROKE_IDX"]+new_cols))
df_features = df_features.merge(df_merge, on="STROKE_IDX", how="left")

# df_features.tail(3)

### Output features.csv

In [None]:
# Reindex
col_order = [
    "PARTICIPANT_ID",
    "USER_ID",
    "DEVICE",
    "SCREEN_HEIGHT",
    "SCREEN_WIDTH",
    "YMD",
    "N_TH_TIME",
    "SESSION_ID",
    "VER_HOR",
    "GAME_NUM",
    "STROKE_NUM",
    "DATA_POINTS",
    "START_TIME",
    "STOP_TIME",
    "STROKE_IDX",
    "INTER_STROKE_TIME",
    "STROKE_DURATION",
    "START_X",
    "START_Y",
    "STOP_X",
    "STOP_Y",
    "LENGTH_2EE",
    "MEAN_RESULTANT_LENGTH",
    "DIRECTION_FLAG",
    "DIRECTION_EE",
    "VEL_20",
    "VEL_50",
    "VEL_80",
    "ACC_20",
    "ACC_50",
    "ACC_80",
    "MED_VEL_L3",
    "LARGEST_DEVIATION_EE",
    "DEV_20",
    "DEV_50",
    "DEV_80",
    "AVERAGE_DIRECTION",
    "LENGTH_TRJ",
    "RATIO_TRJ_2EE",
    "AVERAGE_VELOCITY",
    "MED_ACC_F5",
    "MID_PRESSURE",
    "MID_AREA",
    "MID_FINGER_ORIENT",
    "PHONE_ORIENT",
    ]

df_features = df_features.reindex(col_order, axis="columns")

# df_features.tail(3)

In [None]:
df_features.to_csv(PATHs["features"])