In [None]:
import pandas as pd
import os

In [None]:
def map(key: int) -> str:

    # uppercase letters to lowercase letters
    if key >= 65 and key <= 90:
        return chr(key + 32)
    

    # lowercase letters
    if key >= 97 and key <= 122:
        return chr(key)
    

    # numbers
    if key >= 48 and key <= 57:
        return chr(key)


    match key:
        case 8:
            return 'Backspace'
        case 13:
            return 'Enter'
        case 32:
            return 'Space'
        case 44:
            return ','
        case 45:
            return '-'
        case 46 :
            return '.'
        case _:
            return '#'

In [None]:
# read in all users directories
user_dir_path = './../../KEYSTROKE-SAMPLES-31-USERS/USERS/'

user_dirs = []
for user_i in range(1,32):
    dir = f'user{user_i}'
    user_dirs.append(os.path.join(user_dir_path, dir))


for user_dir in user_dirs:
    assert os.path.isdir(user_dir)


In [None]:
# read all files in
user_files = []

user_dataframes = []

# read all user data
for (user_i, user_dir) in enumerate(user_dirs):
    file_dataframes = []

    # read all files for user
    for fi in range(1,16):
        file_name = f'user{user_i+1}-{fi}'
        file_path = os.path.join(user_dir, file_name)

        rows  = []

        # read file and create dataframe rows containing key and timestamp
        with open(file_path, 'r') as open_file:
            lines = open_file.read().splitlines()
            #
            first = None

            current = None
            key = None
            key_symbol = []
            for line in lines:
                line = int(line)
                # first timestamp, then key
                if current is None:
                    assert key is None
                    current = line
                    

                # last one was a timestamp
                else:
                    assert current is not None
                    key = line

                # found key and timestamp
                if key is not None and current is not None:
                    row = pd.DataFrame({
                        'user': [user_i+1],
                        'set': [fi],
                        'timestamp': [current],
                        'distance': [0],
                        'key': [key],
                        'to_press': [map(key)]
                    })

                    rows.append(row)

                    current = None
                    key = None

        # create 1 dataframe containing all rows           
        user_file_df = pd.concat(rows)

        # calculate distances to last timestamp
        distances = []
        last = None
        for current in user_file_df['timestamp'].to_list():
            # first
            if last is None:
                distances.append(0)
                last = current
                continue

            # monotonic increment
            if last < current :
                dif = current - last
                distances.append(dif)
                last = current
                continue

            # reset timestamps
            if last > current:
                to_reset = 100_000 - last
                dif = to_reset + current
                distances.append(dif)
                last = current
                continue

            # no delay to next key
            if last == current:
                distances.append(0)
                continue
            
            assert False

        # calculate absolute timestamps
        absolutes = []
        for diff_i in range(0, len(distances)):
            absolutes.append(sum(distances[0:diff_i+1]))

        assert len(distances) == len(absolutes) == len(user_file_df)

        # update dataframe
        user_file_df['timestamp'] = absolutes
        user_file_df['distance'] = distances


        file_dataframes.append(user_file_df)


    # create 1 dataframe containing all data for user
    user_dataframe = pd.concat(file_dataframes)

    user_dataframes.append(user_dataframe)
    print(f"User {user_i} finished...")

In [None]:
# create dataframe containing all user data
df = pd.concat(user_dataframes)
df.to_csv(f'./FreeText-Dataset-31-USERS.csv', index=False)