In [1]:
import glob
import pandas as pd

In [2]:
def read_user_data():
    """
    A function that reads all the user data and prepares them for future use
    :return: a list of all the user data
    """
    
    # path of the dataset
    path = './EMG_data_for_gestures-master/'  
    
    # a list holding the data for all 36 users
    user_data = []

    for i in range(36):  # since there are 36 users
        all_files = glob.glob(path + "{}/*.txt".format(i+1))
        
        # two files for each user, hence two dataframes
        file = [pd.DataFrame() for _ in range(2)]
        
        # read both files for each user
        for j, filename in enumerate(all_files):
            file[j] = pd.read_csv(filename, sep = "\t")
            
        merged_df = pd.concat([file[0], file[1]], axis=0, ignore_index=True)
        user_data.append(merged_df.sort_values(by=['time']))
        
    return user_data

In [3]:
def get_summary_stats(user_data):
    """
    A function that computes the summary statistics of the list of user_data
    """

    columns = ["mean", "SD", "min", "max"]

    summary_stats = []
    for user in user_data:
        means = user.mean(axis=0).to_frame()
        std = user.std(axis=0).to_frame()
        mini = user.min(axis=0).to_frame()
        maxm = user.max(axis=0).to_frame()
        result = pd.concat([means, std, mini, maxm], axis=1, ignore_index=True)
        result.columns = columns
        summary_stats.append(result)

    f = open("summary_stats.txt", "w+")
    f.write("Summary stats for all users\n\n")

    for i, summary in enumerate(summary_stats):
        f.write("---- user {} ----\n".format(i+1))
        f.write(str(summary))
        f.write("\n\n")

    f.close()
    

In [4]:
def get_class_distribution(user_data):
    counts = []
    
    f = open("class_distribution.txt", "w+")
    f.write("Class distributions stats for all users\n\n")
    
    for i, user in enumerate(user_data):
        f.write("---- user {} ----\n".format(i+1))
        count = pd.DataFrame(user['class'].value_counts())
        f.write(str(count))
        f.write("\n\n")
        counts.append(count)
        
    f.close()
        
    return counts

In [5]:
# get all the user data
user_data = read_user_data()

for user in user_data:
    user.dropna(inplace=True)

In [6]:
for user in user_data:
    print(user.isna().sum())

time        0
channel1    0
channel2    0
channel3    0
channel4    0
channel5    0
channel6    0
channel7    0
channel8    0
class       0
dtype: int64
time        0
channel1    0
channel2    0
channel3    0
channel4    0
channel5    0
channel6    0
channel7    0
channel8    0
class       0
dtype: int64
time        0
channel1    0
channel2    0
channel3    0
channel4    0
channel5    0
channel6    0
channel7    0
channel8    0
class       0
dtype: int64
time        0
channel1    0
channel2    0
channel3    0
channel4    0
channel5    0
channel6    0
channel7    0
channel8    0
class       0
dtype: int64
time        0
channel1    0
channel2    0
channel3    0
channel4    0
channel5    0
channel6    0
channel7    0
channel8    0
class       0
dtype: int64
time        0
channel1    0
channel2    0
channel3    0
channel4    0
channel5    0
channel6    0
channel7    0
channel8    0
class       0
dtype: int64
time        0
channel1    0
channel2    0
channel3    0
channel4    0
channel5    

In [7]:
get_summary_stats(user_data)

In [8]:
class_dis = get_class_distribution(user_data)