In [18]:
import os
from random import randint
import numpy as np
import pandas as pd
from utils import list_files_in_directory, create_folder
from collections import namedtuple

In [10]:
def group_overlap_seconds(group1, group2):
    overlap_sec = 0
    for interval1 in group1:
        for interval2 in group2:
            overlap_sec += overlap_seconds(interval1, interval2)

    return overlap_sec


def overlap_seconds(interval1, interval2):
    dt_s1 = interval1[0]
    dt_e1 = interval1[1]
    dt_s2 = interval2[0]
    dt_e2 = interval2[1]

    Range = namedtuple('Range', ['start', 'end'])
    r1 = Range(start=dt_s1, end=dt_e1)
    r2 = Range(start=dt_s2, end=dt_e2)
    latest_start = max(r1.start, r2.start)
    earliest_end = min(r1.end, r2.end)
    overlap_sec = (earliest_end - latest_start).total_seconds()
    overlap_sec = max(0, overlap_sec)

    return overlap_sec

In [8]:
settings = {}
settings["TIMEZONE"] = 'Etc/GMT+6'

SUBJS = ['202','205','209','211','212','215','218']
#  205 Eating, empty
# 
ANNO_DIR = '/Volumes/Seagate/SHIBO/MD2K/ANNOTATION/'
DATA_DIR = '/Volumes/Seagate/SHIBO/MD2K/RESAMPLE/'
OUT_DIR = '/Volumes/Seagate/SHIBO/MD2K/DATA_LABEL/'
PLOT_DIR = '/Volumes/Seagate/SHIBO/MD2K/PLOT/'

DEVICE = 'RIGHT_WRIST'
SENSOR = 'ACC_GYR'



In [None]:
for SUBJ in SUBJS:
    
    if SUBJ == '212':
        ACTIVITIES = ['Smoking_Eating','False']
        fullname = {'Smoking_Eating':'inlab_eating_smoking', \
                    'False':'inlab_false_alarm'
                   }
        activity_dict = {'Smoking_Eating': ['sync','fd','fn','ft','dd','dn','dt','fod','fon','sd','sn','st','ld','ln','cn','cd','ct'],\
                         'False': ['sync','cn','cd','ct']
                        }
    else:  
        ACTIVITIES = ['False','Eating','Smoking']
        fullname = {'Smoking':'inlab_smoking', \
                    'False':'inlab_false_alarm', \
                    'Eating':'inlab_eating'
                   }
        activity_dict = {'Smoking': ['sd','sync','sn','st','ld','ln','cn','cd','ct'],\
                         'False': ['cd','cn','sync','ct'], \
                         'Eating':['fd','sync','fn','ft','dd','dn','dt','fod','fon','cn','cd','ct']
                        }

    LABEL_FOLDER = os.path.join(ANNO_DIR, SUBJ)
    DATA_FOLDER = os.path.join(DATA_DIR, SUBJ, DEVICE, SENSOR, 'DATA')
    OUT_FOLDER = os.path.join(OUT_DIR, SUBJ, DEVICE, SENSOR)
    PLOT_FOLDER = os.path.join(PLOT_DIR, SUBJ, DEVICE, SENSOR)    
    SAMPLE_FOLDER = os.path.join(OUT_DIR, SUBJ, DEVICE, SENSOR, 'SAMPLE', 'RAW')



    for ACTIVITY in ACTIVITIES:
        
        print(SUBJ)
        print(ACTIVITY)

        label_file = os.path.join(LABEL_FOLDER, fullname[ACTIVITY]+'.csv')

        # read label file
        annot_df = pd.read_csv(label_file)
        annot_df['start'] = pd.to_datetime(annot_df['start'], utc=True)\
                            .dt.tz_convert(settings["TIMEZONE"])
        annot_df['end'] = pd.to_datetime(annot_df['end'], utc=True)\
                            .dt.tz_convert(settings["TIMEZONE"])
        
        # read acc file
        data_file = [file for file in list_files_in_directory(DATA_FOLDER) if (('Day0' in file) and ('.csv' in file))][0]
        data_path = os.path.join(DATA_FOLDER, data_file)
        data_df = pd.read_csv(data_path)

        data_df['Unixtime'] = data_df['time']
        data_df['time'] = pd.to_datetime(data_df['time'],unit='ms',utc=False)
        data_df = data_df.set_index(['time'])
        data_df.index = data_df.index.tz_localize('UTC').tz_convert(settings["TIMEZONE"])
        
        
        act_data_df = data_df[(data_df.index >= annot_df.start.iloc[0]) & (data_df.index < annot_df.end.iloc[-1])]
        
        
        

        act_dur = []
        act_st_list = list(annot_df.start.tolist())
        act_end_list = list(annot_df.end.tolist())
        for n in range(len(act_st_list)):
            act_dur.append([act_st_list[n],act_end_list[n]])
        
        count = 0
        
        rand_dur = []
        
        create_folder(os.path.join(SAMPLE_FOLDER, ACTIVITY, 'null'))
        
        while count < len(act_data_df)/200: #20Hz, '/200' means 1 null segment every 10seconds
            randstart_idx = randint(0, len(act_data_df)-60)
            randlen = randint(30, 60)
            randstart = act_data_df.index[randstart_idx]
            randend = act_data_df.index[randstart_idx+randlen]


            print(randstart)
            print(randend)



            n = group_overlap_seconds(act_dur,[[randstart,randend]])
            if n > 0:
                continue
            
            save_df = act_data_df[(act_data_df.index >= randstart) & (act_data_df.index < randend)]
            savepath = os.path.join(SAMPLE_FOLDER, ACTIVITY, 'null', 'null_raw_'+str(count)+'.csv')
            print(save_df)
            save_df[['accx','accy','accz','rotx','roty','rotz']].to_csv(savepath, index=None)
            count += 1
        


202
False
2017-06-26 10:50:41.344000-06:00
2017-06-26 10:50:43.969000-06:00
                                      accx      accy      accz      rotx  \
time                                                                       
2017-06-26 10:50:41.344000-06:00  0.466479  0.340922  0.868746  0.254313   
2017-06-26 10:50:41.406000-06:00  0.462175  0.346873  0.874238 -0.000831   
2017-06-26 10:50:41.469000-06:00  0.462477  0.345967  0.873709  0.141937   
2017-06-26 10:50:41.531000-06:00  0.462779  0.345060  0.873180  0.115196   
2017-06-26 10:50:41.594000-06:00  0.463082  0.344154  0.872652 -0.132268   
2017-06-26 10:50:41.656000-06:00  0.463696  0.344192  0.872196 -0.163253   
2017-06-26 10:50:41.719000-06:00  0.464377  0.344432  0.871755 -0.148154   
2017-06-26 10:50:41.781000-06:00  0.465057  0.344673  0.871315 -0.133295   
2017-06-26 10:50:41.844000-06:00  0.465738  0.344913  0.870874 -0.135482   
2017-06-26 10:50:41.906000-06:00  0.466419  0.345153  0.870433 -0.142951   
2017-06-26 1