# Fetch the data

In [None]:
import os
import glob
import pandas as pd

raw_data_path = "/content/drive/My Drive/ba/ba_data"
raw_data_folders = next(os.walk(raw_data_path))[1]
raw_data_files_by_folder = {raw_data_folder.split("@")[0]: glob.glob(f"{raw_data_path}/{raw_data_folder}/*") for raw_data_folder in raw_data_folders}
df_by_file_by_folder = { folder: { file_path.split("/")[-1].split(".csv")[0]: pd.read_csv(file_path) for file_path in file_paths} for folder, file_paths in raw_data_files_by_folder.items()}

  


# Clean the data & check for integrity

In [None]:
def normalize_columns(df):
    # for column in df.drop(["dt"], axis=1).columns.tolist():
    df[df.drop(["dt"], axis=1).columns.tolist()] = (df[df.drop(["dt"], axis=1).columns.tolist()] - df[df.drop(["dt"], axis=1).columns.tolist()].mean()) / df[df.drop(["dt"], axis=1).columns.tolist()].std()
    # to ms
    # df["dt"] = df["dt"].apply(lambda x: int(x / 1_000_000))

    return df

def shift_dt_to_zero(folder):
    min_dt = df_by_file_by_folder[folder]["bno"]["dt"][100]

    for _, df in df_by_file_by_folder[folder].items():
        local_min_dt = df["dt"].min()
        min_dt = min(local_min_dt, min_dt)

    for filename, df in df_by_file_by_folder[folder].items():
        df_by_file_by_folder[folder][filename]["dt"] = df_by_file_by_folder[folder][filename]["dt"].apply(lambda x: x - min_dt)



for folder, df_by_files in df_by_file_by_folder.items():
    for filename, df in df_by_files.items():

        # Ensure all are valid floats
        df_by_files[filename] = df.apply(pd.to_numeric, errors='coerce')

        # Drop N/A
        df_by_files[filename] = df.dropna(axis=0, how="any")

        # Normalize
        # df_by_files[filename] = normalize_columns(df_by_files[filename])
    
    # Shift dt to 0
    shift_dt_to_zero(folder)


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy


# For every folder plot its dfs in a single figure

In [None]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

def plot_folder(folder_name):
    fig = make_subplots()
    dfs_by_file = df_by_file_by_folder[folder_name]

    for filename, df in dfs_by_file.items():

        for column in df.drop(["dt"], axis=1).columns.tolist():

            trace = go.Scatter(
            x=df["dt"],
            y=df[column],
            name=f"{filename}:{column}",
            )
            
            fig.add_trace(trace)

    fig.update_layout(title_text=folder_name)
    fig.write_html(f"plots/{folder_name}-plot.html", auto_open=True)

In [None]:
# for folder, _ in df_by_file_by_folder.items():
#     plot_folder(folder)

# plot_folder("amelbenn")

# Interpolate the data

In [None]:
# labels & meta

labels = {
    "nasekoch": {
        "timeranges": [
            [119231, 121265],
            [123114, 125234],
            [127166, 129195],

            [131416, 135005],
            [137112, 140208],
            [141967, 145829],

            [213176, 215265],
            [217916, 220249],
            [222218, 224873],

            [226591, 230245],
            [232566, 235830],
            [237661, 240948],

            [254020, 256262],
            [258132, 260878],
            [262545, 264712],

            [266796, 270863],
            [273278, 277297],
            [278471, 282116],

            [286196, 288841],
            [290504, 293305],
            [295206, 297897],

            [299069, 303018],
            [304074, 308569],
            [309612, 314484],


            [316669, 319023],
            [320487, 322442],
            [324795, 327586],

            [329284, 333153],
            [334747, 338843],
            [340063, 344857],

            [346152, 349589],
            [350828, 354071],
            [355596, 358868],

            [360244, 363559],
            [364294, 367172],
            [368247, 371181],

            [372564, 377512],
            [378799, 382830],
            [383872, 387873],

            [409311, 421889],
            [423702, 430391],
            [432341, 438502],
            [439865, 446016],
            [447409, 452958],
            [455100, 461470],
            [462625, 468570],
            [469797, 476224],
            [477219, 482523],

            [488079, 493301],
            [495953, 500832],
            [502757, 507027],
            [508999, 514103],
            [515823, 521853],
            [530799, 536112],
            [537185, 543296],
            [544395, 549405],
            [551660, 556469],
            [558195, 563827],
        ],
        "device_click": 100343,
        "audio_click": 69103
    },

    "luormiau": {
        "timeranges": [
            [196579, 199536],
            [200812, 205850],
            [209850, 212457],

            [213857, 216205],
            [217876, 220164],

            [223528, 226715],
            [228685, 231941],
            [233586, 236738],

            [240941, 242987],
            [244759, 247407],
            [248819, 250573],

            [254170, 257298],
            [259544, 263058],
            [265666, 268381],

            [276129, 278100],
            [279762, 282116],
            [283817, 286157],

            [287826, 289879],
            [292133, 295187],
            [296909, 300406],
            [302109, 306252],

            [310601, 312638],
            [314621, 316777],
            [318390, 320371],

            [323878, 326618],
            [328468, 331954],
            [333249, 336584],

            [347300, 349949],
            [352186, 354417],
            [355726, 358012],

            [360465, 363767],
            [364941, 367900],
            [369325, 372565],

            [376743, 378867],
            [380871, 382932],
            [384763, 386805],

            [389744, 392896],
            [394752, 398133],
            [399712, 402599],

            [436746, 438720],
            [441068, 443564],
            [445372, 447326],

            [449504, 452504],
            [454541, 457952],
            [459532, 462553],

            [493863, 497936],
            [499855, 504414],
            [505920, 510301],

            [511866, 517356],
            [519535, 524903],
            [526196, 530708],

            [531953, 536973],
            [539118, 544965],
            [546397, 552252],
            [553308, 558253],

            [568278, 573629],
            [575747, 581207],
            [582965, 588258],

            [589835, 593957],
            [595475, 599705],
            [601162, 606757],

            [607976, 613243],
            [614104, 619561],
            [620908, 626441],
            [628571, 634000]
        ],
        "device_click": 174048,
        "audio_click": 144660
    },

    "liiukoim": {
        "timeranges": [
            [107030, 110370],
            [112986, 115653],
            [117908, 119636],
            [123068, 127507],
            [129619, 133762],
            [134844, 139136],
            [141210, 143789],
            [145047, 148261],
            [149759, 152840],
            [154672, 157160],
            [176183, 179421],
            [181197, 184010],
            [185766, 188575],
            [190280, 194438],
            [196331, 200365],
            [202591, 206026],
            [208950, 211322],
            [212787, 215822],
            [217269, 219841],
            [221277, 224771],
            [233741, 238032],
            [369310, 402679],
            [406539, 410754],
            [412248, 419420],
            [420954, 428739],
            [430332, 437296],
            [439008, 445947],
            [447332, 454463],
            [457544, 462726],
            [464022, 470321],
            [471440, 477583],
            [486186, 490836],
            [493653, 498959],
            [505525, 511856],
            [513829, 520347],
            [522222, 527527],
            [530032, 535876],
            [537978, 543282],
            [546228, 551457],
            [552791, 557988],
        ],
        "device_click": 99098,
        "audio_click": 91138
    },

    "siinkoau": {
        "timeranges": [
            [100040, 102311],
            [103857, 106188],
            [107402, 109650],
            [112489, 115713],
            [117066, 121164],
            [122216, 126113],
            [129211, 131156],
            [132674, 135184],
            [136763, 139563],
            [142110, 145382],
            [147176, 150602],
            [152673, 156809],
            [160276, 162683],
            [164239, 162072],
            [168148, 171213],
            [173560, 177328],
            [178917, 183054],
            [184769, 189563],
            [192254, 196411],
            [196398, 201121],
            [202747, 205314],
            [207814, 212512],
            [214066, 218549],
            [220463, 225480],
            [232510, 235078],
            [237088, 239995],
            [241798, 245049],
            [246759, 250412],
            [252176, 255659],
            [257206, 261395],
            [296341, 298627],
            [300474, 302540],
            [304709, 306730],
            [309691, 313231],
            [315049, 318720],
            [320201, 325087],
            [328654, 331225],
            [332666, 335748],
            [336748, 339787],
            [341876, 345534],
            [347253, 351618],
            [353010, 357956],
            [373136, 378146],
            [379977, 386230],
            [387658, 393549],
            [395895, 402509],
            [404211, 410578],
            [412685, 417536],
            [419911, 425730],
            [427371, 433726],
            [435692, 440328],
            [443752, 450110],
            [458457, 463931],
            [468005, 473094],
            [484006, 488541],
            [492114, 497952],
            [500128, 504655],
            [506614, 511980],
            [513543, 519866],
            [522097, 527612],
            [529265, 535202]
        ],
        "device_click": 90716,
        "audio_click": 95190
    },

    "kausenel": {
        "timeranges": [
            [152242, 154294],
            [158026, 160043],
            [162660, 165570],
            [168044, 171930],
            [175510, 179270],
            [183036, 186589],
            [191690, 193919],
            [198890, 201076],
            [202056, 205296],
            [208746, 211583],
            [214551, 217643],
            [220671, 223882],
            [230767, 234067],
            [236084, 238777],
            [239939, 242015],
            [243285, 245865],
            [247036, 249737],
            [251356, 254046],
            [258807, 261351],
            [263072, 265479],
            [267373, 269884],
            [272086, 274515],
            [276240, 279205],
            [281128, 284025],
            [286428, 289306],
            [321364, 323787],
            [326524, 329010],
            [331003, 333355],
            [335858, 339234],
            [340617, 344386],
            [346795, 349944],
            [353253, 356772],
            [359031, 361787],
            [363412, 365070],
            [368181, 371775],
            [373216, 376226],
            [378655, 381901],
            [403073, 405320],
            [407275, 409149],
            [411528, 413216],
            [416534, 419820],
            [420869, 424204],
            [426113, 428538],
            [437976, 441576],
            [443242, 445592],
            [447826, 449086],
            [452568, 455342],
            [458091, 461328],
            [463867, 466866],
            [522705, 527062],
            [530715, 535165],
            [537592, 542315],
            [544443, 549801],
            [552870, 557765],
            [561415, 566938],
            [569426, 574491],
            [576778, 581273],
            [582814, 589335],
            [592217, 596369],
            [597918, 603781],
            [609501, 613970],
            [617760, 623674],
            [627596, 632251],
            [636631, 642228],
            [645557, 651327],
            [654652, 650580],
            [663168, 672061],
            [673634, 679300],
            [681339, 689152],
            [692005, 697998],
            [701638, 707461],
        ],
        "device_click": 132852,
        "audio_click": 174196
    },

    "aneimiau": {
        "timeranges": [
            [121553, 126360],
            [125220, 126375],
            [129456, 131212],
            [135957, 137919],
            [139508, 142223],
            [143712, 146034],
            [148722, 152042],
            [153479, 156577],
            [158202, 161022],
            [165380, 168306],
            [170164, 172449],
            [174197, 176618],
            [178983, 181601],
            [183535, 185493],
            [187428, 190214],
            [194604, 196375],
            [198381, 201281],
            [203646, 205796],
            [208608, 211733],
            [212999, 216569],
            [218885, 221975],
            [233062, 235404],
            [237611, 239581],
            [241436, 243568],
            [246659, 249502],
            [251097, 253736],
            [255787, 258415],
            [261058, 263271],
            [264482, 266770],
            [268434, 270701],
            [272966, 275990],
            [277858, 281264],
            [283073, 286012],
            [288739, 291932],
            [293526, 295956],
            [297627, 299604],
            [300864, 303720],
            [305590, 309513],
            [310982, 314205],
            [318237, 321451],
            [323103, 325570],
            [327293, 329968],
            [331981, 335307],
            [336925, 340265],
            [341809, 345985],
            [365038, 368457],
            [370376, 373764],
            [375401, 379242],
            [380244, 383960],
            [385373, 388915],
            [390520, 393027],
            [394229, 397356],
            [398433, 401443],
            [402852, 405772],
            [407152, 409793],
            [417975, 421027],
            [422409, 427257],
            [428557, 432961],
            [434044, 438263],
            [439542, 443942],
            [445374, 449534],
            [450962, 454957],
            [457014, 461677],
            [463217, 467873],
            [469612, 473864],
        ],
        "device_click": 95202,
        "audio_click": 54602
    },

    "amelbenn": {
        "timeranges": [
            [227573, 229734],
            [232280, 234099],
            [236183, 237782],
            [242128, 244714],
            [246479, 249272],
            [250635, 252984],
            [254463, 256704],
            [259970, 261992],
            [263465, 265135],
            [266843, 268885],
            [270387, 271761],
            [274874, 276997],
            [278478, 280880],
            [282599, 284513],
            [289852, 292826],
            [294419, 297167],
            [298621, 301160],
            [334491, 337104],
            [338640, 340535],
            [342356, 344126],
            [346966, 349567],
            [351092, 353242],
            [355013, 357530],
            [378085, 380845],
            [382294, 384758],
            [386432, 388382],
            [390898, 393728],
            [395678, 397977],
            [399325, 402547],
            [404647, 407461],
            [408587, 411498],
            [412763, 415195],
            [418004, 421052],
            [422402, 425521],
            [426750, 430468],
            [470678, 472783],
            [474215, 476761],
            [478265, 480162],
            [482970, 485885],
            [487890, 490407],
            [492222, 494749],
            [579385, 586382],
            [591092, 598277],
            [601489, 610189],
            [615230, 621606],
            [625429, 631061],
            [635210, 639871],
            [642582, 648378],
            [651986, 657121],
            [661136, 667424],
            [669410, 677151],
            [725629, 731642],
            [735982, 741336],
            [744994, 750997],
            [753722, 760211],
            [762634, 768153],
            [771092, 776683],
            [780636, 785603],
            [789238, 793861],
        ],
        "device_click": 204838,
        "audio_click": 229397
    },

    "sviokoau": {
        "timeranges": [
            [240660, 242349],
            [245731, 247997],
            [249832, 252129],
            [257280, 260121],
            [262027, 266135],
            [267639, 271348],
            [272160, 274436],
            [276816, 278860],
            [280950, 282991],
            [286141, 288915],
            [292805, 295889],
            [297969, 301392],
            [305812, 308973],
            [310478, 313440],
            [315105, 317075],
            [320157, 324142],
            [325798, 328284],
            [331276, 334436],
            [337983, 340349],
            [342726, 345182],
            [347959, 350299],
            [352099, 354796],
            [357490, 361932],
            [363351, 365734],
            [366615, 368353],
            [369860, 371880],
            [372739, 375233],
            [388232, 389834],
            [393566, 397534],
            [399373, 402106],
            [405673, 407052],
            [421503, 423422],
            [425345, 427126],
            [428074, 430370],
            [567025, 573348],
            [576251, 581971],
            [583779, 587595],
            [591696, 595977],
            [599232, 604305],
            [606517, 611254],
            [614416, 618639],
            [622089, 626780],
            [628422, 633520],
            [645295, 649328],
            [651044, 654662],
            [658547, 662993],
            [668345, 672336],
            [674924, 680455],
            [682758, 689225],
            [691973, 697412],
            [699125, 703535],
            [715443, 720325]
        ],
        "device_click": 170614,
        "audio_click": None
    },

    "maipmiau": {
        "timeranges": [
            [120817, 122759],
            [125506, 127381],
            [130070, 131041],
            [134652, 136983],
            [139611, 142932],
            [144852, 146820],
            [151115, 153623],
            [155567, 157731],
            [159568, 161589],
            [163328, 166755],
            [169062, 172493],
            [175331, 179061],
            [182697, 184715],
            [186810, 189155],
            [190952, 193708],
            [195262, 199193],
            [200736, 204445],
            [206083, 209393],
            [213243, 216134],
            [218549, 221726],
            [223838, 226736],
            [228776, 233427],
            [235799, 238914],
            [242545, 246239],
            [262671, 265477],
            [267255, 269585],
            [272374, 274856],
            [277888, 281638],
            [283713, 287541],
            [290660, 294850],
            [298448, 301559],
            [303637, 307191],
            [309330, 312663],
            [314615, 318479],
            [320871, 326197],
            [328400, 332905],
            [335370, 338256],
            [340916, 343715],
            [345845, 349286],
            [351613, 356180],
            [358195, 363039],
            [364551, 369253],
            [373861, 376183],
            [378049, 381799],
            [384114, 386806],
            [388932, 392458],
            [394841, 398409],
            [401098, 405602],
            [431712, 436470],
            [438521, 443112],
            [448849, 454022],
            [455639, 459792],
            [463039, 469057],
            [470799, 475913],
            [478780, 482652],
            [486691, 491493],
            [494833, 499694],
            [517823, 522427],
            [524300, 529069],
            [538765, 543051],
            [546199, 550994],
            [555910, 560598],
            [562542, 567120],
            [568687, 571997],
            [574186, 582872],
            [586496, 590397],
            [591762, 596932],
        ],
        "device_click": 109393,
        "audio_click": 42167
    },

    "crhagach": {
        "timeranges": [
            [160784, 171798],
            [174415, 178904],
            [181343, 184106],
            [185996, 188855],
            [192392, 194517],
            [196791, 199849],
            [201438, 203751],
            [207047, 209896],
            [211756, 214567],
            [216838, 220030],
            [222921, 225076],
            [226593, 528862],
            [230270, 232704],
            [235105, 237548],
            [239285, 241587],
            [243090, 245555],
            [252290, 254641],
            [256002, 258918],
            [260359, 262578],
            [264537, 267380],
            [268855, 271599],
            [272957, 275941],
            [313245, 316397],
            [318069, 320595],
            [322342, 326016],
            [327976, 331450],
            [332542, 336060],
            [338558, 341866],
            [343123, 345369],
            [346744, 348602],
            [350822, 354045],
            [355672, 359119],
            [361199, 364625],
            [370146, 372019],
            [373068, 375762],
            [377071, 379309],
            [381348, 384990],
            [386507, 390045],
            [391243, 395114],
            [418823, 424373],
            [430310, 438323],
            [445128, 451495],
            [459615, 466233],
            [472768, 481371],
            [488208, 495283],
            [502608, 509754],
            [518445, 525109],
            [532836, 541816],
            [559980, 566472],
            [573353, 579792],
            [586689, 594294],
            [601513, 608326],
            [615584, 623138],
            [531438, 640013],
            [646306, 654944],
            [661756, 669343],
            [676974, 685005],
        ],
        "device_click": 135015,
        "audio_click": 132063
    },
}


In [None]:
from datetime import datetime

def resample_df(df, freq, cut_from, cut_to):
    df = df.set_index("dt")
    df.index = df.index.map(int)
    try:
        df.index = pd.to_datetime(df.index)
    except Exception as e:
        print(df.index, df, cut_from, cut_to)
        raise(e)

    old_index = df.index

    new_index = pd.date_range(0, old_index.max(), freq=freq)
    res = df.reindex(old_index.union(new_index)).interpolate(method="linear").reindex(new_index)
    res = res[(res.index >= datetime.utcfromtimestamp(cut_from/1_000.0)) & (res.index <= datetime.utcfromtimestamp(cut_to//1_000_000_000.0))]

    return res

In [None]:
# dfs = df_by_file_by_folder["amelbenn"]
# df = dfs["bno"]

# df = df.set_index("dt")
# df.index = pd.to_datetime(df.index)

# # 

# old_index = df.index

# new_index = pd.date_range(old_index.min(), old_index.max(), freq="4ms")
# res = df.reindex(old_index.union(new_index)).interpolate(method="linear").reindex(new_index)

FREQ = "10ms"
dfs_global_timestamp_minmax = {}

for folder, df_by_files in df_by_file_by_folder.items():
    dfs_global_timestamp_minmax[folder] = []
    for filename, df in df_by_files.items():
        dfs_global_timestamp_minmax[folder].append(df["dt"].iat[-1])
        
dfs_global_timestamp_minmax = {folder: min(max_ts) for folder, max_ts in dfs_global_timestamp_minmax.items()}

for folder, df_by_files in df_by_file_by_folder.items():
    for filename, df in df_by_files.items():
        df_by_files[filename] = resample_df(df, FREQ, labels[folder]["device_click"], dfs_global_timestamp_minmax[folder])


# Add all datasets to one big table & Label the data

In [None]:
data_to_remove_by_folder = {
    "amelbenn": [
        "temporalis_left"
    ],
    "aneimiau": [
        "temporalis_left"
    ],
    "kausenel": [
        "temporalis_right"
    ],
    "maipmiau": [
        "temporalis_left"
    ]
}

In [None]:
def is_bruxism(folder, row):
    for dt_range in labels[folder]["timeranges"]:
        if row.name >= datetime.utcfromtimestamp(dt_range[0] / 1_000.0) and row.name <= datetime.utcfromtimestamp(dt_range[1] / 1_000.0):
            return 1
    return 0

bigdfs_by_folder = {}

for folder, df_by_files in df_by_file_by_folder.items():
    start_ts = df_by_files["bno"].index[0]
    end_ts = df_by_files["bno"].index[-1]

    new_index = pd.date_range(start_ts, end_ts, freq=FREQ)
    big_df = pd.DataFrame(index=new_index, columns=["is_bruxism"])
    big_df = big_df.fillna(0)

    big_df["is_bruxism"] = big_df.apply(lambda x: is_bruxism(folder, x), axis=1)

    for filename, df in df_by_files.items():
        big_df = big_df.merge(right=df, left_index=True, right_index=True, how="left", suffixes=("", f"_2"))

    columns_to_drop = data_to_remove_by_folder.get(folder, None)
    if columns_to_drop is not None:
        for col in columns_to_drop:
            big_df.drop(col, inplace=True, axis=1)

    big_df = big_df.fillna(0)

    bigdfs_by_folder[folder] = big_df

In [None]:
bigdfs_by_folder["nasekoch"].columns
# bigdfs_by_folder["siinkoau"].columns

Index(['is_bruxism', 'acceleration_x', 'acceleration_y', 'acceleration_z',
       'gyro_x', 'gyro_y', 'gyro_z', 'gsr', 'acceleration_x_2',
       'acceleration_y_2', 'acceleration_z_2', 'gyro_x_2', 'gyro_y_2',
       'gyro_z_2', 'x28_gyro_x', 'x28_gyro_y', 'x28_gyro_z',
       'x28_acceleration_x', 'x28_acceleration_y', 'x28_acceleration_z',
       'x28_quaternion_w', 'x28_quaternion_x', 'x28_quaternion_y',
       'x28_quaternion_z', 'x29_gyro_x', 'x29_gyro_y', 'x29_gyro_z',
       'x29_acceleration_x', 'x29_acceleration_y', 'x29_acceleration_z',
       'x29_quaternion_w', 'x29_quaternion_x', 'x29_quaternion_y',
       'x29_quaternion_z', 'masseter_left', 'masseter_right',
       'temporalis_left', 'temporalis_right'],
      dtype='object')

In [None]:
# fig = make_subplots()
# df = bigdfs_by_folder["nasekoch"]

# for column in df.columns.tolist():
#     trace = go.Scatter(
#     x=df.index,
#     y=df[column],
#     name=f"{column}",
#     )
    
#     fig.add_trace(trace)

# fig.update_layout(title_text="nasekoch")
# fig.write_html(f"debug-plot.html", auto_open=True)

In [None]:
from itertools import chain, combinations

def all_subsets(iterable):
    s = list(iterable)
    return chain.from_iterable(combinations(s, r) for r in range(1, len(s)+1))


df = bigdfs_by_folder["nasekoch"]
columns = df.drop(["is_bruxism"], axis=1).columns.tolist()

# columns

raw_features_types = {
    # "bno_gyro": ['x28_gyro_x', 'x28_gyro_y', 'x28_gyro_z', "x29_gyro_x", "x29_gyro_y", "x29_gyro_z"],
    # "bno_acc": ["x28_acceleration_x", "x28_acceleration_y", "x28_acceleration_z", "x29_acceleration_x", "x29_acceleration_y", "x29_acceleration_z"],
    "bno_quat": ["x28_quaternion_w", "x28_quaternion_x", "x28_quaternion_y", "x28_quaternion_z", "x29_quaternion_w", "x29_quaternion_x", "x29_quaternion_y", "x29_quaternion_z"],

    # "ble_gyro": ["gyro_x", "gyro_y", "gyro_z", "gyro_x_2", "gyro_y_2", "gyro_z_2"],
    # "ble_acc": ["acceleration_x", "acceleration_y", "acceleration_z", "acceleration_x_2", "acceleration_y_2", "acceleration_z_2"],

    # "gsr": ["gsr"],

    # "masseter": ["masseter_left", "masseter_right"],

    # "temporalis": ["temporalis_left", "temporalis_right"]
}

feature_types_subsets = list(all_subsets(raw_features_types.keys()))
feature_types_subsets.sort(key=len)

feature_types_subsets

[('bno_quat',)]

# Separate big dfs in windows

In [None]:
# print(list(bigdfs_by_folder["nasekoch"][0:5][["gyro_x", "gyro_y"]].to_numpy().flatten()))

In [None]:
# print(bigdfs_by_folder["nasekoch"][0:5][["gyro_x", "gyro_y"]])
# print(1 if (bigdfs_by_folder["nasekoch"][0:5][["is_bruxism"]].sum()) > (5/2) else 0)

# print(1 if bigdfs_by_folder["nasekoch"][0:100][["is_bruxism"]].sum()["is_bruxism"] > 100/2 else 0)

In [None]:
# bigdfs_by_folder["nasekoch"][:8]

In [None]:
# [(i - 1_600, i) for i in range(1_600, 10_000, 800)]

In [None]:
# for folder, df in bigdfs_by_folder.items():
#     print(folder, df["x28_gyro_x"].isnull())

In [None]:
# import time
# import numpy as np

# WINDOW_SIZE_MS = 1_600
# WINDOW_OVERLAP_PER = 0.5
# WINDOW_OVERLAP_MS = int(WINDOW_SIZE_MS * WINDOW_OVERLAP_PER)

# freq = int(FREQ.split("ms")[0])
# WINDOW_SIZE_INDEX = WINDOW_SIZE_MS // freq
# WINDOW_OVERLAP_INDEX = WINDOW_OVERLAP_MS // freq

# windows_with_labels__for_feature_set = {",".join(feature_set): {"data": [], "labels": []} for feature_set in feature_types_subsets}

# used_features_by_feature_set = {feature_set: [] for feature_set in windows_with_labels__for_feature_set.keys()}
# for feature_set in used_features_by_feature_set.keys():
#     for feature_type in feature_set.split(","):
#         used_features_by_feature_set[feature_set] += raw_features_types[feature_type]


# for folder, bigdf in bigdfs_by_folder.items():
#     start = time.time()

#     windows_ranges = [(i - WINDOW_SIZE_INDEX, i) for i in range(WINDOW_SIZE_INDEX, len(bigdf.index), WINDOW_OVERLAP_INDEX)]
#     labels = [1 if bigdf[index_range[0]:index_range[1]][["is_bruxism"]].sum()["is_bruxism"] >= WINDOW_SIZE_INDEX // 2 else 0 for index_range in windows_ranges]

#     for feature_set in windows_with_labels__for_feature_set.keys():
#         used_features = used_features_by_feature_set[feature_set]
#         try:
#             bigdf[used_features]
#         except:
#             print(f"{folder} does not have all of the {used_features} columns")
#             continue

#         windows_with_labels__for_feature_set[feature_set]["data"] += [bigdf[index_range[0]:index_range[1]][used_features].to_numpy().flatten() for index_range in windows_ranges]
#         windows_with_labels__for_feature_set[feature_set]["labels"] += labels

#     windows_with_labels__for_feature_set[feature_set]["data"] = np.asarray(windows_with_labels__for_feature_set[feature_set]["data"])
#     windows_with_labels__for_feature_set[feature_set]["labels"] = np.asarray(windows_with_labels__for_feature_set[feature_set]["labels"])
#     print(f"{folder} done in {time.time() - start}s")

In [None]:
# len(windows_with_labels__for_feature_set["bno_gyro"]["data"])
# len(windows_with_labels__for_feature_set["ble_gyro"]["data"])

In [None]:
import time
import numpy as np

WINDOW_SIZE_MS = 1_600
WINDOW_OVERLAP_PER = 0.5
WINDOW_OVERLAP_MS = int(WINDOW_SIZE_MS * WINDOW_OVERLAP_PER)

freq = int(FREQ.split("ms")[0])
WINDOW_SIZE_INDEX = WINDOW_SIZE_MS // freq
WINDOW_OVERLAP_INDEX = WINDOW_OVERLAP_MS // freq

windows_with_labels__for_feature_set = {",".join(feature_set): {"data": [], "labels": []} for feature_set in feature_types_subsets}

used_features_by_feature_set = {feature_set: [] for feature_set in windows_with_labels__for_feature_set.keys()}
for feature_set in used_features_by_feature_set.keys():
    for feature_type in feature_set.split(","):
        used_features_by_feature_set[feature_set] += raw_features_types[feature_type]

for feature_set in windows_with_labels__for_feature_set.keys():
    used_features = used_features_by_feature_set[feature_set]

    for folder, bigdf in bigdfs_by_folder.items():
        try:
            bigdf[used_features]
        except:
            print(f"{folder} does not have all of the {used_features} columns")
            continue

        windows_ranges = [(i - WINDOW_SIZE_INDEX, i) for i in range(WINDOW_SIZE_INDEX, len(bigdf.index), WINDOW_OVERLAP_INDEX)]
        
        windows_with_labels__for_feature_set[feature_set]["data"] += [bigdf[index_range[0]:index_range[1]][used_features].to_numpy().flatten() for index_range in windows_ranges]
        windows_with_labels__for_feature_set[feature_set]["labels"] += [1 if bigdf[index_range[0]:index_range[1]][["is_bruxism"]].sum()["is_bruxism"] >= WINDOW_SIZE_INDEX // 2 else 0 for index_range in windows_ranges]
    
    windows_with_labels__for_feature_set[feature_set]["data"] = np.asarray(windows_with_labels__for_feature_set[feature_set]["data"])
    windows_with_labels__for_feature_set[feature_set]["labels"] = np.asarray(windows_with_labels__for_feature_set[feature_set]["labels"])
    # print(f"{feature_set} done")

# Train time

In [None]:
# list(windows_with_labels__for_feature_set.keys())[:10]

In [None]:
# print(windows_with_labels__for_feature_set["ble_gyro"]["data"])

In [None]:

# with open(f"windows_with_labels__for_feature_set.txt", 'w') as f:
#     print(windows_with_labels__for_feature_set, file=f)

In [None]:
# windows_with_labels__for_feature_set.keys()

In [None]:
for k,v in windows_with_labels__for_feature_set.items():
    if len(v["data"]) == 0:
      print(k)
    if len(v["data"]) != len(v["labels"]):
        print(k, len(v["data"]), len(v["labels"]))

In [None]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn import tree
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression

rf_score__by_feature_set = {}
knn_score__by_feature_set = {}
svm_score__by_feature_set = {}
dt_score__by_feature_set = {}
lr_score__by_feature_set = {}

In [None]:
X_train, X_test, y_train, y_test = train_test_split(windows_with_labels__for_feature_set["bno_quat"]["data"], windows_with_labels__for_feature_set["bno_quat"]["labels"], test_size=0.2)

# model = RandomForestClassifier(n_estimators=10)
model = KNeighborsClassifier(n_neighbors=1)
model.fit(X_train, y_train)
model.score(X_test, y_test)



0.8818747010999521

In [None]:
for feature_set, data_and_labels in windows_with_labels__for_feature_set.items():
    X_train, X_test, y_train, y_test = train_test_split(data_and_labels["data"], data_and_labels["labels"], test_size=0.2)

    modelRF = RandomForestClassifier(n_estimators=20)
    modelRF.fit(X_train, y_train)
    rf_score__by_feature_set[feature_set] = modelRF.score(X_test, y_test)

rf_score__by_feature_set

{'ble_acc': 0.8455284552845529,
 'ble_acc,gsr': 0.8402678144428503,
 'ble_acc,gsr,masseter': 0.8426590148254424,
 'ble_acc,gsr,masseter,temporalis': 0.8464566929133859,
 'ble_acc,gsr,temporalis': 0.8519685039370078,
 'ble_acc,masseter': 0.8565279770444764,
 'ble_acc,masseter,temporalis': 0.8354330708661417,
 'ble_acc,temporalis': 0.8354330708661417,
 'ble_gyro': 0.7307508369201339,
 'ble_gyro,ble_acc': 0.8455284552845529,
 'ble_gyro,ble_acc,gsr': 0.8498326159732186,
 'ble_gyro,ble_acc,gsr,masseter': 0.8637015781922525,
 'ble_gyro,ble_acc,gsr,masseter,temporalis': 0.8732283464566929,
 'ble_gyro,ble_acc,gsr,temporalis': 0.8708661417322835,
 'ble_gyro,ble_acc,masseter': 0.860832137733142,
 'ble_gyro,ble_acc,masseter,temporalis': 0.8488188976377953,
 'ble_gyro,ble_acc,temporalis': 0.8496062992125984,
 'ble_gyro,gsr': 0.803921568627451,
 'ble_gyro,gsr,masseter': 0.803921568627451,
 'ble_gyro,gsr,masseter,temporalis': 0.841732283464567,
 'ble_gyro,gsr,temporalis': 0.8291338582677166,
 'ble_g

In [None]:
for feature_set, data_and_labels in windows_with_labels__for_feature_set.items():
    X_train, X_test, y_train, y_test = train_test_split(data_and_labels["data"], data_and_labels["labels"], test_size=0.2)
    modelKNN = KNeighborsClassifier(n_neighbors=5)
    modelKNN.fit(X_train, y_train)
    knn_score__by_feature_set[feature_set] = modelKNN.score(X_test, y_test)

knn_score__by_feature_set

{'ble_acc': 0.820181731229077,
 'ble_acc,gsr': 0.7350549976087997,
 'ble_acc,gsr,masseter': 0.6872309899569584,
 'ble_acc,gsr,masseter,temporalis': 0.6968503937007874,
 'ble_acc,gsr,temporalis': 0.7023622047244095,
 'ble_acc,masseter': 0.5671927307508369,
 'ble_acc,masseter,temporalis': 0.6173228346456693,
 'ble_acc,temporalis': 0.6204724409448819,
 'ble_gyro': 0.7302725968436155,
 'ble_gyro,ble_acc': 0.7293161166905787,
 'ble_gyro,ble_acc,gsr': 0.7642276422764228,
 'ble_gyro,ble_acc,gsr,masseter': 0.7092300334768054,
 'ble_gyro,ble_acc,gsr,masseter,temporalis': 0.7078740157480315,
 'ble_gyro,ble_acc,gsr,temporalis': 0.7039370078740157,
 'ble_gyro,ble_acc,masseter': 0.5662362505978001,
 'ble_gyro,ble_acc,masseter,temporalis': 0.6354330708661418,
 'ble_gyro,ble_acc,temporalis': 0.6275590551181103,
 'ble_gyro,gsr': 0.759923481587757,
 'ble_gyro,gsr,masseter': 0.7154471544715447,
 'ble_gyro,gsr,masseter,temporalis': 0.6850393700787402,
 'ble_gyro,gsr,temporalis': 0.7,
 'ble_gyro,masseter'

In [None]:
for feature_set, data_and_labels in windows_with_labels__for_feature_set.items():
    X_train, X_test, y_train, y_test = train_test_split(data_and_labels["data"], data_and_labels["labels"], test_size=0.2)
    modelSVM = SVC()
    modelSVM.fit(X_train, y_train)
    svm_score__by_feature_set[feature_set] = modelSVM.score(X_test, y_test)

svm_score__by_feature_set

{'ble_acc': 0.7144906743185079,
 'ble_acc,gsr': 0.697274031563845,
 'ble_acc,gsr,masseter': 0.7058823529411765,
 'ble_acc,gsr,masseter,temporalis': 0.6874015748031496,
 'ble_acc,gsr,temporalis': 0.6834645669291338,
 'ble_acc,masseter': 0.6905786704925873,
 'ble_acc,masseter,temporalis': 0.6700787401574804,
 'ble_acc,temporalis': 0.7007874015748031,
 'ble_gyro': 0.7403156384505022,
 'ble_gyro,ble_acc': 0.7541846006695361,
 'ble_gyro,ble_acc,gsr': 0.6924916307986609,
 'ble_gyro,ble_acc,gsr,masseter': 0.7010999521759923,
 'ble_gyro,ble_acc,gsr,masseter,temporalis': 0.6771653543307087,
 'ble_gyro,ble_acc,gsr,temporalis': 0.6874015748031496,
 'ble_gyro,ble_acc,masseter': 0.7140124342419895,
 'ble_gyro,ble_acc,masseter,temporalis': 0.6992125984251969,
 'ble_gyro,ble_acc,temporalis': 0.662992125984252,
 'ble_gyro,gsr': 0.6948828311812529,
 'ble_gyro,gsr,masseter': 0.7087517934002869,
 'ble_gyro,gsr,masseter,temporalis': 0.6929133858267716,
 'ble_gyro,gsr,temporalis': 0.6960629921259842,
 'ble

In [None]:
for feature_set, data_and_labels in windows_with_labels__for_feature_set.items():
    X_train, X_test, y_train, y_test = train_test_split(data_and_labels["data"], data_and_labels["labels"], test_size=0.2)
    modelDT = tree.DecisionTreeClassifier()
    modelDT.fit(X_train, y_train)
    dt_score__by_feature_set[feature_set] = modelDT.score(X_test, y_test)

dt_score__by_feature_set

{'ble_acc': 0.7847919655667145,
 'ble_acc,gsr': 0.7881396461023433,
 'ble_acc,gsr,masseter': 0.8053562888570062,
 'ble_acc,gsr,masseter,temporalis': 0.8244094488188977,
 'ble_acc,gsr,temporalis': 0.8007874015748031,
 'ble_acc,masseter': 0.7824007651841224,
 'ble_acc,masseter,temporalis': 0.7850393700787401,
 'ble_acc,temporalis': 0.7637795275590551,
 'ble_gyro': 0.6465805834528934,
 'ble_gyro,ble_acc': 0.7728359636537542,
 'ble_gyro,ble_acc,gsr': 0.795791487326638,
 'ble_gyro,ble_acc,gsr,masseter': 0.7948350071736011,
 'ble_gyro,ble_acc,gsr,masseter,temporalis': 0.8377952755905512,
 'ble_gyro,ble_acc,gsr,temporalis': 0.8031496062992126,
 'ble_gyro,ble_acc,masseter': 0.8015303682448589,
 'ble_gyro,ble_acc,masseter,temporalis': 0.7881889763779527,
 'ble_gyro,ble_acc,temporalis': 0.7960629921259843,
 'ble_gyro,gsr': 0.719273075083692,
 'ble_gyro,gsr,masseter': 0.7398373983739838,
 'ble_gyro,gsr,masseter,temporalis': 0.7543307086614173,
 'ble_gyro,gsr,temporalis': 0.7464566929133858,
 'ble

In [None]:
for feature_set, data_and_labels in windows_with_labels__for_feature_set.items():
    X_train, X_test, y_train, y_test = train_test_split(data_and_labels["data"], data_and_labels["labels"], test_size=0.2)
    modelLR = LogisticRegression()
    modelLR.fit(X_train, y_train)
    lr_score__by_feature_set[feature_set] = modelDT.score(X_test, y_test)

lr_score__by_feature_set

ValueError: ignored

In [None]:
# x = knn_score__by_feature_set
# x = {k: v for k, v in sorted(x.items(), key=lambda item: item[1] + (8 - len(item[0].split(","))), reverse=True)}

# x

In [None]:
feature_set_bno_quat = windows_with_labels__for_feature_set["bno_quat"]

feature_set_bno_quat

NameError: ignored