# Bonn data

In [1]:
import os
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:

os.chdir('/content/drive/My Drive/0-Project')
folders = ['F', 'O', 'S']
# O: healthy people with eyes closed; F: patients seizure free; S: seizure
dataframes = {}

for folder_path in folders:
  file_list = os.listdir(folder_path)
  data_dict = {}

  for file_name in file_list:
    column_name = file_name[1:4]
    file_path = os.path.join(folder_path, file_name)
    with open(file_path, 'r') as file:
      file_data = [line.strip() for line in file.readlines()]
      data_dict[column_name] = file_data

  df = pd.DataFrame(data_dict)
  df = df[sorted(df.columns)]

  # Store the DataFrame in the dictionary with the folder name as the key
  dataframes[folder_path] = df

# Accessing the DataFrames for F, O, and S
df_F = dataframes['F']
df_O = dataframes['O']
df_S = dataframes['S']
df_F = df_F.apply(pd.to_numeric)
df_O = df_O.apply(pd.to_numeric)
df_S = df_S.apply(pd.to_numeric)

# Representation

1. All nibbles are the same, 000
2. Nibble 1 to 4 are the same, nibble 5 is different, 001
3. Nibble 1 to 3 are the same, nibble 4 to 5 are different, 010
4. Nibble 1 to 2 are the same, nibble 3 to 5 are different, 011
5. Nibble 1 is the same, nibble 2 to 5 are different, 100
6. All nibbles are different, 101

In [2]:
def compare(str1, str2, split):
  '''
  Params:
    str1  : input string 1
    str2  : input string 2
    split : a list of 5 integers that sum up to 12

  Compare str1 and str2, both are 12-bit binary representation, separate them
  into 5 nibbles using split
  All nibbles are the same, 000
  Nibble 1 to 4 are the same, nibble 5 is different, 001
  Nibble 1 to 3 are the same, nibble 4 to 5 are different, 010
  Nibble 1 to 2 are the same, nibble 3 to 5 are different, 011
  Nibble 1 is the same, nibble 2 to 5 are different, 100
  All nibbles are different, 101
  '''
  # Ensure both strings are 12-bit binary representations
  if len(str1) != 12 or len(str2) != 12:
    raise ValueError("Both strings must be 12-bit binary representations.")

  # Split the strings into 5 nibbles
  nibbles1 = [str1[:split[0]], str1[split[0]:(split[0]+split[1])],
              str1[(split[0]+split[1]):(split[0]+split[1]+split[2])],
              str1[(split[0]+split[1]+split[2]):(split[0]+split[1]+split[2]+split[3])],
              str1[(split[0]+split[1]+split[2]+split[3]):]]
  nibbles2 = [str2[:split[0]], str2[split[0]:(split[0]+split[1])],
              str2[(split[0]+split[1]):(split[0]+split[1]+split[2])],
              str2[(split[0]+split[1]+split[2]):(split[0]+split[1]+split[2]+split[3])],
              str2[(split[0]+split[1]+split[2]+split[3]):]]

  # Compare the nibbles
  if nibbles1 == nibbles2:
    return '000'
  elif nibbles1[0] == nibbles2[0] and nibbles1[1] == nibbles2[1] and nibbles1[2] == nibbles2[2] and nibbles1[3] == nibbles2[3]:
    return '001'
  elif nibbles1[0] == nibbles2[0] and nibbles1[1] == nibbles2[1] and nibbles1[2] == nibbles2[2]:
    return '010'
  elif nibbles1[0] == nibbles2[0] and nibbles1[1] == nibbles2[1]:
    return '011'
  elif nibbles1[0] == nibbles2[0]:
    return '100'
  else:
    return '101'

In [3]:
def l2sb(input, split):
  '''
  Params:
    input : a list of 12-bit binary string
    split :  a list of 5 integers that sum up to 12
  Use L2SB algorithm and return a list of compressed result.
  '''
  res = []
  res.append(input[0])
  for i in range(1, len(input)):
    code = compare(input[i], input[i-1], split)
    if code == '101':
      res.append('101' + input[i])
    elif code == '100':
      res.append('100' + input[i][split[0]:])
    elif code == '011':
      res.append('011' + input[i][(split[0]+split[1]):])
    elif code == '010':
      res.append('010' + input[i][(split[0]+split[1]+split[2]):])
    elif code == '001':
      res.append('001' + input[i][(split[0]+split[1]+split[2]+split[3]):])
    else:
      res.append('000')
  return res


In [4]:
def total_length(strings):
  '''
  Params:
    strings : a list of string
  Return the total length of all strings in input string.
  '''
  return sum(len(s) for s in strings)

In [5]:
def calRatio_all(input, split):
  '''
  Params:
    input : a data frame, each column represents a txt file
    split : an integer, we split the 12 bits into 0-(split-1) and split-12

  Use new l2sb algorithm to compress the input data. Record the length of compressed
  and original data of each column, then sum the compressed and original length
  of all data to get the overall compress ratio.
  '''
  original_length = 0
  compressed_length = 0
  for i in range(input.shape[1]):
    col = input.iloc[:,i]
    binarycol = [np.binary_repr(val, width=12) for val in col]
    col_compressed = l2sb(binarycol, split)
    original_length += total_length(binarycol)
    compressed_length += total_length(col_compressed)
  return original_length / compressed_length

#Test case

In [None]:
split = [3,3,2,2,2]
input = ['000111001100', '000110101010', '000110111111', '000110110000', '000110110011', '000110110011', '001010001101']
#          1-2-3-4-5         1 same         1,2 same       1,2,3 same     1,2,3,4 same     all same        all different
#            none             100             011             010             001             000           101
res = l2sb(input, split)
print(res)

['000111001100', '100110101010', '011111111', '0100000', '00111', '000', '101001010001101']


# Transfer data

In [None]:
df_F_new = df_F.copy()
df_O_new = df_O.copy()
df_S_new = df_S.copy()

df_F_new -= df_F_new.min().min()
df_O_new -= df_O_new.min().min()
df_S_new -= df_S_new.min().min()

In [6]:
def find_combinations(target_sum, num_parts, current_combination=[], current_sum=0):
  if num_parts == 1:
    # The last number must be exactly what is needed to reach the target sum
    if 1 <= target_sum - current_sum <= target_sum:
      yield current_combination + [target_sum - current_sum]
    return

  for i in range(1, target_sum - current_sum - (num_parts - 1) + 1):
    yield from find_combinations(target_sum, num_parts - 1, current_combination + [i], current_sum + i)

# Get all combinations of 5 positive integers that sum to 12
split_list = list(find_combinations(12, 5))


In [None]:
f_ratio = {}
#o_ratio = {}
s_ratio = {}
for split in split_list:
  ratio_f = calRatio_all(df_F_new, split)
  #ratio_o = calRatio_all(df_O_new, split)
  ratio_s = calRatio_all(df_S_new, split)
  f_ratio[tuple(split)] = ratio_f
  #o_ratio[tuple(split)] = ratio_o
  s_ratio[tuple(split)] = ratio_s

In [None]:
import openpyxl
from openpyxl import Workbook

# Create a new workbook and select the active worksheet
wb = Workbook()
ws = wb.active

# Write the headers
ws.append(['Key', 'Value'])

# Write the key-value pairs
for key, value in o_ratio.items():
  ws.append([str(key), value])


# Save the workbook to a file
wb.save('O-output-5.xlsx')

In [None]:
import openpyxl
from openpyxl import Workbook

wb = Workbook()
ws_f = wb.active
ws_s = wb.create_sheet(title='Ratio List S')

ws_f.append(['Key', 'Value'])
ws_s.append(['Key', 'Value'])

for key, value in f_ratio.items():
  ws_f.append([str(key), value])

for key, value in s_ratio.items():
  ws_s.append([str(key), value])

wb.save('5 nibbles.xlsx')


In [None]:
top_5_f_ratio = sorted(f_ratio.items(), key=lambda item: item[1], reverse=True)[:5]
print(top_5_f_ratio)

[((4, 2, 2, 1, 3), 1.542978949905596), ((4, 2, 1, 2, 3), 1.5391179968550202), ((4, 2, 2, 2, 2), 1.5386435375656984), ((4, 3, 1, 1, 3), 1.5314628329632702), ((4, 3, 1, 2, 2), 1.527191804778889)]


In [None]:
top_5_o_ratio = sorted(o_ratio.items(), key=lambda item: item[1], reverse=True)[:5]
print(top_5_o_ratio)

[((4, 2, 1, 1, 4), 1.3326917994348177), ((4, 2, 1, 2, 3), 1.3318426722240477), ((4, 1, 1, 2, 4), 1.3251694871396782), ((4, 1, 2, 1, 4), 1.322417018090032), ((4, 1, 2, 2, 3), 1.3215809294936414)]


In [None]:
top_5_s_ratio = sorted(s_ratio.items(), key=lambda item: item[1], reverse=True)[:5]
print(top_5_s_ratio)

[((3, 2, 1, 2, 4), 1.1458606703904886), ((2, 2, 2, 2, 4), 1.1447411619248529), ((3, 1, 2, 2, 4), 1.144651077458355), ((2, 2, 1, 2, 5), 1.1432714669774673), ((3, 1, 1, 2, 5), 1.14318161366628)]


#Import MIT data

In [7]:
os.chdir('/content/drive/My Drive/0-Project/MIT')
# 48 csv files
csv_files = [f for f in os.listdir() if f.endswith('.csv')]

df_mix = pd.DataFrame()

for file in csv_files:
  file_path = os.path.join(os.getcwd(), file)

  df = pd.read_csv(file_path, skiprows=1)

  df_last_two_cols = df.iloc[:, -2:]

  file_prefix = file[:3]
  df_last_two_cols.columns = [f"{file_prefix}-1", f"{file_prefix}-2"]

  df_mix = pd.concat([df_mix, df_last_two_cols], axis=1)


In [8]:
# scale the data
#df_mix_scaled = transfer(df_mix)
df_mix_scaled = df_mix.mul(600)
# round the scaled result into integers
df_mix_scaled = df_mix_scaled.round().astype(int)
scaled_min = df_mix_scaled.min().min()
# add offset to all data, make them non-negative
df_mix_scaled = df_mix_scaled.add(abs(scaled_min))

# choose 1,3,5.. columns in df_mixed_scaled as mixed_signal_1
mixed_signal_1 = df_mix_scaled.iloc[:,1::2]
# choose 0,2,4.. columns in df_mixed_scaled as mixed_signal_2
mixed_signal_2 = df_mix_scaled.iloc[:,::2]

In [9]:
mit_ratio_list1 = {}
mit_ratio_list2 = {}
for split in split_list:
  ratio1 = calRatio_all(mixed_signal_1, split)
  ratio2 = calRatio_all(mixed_signal_2, split)
  mit_ratio_list1[tuple(split)] = ratio1
  mit_ratio_list2[tuple(split)] = ratio2

In [None]:
top_5_mix_ratio_1 = sorted(mit_ratio_list1.items(), key=lambda item: item[1], reverse=True)[:5]
top_5_mix_ratio_2 = sorted(mit_ratio_list2.items(), key=lambda item: item[1], reverse=True)[:5]
print(top_5_mix_ratio_1)
print(top_5_mix_ratio_2)

[((4, 2, 2, 1, 3), 1.5454144357925614), ((5, 2, 1, 1, 3), 1.5449158812077701), ((4, 2, 1, 1, 4), 1.5429225793058188), ((4, 2, 1, 2, 3), 1.5412379256317024), ((3, 2, 2, 1, 4), 1.5373551500952691)]
[((3, 2, 2, 1, 4), 1.5301062573789848), ((4, 2, 2, 1, 3), 1.529643481532299), ((3, 2, 2, 2, 3), 1.5291347513128097), ((4, 2, 1, 1, 4), 1.5265950437195441), ((3, 3, 2, 1, 3), 1.526105186152303)]


In [10]:
import openpyxl
from openpyxl import Workbook

wb = Workbook()
ws_1 = wb.active
ws_1.title = 'Ratio List 1'
ws_2 = wb.create_sheet(title='Ratio List 2')

ws_1.append(['Key', 'Value'])
ws_2.append(['Key', 'Value'])

for key, value in mit_ratio_list1.items():
  ws_1.append([str(key), value])

for key, value in mit_ratio_list2.items():
  ws_2.append([str(key), value])

wb.save('5 nibbles MIT.xlsx')