# Bonn data

In [1]:
import os
import pandas as pd
import numpy as np
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:

os.chdir('/content/drive/My Drive/0-Project')
folders = ['F', 'O', 'S']
# O: healthy people with eyes closed; F: patients seizure free; S: seizure
dataframes = {}

for folder_path in folders:
  file_list = os.listdir(folder_path)
  data_dict = {}

  for file_name in file_list:
    column_name = file_name[1:4]
    file_path = os.path.join(folder_path, file_name)
    with open(file_path, 'r') as file:
      file_data = [line.strip() for line in file.readlines()]
      data_dict[column_name] = file_data

  df = pd.DataFrame(data_dict)
  df = df[sorted(df.columns)]

  # Store the DataFrame in the dictionary with the folder name as the key
  dataframes[folder_path] = df

# Accessing the DataFrames for F, O, and S
df_F = dataframes['F']
df_O = dataframes['O']
df_S = dataframes['S']
df_F = df_F.apply(pd.to_numeric)
df_O = df_O.apply(pd.to_numeric)
df_S = df_S.apply(pd.to_numeric)

# Representation

1. All nibbles are the same, 0000
2. Nibble 1 to 10 are the same, nibble 11 is different, 0001
3. Nibble 1 to 9 are the same, nibble 10 to 11 are different, 0010
4. Nibble 1 to 8 are the same, nibble 9 to 11 are different, 0011
5. Nibble 1 to 7 are the same, nibble 8 to 11 are different, 0100
6. Nibble 1 to 6 are the same, nibble 7 to 11 are different, 0101
6. Nibble 1 to 5 are the same, nibble 6 to 11 are different, 0110
7. Nibble 1 to 4 are the same, nibble 5 to 11 are different, 0111
8. Nibble 1 to 3 are the same, nibble 4 to 11 are different, 1000
9. Nibble 1 to 2 are the same, nibble 3 to 11 are different, 1001
10. Nibble 1 is the same, nibble 2 to 11 is different, 1010
8. All nibble are different, 1011

In [2]:
def compare(str1, str2, split):
  '''
  Params:
    str1  : input string 1
    str2  : input string 2
    split : a list of 11 integers that sum up to 12

  Compare str1 and str2, both are 12-bit binary representation, separate them
  into 11 nibbles using split
  All nibbles are the same, 0000
  Nibble 1 to 10 are the same, nibble 11 is different, 0001
  Nibble 1 to 9 are the same, nibble 10 to 11 are different, 0010
  Nibble 1 to 8 are the same, nibble 9 to 11 are different, 0011
  Nibble 1 to 7 are the same, nibble 8 to 11 are different, 0100
  Nibble 1 to 6 are the same, nibble 7 to 11 are different, 0101
  Nibble 1 to 5 are the same, nibble 6 to 11 are different, 0110
  Nibble 1 to 4 are the same, nibble 5 to 11 are different, 0111
  Nibble 1 to 3 are the same, nibble 4 to 11 are different, 1000
  Nibble 1 to 2 are the same, nibble 3 to 11 are different, 1001
  Nibble 1 is the same, nibble 2 to 11 is different, 1010
  All nibble are different, 1011
  '''
  # Ensure both strings are 12-bit binary representations
  if len(str1) != 12 or len(str2) != 12:
    raise ValueError("Both strings must be 12-bit binary representations.")

  # Split the strings into 10 nibbles
  nibbles1, nibbles2 = [], []
  start = 0
  for s in split:
    nibbles1.append(str1[start:start + s])
    nibbles2.append(str2[start:start + s])
    start += s
  # Compare the nibbles
  binary_codes = {
      11: '0000',
      10: '0001',
      9: '0010',
      8: '0011',
      7: '0100',
      6: '0101',
      5: '0110',
      4: '0111',
      3: '1000',
      2: '1001',
      1: '1010',
      0: '1011'
    }

    # Compare the nibbles
  identical_nibbles = 0
  for i in range(len(nibbles1)):
    if nibbles1[i] == nibbles2[i]:
      identical_nibbles += 1
    else:
      break

  return binary_codes[identical_nibbles]

In [3]:
def l2sb(input, split):
  '''
  Params:
    input : a list of 12-bit binary string
    split :  a list of 8 integers that sum up to 12
  Use L2SB algorithm and return a list of compressed result.
  '''
  res = []
  res.append(input[0])
  for i in range(1, len(input)):
    code = compare(input[i], input[i-1], split)
    if code == '1011':
      res.append('1011' + input[i])
    elif code == '1010':
      res.append('1010' + input[i][split[0]:])
    elif code == '1001':
      res.append('1001' + input[i][(split[0]+split[1]):])
    elif code == '1000':
      res.append('1000' + input[i][(split[0]+split[1]+split[2]):])
    elif code == '0111':
      res.append('0111' + input[i][(split[0]+split[1]+split[2]+split[3]):])
    elif code == '0110':
      res.append('0110' + input[i][(split[0]+split[1]+split[2]+split[3]+split[4]):])
    elif code == '0101':
      res.append('0101' + input[i][(split[0]+split[1]+split[2]+split[3]+split[4]+split[5]):])
    elif code == '0100':
      res.append('0100' + input[i][(split[0]+split[1]+split[2]+split[3]+split[4]+split[5]+split[6]):])
    elif code == '0011':
      res.append('0011' + input[i][(split[0]+split[1]+split[2]+split[3]+split[4]+split[5]+split[6]+split[7]):])
    elif code == '0010':
      res.append('0010' + input[i][(split[0]+split[1]+split[2]+split[3]+split[4]+split[5]+split[6]+split[7]+split[8]):])
    elif code == '0001':
      res.append('0001' + input[i][(split[0]+split[1]+split[2]+split[3]+split[4]+split[5]+split[6]+split[7]+split[8]+split[9]):])
    else:
      res.append('0000')
  return res


In [4]:
def total_length(strings):
  '''
  Params:
    strings : a list of string
  Return the total length of all strings in input string.
  '''
  return sum(len(s) for s in strings)

In [5]:
def calRatio_all(input, split):
  '''
  Params:
    input : a data frame, each column represents a txt file
    split : an integer, we split the 12 bits into 0-(split-1) and split-12

  Use new l2sb algorithm to compress the input data. Record the length of compressed
  and original data of each column, then sum the compressed and original length
  of all data to get the overall compress ratio.
  '''
  original_length = 0
  compressed_length = 0
  for i in range(input.shape[1]):
    col = input.iloc[:,i]
    binarycol = [np.binary_repr(val, width=12) for val in col]
    col_compressed = l2sb(binarycol, split)
    original_length += total_length(binarycol)
    compressed_length += total_length(col_compressed)
  return original_length / compressed_length

#Test case

In [None]:
split = [2,1,1,1,1,1,1,1,1,1,1]
input = ['000100110011', #
         '001010101100', # 1 same 1010
         '001101010011', # 1-2 same 1001
         '001110101100', # 1-3 same 1000
         '001111010011', # 1-4 same 0111
         '001111101111', # 1-5 same, 0110
         '001111110001', # 1-6 same, 0101
         '001111111110', # 1-7 same, 0100
         '001111111001', # 1-8 same, 0011
         '001111111010', # 1-9 same, 0010
         '001111111011', # 1-10 same, 0001
         '001111111011', # all same, 0000
         '010000000100'  # all different 1010
         ]
res = l2sb(input, split)
print(res)

['000100110011', '10101010101100', '1001101010011', '100010101100', '01111010011', '0110101111', '010110001', '01001110', '0011001', '001010', '00011', '0000', '1011010000000100']


#Transfer data

In [None]:
df_F_new = df_F.copy()
df_O_new = df_O.copy()
df_S_new = df_S.copy()

df_F_new -= df_F_new.min().min()
df_O_new -= df_O_new.min().min()
df_S_new -= df_S_new.min().min()

In [6]:
def find_combinations(target_sum, num_parts, current_combination=[], current_sum=0):
  if num_parts == 1:
    # The last number must be exactly what is needed to reach the target sum
    if 1 <= target_sum - current_sum <= target_sum:
      yield current_combination + [target_sum - current_sum]
    return

  for i in range(1, target_sum - current_sum - (num_parts - 1) + 1):
    yield from find_combinations(target_sum, num_parts - 1, current_combination + [i], current_sum + i)

# Get all combinations of 5 positive integers that sum to 12
split_list = list(find_combinations(12, 11))


In [None]:
f_ratio = {}
#o_ratio = {}
s_ratio = {}
for split in split_list: # 12min
  ratio_f = calRatio_all(df_F_new, split)
  #ratio_o = calRatio_all(df_O_new, split)
  ratio_s = calRatio_all(df_S_new, split)
  f_ratio[tuple(split)] = ratio_f
  #o_ratio[tuple(split)] = ratio_o
  s_ratio[tuple(split)] = ratio_s

In [None]:
import openpyxl
from openpyxl import Workbook

wb = Workbook()
ws_f = wb.active
ws_f.title = 'Ratio List F'
ws_o = wb.create_sheet(title='Ratio List O')
ws_s = wb.create_sheet(title='Ratio List S')
ws_f.append(['Key', 'Value'])
ws_o.append(['Key', 'Value'])
ws_s.append(['Key', 'Value'])

for key, value in f_ratio.items():
  ws_f.append([str(key), value])

# for key, value in o_ratio.items():
#   ws_o.append([str(key), value])

for key, value in s_ratio.items():
  ws_s.append([str(key), value])

wb.save('11 nibbles.xlsx')


In [None]:
top_5_f_ratio = sorted(f_ratio.items(), key=lambda item: item[1], reverse=True)[:5]
print(top_5_f_ratio)

[((1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1), 1.443606161312506), ((1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1), 1.4425188625684195), ((2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), 1.4420728961923983), ((1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2), 1.4358430992996307), ((1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1), 1.4336796319160812)]


In [None]:
top_5_o_ratio = sorted(o_ratio.items(), key=lambda item: item[1], reverse=True)[:5]
print(top_5_o_ratio)

[((2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), 1.257366542909513), ((1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1), 1.2558418973624357), ((1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2), 1.2549997434556297), ((1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1), 1.2526676899181195), ((1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1), 1.2521565852831105)]


In [None]:
top_5_s_ratio = sorted(s_ratio.items(), key=lambda item: item[1], reverse=True)[:5]
print(top_5_s_ratio)

[((1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2), 1.0944249861204545), ((2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), 1.0941409911630156), ((1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1), 1.09332368487709), ((1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1), 1.0913830156348543), ((1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1), 1.0896655756860334)]


#Import MIT data

In [7]:
os.chdir('/content/drive/My Drive/0-Project/MIT')
# 48 csv files
csv_files = [f for f in os.listdir() if f.endswith('.csv')]

df_mix = pd.DataFrame()

for file in csv_files:
  file_path = os.path.join(os.getcwd(), file)

  df = pd.read_csv(file_path, skiprows=1)

  df_last_two_cols = df.iloc[:, -2:]

  file_prefix = file[:3]
  df_last_two_cols.columns = [f"{file_prefix}-1", f"{file_prefix}-2"]

  df_mix = pd.concat([df_mix, df_last_two_cols], axis=1)


In [8]:
# scale the data
#df_mix_scaled = transfer(df_mix)
df_mix_scaled = df_mix.mul(600)
# round the scaled result into integers
df_mix_scaled = df_mix_scaled.round().astype(int)
scaled_min = df_mix_scaled.min().min()
# add offset to all data, make them non-negative
df_mix_scaled = df_mix_scaled.add(abs(scaled_min))

# choose 1,3,5.. columns in df_mixed_scaled as mixed_signal_1
mixed_signal_1 = df_mix_scaled.iloc[:,1::2]
# choose 0,2,4.. columns in df_mixed_scaled as mixed_signal_2
mixed_signal_2 = df_mix_scaled.iloc[:,::2]

In [9]:
mit_ratio_list1 = {}
mit_ratio_list2 = {}
for split in split_list:
  ratio1 = calRatio_all(mixed_signal_1, split)
  ratio2 = calRatio_all(mixed_signal_2, split)
  mit_ratio_list1[tuple(split)] = ratio1
  mit_ratio_list2[tuple(split)] = ratio2

In [None]:
top_5_mix_ratio_1 = sorted(mit_ratio_list1.items(), key=lambda item: item[1], reverse=True)[:5]
top_5_mix_ratio_2 = sorted(mit_ratio_list2.items(), key=lambda item: item[1], reverse=True)[:5]
print(top_5_mix_ratio_1)
print(top_5_mix_ratio_2)

[((1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2), 1.4329229527500291), ((2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), 1.4326764366319114), ((1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1), 1.4314421627920302), ((1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1), 1.4297918817111994), ((1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1), 1.4269984584893196)]
[((1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2), 1.428510411399425), ((2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), 1.4281847466067181), ((1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1), 1.426437943139223), ((1, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1), 1.422576402766944), ((1, 1, 1, 2, 1, 1, 1, 1, 1, 1, 1), 1.4204880187425502)]


In [10]:
import openpyxl
from openpyxl import Workbook

wb = Workbook()
ws_1 = wb.active
ws_1.title = 'Ratio List 1'
ws_2 = wb.create_sheet(title='Ratio List 2')

ws_1.append(['Key', 'Value'])
ws_2.append(['Key', 'Value'])

for key, value in mit_ratio_list1.items():
  ws_1.append([str(key), value])

for key, value in mit_ratio_list2.items():
  ws_2.append([str(key), value])

wb.save('11 nibbles MIT.xlsx')