# Format and write bin data

## imports

In [1]:
import os
from pathlib import Path



## Install dependencies

In [2]:
!pip install tabulate pandas



## Create Files

In [9]:
def write_list_to_file(data_list, filename): return filename.write_text(
    "\n".join([str(val) for val in data_list]))


def write_dict_to_filenames(data: dict, path: os.PathLike = None, filenames_list: str = []) -> None:
    """Create output files from the data dictionary.
    arguments:
    -----------
        data: dictionary containing the chunk number as key and the chunk data as value.
        path: path to the output directory. If not provided, the current directory is used.
    """
    path = Path(path) if path is not None else Path()
    # filename = filenames_list[0] if filenames_list is not None else 'input.txt'
    filename = filenames_list[0] if filenames_list else 'input.txt'
    filename_stem = Path(filename).stem if filename is not None else "input"

    for k in data.keys():
        filename = path / f"{filename_stem}_{k}.txt"
        write_list_to_file(data[k], filename=filename)


def create_files(data: dict, path: os.PathLike = None, filename: str = None) -> None:
    """Create output files from the data dictionary.
    arguments:
    -----------
        data: dictionary containing the chunk number as key and the chunk data as value.
        path: path to the output directory. If not provided, the current directory is used.
    """
    filename_stem = Path(filename).stem if filename is not None else "input"
    if path is None:
        path = Path(os.getcwd())
    else:
        path = Path(path)
    path = path / filename_stem
    path.mkdir(parents=True, exist_ok=True)
    for k in data.keys():
        filename = path / f"output_{k}.txt"
        write_list_to_file(data[k], filename=filename)
        

## process binary data

In [10]:

import pandas as pd


def get_xor_of_list(data_list): return [data_list[0] ^ 0] + [
    data_list[i] ^ data_list[i-1] for i in range(1, len(data_list))]

def get_transformed_data(filename: os.PathLike, n: int) -> dict:
    """Create a dictionary containing the first n lines of a binary file.
    Read data and create n chunks of data.
    arguments:
    -----------
        filename: path to the input file
        n: number of chunks to be created

    returns:
    -----------
        dictionary containing the chunk number as key and the chunk data as value.
    """
    filename = Path(filename)
    (filename.parent / filename.stem).mkdir(exist_ok=True, parents=True)
    data = [int(i.strip()) for i in Path(filename).read_text().splitlines()]
    data_xor = get_xor_of_list(data)
    write_list_to_file(data_xor, filename.parent /
                       filename.stem / f"{filename.stem}_c.txt")
    transformed_data_dict = {i: data[i:len(data):n] for i in range(0, n)}
    return transformed_data_dict


def get_sum_of_elements_of_dict(data_dict, idx): return sum(
    [data_dict[k][idx] for k in data_dict.keys()])



def process_binary_data(filename: os.PathLike, n: int, output_filenames_list: list = []) -> None:
    """Create a dictionary containing the first n lines of a binary file.
    Read data and create n chunks of data.
    arguments:
    -----------
        filename: path to the input file
        n: number of chunks to be created

    returns:
    -----------
        dictionary containing the chunk number as key and the chunk data as value.
    """
    # transform data from single column to m x n
    transformed_data_dict = get_transformed_data(filename, n)
    # xor of dict values and shifted by 1 values.
    data_xor_dict = {f'xor_{k}': get_xor_of_list(
        v) for k, v in transformed_data_dict.items()}

    # sum- data_xor_dict[k][i] for k = 0, n-1, sum of values of dict for a given index
    data_xor_sum_list = [get_sum_of_elements_of_dict(
        data_xor_dict, idx) for idx in range(len(list(transformed_data_dict.values())[0]))]
    
    # transform xor data to single column
    transformed_data_xor_list = [
        val for vlist in data_xor_dict.values() for val in vlist]
    
    # create files
    output_data_path = filename.parent / filename.stem 
    output_data_path.mkdir(exist_ok=True, parents=True)
    
    write_dict_to_filenames(
        data_xor_dict, path=output_data_path, filenames_list=output_filenames_list[:n])

    write_list_to_file(data_xor_sum_list,
                       filename=output_data_path / f"{filename.stem}_a.txt")
    write_list_to_file(transformed_data_xor_list,
                       filename=output_data_path / f"{filename.stem}_b.txt")
    

# test function
if __name__ == "__main__":
    import random
    import tabulate
    import pandas as pd
    import shutil
    
    n = 8
    
    test_data_path = Path(os.getcwd()) / "test_data"
    # shutil.rmtree(test_data_path, ignore_errors=True)
    test_data_path.mkdir(exist_ok=True, parents=True)
    data = '\n'.join([str(random.randint(0, 1)) for i in range(1024)])
    # data = '\n'.join([str(i) for i in range(1024)])
    (test_data_path / 'result.txt').write_text(data)
    # assert list(read_file(test_data_path / 'input.txt')) == data.split('\n')
    filename = test_data_path / 'result.txt'
    data_dict = get_transformed_data(filename, 8)

    # print(tabulate.tabulate(data_dict.values(), headers=list(data_dict.keys()), tablefmt='outer-rounded'))
    df = pd.DataFrame(data_dict)

    print(df.columns)
    process_binary_data(filename, 8)

df.head()

Index([0, 1, 2, 3, 4, 5, 6, 7], dtype='int64')


Unnamed: 0,0,1,2,3,4,5,6,7
0,0,1,1,1,1,1,0,1
1,1,0,1,0,0,1,1,1
2,0,0,1,0,1,0,1,1
3,1,0,0,0,1,1,1,0
4,0,1,1,0,0,0,0,0


### Get XOR

In [11]:
# data_dict[1]
# data_list = list(range(128))
# data_list[0:128:8]
data_xor_dict = {f'xor_{k}': get_xor_of_list(v) for k, v in data_dict.items()}

#  test
data_xor_dict_df = pd.DataFrame(data_xor_dict)
data_xor_dict_df
final_df = pd.concat((df, data_xor_dict_df), axis=1)

final_df

Unnamed: 0,0,1,2,3,4,5,6,7,xor_0,xor_1,xor_2,xor_3,xor_4,xor_5,xor_6,xor_7
0,0,1,1,1,1,1,0,1,0,1,1,1,1,1,0,1
1,1,0,1,0,0,1,1,1,1,1,0,1,1,0,1,0
2,0,0,1,0,1,0,1,1,1,0,0,0,1,1,0,0
3,1,0,0,0,1,1,1,0,1,0,1,0,0,1,0,1
4,0,1,1,0,0,0,0,0,1,1,1,0,1,1,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
123,1,1,0,1,1,0,1,0,1,1,0,0,1,0,0,1
124,1,0,1,0,1,0,1,0,0,1,1,1,0,0,0,0
125,1,0,0,1,1,0,1,1,0,0,1,1,0,0,0,1
126,1,0,0,1,0,0,1,1,0,0,0,0,1,0,0,0


### Sum

In [12]:
def get_sum_of_elements_of_dict(data_dict, idx): return sum(
    [data_dict[k][idx] for k in data_dict.keys()])

xor_sum_list = [get_sum_of_elements_of_dict(
    data_xor_dict, idx) for idx in range(len(list(data_dict.values())[0]))]

# test
final_df['sum'] = xor_sum_list
# xor_sum_list
final_df

Unnamed: 0,0,1,2,3,4,5,6,7,xor_0,xor_1,xor_2,xor_3,xor_4,xor_5,xor_6,xor_7,sum
0,0,1,1,1,1,1,0,1,0,1,1,1,1,1,0,1,6
1,1,0,1,0,0,1,1,1,1,1,0,1,1,0,1,0,5
2,0,0,1,0,1,0,1,1,1,0,0,0,1,1,0,0,3
3,1,0,0,0,1,1,1,0,1,0,1,0,0,1,0,1,4
4,0,1,1,0,0,0,0,0,1,1,1,0,1,1,1,0,6
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
123,1,1,0,1,1,0,1,0,1,1,0,0,1,0,0,1,4
124,1,0,1,0,1,0,1,0,0,1,1,1,0,0,0,0,3
125,1,0,0,1,1,0,1,1,0,0,1,1,0,0,0,1,3
126,1,0,0,1,0,0,1,1,0,0,0,0,1,0,0,0,1


### Transform xor data to mn x 1

In [13]:
# data_xor
transformed_data_xor_list = [val for vlist in data_xor_dict.values() for val in vlist ]

# transformed_data_xor_list

### find xor(original data, shifted original data)

## Create files

In [14]:
create_files(data_dict, path = test_data_path, filename=filename)
write_list_to_file(xor_sum_list, filename=filename.parent /
                   filename.stem / f"{filename.stem}_a.txt")
write_list_to_file(transformed_data_xor_list, filename=filename.parent /
                   filename.stem / f"{filename.stem}_b.txt")
write_list_to_file(transformed_data_xor_list, filename=filename.parent /
                   filename.stem / f"{filename.stem}_c.txt")

2047