In [4]:
import os
from pathlib import Path


def read_file(filename: os.PathLike):
    """
    Read a file containing data in binary formatand return its contents as a list of strings.
    Each string represents a line from the file.
    arguments:
    -----------
        filename: path to the input file
    returns:
    -----------
        list of strings containing the contents of the input file.

    usecases:
    -----------
    #example 1
    >>> read_file("input.txt")
    ["1", "0", "1", "0", "1", "0", "1", "0", "1", "0"]
    #example 2
    >>> read_file(Path(os.getcwd()) / "input.txt"))
    ["0", "1", "0", "1", "0", "1", "0", "1", "0", "1"]

    """
    with open(filename, "r") as f:
        yield from f.readlines()
        
# test function
if __name__ == "__main__":
    import random
    test_data_path = Path(os.getcwd()) / "test_data"
    test_data_path.mkdir(exist_ok=True, parents=True)
    data = '\n'.join([str(random.randint(0,1)) for i in range(256)])
    (test_data_path / 'input.txt').write_text(data)
    assert list(read_file(test_data_path / 'input.txt')) == data.split('\n')

AssertionError: 

In [24]:
!pip install tabulate pandas

Collecting pandas
  Using cached pandas-2.2.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (19 kB)
Collecting pytz>=2020.1 (from pandas)
  Using cached pytz-2024.1-py2.py3-none-any.whl.metadata (22 kB)
Collecting tzdata>=2022.7 (from pandas)
  Using cached tzdata-2024.1-py2.py3-none-any.whl.metadata (1.4 kB)
Using cached pandas-2.2.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (13.0 MB)
Using cached pytz-2024.1-py2.py3-none-any.whl (505 kB)
Using cached tzdata-2024.1-py2.py3-none-any.whl (345 kB)
Installing collected packages: pytz, tzdata, pandas
Successfully installed pandas-2.2.2 pytz-2024.1 tzdata-2024.1


In [27]:

def process_binary(filename: os.PathLike, n: int) -> dict:
    """Create a dictionary containing the first n lines of a binary file.
    Read data and create n chunks of data.
    arguments:
    -----------
        filename: path to the input file
        n: number of chunks to create

    returns:
    -----------
        dictionary containing the chunk number as key and the chunk data as value.
    """
    data = [int(i.strip()) for i in Path(filename).read_text().splitlines()]
    data_dict = {i: data[i:len(data):n] for i in range(0, n)}
    return data_dict


# test function
if __name__ == "__main__":
    import random
    import tabulate
    import pandas as pd
    test_data_path = Path(os.getcwd()) / "test_data"
    test_data_path.mkdir(exist_ok=True, parents=True)
    # data = '\n'.join([str(random.randint(0, 1)) for i in range(1024)])
    data = '\n'.join([str(i) for i in range(1024)])
    (test_data_path / 'input.txt').write_text(data)
    # assert list(read_file(test_data_path / 'input.txt')) == data.split('\n')
    
data_dict = process_binary(test_data_path / 'input.txt', 8)

# print(tabulate.tabulate(data_dict.values(), headers=list(data_dict.keys()), tablefmt='outer-rounded'))
df = pd.DataFrame(data_dict)

df

Unnamed: 0,0,1,2,3,4,5,6,7
0,0,1,2,3,4,5,6,7
1,8,9,10,11,12,13,14,15
2,16,17,18,19,20,21,22,23
3,24,25,26,27,28,29,30,31
4,32,33,34,35,36,37,38,39
...,...,...,...,...,...,...,...,...
123,984,985,986,987,988,989,990,991
124,992,993,994,995,996,997,998,999
125,1000,1001,1002,1003,1004,1005,1006,1007
126,1008,1009,1010,1011,1012,1013,1014,1015


In [29]:
data_dict[1]
# data_list = list(range(128))
# data_list[0:128:8]

[1,
 9,
 17,
 25,
 33,
 41,
 49,
 57,
 65,
 73,
 81,
 89,
 97,
 105,
 113,
 121,
 129,
 137,
 145,
 153,
 161,
 169,
 177,
 185,
 193,
 201,
 209,
 217,
 225,
 233,
 241,
 249,
 257,
 265,
 273,
 281,
 289,
 297,
 305,
 313,
 321,
 329,
 337,
 345,
 353,
 361,
 369,
 377,
 385,
 393,
 401,
 409,
 417,
 425,
 433,
 441,
 449,
 457,
 465,
 473,
 481,
 489,
 497,
 505,
 513,
 521,
 529,
 537,
 545,
 553,
 561,
 569,
 577,
 585,
 593,
 601,
 609,
 617,
 625,
 633,
 641,
 649,
 657,
 665,
 673,
 681,
 689,
 697,
 705,
 713,
 721,
 729,
 737,
 745,
 753,
 761,
 769,
 777,
 785,
 793,
 801,
 809,
 817,
 825,
 833,
 841,
 849,
 857,
 865,
 873,
 881,
 889,
 897,
 905,
 913,
 921,
 929,
 937,
 945,
 953,
 961,
 969,
 977,
 985,
 993,
 1001,
 1009,
 1017]