#
<h1 style="color: #FFC0CB; font-size: 24px;">
CSV_DIVIDER CONCEPT
</h1>

I want to divide by 100mb my csv file with table structured data.

## Full code

In [90]:
import pandas as pd
import os

def split_csv(file_path, output_folder, max_file_size_mb, delimiter=';'):
    # Create the output directory if it doesn't exist
    os.makedirs(output_folder, exist_ok=True)

    # Read the CSV file into a DataFrame
    df = pd.read_csv(file_path, delimiter=delimiter)

    # Calculating chunk size
    max_file_size = max_file_size_mb * 1000 * 1000

    # Initialize variables
    current_file_size = 0
    current_chunk = 1
    output_file = os.path.join(output_folder, f"chunk_{current_chunk}.csv")

    # Write the header row to each output file
    pd.DataFrame(df.columns).to_csv(output_file, index=False)

    # iterate over rows and split into chunks
    with open(output_file, 'a') as f:
        for _, row in df.iterrows():
            row_str = delimiter.join([str(val) for val in row.values]) + '\n'
            row_size = len(row_str)

            # Check if adding the row exceeds the maximum file size
            if current_file_size + row_size > max_file_size:
                f.close()
                print(f"Saved {current_chunk} / x")

                # Increment chunk number
                current_chunk += 1
                output_file = os.path.join(output_folder, f"chunk_{current_chunk}.csv")

                # Write the header row to the new output file
                pd.DataFrame(df.columns).to_csv(output_file, index=False)

                # Open new file for writing
                f = open(output_file, 'a')

                # Reset the current file size
                current_file_size = 0
            
            # Write the row to the chunk
            f.write(row_str)

            # Update the current file size
            current_file_size += row_size        
    print(f"Saved {current_chunk} chunks.")

## Production

## Example of usage

In [91]:
file_path = os.path.join(os.getcwd(), "input", "amazon.csv")
output_path = os.path.join(os.getcwd(),"output")
max_file_size_mb = 75

split_csv(file_path, output_path, max_file_size_mb, delimiter=',')

Saved 1 / x
Saved 2 chunks.


In [67]:
my_ch = 75 * 1000 * 1000
ch = 75 * 1024 * 1024

print(my_ch / 1024 / 1024)

71.52557373046875


## Test

In [84]:
test_path = os.path.join(os.getcwd(), "output/chunk_1.csv")

In [87]:
test = pd.read_csv(test_path, delimiter='\n')

ValueError: Specified \n as separator or delimiter. This forces the python engine which does not accept a line terminator. Hence it is not allowed to use the line terminator as separator.