#
<h1 style="color: #FFC0CB; font-size: 24px;">
CSV_DIVIDER CONCEPT
</h1>

I want to divide by 100mb my csv file with table structured data.

## Full code

In [132]:
import csv
import os
import math

def split_csv(file_path, output_folder, max_file_size_mb, delimiter=';', output_file_name = 'chunk'):
    # Create the output directory if it doesn't exist
    os.makedirs(output_folder, exist_ok=True)

    # Calculating chunk size
    max_file_size = max_file_size_mb * 1000 * 1000

    # Initialize variables
    current_chunk = 1
    output_file = os.path.join(output_folder, f"{output_file_name}_{current_chunk}.csv")

    # Get the size of the input file
    file_size = os.path.getsize(file_path)
    # Calculate the approximate total number of chunks
    total_n_chunks = math.ceil(file_size / max_file_size)

    # Read file and save header row and its size
    with open(file_path, 'r') as f:
        reader = csv.reader(f, delimiter=delimiter)
        rows = list(reader)
        header_row = rows[0]
        header_row_str = delimiter.join(header_row) + '\n'
        current_file_size = len(header_row_str)
    
    # Iterate over rows and split into chunks
    for row in rows:
        row_str = delimiter.join(row) + '\n'
        row_size = len(row_str)

        if current_file_size + row_size > max_file_size:
            print(f"Saved {current_chunk} / {total_n_chunks}")

            # Increment chunk number
            current_chunk += 1
            output_file = os.path.join(output_folder, f"{output_file_name}_{current_chunk}.csv")

            with open(output_file, 'w', newline='') as f_new:
                writer = csv.writer(f_new, delimiter=delimiter)
                writer.writerow(header_row)

            current_file_size = len(header_row_str)
        
        with open(output_file, 'a', newline='') as f_out:
            writer = csv.writer(f_out, delimiter=delimiter)
            writer.writerow(row)

            current_file_size += row_size
    
    print(f"Saved {current_chunk} / {total_n_chunks}")

## Production

In [128]:
file_size = os.path.getsize(file_path)

In [129]:
print(file_size)

131879567


## Example of usage

In [133]:
file_path = os.path.join(os.getcwd(), "input", "amazon.csv")
output_path = os.path.join(os.getcwd(),"output")
max_file_size_mb = 75

split_csv(file_path, output_path, max_file_size_mb, delimiter=',')

Saved 1 / 2
Saved 2 / 2


## Test

In [134]:
df = pd.read_csv(file_path, delimiter=',')

In [135]:
df.loc[0][:]

Product Name    "CLEAR CLEAN ESN" Sprint EPIC 4G Galaxy SPH-D7...
Brand Name                                                Samsung
Price                                                      199.99
Rating                                                          5
Reviews         I feel so LUCKY to have found this used (phone...
Review Votes                                                  1.0
Name: 0, dtype: object

In [136]:
df.head()

Unnamed: 0,Product Name,Brand Name,Price,Rating,Reviews,Review Votes
0,"""CLEAR CLEAN ESN"" Sprint EPIC 4G Galaxy SPH-D7...",Samsung,199.99,5,I feel so LUCKY to have found this used (phone...,1.0
1,"""CLEAR CLEAN ESN"" Sprint EPIC 4G Galaxy SPH-D7...",Samsung,199.99,4,"nice phone, nice up grade from my pantach revu...",0.0
2,"""CLEAR CLEAN ESN"" Sprint EPIC 4G Galaxy SPH-D7...",Samsung,199.99,5,Very pleased,0.0
3,"""CLEAR CLEAN ESN"" Sprint EPIC 4G Galaxy SPH-D7...",Samsung,199.99,4,It works good but it goes slow sometimes but i...,0.0
4,"""CLEAR CLEAN ESN"" Sprint EPIC 4G Galaxy SPH-D7...",Samsung,199.99,4,Great phone to replace my lost phone. The only...,0.0


In [137]:
test_path = os.path.join(os.getcwd(), "output/chunk_1.csv")

In [138]:
test = pd.read_csv(test_path, delimiter=',')

In [139]:
test.head()

Unnamed: 0,Product Name,Brand Name,Price,Rating,Reviews,Review Votes
0,"""CLEAR CLEAN ESN"" Sprint EPIC 4G Galaxy SPH-D7...",Samsung,199.99,5,I feel so LUCKY to have found this used (phone...,1.0
1,"""CLEAR CLEAN ESN"" Sprint EPIC 4G Galaxy SPH-D7...",Samsung,199.99,4,"nice phone, nice up grade from my pantach revu...",0.0
2,"""CLEAR CLEAN ESN"" Sprint EPIC 4G Galaxy SPH-D7...",Samsung,199.99,5,Very pleased,0.0
3,"""CLEAR CLEAN ESN"" Sprint EPIC 4G Galaxy SPH-D7...",Samsung,199.99,4,It works good but it goes slow sometimes but i...,0.0
4,"""CLEAR CLEAN ESN"" Sprint EPIC 4G Galaxy SPH-D7...",Samsung,199.99,4,Great phone to replace my lost phone. The only...,0.0
