### Multi-Process One-Thread

In [2]:
import multiprocessing
from multiprocessing import Process

In [3]:
# Check number of available CPU cores
print(f"Number of available CPU cores: {multiprocessing.cpu_count()}")

Number of available CPU cores: 24


In [5]:
import pandas as pd
from time import time, sleep

# Example of a processing function
def process_dataframe(chunk_id, chunk_data: pd.DataFrame):
    print(f"Processing chunk {chunk_id} with {len(chunk_data)} rows")
    sleep(5)
    print(f"The chunk {chunk_id} has been processed")

In [11]:
# Make a sample DataFrame
data = [
    ['tom',  10], ['nick',  15],
    ['juli',  14], ['peter',  20],
    ['jason',  27], ['anna',  11],
]

# Create a DataFrame
df = pd.DataFrame(data, columns=['Name', 'Age'])

# Devide the DataFrame into chunks
chunk_size = 2
chunks = [df[i:i + chunk_size].to_numpy() for i in range(0, df.shape[0], chunk_size)]
print(f"Number of chunks: {len(chunks)}")

# Mark the stating point the measure the time
start_time = time()

procs = []


for i, chunk in enumerate(chunks):
    # Create a new process for each chunk
    proc = Process(target=process_dataframe, args=(i, chunk))

    # Start the process
    proc.start()

    # Investigate the process id
    print(f"Process ID: {proc.pid}")

    # Manage all processes definitions in a list
    procs.append(proc)

# Stop all processes to prevent resource scarcity
for proc in procs:
    proc.join() # Wait for the process to finish

# Report total elaspsed time
print(f"Total elapsed time: {time() - start_time:.2f} seconds")

Number of chunks: 3
Process ID: 30163
Processing chunk 0 with 2 rows
Process ID: 30166
Processing chunk 1 with 2 rows
Processing chunk 2 with 2 rows
Process ID: 30171
The chunk 0 has been processed
The chunk 1 has been processedThe chunk 2 has been processed

Total elapsed time: 5.03 seconds
