Первая создает бинарный файл (min 2Гб), состоящий из случайных 32-разрядных беззнаковых целых чисел (big endian).

In [2]:
import time 
import ipykernel_launcher
import struct
import mmap
import threading
import queue

In [4]:
import random
import struct

with open("random_numbers.bin", "wb") as f:
    while f.tell() < 2 * 1024**3:
        num = random.randint(0, 2**32 - 1)
        num_bytes = num.to_bytes(4, "big")
        f.write(num_bytes)

Вторая считает сумму этих чисел (с применением длинной арифметики), находит минимальное и максимальное число.
1. Простое последовательное чтение 

In [4]:
%%time

sum_num = 0
min_num = 2**32 - 1
max_num = 0

start_time = time.time()
with open("random_numbers.bin", "rb") as f:
    while (chunk := f.read(4)):
        num = int.from_bytes(chunk, "big")
        sum_num += num
        min_num = min(min_num, num)
        max_num = max(max_num, num)
end_time = time.time()
print(f' sum: {sum_num} \n min: {min_num} \n max:{max_num}\n time:{end_time-start_time}')

 sum: 1152968791111996711 
 min: 0 
 max:4294967292
 time:572.8589899539948
CPU times: total: 9min 10s
Wall time: 9min 32s


2. Многопоточная + memory-mapped files. Сравните время работы. 

In [5]:
def worker(q, results):
    while True:
        chunk = q.get()
        if chunk is None:
            break
        nums = struct.unpack('>' + 'I' * (len(chunk) // 4), chunk)
        results.append((sum(nums), max(nums), min(nums)))
        q.task_done()

def process_file(filename):
    chunk_size = 1024 * 1024  
    threads_num = 8  

    q = queue.Queue()
    results = []

    threads = []
    for _ in range(threads_num):
        t = threading.Thread(target=worker, args=(q, results))
        t.start()
        threads.append(t)

    with open(filename, 'rb') as f:
        mmapped_file = mmap.mmap(f.fileno(), 0, access=mmap.ACCESS_READ)
        chunks = [mmapped_file[i:i + chunk_size] for i in range(0, len(mmapped_file), chunk_size)]

    for chunk in chunks:
        q.put(chunk)

    q.join()

    for i in range(threads_num):
        q.put(None)
    for t in threads:
        t.join()

    total_sum = sum(result[0] for result in results)
    max_num = max(result[1] for result in results)
    min_num = min(result[2] for result in results)

    return total_sum, max_num, min_num

In [6]:
%%time
total_sum, max_num, min_num = process_file('random_numbers.bin')
print(f' sum: {total_sum} \n min: {min_num} \n max:{max_num}')

 sum: 1152968791111996711 
 min: 0 
 max:4294967292
CPU times: total: 1min
Wall time: 1min 2s
