In [1]:
from random import randint
import numpy as np
np.set_printoptions(suppress=True)
import time
import os    

In [2]:
def timing(fun, *args):
    start = time.time()
    result = fun(*args)
    stop = time.time()
    print('{:f}'.format(stop - start))
    return result

In [3]:
# проверка сортировки
def revision(a):
    for i in range(len(a)-1):
        if a[i] > a[i + 1]:
            print('wrong_sorted')
            return
    print('Success')
    return

In [4]:
# quick sort and merge
def partition(arr, left, right):
    i = left - 1
    pivot = arr[right]
    for j in range(left, right + 1):
        if arr[j] <= pivot:
            i += 1
            temp = arr[i]
            arr[i] = arr[j]
            arr[j] = temp
    return i   

def quick_sort(arr):
    left = 0
    right = len(arr) -1
    if left >= right:
        return
    
    center = partition(arr, left, right)
    quick_sort(arr[left:center])
    quick_sort(arr[center + 1: right + 1])
    
def merge(A, B):
    a = 0 # указатель на массив А
    b = 0 # указатель на массив В
    S = np.array([0] * (len(A) + len(B))) # размер - сумма размеров А и В
    s = 0 # счетчик для S
    while(a < len(A) and b < len(B)):
        if A[a] < B[b]:
            S[s] = A[a]
            a += 1
            s += 1
        else:
            S[s] = B[b]
            b += 1
            s += 1
    while(a < len(A)):
        S[s] = A[a]
        s += 1
        a += 1
    while(b < len(B)):
        S[s] = B[b]
        s += 1
        b += 1
    return S

In [8]:
# генерация файла
count = int(1e8)
np.array(([randint(0,65535) for _ in range(count)]), dtype='uint16').tofile('test.bin')
b = np.fromfile('test.bin', dtype='uint16'); b.shape


(100000000,)

In [9]:
# сортировка слиянием для сортировки файла
# для сортировки небольших кусков используется quick sort
def part_sort(b, count_step, len_step):
    num_blok = 0
    files_list = []
    for i in range(1, count_step, 2):
        quick_sort(b[num_blok * len_step: i * len_step]),
        quick_sort(b[(num_blok + 1) * len_step: (i+1) * len_step])
        a = merge(b[num_blok * len_step: i * len_step], b[(num_blok + 1) * len_step: (i+1) * len_step])
        num_blok +=2  

        if (count_step - i) <= 2:
            quick_sort(b[num_blok*len_step:])
            a = merge(a, b[num_blok*len_step:])
            np.array(a, dtype='uint16').tofile(f'test_{i}.bin')
            files_list.append(f'test_{i}.bin')
        else:
            np.array(a, dtype='uint16').tofile(f'test_{i}.bin')
            files_list.append(f'test_{i}.bin')
    return(files_list)

def part_merge(files):
    len_list = len(files)
    files_list = []
    
    for i in range(0, len_list, 2):
        if (len_list - 3) == i:
            file_1 = np.fromfile(files[i], dtype='uint16')
            os.remove(files[i])
            file_2 = np.fromfile(files[i + 1], dtype='uint16')
            os.remove(files[i + 1])
            a = merge(file_1, file_2)
            file_3 = np.fromfile(files[i + 2], dtype='uint16')
            a = merge(a, file_3)
            os.remove(files[i + 2])
            np.array(a, dtype='uint16').tofile(f'test_{i}.bin')
            files_list.append(f'test_{i}.bin')
            break
        else:
            file_1 = np.fromfile(files[i], dtype='uint16')
            os.remove(files[i])
            file_2 = np.fromfile(files[i + 1], dtype='uint16')
            os.remove(files[i + 1])
            a = merge(file_1, file_2)
            
            np.array(a, dtype='uint16').tofile(f'test_{i}.bin')
            files_list.append(f'test_{i}.bin')  
    return files_list

def file_sort(file, len_step):
    b = np.fromfile(file, dtype='uint16')
    len_step = len_step
    common_len = b.shape[0]
    count_step = common_len // len_step
    
    files_list = part_sort(b, count_step, len_step)
    
    while len(files_list) > 1:
        files_list = part_merge(files_list)
    return files_list[0]
    

    

In [10]:
# Комбинированная сортировка, замер времени выполнения, проверка отсортированного массива
result = timing(file_sort, 'test.bin', 1024)
revision(np.fromfile(result, dtype='uint16'))

1755.868806
Success


In [11]:
# Сортировка подсчетом, только для чисел
# верхняя граница не включается
def count_sort_num(arr, min_border, max_border):
    count_arr = np.array([0] * (max_border - min_border))
    for item in arr:
        count_arr[item - min_border] += 1
    result = []
    for ind, count in enumerate(count_arr):
        for i in range(count):
            result.append(ind + min_border)
    return result
        

In [12]:
# Сортировка подсчетом, вариант  для объектов
def count_sort_obj(arr, min_border, max_border):
    count_arr = np.array([0] * (max_border - min_border))
    for item in arr:
        count_arr[item - min_border] += 1
        
    temp = 0
    for ind, item in enumerate(count_arr):
        temp += item
        count_arr[ind] = temp
        
    result = np.array([0] * len(arr), dtype='object' )
    for item in reversed(arr):
        temp = count_arr[item - min_border]
        count_arr[item - min_border] -= 1
        result[temp -1]  = item
    return result

In [14]:
# сортировка, замер времени, прверка для упрощенного варианта сортировки подсчетом
min_border = 0
max_border = 65536
count = int(1e8)
arr = [randint(min_border, max_border - 1) for _ in range(count)]

result = timing(count_sort_num, arr, min_border, max_border)
revision(result)

40.347905
Success


In [16]:
# сортировка, замер времени, прверка для варианта с объектами сортировки подсчетом
result = timing(count_sort_obj, arr, min_border, max_border)
revision(result)

113.984351
Success


In [17]:
# поразрядная сортировка
def radix_sort(arr, rang):
    
    result = np.array(arr, dtype='object' )
    for i in range(rang):
        count_arr = np.array([0] * 10, dtype='object')    
        ran = 10 ** i
        
        for item in result:
            number = item // ran % 10
            count_arr[number] += 1
            
        temp = 0
        for ind, item in enumerate(count_arr):
            temp += item
            count_arr[ind] = temp
            
        temp_arr = result.copy()
        for item in reversed(temp_arr):
            number = item // ran % 10
            temp = count_arr[number]
            count_arr[number] -= 1
            result[temp -1]  = item
            
    return result

In [19]:
min_border = 0
max_border = 65535
count = int(1e8)
arr = [randint(min_border, max_border) for _ in range(count)]

In [20]:
# сортировка, замер времени, прверка для поразрядной сортировки
result = timing(radix_sort, arr, 5)
revision(result)

269.747134
Success
