In [37]:
import numpy as np
import time

In [5]:
import sys; print('Python %s on %s' % (sys.version, sys.platform))
sys.path.extend(['C:\\Projects\\Repo\\Work\\SWIFT', 'C:\\Projects\\Repo\\Work\\SWIFT\\scripts'])

Python 3.6.5 |Anaconda, Inc.| (default, Mar 29 2018, 13:32:41) [MSC v.1900 64 bit (AMD64)] on win32


In [15]:
from services.data_service import normal_rounding

In [30]:
def bucket_rounding(mat):
    """
    bucket rounding and add the difference to the diagonal elements
    :param mat: an numpy 2d-array
    :return: an numpy 2d-array of dtype np.uint16
    """

    # todo: implement as pure list iteration by rows
    if len(mat.shape) != 2:
        raise ValueError("Input must be a 2-dimensional numpy array")
    
    threshold = 0.05
    rounded = np.zeros_like(mat, dtype=mat.dtype)
    for i in range(mat.shape[0]):
        residual = 0
        if mat[i].sum() <= threshold:
            continue
        for j in range(mat.shape[1]):
            if mat[i, j] != 0:
                val = np.round(mat[i, j] + residual)
                residual += mat[i, j] - val
                rounded[i, j] = val

    total_diff = int(round(rounded.sum() - mat.sum()))
    diff = np.where(total_diff > 0, -1, 1)
    indices = np.argsort(np.diagonal(rounded))[::-1].astype(np.int16)[:np.abs(total_diff)]
    rounded[indices, indices] += diff
    return rounded.clip(min=0).astype(np.int16)

In [34]:
def bucket_rounding_list(mat):
    """
    bucket rounding and add the difference to the diagonal elements
    :param mat: an numpy 2d-array
    :return: an numpy 2d-array of dtype np.uint16
    """

    if len(mat.shape) != 2:
        raise ValueError("Input must be a 2-dimensional numpy array")
    
    threshold = 0.05
    rows, columns = mat.shape
    mat_list = mat.tolist()
    for i in range(rows):
        residual = 0
        if mat[i].sum() <= threshold:
            continue
        for j in range(columns):
            if mat_list[i][j] != 0:
                val = np.round(mat_list[i][j] + residual)
                residual += mat_list[i][j] - val
                mat_list[i][j] = val
    
    rounded = np.array(mat_list)
    total_diff = int(round(rounded.sum() - mat.sum()))
    diff = np.where(total_diff > 0, -1, 1)
    indices = np.argsort(np.diagonal(rounded))[::-1].astype(np.int16)[:np.abs(total_diff)]
    rounded[indices, indices] += diff
    return rounded.clip(min=0).astype(np.int16)

In [38]:
def test_rounding(matrix, func):
    start_time = time.time()
    matrix_rounded = func(matrix)
    end_time = time.time()
    print("Total after rounding by {0:s} = {1:.2f} in {2:d} seconds".format(func.__name__, matrix_rounded.sum(), round(end_time-start_time)))

#### Simulated Data

In [18]:
zones = 5263
seed = 42
np.random.seed(seed)

In [24]:
matrix = np.random.random((zones, zones))
num_rows_to_small_values = 2000
rows_to_small_values = np.random.randint(low=0, high=zones-1, size=num_rows_to_small_values, dtype=np.uint16)
small_values = np.random.random((num_rows_to_small_values, zones)) / 50
matrix_with_small_values = matrix.copy()
matrix_with_small_values[rows_to_small_values] = small_values
print("Total number of trips in original matrix    = {0:.2f}".format(matrix.sum()))
print("Total number of trips with fractional trips = {0:.2f}".format(matrix_with_small_values.sum()))

Total number of trips in original matrix    = 13849489.51
Total number of trips with fractional trips = 9525040.91


In [27]:
test_rounding(matrix_with_small_values, normal_rounding)
test_rounding(matrix_with_small_values, bucket_rounding)
test_rounding(matrix_with_small_values, bucket_rounding_list)

Total after rounding by normal_rounding = 9440908.00
Total after rounding by bucket_rounding = 9525041.00
Total after rounding by bucket_rounding_list = 9525041.00


In [28]:
%timeit normal_rounding(matrix_with_small_values)

262 ms ± 8.57 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [32]:
%timeit bucket_rounding(matrix_with_small_values)

1min 12s ± 3.46 s per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [35]:
%timeit bucket_rounding_list(matrix_with_small_values)

58.3 s ± 2.11 s per loop (mean ± std. dev. of 7 runs, 1 loop each)


#### Actual Data

In [39]:
import h5py

  from ._conv import register_converters as _register_converters


In [40]:
trip_table_file = r"C:\Projects\Repo\Work\SWIFT\data\Dynus_T\OD\2017\OD AM3HR HBW Vehicles.omx"

In [41]:
h5 = h5py.File(trip_table_file, 'r')
od = h5['/matrices/' + 'amhbwi1da'][:]

In [42]:
print("Total number of trips = {0:.2f}".format(od.sum()))

Total number of trips = 49474.39


In [43]:
test_rounding(od, normal_rounding)
test_rounding(od, bucket_rounding)
test_rounding(od, bucket_rounding_list)

Total after rounding by normal_rounding = 12548.00 in 0 seconds
Total after rounding by bucket_rounding = 49474.00 in 22 seconds
Total after rounding by bucket_rounding_list = 49470.00 in 17 seconds
