In [None]:
import pandas as pd
import numpy as np
import os
import sys


In [None]:
# Get current script directory
current_dir = os.path.dirname(os.path.abspath('__file__'))
# Get parent directory
parent_dir = os.path.dirname(current_dir)
# Add parent directory to sys.path
sys.path.append(parent_dir)

In [None]:
n = 50

data = pd.read_csv(
    f"data/paper_data/GKD_data_{n}.txt",
    sep=r"\s+",
    header=None,
)

In [39]:
from src.data_class import ProblemData
problem_data = ProblemData(data=data)

In [40]:
problem_data.distance_matrix

array([[ 0.     , 65.73852, 23.84321, ..., 24.41362, 44.00256, 47.26974],
       [65.73852,  0.     , 55.95335, ..., 47.17637, 21.93272, 39.90072],
       [23.84321, 55.95335,  0.     , ..., 33.78458, 37.72945, 53.99211],
       ...,
       [24.41362, 47.17637, 33.78458, ...,  0.     , 25.87052, 22.97829],
       [44.00256, 21.93272, 37.72945, ..., 25.87052,  0.     , 26.76906],
       [47.26974, 39.90072, 53.99211, ..., 22.97829, 26.76906,  0.     ]])

In [None]:
from src.data_class import ProblemData

# Create ProblemData with numpy distance matrix
problem_data = ProblemData(data=data)
print(f"Number of facilities: {problem_data.number_of_facilities}")
print(f"Distance matrix type: {type(problem_data.distance_matrix)}")
print(f"Distance matrix shape: {problem_data.distance_matrix.shape}")
print(f"Distance matrix dtype: {problem_data.distance_matrix.dtype}")

# Show a small portion of the matrix
print("\nFirst 5x5 of distance matrix:")
print(problem_data.distance_matrix[:5, :5])

# Test numpy operations
print("\nMatrix operations:")
print(f"Min distance (excluding zeros): {problem_data.distance_matrix[problem_data.distance_matrix > 0].min():.2f}")
print(f"Max distance: {problem_data.distance_matrix.max():.2f}")
print(f"Mean distance: {problem_data.distance_matrix[problem_data.distance_matrix > 0].mean():.2f}")

# Check if matrix is symmetric
is_symmetric = np.allclose(problem_data.distance_matrix, problem_data.distance_matrix.T)
print(f"Is symmetric: {is_symmetric}")

Number of facilities: 50
Distance matrix type: <class 'numpy.ndarray'>
Distance matrix shape: (50, 50)
Distance matrix dtype: float64

First 5x5 of distance matrix:
[[ 0.      65.73852 23.84321 61.61796 18.27225]
 [65.73852  0.      55.95335 42.00821 83.95527]
 [23.84321 55.95335  0.      67.68217 38.11752]
 [61.61796 42.00821 67.68217  0.      75.9398 ]
 [18.27225 83.95527 38.11752 75.9398   0.     ]]

Matrix operations:
Min distance (excluding zeros): 0.49
Max distance: 125.86
Mean distance: 52.37
Is symmetric: True


In [42]:
# Benefits of numpy arrays for distance matrices:

# 1. Memory efficiency - numpy arrays use less memory than lists of lists
print("Memory and performance benefits:")
print(f"Matrix shape: {problem_data.distance_matrix.shape}")
print(f"Memory usage: {problem_data.distance_matrix.nbytes} bytes")

# 2. Fast mathematical operations
print("\nMathematical operations:")
# Find closest facility to facility 0
distances_from_0 = problem_data.distance_matrix[0, :]
distances_from_0_nonzero = distances_from_0[distances_from_0 > 0]
closest_to_0 = np.argmin(distances_from_0_nonzero)
print(f"Closest facility to facility 0: {np.where(distances_from_0 == distances_from_0_nonzero[closest_to_0])[0][0]}")
print(f"Distance: {distances_from_0_nonzero[closest_to_0]:.2f}")

# 3. Easy slicing and indexing
print("\nDistances from first 3 facilities to first 3 facilities:")
print(problem_data.distance_matrix[:3, :3])

# 4. Built-in statistical functions
print("\nStatistics:")
print(f"Standard deviation: {np.std(problem_data.distance_matrix[problem_data.distance_matrix > 0]):.2f}")
print(f"Median distance: {np.median(problem_data.distance_matrix[problem_data.distance_matrix > 0]):.2f}")

# 5. Easy to use with optimization libraries (scipy, numpy, etc.)
print("\nReady for optimization algorithms that expect numpy arrays!")

Memory and performance benefits:
Matrix shape: (50, 50)
Memory usage: 20000 bytes

Mathematical operations:
Closest facility to facility 0: 28
Distance: 10.36

Distances from first 3 facilities to first 3 facilities:
[[ 0.      65.73852 23.84321]
 [65.73852  0.      55.95335]
 [23.84321 55.95335  0.     ]]

Statistics:
Standard deviation: 24.34
Median distance: 51.51

Ready for optimization algorithms that expect numpy arrays!


In [45]:
from src.maxmin_diversity.bisection import BisectionMethod

solver = BisectionMethod(ratio=0.3)
solver.optimise(problem=problem_data, p_median=15)

(set(),
                                     algorithm  p_median  optimal_distance  \
 0  Adaptive Bisection Method for P-Dispersion        15          22.66334   
 
    runtime_seconds  iterations  ratio   status  
 0         0.191487           8    0.3  optimal  )

In [46]:
from src.maxmin_diversity.binary_search import BinarySearch

solver = BinarySearch()
solver.optimise(problem=problem_data, p_median=15)

(set(),
                                algorithm  p_median  optimal_distance  \
 0  Binary Search Method for P-Dispersion        15          22.66334   
 
    runtime_seconds  iterations   status  
 0         3.418634          10  optimal  )

In [47]:
from src.maxmin_diversity.direct_search import DirectBinarySearch

solver = DirectBinarySearch()
solver.optimise(problem=problem_data, p_median=15)

(set(),
                                       algorithm  p_median  optimal_distance  \
 0  Direct Binary Search Method for P-Dispersion        15          22.66334   
 
    runtime_seconds  iterations  bisection_ratio   status  
 0         0.422243          10              0.5  optimal  )

In [48]:
from src.maxmin_diversity.nf import NewCompactFormulation

solver = NewCompactFormulation()
solver.optimise(problem=problem_data, p_median=15)

(set(),
                     algorithm  p_median  optimal_distance  runtime_seconds  \
 0  NF Method for P-Dispersion        15          22.66334        40.438886   
 
    time_limit   status  
 0      3600.0  optimal  )

In [49]:
from src.maxmin_diversity.nf_star import NFwithBounds

solver = NFwithBounds()
solver.optimise(problem=problem_data, p_median=15)

(set(),
                      algorithm  p_median  optimal_distance  runtime_seconds  \
 0  NF* Method for P-Dispersion        15          22.66334         4.364619   
 
    time_limit   status  
 0      3600.0  optimal  )

In [50]:
from src.maxmin_diversity.nfc_star import NFwithBoundsCuts

solver = NFwithBoundsCuts()
solver.optimise(problem=problem_data, p_median=15)

(set(),
                       algorithm  p_median  optimal_distance  runtime_seconds  \
 0  NFC* Method for P-Dispersion        15          22.66334        19.827343   
 
    time_limit   status  
 0      3600.0  optimal  )