## Imports

In [1]:
import numpy as np
import linecache

# Aggregating postcodes by alphabetical prefix

In [2]:
customer_id_og = linecache.getline(r"CaseStudyData.txt", 17).split(" ")[2:-1]
customer_id_og = [id.strip('"') for id in customer_id_og]

candidate_id_og = linecache.getline(r"CaseStudyData.txt", 27).split(" ")[2:-1]
candidate_id_og = [id.strip('"') for id in candidate_id_og]

In [3]:
def aggregate_postcodes(list):
    
    # Extract unique prefixes
    unique_prefixes = np.unique([postcode[:2] if len(postcode) >= 2 and postcode[1].isalpha() else postcode[0] for postcode in list])

    # Create array with prefixes
    aggregated_values = np.array([postcode[:2] if len(postcode) >= 2 and postcode[1].isalpha() else postcode[0] for postcode in list])

    return unique_prefixes, aggregated_values


# Display the aggregated values
# for i, prefix in enumerate(unique_prefixes):
#     count = np.count_nonzero(aggregated_values == prefix)
#     print(f"Aggregate for prefix {prefix}: {count}")

customer_id = aggregate_postcodes(customer_id_og)
candidate_id = aggregate_postcodes(candidate_id_og)

### I think the CustomerID and CandidateID vectors are all the same.

# Aggregating the 1D vectors

In [4]:
def process_aggregated_values(postcode_list, value_list, operation='average'):
    
    '''
    postcode_list: the reference data to obtain the unique postcode prefixes from
    value_list: the vector we wish to aggregate/group
    operation: can choose between averging, summing, or taking the maximum value when aggregating
    
    This function takes the arguments listed above and returns a vector of 
    aggregated values based on the operation chosen.
    '''
    
    unique_prefixes, aggregated_values = aggregate_postcodes(postcode_list)
    
    values_dict = {prefix: [] for prefix in unique_prefixes}

    for prefix, value in zip(aggregated_values, value_list):
        values_dict[prefix].append(value)

    if operation == 'average':
        result_array = np.array([np.mean(values_dict[prefix]) for prefix in unique_prefixes])
    elif operation == 'sum':
        result_array = np.array([np.sum(values_dict[prefix]) for prefix in unique_prefixes])
    elif operation == 'maximum':
        result_array = np.array([np.max(values_dict[prefix]) for prefix in unique_prefixes])
    else:
        raise ValueError("Invalid operation. Please choose 'average', 'sum', or 'maximum'.")

    return result_array

## Aggregating coordinates

In [5]:
customer_easting = linecache.getline(r"CaseStudyData.txt", 20).split(" ")[2:-1]
customer_easting = [eval(coord) for coord in customer_easting]
customer_easting_avg = process_aggregated_values(customer_id_og, customer_easting)

customer_northing = linecache.getline(r"CaseStudyData.txt", 21).split(" ")[2:-1]
customer_northing = [eval(coord) for coord in customer_northing]
customer_northing_avg = process_aggregated_values(customer_id_og, customer_northing)

candidate_easting = linecache.getline(r"CaseStudyData.txt", 30).split(" ")[2:-1]
candidate_easting = [eval(coord) for coord in candidate_easting]
candidate_easting_avg = process_aggregated_values(candidate_id_og, candidate_easting)

candidate_northing = linecache.getline(r"CaseStudyData.txt", 31).split(" ")[2:-1]
candidate_northing = [eval(coord) for coord in candidate_northing]
candidate_northing_avg = process_aggregated_values(candidate_id_og, candidate_northing)

## Aggregating  warehouse costs and capacities

In [6]:
def process_costs_capacities(line_start, line_stop):
    
    '''
    Simply takes the line to start reading, and the line to stop reading. Then, it 
    processes and transforms the 1D cost and capacity vectors into a format that we can use.
    '''
    
    file = open(r"CaseStudyData.txt", "r")
    costs_og = file.readlines()[line_start-1:line_stop]
    costs_og = list(map(lambda s: s.strip(), costs_og))

    costs_og = [costs_og[line].split(" ") for line in range(len(costs_og))]
    costs = []
    for line in costs_og:
        for i in range(len(line)):
            costs.append(line[i].strip('[]'))
            
    costs = costs[2:]        
    costs = [eval(cost) for cost in costs]
    return costs

In [7]:
setup_costs = process_costs_capacities(62, 105)
operating_costs = process_costs_capacities(108, 146)
wh_capacities = process_costs_capacities(149, 193)

setup_costs_agg = process_aggregated_values(customer_id_og, setup_costs, operation = "maximum")
operating_costs_agg = process_aggregated_values(customer_id_og, operating_costs, operation = "maximum")
wh_capacities_agg = process_aggregated_values(customer_id_og, wh_capacities, operation = "maximum")

# Aggregating the 2D arrays

We start off with 4 $\times$ 440 = 1760 values, so we expect to be left with 4 vectors in the end - one for each product type.

In [8]:
def process_nD(line_start, line_stop, no_to_delete = None):
    
    '''
    Very similar to the function for processing costs and capacities, but this 
    one doesn't automatically remove the first two elements from the beginning like
    the other one. This is because the formatting for 2D stuff is a little different 
    and requires some further processing outside the function too.
    
    The logic is to basically get it all cleaned up within the function, then 
    reshape and delete columns outside.
    '''
    
    file = open(r"CaseStudyData.txt", "r")
    list_og = file.readlines()[line_start-1:line_stop]
    list_og = list(map(lambda s: s.strip(), list_og))

    list_og = [list_og[line].split(" ") for line in range(len(list_og))]
    processed_list = []
    for line in list_og:
        for i in range(len(line)):
            processed_list.append(line[i].strip('[]'))
    
    if no_to_delete != None:
        processed_list = processed_list[no_to_delete:]
    else:
        pass
    return processed_list

In [16]:
demand_per_product = np.asarray(process_nD(197, 343)).reshape(440, 6)[:, 2:]
demand_per_product = demand_per_product.astype(int)

dpp_agg = []
for i in range(4):
    dpp_agg.append(process_aggregated_values(customer_id_og, demand_per_product[:,i], "average"))

# Aggregating the 3D array

In [58]:
dpp_per_year = np.asarray(process_nD(347, 1693, 1)).reshape(440, 4, -1)[:, :, 3:]
dpp_per_year = dpp_per_year.astype(int)

dpp_py_agg = []
for i in range(4):
    dpp_aggregate = []
    for j in range(10):
        dpp_aggregate.append(process_aggregated_values(customer_id_og, dpp_per_year[:, i, j], "average"))
    dpp_py_agg.append(dpp_aggregate)

np.asarray(dpp_py_agg).shape

(4, 10, 15)

# Aggregating the 4D array

In [71]:
dppy_scenarios = np.asarray(process_nD(1695, 115666, 1)).reshape(440, 4, 10, -1)[:, :, :, 4:]
dppy_scenarios  = dppy_scenarios.astype(int)

dppys_agg = []
for i in range(4):
    dppy_agg = []
    for j in range(10):
        dp_agg = []
        for k in range(100):
            dp_agg.append(process_aggregated_values(customer_id_og, dppy_scenarios[:, i, j, k], "average"))
        dppy_agg.append(dp_agg)
    dppys_agg.append(dppy_agg)
    
np.asarray(dppys_agg).shape

(4, 10, 100, 15)