## Imports

In [88]:
import numpy as np
import linecache as lc
import pandas as pd

# Aggregating postcodes by alphabetical prefix

In [2]:
customer_id_og = lc.getline(r"CaseStudyData.txt", 17).split(" ")[2:-1]
customer_id_og = [id.strip('"') for id in customer_id_og]

candidate_id_og = lc.getline(r"CaseStudyData.txt", 27).split(" ")[2:-1]
candidate_id_og = [id.strip('"') for id in candidate_id_og]

In [3]:
def extract_first_occurrence(postcodes_list):
    first_occurrences = {}
    
    for i, postcode in enumerate(postcodes_list):
        prefix = postcode[:2] if len(postcode) >= 2 and postcode[1].isalpha() else (postcode[0] if postcode and postcode[0].isalpha() else '')
        # prefix = postcode[:2] if len(postcode) >= 2 and postcode[1].isalpha() else (postcode[0] if postcode[0].isalpha() else '')
        if prefix not in first_occurrences:
            first_occurrences[prefix] = {'postcode': postcode, 'index': i}
    
    unique_prefixes = list(first_occurrences.keys())
    first_postcodes = [first_occurrences[prefix]['postcode'] for prefix in unique_prefixes]
    first_indices = [first_occurrences[prefix]['index'] for prefix in unique_prefixes]
    
    return unique_prefixes, first_postcodes, first_indices

customer_id_selective = extract_first_occurrence(customer_id_og)
candidate_id_selective = extract_first_occurrence(candidate_id_og)

# Aggregating the 1D vectors

In [4]:
def aggregate_postcodes(list):
    
    # Extract unique prefixes
    unique_prefixes = np.unique([postcode[:2] if len(postcode) >= 2 and postcode[1].isalpha() else postcode[0] for postcode in list])

    # Create array with prefixes
    aggregated_values = np.array([postcode[:2] if len(postcode) >= 2 and postcode[1].isalpha() else postcode[0] for postcode in list])

    return unique_prefixes, aggregated_values

In [5]:
def process_aggregated_values(postcode_list, value_list, operation='average'):
    
    '''
    postcode_list: the reference data to obtain the unique postcode prefixes from
    value_list: the vector we wish to aggregate/group
    operation: can choose between averging, summing, or taking the maximum value when aggregating
    
    This function takes the arguments listed above and returns a vector of 
    aggregated values based on the operation chosen.
    '''
    
    unique_prefixes, aggregated_values = aggregate_postcodes(postcode_list)
    
    values_dict = {prefix: [] for prefix in unique_prefixes}

    for prefix, value in zip(aggregated_values, value_list):
        values_dict[prefix].append(value)

    if operation == 'average':
        result_array = np.array([np.mean(values_dict[prefix]) for prefix in unique_prefixes])
    elif operation == 'sum':
        result_array = np.array([np.sum(values_dict[prefix]) for prefix in unique_prefixes])
    elif operation == 'maximum':
        result_array = np.array([np.max(values_dict[prefix]) for prefix in unique_prefixes])
    else:
        raise ValueError("Invalid operation. Please choose 'average', 'sum', or 'maximum'.")

    return result_array

In [6]:
def extract_selected_values(postcode_list, value_list):
    first_indices = extract_first_occurrence(postcode_list)[2]
    return [value_list[i] for i in first_indices]

## Aggregating coordinates

In [7]:
customer_easting = lc.getline(r"CaseStudyData.txt", 20).split(" ")[2:-1]
customer_easting = [eval(coord) for coord in customer_easting]
customer_easting_selective = extract_selected_values(customer_id_og, customer_easting)

customer_northing = lc.getline(r"CaseStudyData.txt", 21).split(" ")[2:-1]
customer_northing = [eval(coord) for coord in customer_northing]
customer_northing_selective = extract_selected_values(customer_id_og, customer_northing)

candidate_easting = lc.getline(r"CaseStudyData.txt", 30).split(" ")[2:-1]
candidate_easting = [eval(coord) for coord in candidate_easting]
candidate_easting_selective = extract_selected_values(candidate_id_og, candidate_easting)

candidate_northing = lc.getline(r"CaseStudyData.txt", 31).split(" ")[2:-1]
candidate_northing = [eval(coord) for coord in candidate_northing]
candidate_northing_selective = extract_selected_values(candidate_id_og, candidate_northing)

## Aggregating  warehouse costs and capacities

In [8]:
def process_costs_capacities(line_start, line_stop):
    
    '''
    Simply takes the line to start reading, and the line to stop reading. Then, it 
    processes and transforms the 1D cost and capacity vectors into a format that we can use.
    '''
    
    file = open(r"CaseStudyData.txt", "r")
    costs_og = file.readlines()[line_start-1:line_stop]
    costs_og = list(map(lambda s: s.strip(), costs_og))

    costs_og = [costs_og[line].split(" ") for line in range(len(costs_og))]
    costs = []
    for line in costs_og:
        for i in range(len(line)):
            costs.append(line[i].strip('[]'))
            
    costs = costs[2:]        
    costs = [eval(cost) for cost in costs]
    return costs

In [9]:
setup_costs = process_costs_capacities(62, 105)
operating_costs = process_costs_capacities(108, 146)
wh_capacities = process_costs_capacities(149, 193)

setup_costs_selective = extract_selected_values(customer_id_og, setup_costs)
operating_costs_selective = extract_selected_values(customer_id_og, operating_costs)
wh_capacities_selective = extract_selected_values(customer_id_og, wh_capacities)

In [10]:
customer_populations = lc.getline(r"CaseStudyData.txt", 24).split(" ")[2:-1]
customer_populations = [eval(coord) for coord in customer_populations]
customer_populations_selective = extract_selected_values(customer_id_og, customer_populations)

# Aggregating the 2D arrays

We start off with 4 $\times$ 440 = 1760 values, so we expect to be left with 4 vectors in the end - one for each product type.

In [11]:
def process_nD(line_start, line_stop, no_to_delete = None):
    
    '''
    Very similar to the function for processing costs and capacities, but this 
    one doesn't automatically remove the first two elements from the beginning like
    the other one. This is because the formatting for 2D stuff is a little different 
    and requires some further processing outside the function too.
    
    The logic is to basically get it all cleaned up within the function, then 
    reshape and delete columns outside.
    '''
    
    file = open(r"CaseStudyData.txt", "r")
    list_og = file.readlines()[line_start-1:line_stop]
    list_og = list(map(lambda s: s.strip(), list_og))

    list_og = [list_og[line].split(" ") for line in range(len(list_og))]
    processed_list = []
    for line in list_og:
        for i in range(len(line)):
            processed_list.append(line[i].strip('[]'))
    
    if no_to_delete != None:
        processed_list = processed_list[no_to_delete:]
    else:
        pass
    return processed_list

In [12]:
demand_per_product = np.asarray(process_nD(197, 343)).reshape(440, 6)[:, 2:]
demand_per_product = demand_per_product.astype(int)

dpp_selective = []
for i in range(4):
    dpp_selective.append(process_aggregated_values(customer_id_og, demand_per_product[:,i], "sum"))

dpp_selective = np.asarray(dpp_selective)

# Aggregating the 3D array

In [13]:
dpp_per_year = np.asarray(process_nD(347, 1693, 1)).reshape(440, 4, -1)[:, :, 3:]
dpp_per_year = dpp_per_year.astype(int)

dpp_py_selective = []
for i in range(4):
    dpp_sel = []
    for j in range(10):
        dpp_sel.append(process_aggregated_values(customer_id_og, dpp_per_year[:, i, j], "sum"))
    dpp_py_selective.append(dpp_sel)
dpp_py_selective = np.asarray(dpp_py_selective)
dpp_py_selective[0].shape

(10, 15)

# Aggregating the 4D array

In [14]:
dppy_scenarios = np.asarray(process_nD(1695, 115666, 1)).reshape(440, 4, 10, -1)[:, :, :, 4:]
dppy_scenarios  = dppy_scenarios.astype(int)

dppys_selective = []
for i in range(4):
    dppy_sel = []
    for j in range(10):
        dp_sel = []
        for k in range(100):
            dp_sel.append(process_aggregated_values(customer_id_og, dppy_scenarios[:, i, j, k], "sum"))
        dppy_sel.append(dp_sel)
    dppys_selective.append(dppy_sel)
    
dppys_selective = np.asarray(dppys_selective)

In [89]:
can_supp_distances = np.asarray(process_nD(115670, 116109)).reshape(440, -1)[:, 2:]
can_supp_distances  = can_supp_distances.astype(float)

indices = indices = extract_first_occurrence(customer_id_og)[2]

agg_dist_can_supp = np.asarray([can_supp_distances[i,:] for i in indices])
pd.DataFrame(agg_dist_can_supp)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,43,44,45,46,47,48,49,50,51,52
0,192.61,175.97,179.64,18.06,141.07,190.36,50.89,179.24,93.43,101.05,...,121.47,65.56,4.55,18.26,67.08,102.44,116.63,86.38,134.37,144.26
1,180.02,112.58,147.99,56.74,77.68,110.06,18.03,115.41,30.04,37.23,...,53.65,3.07,63.82,79.39,128.22,125.29,53.24,23.0,70.55,80.44
2,265.51,131.58,218.67,203.08,105.62,25.49,165.12,95.71,125.63,100.62,...,91.59,146.53,210.16,225.73,251.09,234.72,96.99,126.45,73.15,65.86
3,209.66,101.83,177.63,111.65,65.06,52.95,72.84,54.0,49.65,24.97,...,26.93,61.04,118.74,134.31,171.31,154.93,39.42,44.0,10.32,38.37
4,211.69,73.99,147.48,116.74,44.53,71.06,78.95,81.73,47.88,31.08,...,3.49,60.19,123.82,139.39,173.34,156.96,18.88,48.69,36.87,21.6
5,189.6,55.68,142.76,135.83,29.71,57.91,97.87,99.02,58.39,51.35,...,24.35,79.29,142.92,158.49,183.85,167.47,29.75,59.2,54.15,19.64
6,106.26,269.55,220.07,288.81,277.58,350.65,303.85,375.09,275.42,302.76,...,308.35,284.1,299.77,281.33,231.83,196.74,300.84,271.27,330.23,311.33
7,87.6,147.24,97.76,96.37,166.23,215.52,132.69,216.05,116.38,143.72,...,149.31,125.05,106.18,87.75,38.24,4.79,141.8,112.23,171.19,177.32
8,205.3,71.38,158.47,159.86,45.42,61.03,121.9,119.41,82.42,77.33,...,48.38,103.32,166.95,182.52,207.88,191.5,53.78,83.23,74.55,34.6
9,170.09,247.49,198.01,198.62,268.48,317.78,234.94,318.3,218.63,245.98,...,251.56,227.31,208.43,190.0,140.49,107.04,244.05,214.48,273.44,279.57


In [90]:
can_cust_distances = np.asarray(process_nD(116114, 116553)).reshape(440, -1)[:, 2:]
can_cust_distances  = can_cust_distances.astype(float)

indices = extract_first_occurrence(customer_id_og)[2]

agg_dist_customer_only = np.asarray([can_cust_distances[:,i] for i in indices]).T

agg_dist_both = np.asarray([agg_dist_customer_only[i,:] for i in indices])

pd.DataFrame(agg_dist_both)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14
0,0.0,64.86,211.2,119.78,124.86,143.95,298.79,105.21,167.98,207.46,92.74,141.36,153.11,83.21,156.2
1,64.9,0.0,131.64,55.98,62.09,79.96,286.12,127.08,103.99,229.33,28.95,77.57,89.12,26.71,92.4
2,211.16,147.77,0.0,75.38,91.85,78.7,371.44,236.32,60.33,338.57,105.61,67.94,84.9,126.39,60.79
3,119.85,56.03,75.19,0.0,28.45,45.74,315.7,156.66,66.13,258.91,29.67,35.35,54.74,46.73,32.53
4,125.0,62.05,91.59,26.58,0.0,24.34,317.34,158.3,48.38,260.55,35.69,19.34,33.5,48.37,59.26
5,143.96,80.57,78.95,46.24,24.65,0.0,295.42,169.12,23.74,271.37,55.82,15.55,8.89,59.19,76.64
6,298.66,286.07,371.55,315.71,317.74,295.64,0.0,193.65,311.35,276.13,298.43,308.6,288.55,269.97,352.13
7,105.1,127.09,236.52,156.73,158.76,169.27,193.79,0.0,193.3,103.85,139.45,173.08,178.43,110.99,193.16
8,167.85,104.46,60.44,67.16,48.54,23.97,313.54,193.01,0.0,295.26,82.86,32.36,22.08,83.08,97.56
9,207.35,229.34,338.77,258.99,261.02,271.52,276.34,103.99,295.55,0.0,241.7,275.34,280.68,213.25,295.41


In [82]:
for i, j in enumerate(indices):
    print(agg_dist_both[i, i] == can_cust_distances[j, j])

True
True
True
True
True
True
True
True
True
True
True
True
True
True
True


# Writing to a txt file

In [21]:
# Strings to be written before each array
# File path
output_file = f"CaseStudyData_Aggregated.txt"

# Writing to the file
with open(output_file, 'w') as file:
    # Write number of customers, locations
    file.write("! Number of customers = postcode districts\n")
    file.write(f"nbCustomers: {len(customer_id_selective[0])}\n")
    file.write("! Number of candidate locations\n")
    file.write(f"nbCandidates: {len(candidate_id_selective[0])}\n \n")
    
    # Write Customer IDs
    file.write("! Vector of customer ids\n")
    customer_id_output = "CustomerId: [ " + " ".join('"' + s + '"' for s in customer_id_selective[0]) + " ]\n"
    file.write(customer_id_output)

    # Write Customer eastings and northings
    file.write("\n! Vector of customer coordinates\n")
    file.write("CustomerEasting: [" )
    np.savetxt(file, customer_easting_selective, fmt='%d', delimiter=' ', newline=" ")
    file.write("]\n")
    file.write("CustomerNorthing: [" )
    np.savetxt(file, customer_northing_selective, fmt='%d', delimiter=' ', newline=" ")
    file.write("]\n \n")

    # Write Customer populations
    file.write("! Vector of customer populations\nCustomerPopulation: [ ")
    np.savetxt(file, customer_populations_selective, fmt='%d', delimiter=' ', newline=" ")
    file.write("]\n \n")

    # Write Candidate IDs
    file.write("! Vector of candidate location ids\n")
    candidate_id_output = "CandidateId: [ " + " ".join('"' + s + '"' for s in candidate_id_selective[0]) + " ]\n"
    file.write(candidate_id_output)

    # Write Customer eastings and northings
    file.write("\n! Vector of candidate location coordinates\n")
    file.write("CandidateEasting: [" )
    np.savetxt(file, candidate_easting_selective, fmt='%d', delimiter=' ', newline=" ")
    file.write("]\n")
    file.write("CandidateNorthing: [" )
    np.savetxt(file, candidate_northing_selective, fmt='%d', delimiter=' ', newline=" ")
    file.write("]\n \n")

    # Write set up and operating costs
    file.write("! Setup cost for warehouses\nSetup: [(1) ")
    np.savetxt(file, setup_costs_selective, fmt='%d', delimiter=' ', newline=" ")
    file.write("]\n \n")
    file.write("! Operating cost for warehouses\nOperating: [(1) ")
    np.savetxt(file, operating_costs_selective, fmt='%d', delimiter=' ', newline=" ")
    file.write("]\n \n")
    file.write("! The warehouse capacity\nCapacity: [(1) ")
    np.savetxt(file, wh_capacities_selective, fmt='%d', delimiter=' ', newline=" ")
    file.write("]\n \n")

    # Write annual district demand in kg per product group
    file.write("! The annual district demand in kilograms per product group\nCustomerDemand: [\n")
    for i in range(len(customer_id_selective[0])):
        file.write(f"({i+1} 1) {dpp_selective[0,i]} {dpp_selective[1,i]} {dpp_selective[2,i]} {dpp_selective[3,i]} \n")
    file.write("]\n \n")

    # Write customer demand in kilograms per product group over nbPeriod years
    file.write("! The customer demand in kilograms per product group over nbPeriods years\nCustomerDemandPeriods: [")
    for cust in range(len(customer_id_selective[0])):
        for pro in range(4):
            file.write(f"\n({cust+1} {pro+1} 1) ")
            for per in range(10):
                file.write(f"{dpp_py_selective[pro, per, cust]} ")
    file.write("]\n \n")

    # Write customer demand per period for each scenario
    file.write("CustomerDemandPeriodScenarios: [")
    for cust in range(len(customer_id_selective[0])):
        for pro in range(4):
            for per in range(10):
                file.write(f"\n({cust+1} {pro+1} {per+1} 1) ")
                for scen in range(100):
                    file.write(f"{dppys_selective[pro, per, scen, cust]} ")
    file.write("] \n \n")

    # Write distance matrix between candidate and supplier
    file.write("! Distance matrix between candidate locations and suppliers\nDistanceCandidateSupplier: [")
    for can in range(len(candidate_id_selective[0])):
        file.write(f"\n({can+1} 1) ")
        for supp in range(53):
            file.write(f"{csd_selective[supp, can]} ")
    file.write("]\n \n")

    # Write distance matrix between customer and candidate
    file.write("! Distance matrix between candidate locations and customers\nDistanceCandidateCustomer: [")
    for can in range(len(candidate_id_selective[0])):
        file.write(f"\n({can+1} 1) ")
        for cust in range(len(customer_id_selective[0])):
            file.write(f"{ccd_selective[cust, can]} ")
    file.write("]")

In [24]:
np.array(setup_costs_selective).sum()

54007000