## Imports

In [1]:
import numpy as np
import linecache as lc

# Aggregating postcodes by alphabetical prefix

In [2]:
customer_id_og = lc.getline(r"CaseStudyData.txt", 17).split(" ")[2:-1]
customer_id_og = [id.strip('"') for id in customer_id_og]

candidate_id_og = lc.getline(r"CaseStudyData.txt", 27).split(" ")[2:-1]
candidate_id_og = [id.strip('"') for id in candidate_id_og]

In [40]:
def extract_first_occurrence(postcodes_list):
    first_occurrences = {}
    
    for i, postcode in enumerate(postcodes_list):
        prefix = postcode[:2] if len(postcode) >= 2 and postcode[1].isalpha() else (postcode[0] if postcode and postcode[0].isalpha() else '')
        # prefix = postcode[:2] if len(postcode) >= 2 and postcode[1].isalpha() else (postcode[0] if postcode[0].isalpha() else '')
        if prefix not in first_occurrences:
            first_occurrences[prefix] = {'postcode': postcode, 'index': i}
    
    unique_prefixes = list(first_occurrences.keys())
    first_postcodes = [first_occurrences[prefix]['postcode'] for prefix in unique_prefixes]
    first_indices = [first_occurrences[prefix]['index'] for prefix in unique_prefixes]
    
    return unique_prefixes, first_postcodes, first_indices

customer_id_selective = extract_first_occurrence(customer_id_og)
candidate_id_selective = extract_first_occurrence(candidate_id_og)

# Aggregating the 1D vectors

In [41]:
def extract_selected_values(postcode_list, value_list):
    first_indices = extract_first_occurrence(postcode_list)[2]
    return [value_list[i] for i in first_indices]

## Aggregating coordinates

In [43]:
customer_easting = lc.getline(r"CaseStudyData.txt", 20).split(" ")[2:-1]
customer_easting = [eval(coord) for coord in customer_easting]
customer_easting_selective = extract_selected_values(customer_id_og, customer_easting)

customer_northing = lc.getline(r"CaseStudyData.txt", 21).split(" ")[2:-1]
customer_northing = [eval(coord) for coord in customer_northing]
customer_northing_selective = extract_selected_values(customer_id_og, customer_northing)

candidate_easting = lc.getline(r"CaseStudyData.txt", 30).split(" ")[2:-1]
candidate_easting = [eval(coord) for coord in candidate_easting]
candidate_easting_selective = extract_selected_values(candidate_id_og, candidate_easting)

candidate_northing = lc.getline(r"CaseStudyData.txt", 31).split(" ")[2:-1]
candidate_northing = [eval(coord) for coord in candidate_northing]
candidate_northing_selective = extract_selected_values(candidate_id_og, candidate_northing)

## Aggregating  warehouse costs and capacities

In [56]:
def process_costs_capacities(line_start, line_stop):
    
    '''
    Simply takes the line to start reading, and the line to stop reading. Then, it 
    processes and transforms the 1D cost and capacity vectors into a format that we can use.
    '''
    
    file = open(r"CaseStudyData.txt", "r")
    costs_og = file.readlines()[line_start-1:line_stop]
    costs_og = list(map(lambda s: s.strip(), costs_og))

    costs_og = [costs_og[line].split(" ") for line in range(len(costs_og))]
    costs = []
    for line in costs_og:
        for i in range(len(line)):
            costs.append(line[i].strip('[]'))
            
    costs = costs[2:]        
    costs = [eval(cost) for cost in costs]
    return costs

In [63]:
setup_costs = process_costs_capacities(62, 105)
operating_costs = process_costs_capacities(108, 146)
wh_capacities = process_costs_capacities(149, 193)

setup_costs_selective = extract_selected_values(customer_id_og, setup_costs)
operating_costs_selective = extract_selected_values(customer_id_og, operating_costs)
wh_capacities_selective = extract_selected_values(customer_id_og, wh_capacities)

# Aggregating the 2D arrays

We start off with 4 $\times$ 440 = 1760 values, so we expect to be left with 4 vectors in the end - one for each product type.

In [64]:
def process_nD(line_start, line_stop, no_to_delete = None):
    
    '''
    Very similar to the function for processing costs and capacities, but this 
    one doesn't automatically remove the first two elements from the beginning like
    the other one. This is because the formatting for 2D stuff is a little different 
    and requires some further processing outside the function too.
    
    The logic is to basically get it all cleaned up within the function, then 
    reshape and delete columns outside.
    '''
    
    file = open(r"CaseStudyData.txt", "r")
    list_og = file.readlines()[line_start-1:line_stop]
    list_og = list(map(lambda s: s.strip(), list_og))

    list_og = [list_og[line].split(" ") for line in range(len(list_og))]
    processed_list = []
    for line in list_og:
        for i in range(len(line)):
            processed_list.append(line[i].strip('[]'))
    
    if no_to_delete != None:
        processed_list = processed_list[no_to_delete:]
    else:
        pass
    return processed_list

In [129]:
demand_per_product = np.asarray(process_nD(197, 343)).reshape(440, 6)[:, 2:]
demand_per_product = demand_per_product.astype(int)

dpp_selective = []
for i in range(4):
    dpp_selective.append(extract_selected_values(customer_id_og, demand_per_product[:,i]))
dpp_selective = np.asarray(dpp_selective)

# Aggregating the 3D array

In [130]:
dpp_per_year = np.asarray(process_nD(347, 1693, 1)).reshape(440, 4, -1)[:, :, 3:]
dpp_per_year = dpp_per_year.astype(int)

dpp_py_selective = []
for i in range(4):
    dpp_sel = []
    for j in range(10):
        dpp_sel.append(extract_selected_values(customer_id_og, dpp_per_year[:, i, j]))
    dpp_py_selective.append(dpp_sel)
dpp_py_selective = np.asarray(dpp_py_selective)

# Aggregating the 4D array

In [131]:
dppy_scenarios = np.asarray(process_nD(1695, 115666, 1)).reshape(440, 4, 10, -1)[:, :, :, 4:]
dppy_scenarios  = dppy_scenarios.astype(int)

dppys_selective = []
for i in range(4):
    dppy_sel = []
    for j in range(10):
        dp_sel = []
        for k in range(100):
            dp_sel.append(extract_selected_values(customer_id_og, dppy_scenarios[:, i, j, k]))
        dppy_sel.append(dp_sel)
    dppys_selective.append(dppy_sel)
    
dppys_selective = np.asarray(dppys_selective)

In [126]:
can_supp_distances = np.asarray(process_nD(115670, 116109)).reshape(440, -1)[:, 2:]
can_supp_distances  = can_supp_distances.astype(float)

csd_selective = []
for i in range(53):
    csd_selective.append(extract_selected_values(customer_id_og, can_supp_distances[:,i]))
csd_selective = np.asarray(csd_selective)

In [127]:
can_cust_distances = np.asarray(process_nD(116114, 116553)).reshape(440, -1)[:, 2:]
can_cust_distances  = can_cust_distances.astype(float)

ccd_selective = []
for i in range(53):
    ccd_selective.append(extract_selected_values(customer_id_og, can_cust_distances[:,i]))
ccd_selective = np.asarray(ccd_selective)