# Imports and functions

In [5]:
import numpy as np
import linecache as lc

In [6]:
def extract_first_occurrence(postcodes_list):
    '''
    This function takes a list of postcodes and creates a dictionary containing a list of aggregated 
    postcodes, where each postcode chosen was the first occurrence for each of the 15 areas.

    The first element simply contains the 15 different postcode areas, the second element of the 
    dictionary contains the specific postcodes chosen within those areas, and the third element contains
    the indices of each of the specific postcodes chosen.  
    '''

    first_occurrences = {}
    
    for i, postcode in enumerate(postcodes_list):
        prefix = postcode[:2] if len(postcode) >= 2 and postcode[1].isalpha() else (postcode[0] if postcode and postcode[0].isalpha() else '')
        # prefix = postcode[:2] if len(postcode) >= 2 and postcode[1].isalpha() else (postcode[0] if postcode[0].isalpha() else '')
        if prefix not in first_occurrences:
            first_occurrences[prefix] = {'postcode': postcode, 'index': i}
    
    unique_prefixes = list(first_occurrences.keys())
    first_postcodes = [first_occurrences[prefix]['postcode'] for  prefix in unique_prefixes]
    first_indices = [first_occurrences[prefix]['index'] for prefix in unique_prefixes]
    
    return unique_prefixes, first_postcodes, first_indices


def extract_selected_values(postcode_list, value_list):

    '''
    Given a list of postcodes, this performs the above function on that list and then extracts the corresponding 
    values from the value_list, which could potentially be a list of coordinates, populations or demands.
    '''

    first_indices = extract_first_occurrence(postcode_list)[2]
    return [value_list[i] for i in first_indices]

def extract_every_four(postcode_list, value_list):
    every_4_indices = [i for i in range(len(postcode_list)) if i%4==0]
    return[value_list[i] for i in every_4_indices]


#--------------------------------------------------------------------------------

def process_aggregated_values(postcode_list, value_list, operation='average'):
    
    '''
    The function takes an unaggergated list of postcodes, a vector of values 
    and an operation to perform in order to aggregate.

    The postcode districts in the postcode list are all aggregated by area, so 
    for our data the list goes from 440 postcodes to 15.

    The value list is then aggregated in accordance with the postcode areas based on the operation entered. 

    For example, if we choose "sum" as an operation, then the function will sum all values within the AB area.
    '''
    
    unique_prefixes = np.unique([postcode[:2] if len(postcode) >= 2 and postcode[1].isalpha() else postcode[0] for postcode in postcode_list])
    aggregated_values = np.array([postcode[:2] if len(postcode) >= 2 and postcode[1].isalpha() else postcode[0] for postcode in postcode_list])
    
    values_dict = {prefix: [] for prefix in unique_prefixes}

    for prefix, value in zip(aggregated_values, value_list):
        values_dict[prefix].append(value)

    if operation == 'average':
        result_array = np.array([np.mean(values_dict[prefix]) for prefix in unique_prefixes])
    elif operation == 'sum':
        result_array = np.array([np.sum(values_dict[prefix]) for prefix in unique_prefixes])
    elif operation == 'maximum':
        result_array = np.array([np.max(values_dict[prefix]) for prefix in unique_prefixes])
    else:
        raise ValueError("Invalid operation. Please choose 'average', 'sum', or 'maximum'.")

    return result_array



#--------------------------------------------------------------------------------

def process_1D(line_start, line_stop):
    
    '''
    Simply takes the line to start reading, and the line to stop reading. Then, it 
    processes and transforms the 1D cost and capacity vectors into a format that we can use.
    '''
    
    file = open(r"CaseStudyData.txt", "r")
    costs_og = file.readlines()[line_start-1:line_stop]
    costs_og = list(map(lambda s: s.strip(), costs_og))

    costs_og = [costs_og[line].split(" ") for line in range(len(costs_og))]
    costs = []
    for line in costs_og:
        for i in range(len(line)):
            costs.append(line[i].strip('[]'))
            
    costs = costs[2:]        
    costs = [eval(cost) for cost in costs]
    return costs


def process_nD(line_start, line_stop, no_to_delete = None):
    
    '''
    The function reads multiple lines and then process the text to get them 
    into the form of a long list of values, to be reshaped outside of the function.

    The function's purpose is to simply clean the txt data into form suitable for python.
    '''
    
    file = open(r"CaseStudyData.txt", "r")
    list_og = file.readlines()[line_start-1:line_stop]
    list_og = list(map(lambda s: s.strip(), list_og))

    list_og = [list_og[line].split(" ") for line in range(len(list_og))]
    processed_list = []
    for line in list_og:
        for i in range(len(line)):
            processed_list.append(line[i].strip('[]'))
    
    if no_to_delete != None:
        processed_list = processed_list[no_to_delete:]
    else:
        pass
    return processed_list

# Function to write txt files

The function below simply writes a txt file in the same format as we were given for the original data. The function is written to avoid having large chunks of code.

In [7]:
def write_txt(file_path, customer_ids, candidate_ids, 
              supplier_info1, supplier_info2, 
              customer_easting, customer_northing, customer_populations,
              candidate_easting, candidate_northing, setup_costs, operating_costs, wh_capacities,
              demand_prod, demand_prod_year, demand_prod_year_scen,
              can_supp_distances, can_cust_distances):
    
    output_file = file_path

    # Writing to the file
    with open(output_file, 'w') as file:
            
        # Write number of customers, locations
        file.write("! Number of customers = postcode districts\n")
        file.write(f"nbCustomers: {len(customer_ids)}\n")
        file.write("! Number of candidate locations\n")
        file.write(f"nbCandidates: {len(candidate_ids)}\n \n")
        
        for i in supplier_info1:
            file.write(f"{i}")
        
        for i in supplier_info2:
            file.write(f"{i}")
        
        # Write Customer IDs
        file.write("! Vector of customer ids\n")
        customer_id_output = "CustomerId: [ " + " ".join('"' + s + '"' for s in customer_ids) + " ]\n"
        file.write(customer_id_output)

        # Write Customer eastings and northings
        file.write("\n! Vector of customer coordinates\n")
        file.write("CustomerEasting: [" )
        np.savetxt(file, customer_easting, fmt='%d', delimiter=' ', newline=" ")
        file.write("]\n")
        file.write("CustomerNorthing: [" )
        np.savetxt(file, customer_northing, fmt='%d', delimiter=' ', newline=" ")
        file.write("]\n \n")

        # Write Customer populations
        file.write("! Vector of customer populations\nCustomerPopulation: [ ")
        np.savetxt(file, customer_populations, fmt='%d', delimiter=' ', newline=" ")
        file.write("]\n \n")

        # Write Candidate IDs
        file.write("! Vector of candidate location ids\n")
        candidate_id_output = "CandidateId: [ " + " ".join('"' + s + '"' for s in candidate_ids) + " ]\n"
        file.write(candidate_id_output)

        # Write Customer eastings and northings
        file.write("\n! Vector of candidate location coordinates\n")
        file.write("CandidateEasting: [" )
        np.savetxt(file, candidate_easting, fmt='%d', delimiter=' ', newline=" ")
        file.write("]\n")
        file.write("CandidateNorthing: [" )
        np.savetxt(file, candidate_northing, fmt='%d', delimiter=' ', newline=" ")
        file.write("]\n \n")

        # Write set up and operating costs
        file.write("! Setup cost for warehouses\nSetup: [(1) ")
        np.savetxt(file, setup_costs, fmt='%d', delimiter=' ', newline=" ")
        file.write("]\n \n")
        file.write("! Operating cost for warehouses\nOperating: [(1) ")
        np.savetxt(file, operating_costs, fmt='%d', delimiter=' ', newline=" ")
        file.write("]\n \n")
        file.write("! The warehouse capacity\nCapacity: [(1) ")
        np.savetxt(file, wh_capacities, fmt='%d', delimiter=' ', newline=" ")
        file.write("]\n \n")

        # Write annual district demand in kg per product group
        file.write("! The annual district demand in kilograms per product group\nCustomerDemand: [\n")
        for i in range(len(customer_ids)):
            file.write(f"({i+1} 1) {demand_prod[0,i]} {demand_prod[1,i]} {demand_prod[2,i]} {demand_prod[3,i]} \n")
        file.write("]\n \n")

        # Write customer demand in kilograms per product group over nbPeriod years
        file.write("! The customer demand in kilograms per product group over nbPeriods years\nCustomerDemandPeriods: [")
        for cust in range(len(customer_ids)):
            for pro in range(4):
                file.write(f"\n({cust+1} {pro+1} 1) ")
                for per in range(10):
                    file.write(f"{demand_prod_year[pro, per, cust]} ")
        file.write("]\n \n")

        # Write customer demand per period for each scenario
        file.write("CustomerDemandPeriodScenarios: [")
        for cust in range(len(customer_ids)):
            for pro in range(4):
                for per in range(10):
                    file.write(f"\n({cust+1} {pro+1} {per+1} 1) ")
                    for scen in range(100):
                        file.write(f"{demand_prod_year_scen[pro, per, scen, cust]} ")
        file.write("] \n \n")

        # Write distance matrix between candidate and supplier
        file.write("! Distance matrix between candidate locations and suppliers\nDistanceCandidateSupplier: [")
        for can in range(len(candidate_ids)):
            file.write(f"\n({can+1} 1) ")
            for supp in range(53):
                file.write(f"{can_supp_distances[can, supp]} ")
        file.write("]\n \n")

        # Write distance matrix between customer and candidate
        file.write("! Distance matrix between candidate locations and customers\nDistanceCandidateCustomer: [")
        for can in range(len(candidate_ids)):
            file.write(f"\n({can+1} 1) ")
            for cust in range(len(customer_ids)):
                file.write(f"{can_cust_distances[can, cust]} ")
        file.write("]")

# Supplier & vehicle information to write txt files

These lines are simply read and stored as is from the txt file, as they are only extracted for the purpose of including all the relevant data in our own construction of the txt files later.

In [33]:
supplier_info1 = open(r"CaseStudyData.txt", "r").readlines()[4:15]
supplier_info2 = open(r"CaseStudyData.txt", "r").readlines()[33:61]

# Extracting postcode lists

Simply extracting the lists of postcodes for both candidates and customers, and transforming to a form usable in python.

In [9]:
customer_id_og = lc.getline(r"CaseStudyData.txt", 17).split(" ")[2:-1]
customer_id_og = [id.strip('"') for id in customer_id_og]

candidate_id_og = lc.getline(r"CaseStudyData.txt", 27).split(" ")[2:-1]
candidate_id_og = [id.strip('"') for id in candidate_id_og]

# Extracting data

### Extracting coordinates, customer populations and warehouse costs and  capacities.

In [10]:
customer_easting = lc.getline(r"CaseStudyData.txt", 20).split(" ")[2:-1]
customer_easting = [eval(coord) for coord in customer_easting]

candidate_easting = lc.getline(r"CaseStudyData.txt", 30).split(" ")[2:-1]
candidate_easting = [eval(coord) for coord in candidate_easting]

customer_northing = lc.getline(r"CaseStudyData.txt", 21).split(" ")[2:-1]
customer_northing = [eval(coord) for coord in customer_northing]

candidate_northing = lc.getline(r"CaseStudyData.txt", 31).split(" ")[2:-1]
candidate_northing = [eval(coord) for coord in candidate_northing]

customer_populations = lc.getline(r"CaseStudyData.txt", 24).split(" ")[2:-1]
customer_populations = [eval(coord) for coord in customer_populations]

setup_costs = process_1D(62, 105)
operating_costs = process_1D(108, 146)
wh_capacities = process_1D(149, 193)

### Extracting multidimensional arrays

#### Demand

In [11]:
demand_product = (np.asarray(process_nD(197, 343)).reshape(440, 6)[:, 2:]).astype(int)
demand_product_year = (np.asarray(process_nD(347, 1693, 1)).reshape(440, 4, -1)[:, :, 3:]).astype(int)
demand_product_year_scenarios = (np.asarray(process_nD(1695, 115666, 1)).reshape(440, 4, 10, -1)[:, :, :, 4:]).astype(int)

#### Distance

In [12]:
can_supp_distances = (np.asarray(process_nD(115670, 116109)).reshape(440, -1)[:, 2:]).astype(float)
can_cust_distances = (np.asarray(process_nD(116114, 116553)).reshape(440, -1)[:, 2:]).astype(float)

# Aggregating over customers only

In [36]:
# aggregating the customer and candidate IDs
customer_id_selective = extract_first_occurrence(customer_id_og)
candidate_id_selective = extract_first_occurrence(candidate_id_og)

# indices for the first occurrences of each postcode area
aggregation_indices = customer_id_selective[2]


# AGGREGATING COORDINATES AND POPULATION 
customer_easting_agg = extract_selected_values(customer_id_og, customer_easting)
customer_northing_agg = extract_selected_values(customer_id_og, customer_northing)
customer_populations_agg = process_aggregated_values(customer_id_og, customer_populations, "sum")


# AGGREGATING DEMAND MATRICES
demand_prod_sel = []
for i in range(4):
    demand_prod_sel.append(process_aggregated_values(customer_id_og, demand_product[:,i], "sum"))
demand_prod_sel = np.asarray(demand_prod_sel)

demand_prod_year_sel = []
for i in range(4):
    dpp_sel = []
    for j in range(10):
        dpp_sel.append(process_aggregated_values(customer_id_og, demand_product_year[:, i, j], "sum"))
    demand_prod_year_sel.append(dpp_sel)
demand_prod_year_sel = np.asarray(demand_prod_year_sel)

demand_prod_year_scen_sel = []
for i in range(4):
    dpy_sel = []
    for j in range(10):
        dp_sel = []
        for k in range(100):
            dp_sel.append(process_aggregated_values(customer_id_og, demand_product_year_scenarios[:, i, j, k], "sum"))
        dpy_sel.append(dp_sel)
    demand_prod_year_scen_sel.append(dpy_sel)  
demand_prod_year_scen_sel = np.asarray(demand_prod_year_scen_sel)


# AGGREGATING DISTANCE MATRIX 
agg_dist_customer_only = np.asarray([can_cust_distances[:,i] for i in aggregation_indices]).T

#### Writing data to a .txt file

In [15]:
# writing a txt file aggregating over customers only
write_txt(file_path = f"Datasets/CaseStudyData_Aggregated_CustomerOnly.txt",
          customer_ids=customer_id_selective[0], 
          candidate_ids=candidate_id_og,
          supplier_info1=supplier_info1, 
          supplier_info2=supplier_info2,
          customer_easting=customer_easting_agg, 
          customer_northing=customer_northing_agg, 
          customer_populations=customer_populations_agg,
          candidate_easting=candidate_easting, 
          candidate_northing=candidate_northing, 
          setup_costs=setup_costs, 
          operating_costs=operating_costs, 
          wh_capacities=wh_capacities,
          demand_prod=demand_prod_sel, 
          demand_prod_year=demand_prod_year_sel, 
          demand_prod_year_scen=demand_prod_year_scen_sel,
          can_supp_distances=can_supp_distances, 
          can_cust_distances=agg_dist_customer_only)

# Aggregating over both customers and candidates

#### All the customer aggregations can be obtained from the code chunk above.

In [17]:
# AGGREGATING COORDINATES
candidate_easting_agg = np.asarray([candidate_easting[i] for i in aggregation_indices])
candidate_northing_agg = np.asarray([candidate_northing[i] for i in aggregation_indices])


# AGGREGATING COSTS AND CAPACITIES

setup_costs_agg = np.asarray([setup_costs[i] for i in aggregation_indices])
operating_costs_agg = np.asarray([operating_costs[i] for i in aggregation_indices])
wh_capacities_agg = np.asarray([wh_capacities[i] for i in aggregation_indices])


# AGGREGATING DISTANCE MATRIX
agg_dist_can_cust = np.asarray([agg_dist_customer_only[i,:] for i in aggregation_indices])
agg_dist_can_supp = np.asarray([can_supp_distances[i,:] for i in aggregation_indices])

#### Writing data to .txt file

In [18]:
# writing a txt file aggregating both
write_txt(file_path=f"Datasets/CaseStudyData_Aggregated_Both.txt",
          customer_ids=customer_id_selective[0], 
          candidate_ids=candidate_id_selective[0],
          supplier_info1=supplier_info1, 
          supplier_info2=supplier_info2,
          customer_easting=customer_easting_agg, 
          customer_northing=customer_northing_agg, 
          customer_populations=customer_populations_agg,
          candidate_easting=candidate_easting_agg, 
          candidate_northing=candidate_northing_agg, 
          setup_costs=setup_costs_agg, 
          operating_costs=operating_costs_agg, 
          wh_capacities=wh_capacities_agg,
          demand_prod=demand_prod_sel, 
          demand_prod_year=demand_prod_year_sel, 
          demand_prod_year_scen=demand_prod_year_scen_sel,
          can_supp_distances=agg_dist_can_supp, 
          can_cust_distances=agg_dist_can_cust)

# Fixing optimal candidates

## No aggregation on customers

In [20]:
fixed_candidates_ind = [41, 47, 53, 182, 314, 400] # the candidates we wish to fix
fixed_candidates = [candidate_id_og[i] for i in fixed_candidates_ind]

candidate_easting_fixed = np.asarray([candidate_easting[i] for i in fixed_candidates_ind])
candidate_northing_fixed = np.asarray([candidate_northing[i] for i in fixed_candidates_ind])

setup_costs_fixed = np.asarray([setup_costs[i] for i in fixed_candidates_ind])
operating_costs_fixed = np.asarray([operating_costs[i] for i in fixed_candidates_ind])
wh_capacities_fixed = np.asarray([wh_capacities[i] for i in fixed_candidates_ind])

fixed_can_supp_dist = np.asarray([can_supp_distances[i,:] for i in fixed_candidates_ind])
fixed_can_cust_dist = np.asarray([can_cust_distances[i,:] for i in fixed_candidates_ind])

#### Write data to .txt file

In [21]:
# writing a txt file fixing candidates and keeping all customers unaggregated
write_txt(file_path=f"Datasets/CaseStudy_FixedCandidates_NoAggregation.txt",
          customer_ids=customer_id_og, 
          candidate_ids=fixed_candidates,
          supplier_info1=supplier_info1, 
          supplier_info2=supplier_info2,
          customer_easting=customer_easting, 
          customer_northing=customer_northing, 
          customer_populations=customer_populations,
          candidate_easting=candidate_easting_fixed, 
          candidate_northing=candidate_northing_fixed, 
          setup_costs=setup_costs_fixed, 
          operating_costs=operating_costs_fixed, 
          wh_capacities=wh_capacities_fixed,
          demand_prod=demand_product.T, 
          demand_prod_year=np.transpose(demand_product_year,(1,2,0)), 
          demand_prod_year_scen=np.transpose(demand_product_year_scenarios,(1,2,3,0)),
          can_supp_distances=fixed_can_supp_dist, 
          can_cust_distances=fixed_can_cust_dist)

## Aggregating customers

In [23]:
fixed_ccd_agg = np.asarray([agg_dist_customer_only[i,:] for i in fixed_candidates_ind])

#### Writing data to .txt file

In [24]:
# writing a txt file fixing candidates and aggregating customers
write_txt(file_path=f"Datasets/CaseStudy_FixedCandidates_WithAggregation.txt",
          customer_ids=customer_id_selective[0], 
          candidate_ids=fixed_candidates,
          supplier_info1=supplier_info1, 
          supplier_info2=supplier_info2,
          customer_easting=customer_easting_agg, 
          customer_northing=customer_northing_agg, 
          customer_populations=customer_populations_agg,
          candidate_easting=candidate_easting_fixed, 
          candidate_northing=candidate_northing_fixed, 
          setup_costs=setup_costs_fixed, 
          operating_costs=operating_costs_fixed, 
          wh_capacities=wh_capacities_fixed,
          demand_prod=demand_prod_sel, 
          demand_prod_year=demand_prod_year_sel, 
          demand_prod_year_scen=demand_prod_year_scen_sel,
          can_supp_distances=fixed_can_supp_dist, 
          can_cust_distances=fixed_ccd_agg)

# Part 2 (uncertain demands)

## Aggregating customers normally and candidates for every 4th value

In [26]:
every_fourth_ind = [i for i in range(len(candidate_id_og)) if i % 4 == 3]
agg4_candidates = [candidate_id_og[i] for i in every_fourth_ind]

# AGGREGATING COORDINATES
candidate_easting_agg4 = np.asarray([candidate_easting[i] for i in every_fourth_ind])
candidate_northing_agg4 = np.asarray([candidate_northing[i] for i in every_fourth_ind])


# AGGREGATING COSTS AND CAPACITIES

setup_costs_agg4 = np.asarray([setup_costs[i] for i in every_fourth_ind])
operating_costs_agg4 = np.asarray([operating_costs[i] for i in every_fourth_ind])
wh_capacities_agg4 = np.asarray([wh_capacities[i] for i in every_fourth_ind])


# AGGREGATING DISTANCE MATRIX
agg_dist_can_cust4 = np.asarray([agg_dist_customer_only[i,:] for i in every_fourth_ind])
agg_dist_can_supp4 = np.asarray([can_supp_distances[i,:] for i in every_fourth_ind])

#### Writing data to .txt file

In [27]:
# writing a txt file choosing every 4th candidate with aggregated customers
write_txt(file_path=f"Datasets/CaseStudy_AggData_110_candidates.txt",
          customer_ids=customer_id_selective[0], 
          candidate_ids=agg4_candidates,
          supplier_info1=supplier_info1, 
          supplier_info2=supplier_info2,
          customer_easting=customer_easting_agg, 
          customer_northing=customer_northing_agg, 
          customer_populations=customer_populations_agg,
          candidate_easting=candidate_easting_agg4, 
          candidate_northing=candidate_northing_agg4, 
          setup_costs=setup_costs_agg4, 
          operating_costs=operating_costs_agg4, 
          wh_capacities=wh_capacities_agg4,
          demand_prod=demand_prod_sel, 
          demand_prod_year=demand_prod_year_sel, 
          demand_prod_year_scen=demand_prod_year_scen_sel,
          can_supp_distances=agg_dist_can_supp4, 
          can_cust_distances=agg_dist_can_cust4)

## Aggregating customers normally and candidates for every 2nd value

In [29]:
every_second_ind = [i for i in range(len(candidate_id_og)) if i % 2 == 1]
agg2_candidates = [candidate_id_og[i] for i in every_second_ind]

# AGGREGATING COORDINATES
candidate_easting_agg2 = np.asarray([candidate_easting[i] for i in every_second_ind])
candidate_northing_agg2 = np.asarray([candidate_northing[i] for i in every_second_ind])


# AGGREGATING COSTS AND CAPACITIES

setup_costs_agg2 = np.asarray([setup_costs[i] for i in every_second_ind])
operating_costs_agg2 = np.asarray([operating_costs[i] for i in every_second_ind])
wh_capacities_agg2 = np.asarray([wh_capacities[i] for i in every_second_ind])


# AGGREGATING DISTANCE MATRIX
agg_dist_can_cust2 = np.asarray([agg_dist_customer_only[i,:] for i in every_second_ind])
agg_dist_can_supp2 = np.asarray([can_supp_distances[i,:] for i in every_second_ind])

#### Writing data to a .txt file

In [30]:
# writing a txt file choosing every other candidate with aggregated customers
write_txt(file_path=f"Datasets/CaseStudy_AggData_220_candidates.txt",
          customer_ids=customer_id_selective[0], 
          candidate_ids=agg2_candidates,
          supplier_info1=supplier_info1, 
          supplier_info2=supplier_info2,
          customer_easting=customer_easting_agg, 
          customer_northing=customer_northing_agg, 
          customer_populations=customer_populations_agg,
          candidate_easting=candidate_easting_agg2, 
          candidate_northing=candidate_northing_agg2, 
          setup_costs=setup_costs_agg2, 
          operating_costs=operating_costs_agg2, 
          wh_capacities=wh_capacities_agg2,
          demand_prod=demand_prod_sel, 
          demand_prod_year=demand_prod_year_sel, 
          demand_prod_year_scen=demand_prod_year_scen_sel,
          can_supp_distances=agg_dist_can_supp2, 
          can_cust_distances=agg_dist_can_cust2)