# Prize-Collecting Steiner Tree (PCSTP)

## Libs Importing

In [1]:
import sys
import os
import time
import networkx as nx

sys.path.insert(1, os.path.realpath(os.path.pardir))

In [2]:
import multiprocessing

NUM_PROCESSES = multiprocessing.cpu_count()
print("Number of cpu : ", NUM_PROCESSES)

Number of cpu :  12


In [3]:
from pcstp.instances.generator import generate_random_steiner
from pcstp.instances.reader import SteinlibReader, DatReader

from pcstp.steinertree import SteinerTreeProblem
from pcstp.solver.base import computes_steiner_cost
from pcstp.solver.aco import AntColony
from pcstp.solver.greedy_h1 import GreedyH1

from pcstp.utils.graph import preprocessing
from pcstp.utils.draw import draw_steiner_graph

## Experiments

In [4]:
SEED = 100

In [5]:
import glob

INSTANCES_PATH_PREFIX = '../data/instances/benchmark/PCSPG-CRR'
NUM_EXPERIMENTS_PER_INSTANCE = 10

all_files = glob.glob(os.path.join(INSTANCES_PATH_PREFIX, '*'))

files = all_files

networkx_history = []

for filename in files:
    if filename.endswith('.xlsx'): continue
    if filename.endswith('.stp'):
        stp_reader = SteinlibReader()
    else:
        stp_reader = DatReader()

    print(f"Reading: {filename}")
    stp = stp_reader.parser(filename=filename)
    G, terminals = preprocessing(stp.graph, stp.terminals)
    stp_preprocessed = SteinerTreeProblem(graph=G, terminals=terminals)
    
    def run_experiment(experiment: int):
        start_time = time.time()
        nx_steiner_tree = nx.algorithms.approximation.steiner_tree(
            stp_preprocessed.graph,
            stp_preprocessed.terminals,
            weight='cost'
        )

        networkx_duration = time.time() - start_time
        networkx_cost = computes_steiner_cost(stp.graph, nx_steiner_tree, stp.terminals)

        history = {
            "filename": filename,
            "experiment": experiment,
            "num_nodes": stp.num_nodes,
            "num_edges": stp.num_edges,
            "num_nodes_after_preprocessing": len(stp_preprocessed.graph.nodes),
            "num_edges_after_preprocessing": len(stp_preprocessed.graph.edges),
            "terminals": stp.num_terminals,
            "steiner_cost": networkx_cost,
            "duration": networkx_duration
        }
        return history

    experiments = range(1, NUM_EXPERIMENTS_PER_INSTANCE+1)

    with multiprocessing.Pool(processes=NUM_PROCESSES) as p:
        experiments_results = p.map(run_experiment, experiments)
    
    networkx_history.extend(experiments_results)

Reading: ../data/instances/benchmark/PCSPG-CRR/C01-A.stp
Reading: ../data/instances/benchmark/PCSPG-CRR/C05-A.stp
Reading: ../data/instances/benchmark/PCSPG-CRR/C04-A.stp
Reading: ../data/instances/benchmark/PCSPG-CRR/C02-A.stp
Reading: ../data/instances/benchmark/PCSPG-CRR/C03-A.stp


In [6]:
import pandas as pd

df_score_networkx = pd.DataFrame.from_dict(networkx_history)

In [7]:
df_score_networkx.groupby('filename')[['duration', 'steiner_cost']].describe()

Unnamed: 0_level_0,duration,duration,duration,duration,duration,duration,duration,duration,steiner_cost,steiner_cost,steiner_cost,steiner_cost,steiner_cost,steiner_cost,steiner_cost,steiner_cost
Unnamed: 0_level_1,count,mean,std,min,25%,50%,75%,max,count,mean,std,min,25%,50%,75%,max
filename,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2
../data/instances/benchmark/PCSPG-CRR/C01-A.stp,10.0,0.820369,0.018187,0.790827,0.811247,0.820128,0.834868,0.844906,10.0,88.0,0.0,88.0,88.0,88.0,88.0,88.0
../data/instances/benchmark/PCSPG-CRR/C02-A.stp,10.0,0.576933,0.020645,0.53386,0.569547,0.579054,0.594743,0.596087,10.0,144.0,0.0,144.0,144.0,144.0,144.0,144.0
../data/instances/benchmark/PCSPG-CRR/C03-A.stp,10.0,0.723959,0.021714,0.670352,0.71841,0.733882,0.737988,0.739814,10.0,779.0,0.0,779.0,779.0,779.0,779.0,779.0
../data/instances/benchmark/PCSPG-CRR/C04-A.stp,10.0,0.976669,0.0153,0.945936,0.969171,0.980186,0.986479,0.993411,10.0,1110.0,0.0,1110.0,1110.0,1110.0,1110.0,1110.0
../data/instances/benchmark/PCSPG-CRR/C05-A.stp,10.0,1.48988,0.02196,1.439659,1.48521,1.496182,1.506348,1.507065,10.0,1604.0,0.0,1604.0,1604.0,1604.0,1604.0,1604.0



# Greedy

In [8]:
import glob
import random
import numpy as np

INSTANCES_PATH_PREFIX = '../data/instances/benchmark/PCSPG-CRR'
NUM_EXPERIMENTS_PER_INSTANCE = 5

all_files = glob.glob(os.path.join(INSTANCES_PATH_PREFIX, '*'))

files = all_files

greedy_history = []

for filename in files:
    if filename.endswith('.xlsx'): continue
    if filename.endswith('.stp'):
        stp_reader = SteinlibReader()
    else:
        stp_reader = DatReader()

    print(f"Reading: {filename}")
    stp = stp_reader.parser(filename=filename)
    G, terminals = preprocessing(stp.graph, stp.terminals)
    stp_preprocessed = SteinerTreeProblem(graph=G, terminals=terminals)
    print("Nodes: ", len(stp_preprocessed.graph.nodes))
    print("Edges: ", len(stp_preprocessed.graph.edges))
    # print("Terminals: ", stp_preprocessed.terminals)
    def run_experiment(experiment: int):
        if SEED:
            np.random.seed(SEED*experiment)
            random.seed(SEED*experiment)
        solver = GreedyH1(stp_preprocessed.graph, list(stp_preprocessed.terminals), log_level='info')
        steiner_tree, greedy_cost = solver.solve()
        print(f'Cost: {greedy_cost} ')

        history = {
            "filename": filename,
            "experiment": experiment,
            "num_nodes": stp.num_nodes,
            "num_edges": stp.num_edges,
            "num_nodes_after_preprocessing": len(stp_preprocessed.graph.nodes),
            "num_edges_after_preprocessing": len(stp_preprocessed.graph.edges),
            "terminals": stp.num_terminals,
            "steiner_cost": greedy_cost,
            "duration": solver._duration
        }
        return history

    experiments = range(1, NUM_EXPERIMENTS_PER_INSTANCE+1)

    with multiprocessing.Pool(processes=NUM_PROCESSES) as p:
        experiments_results = p.map(run_experiment, experiments)

    greedy_history.extend(experiments_results)


Reading: ../data/instances/benchmark/PCSPG-CRR/C01-A.stp
Nodes:  311
Edges:  436
Cost: 108.0 Cost: 108.0 Cost: 108.0 


Cost: 108.0 Cost: 108.0 

Reading: ../data/instances/benchmark/PCSPG-CRR/C05-A.stp
Nodes:  412
Edges:  537
Cost: 2215.0 
Cost: 2215.0 
Cost: 2215.0 
Cost: 2215.0 
Cost: 2215.0 
Reading: ../data/instances/benchmark/PCSPG-CRR/C04-A.stp
Nodes:  372
Edges:  497


Process ForkPoolWorker-91:
Process ForkPoolWorker-95:
Process ForkPoolWorker-85:
Process ForkPoolWorker-94:
Process ForkPoolWorker-88:
Process ForkPoolWorker-87:
Process ForkPoolWorker-93:
Process ForkPoolWorker-92:
Process ForkPoolWorker-90:
Traceback (most recent call last):
Process ForkPoolWorker-96:
Traceback (most recent call last):
Traceback (most recent call last):


KeyboardInterrupt: 

In [9]:
import pandas as pd

df_score_greedy = pd.DataFrame.from_dict(greedy_history)

## Solution obtained with Ant Colony Optimization

In [10]:
import glob

INSTANCES_PATH_PREFIX = '../data/instances/benchmark/PCSPG-CRR'
NUM_EXPERIMENTS_PER_INSTANCE = 5

all_files = glob.glob(os.path.join(INSTANCES_PATH_PREFIX, '*'))

files = all_files

aco_history = []

for filename in files:
    if filename.endswith('.xlsx'): continue
    if filename.endswith('.stp'):
        stp_reader = SteinlibReader()
    else:
        stp_reader = DatReader()

    print(f"Reading: {filename}")
    stp = stp_reader.parser(filename=filename)
    G, terminals = preprocessing(stp.graph, stp.terminals)
    stp_preprocessed = SteinerTreeProblem(graph=G, terminals=terminals)

    def run_experiment(experiment: int):
        aco_params = dict(
            iterations=1,
            num_ants=len(terminals),
            evaporation_rate=0.5,
            alpha=1.0,
            beta=3.0,
            # beta_evaporation_rate=0.2,
            initial_pheromone=0.1,
            pheromone_amount=2.0,
            pheromone_deposit_strategy='traditional',
            pheromone_initialization_strategy='same_value',
            choose_best=0.2,
            log_level='info',
            early_stopping=30,
            normalize_distance_prize=False,
            allow_edge_perturbation=False,
            ant_max_moves=1,
            seed=SEED * experiment
        )
        solver = AntColony(
            graph=stp_preprocessed.graph,
            terminals=stp_preprocessed.terminals,
            **aco_params
        )
        steiner_tree, steiner_cost = solver.solve()

        history = {
            "filename": filename,
            "experiment": experiment,
            "num_nodes": stp.num_nodes,
            "num_edges": stp.num_edges,
            "num_nodes_after_preprocessing": len(stp_preprocessed.graph.nodes),
            "num_edges_after_preprocessing": len(stp_preprocessed.graph.edges),
            "terminals": stp.num_terminals,
            "steiner_cost": steiner_cost,
            "duration": solver._duration
        }
        history.update(aco_params)
        return history

    experiments = range(1, NUM_EXPERIMENTS_PER_INSTANCE+1)

    with multiprocessing.Pool(processes=NUM_PROCESSES) as p:
        experiments_results = p.map(run_experiment, experiments)
    
    aco_history.extend(experiments_results)

Reading: ../data/instances/benchmark/PCSPG-CRR/C01-A.stp


2022-02-02 09:56:07,626 - [aco.py:302] - MainThread - INFO - Best Iteration: 0 - Best Cost: 18
2022-02-02 09:56:07,627 - [aco.py:302] - MainThread - INFO - Best Iteration: 0 - Best Cost: 18
2022-02-02 09:56:07,628 - [aco.py:302] - MainThread - INFO - Best Iteration: 0 - Best Cost: 18
2022-02-02 09:56:07,629 - [aco.py:302] - MainThread - INFO - Best Iteration: 0 - Best Cost: 18
2022-02-02 09:56:07,629 - [aco.py:302] - MainThread - INFO - Best Iteration: 0 - Best Cost: 18


Reading: ../data/instances/benchmark/PCSPG-CRR/C05-A.stp


2022-02-02 09:56:08,604 - [aco.py:302] - MainThread - INFO - Best Iteration: 0 - Best Cost: 1239
2022-02-02 09:56:08,727 - [aco.py:302] - MainThread - INFO - Best Iteration: 0 - Best Cost: 1239
2022-02-02 09:56:08,679 - [aco.py:302] - MainThread - INFO - Best Iteration: 0 - Best Cost: 1239
2022-02-02 09:56:08,693 - [aco.py:302] - MainThread - INFO - Best Iteration: 0 - Best Cost: 1239
2022-02-02 09:56:08,821 - [aco.py:302] - MainThread - INFO - Best Iteration: 0 - Best Cost: 1239


Reading: ../data/instances/benchmark/PCSPG-CRR/C04-A.stp


2022-02-02 09:56:09,228 - [aco.py:302] - MainThread - INFO - Best Iteration: 0 - Best Cost: 639
2022-02-02 09:56:09,265 - [aco.py:302] - MainThread - INFO - Best Iteration: 0 - Best Cost: 639
2022-02-02 09:56:09,291 - [aco.py:302] - MainThread - INFO - Best Iteration: 0 - Best Cost: 639
2022-02-02 09:56:09,294 - [aco.py:302] - MainThread - INFO - Best Iteration: 0 - Best Cost: 639
2022-02-02 09:56:09,336 - [aco.py:302] - MainThread - INFO - Best Iteration: 0 - Best Cost: 639


Reading: ../data/instances/benchmark/PCSPG-CRR/C02-A.stp


2022-02-02 09:56:09,426 - [aco.py:302] - MainThread - INFO - Best Iteration: 0 - Best Cost: 50
2022-02-02 09:56:09,427 - [aco.py:302] - MainThread - INFO - Best Iteration: 0 - Best Cost: 50
2022-02-02 09:56:09,428 - [aco.py:302] - MainThread - INFO - Best Iteration: 0 - Best Cost: 50
2022-02-02 09:56:09,430 - [aco.py:302] - MainThread - INFO - Best Iteration: 0 - Best Cost: 50
2022-02-02 09:56:09,431 - [aco.py:302] - MainThread - INFO - Best Iteration: 0 - Best Cost: 50


Reading: ../data/instances/benchmark/PCSPG-CRR/PCSPG-CRR - NetworkX.xlsx


UnicodeDecodeError: 'utf-8' codec can't decode byte 0xde in position 16: invalid continuation byte

In [None]:
import pandas as pd

df_score_aco = pd.DataFrame.from_dict(networkx_history)
# df_score_aco.to_csv(os.path.join(INSTANCES_PATH_PREFIX, 'ACO.csv'))