In [1]:
import os
import pandas as pd
if os.name == 'nt':
    os.environ["OMP_NUM_THREADS"] = '2'
import warnings
warnings.filterwarnings('ignore')
import sys
sys.path.append('../')
from utiliT.io import read_pickle, dump_pickle
from scripts.instance import *
from scripts.benders import *
import gc
from tqdm.notebook import tqdm

In [None]:
file_name = f"../data/multipeak_100_100_400.pkl"
instance = read_pickle(file_name)
results = single_cut(instance)

In [None]:
results

In [None]:
def runner(function, files = None):
    bad_instances = []
    if files is None:
        files = os.listdir("../data")
    for file in tqdm(files):
        instance_name = file[:-4]
        instance = read_pickle(f"../data/{file}")
        try:
            result = function(instance)
            result['instance_name'] = instance_name
            file_name = f"../results/run_results/{result['instance_name']}_{result['method']}.pkl"
            dump_pickle(result, file_name)
            del result
            gc.collect()
        except Exception as e:
            bad_instances.append(file)
            print(file)
            print(e)

In [None]:
def multi_runner(functions, files = None):
    print(f"Running {len(functions)} functions")
    i = 1
    for function in functions:
        print(f"Running function: {i}")
        i = i + 1
        runner(function, files=files)
        gc.collect()

In [None]:
def clean_results(terms):
    for term in terms:
        files = os.listdir("../results/run_results")
        deleting = [x for x in files if term in x]
        for f in deleting:
            os.remove(f"../results/run_results/{f}")

In [None]:
functions = [
    lambda x: dropout_cut(x, "kmeans"),
    lambda x: dropout_cut(x, "affinity"),
    lambda x: dropout_cut(x, "hierarchical"),
    lambda x: dropout_cut(x, "spectral"),
    lambda x: dropout_cut(x, "random"),
    multi_cut,
    single_cut
]
multi_runner(functions)

In [None]:
functions = [
    lambda x: hybrid(x, "kmeans"),
    lambda x: hybrid(x, "affinity"),
    lambda x: hybrid(x, "hierarchical"),
    lambda x: hybrid(x, "spectral"),
    lambda x: hybrid(x, "random"),
]
multi_runner(functions)

In [None]:
functions = [
    lambda x: dropout_cut(x, "kmeans", dr = True),
    lambda x: dropout_cut(x, "affinity", dr = True),
    lambda x: dropout_cut(x, "hierarchical", dr = True),
    lambda x: dropout_cut(x, "spectral", dr = True),
    lambda x: dropout_cut(x, "random", dr = True),
    lambda x: hybrid(x, "kmeans", dr = True),
    lambda x: hybrid(x, "affinity", dr = True),
    lambda x: hybrid(x, "hierarchical", dr = True),
    lambda x: hybrid(x, "spectral", dr = True),
    lambda x: hybrid(x, "random", dr = True)
]
multi_runner(functions)

In [3]:
files = os.listdir("../data")
dicts = []
for i, file in enumerate(tqdm(files)):
    instance = read_pickle(f"../data/{file}")
    methods = ['kmeans','affinity','hierarchical','spectral','random']
    for method in methods:
        t1 = time.time()
        res = clustering_scenarios(instance, method, dr = False)
        t2 = time.time()
        result = {
            "group_method": method,
            "dr": False,
            "clust_runtime": t2 - t1,
            "n1": instance.s1_n_var,
            "n2": instance.s2_n_var,
            "k": instance.k,
            "distribution": instance.distribution
        }
        dicts.append(result)
        t3 = time.time()
        res = clustering_scenarios(instance, method, dr = True)
        t4 = time.time()
        result = {
            "group_method": method,
            "dr": True,
            "clust_runtime": t4 - t3,
            "n1": instance.s1_n_var,
            "n2": instance.s2_n_var,
            "k": instance.k,
            "distribution": instance.distribution
        }
        dicts.append(result)
all_results = pd.DataFrame(dicts)
all_results.to_csv('../results/compiled_results/clustering_info.csv')

  0%|          | 0/192 [00:00<?, ?it/s]