# 1. Data Preparation

## Imports

In [1]:
import os
import shutil
import pandas as pd

## Feasibilty filtering


In [2]:
# read solu file to get all feasible instances
def get_solved_instances(solu_path):
    solved = set()
    with open(solu_path, "r") as f:
        for line in f:
            parts = line.strip().split()
            if len(parts) >= 3 and parts[0] in ("=opt=", "=best="):
                solved.add(parts[1].lower())
    return solved

# copy solved .mps.gz file
def copy_solved_mps_files(src_folder, dst_folder, solved_instances):
    os.makedirs(dst_folder, exist_ok=True)
    copied_files = []
    for file in os.listdir(src_folder):
        if file.endswith(".mps.gz"):
            instance = file.replace(".mps.gz", "").lower()
            if instance in solved_instances:
                src_path = os.path.join(src_folder, file)
                dst_path = os.path.join(dst_folder, file)
                shutil.copy2(src_path, dst_path)
                copied_files.append(file)
    print(f"have copied {len(copied_files)} feasible files into {dst_folder}")
    return copied_files

In [3]:
# Paths
benchmark_folder = "../data/raw/benchmark/"
solu_file = "../data/raw/miplib2017-v35.solu.txt"
solved_folder = "../data/raw/benchmark_solved/"

# Execute copying process
solved_instances = get_solved_instances(solu_file)
copied_files = copy_solved_mps_files(benchmark_folder, solved_folder, solved_instances)

have copied 233 feasible files into ../data/raw/benchmark_solved/


In [4]:
# Preview of copied files
df_copied = pd.DataFrame(copied_files, columns=["mps_file"])
df_copied.head()

Unnamed: 0,mps_file
0,30n20b8.mps.gz
1,50v-10.mps.gz
2,academictimetablesmall.mps.gz
3,air05.mps.gz
4,app1-1.mps.gz


In [7]:
# Save benchmark_solved instances list to instance/model_instances.txt
model_list_path = "../data/instances/model_instances.txt"
os.makedirs(os.path.dirname(model_list_path), exist_ok=True)

with open(model_list_path, "w") as f:
    for name in copied_files:
        f.write(name + "\n")

print(f"Wrote {len(copied_files)} instances into {model_list_path}")

Wrote 233 instances into ../data/instances/model_instances.txt


## Prepare for starter

In [8]:
# Load The benchmark set.csv
benchmark_csv = "../data/raw/The Benchmark Set.csv"
df_benchmark = pd.read_csv(benchmark_csv)
df_benchmark["instance"] = df_benchmark["InstanceInst."].str.lower()
df_benchmark["status"] = df_benchmark["StatusStat."].str.lower()

In [None]:
# Select easy and in solved instances
df_easy = df_benchmark[df_benchmark["status"] == "easy"]
print("df_easy shape:", df_easy.shape)

df_easy shape: (220, 14)


In [13]:
# fixed random seed and selected 5 different instances
starter_df = df_easy.sample(n=5, random_state=42)
starter_instances = starter_df["instance"].tolist()

# Path of starter folder
starter_folder = "../data/raw/starter"

print(f"Select starter instances: {starter_instances}")

Select starter instances: ['neos-933966', 'ns1952667', 'neos-1456979', 's250r10', 'bppc4-08']


In [14]:
# from benchmark_solved copy .mps.gz file to starter folder
for inst in starter_instances:
    src = os.path.join(solved_folder, inst + ".mps.gz")
    dst = os.path.join(starter_folder, inst + ".mps.gz")
    shutil.copy2(src, dst)

# write starter instances into instance/starter_instances.txt
starter_list_path = "../data/instances/starter_instances.txt"
os.makedirs(os.path.dirname(starter_list_path), exist_ok=True)

with open(starter_list_path, "w") as f:
    for inst in starter_instances:
        f.write(inst + ".mps.gz\n")

print(f"copied to: {starter_folder}")
print(f"wrote into: {starter_list_path}")

copied to: ../data/raw/starter
wrote into: ../data/instances/starter_instances.txt
