In [5]:
### Import packages ###
import os
import numpy as np
import pandas as pd

In [6]:
### Directory ###
cwd = os.getcwd()
ParentDirectory = os.path.abspath(os.path.join(cwd, "../../.."))
TargetDirectory = os.path.join(ParentDirectory,"Code", "Cluster", "Simulations")

### Open Parameter Vector ###
ParameterVector = pd.read_csv(os.path.join(ParentDirectory, "Data", "raw", "ParameterVectorSimulations.csv"))

In [7]:
# Loop through each row in the DataFrame
for i, row in ParameterVector.iterrows():
    # Extract parameters for the current row
    JobName = row["JobName"]
    Seed = row["Seed"]
    Data = row["Data"]
    TestProportion = row["TestProportion"]
    CandidateProportion = row["CandidateProportion"]
    SelectorType = row["SelectorType"]
    ModelType = row["ModelType"]
    DataArgs = row["DataArgs"]
    SelectorArgs = row["SelectorArgs"]
    ModelArgs = row["ModelArgs"]
    Output = row["Output"]
    
    # Define the path for the .sbatch file
    sbatch_file_path = os.path.join(TargetDirectory, f"{JobName}.sbatch")
    
    # Create the .sbatch file content
    sbatch_content = [
        "#!/bin/bash",
        f"#SBATCH --job-name={JobName}",
        "#SBATCH --partition=short",
        "#SBATCH --ntasks=1",
        "#SBATCH --time=11:59:00",
        "#SBATCH --mem-per-cpu=30000",
        f"#SBATCH -o ClusterMessages/out/myscript_{JobName}_%j.out",
        f"#SBATCH -e ClusterMessages/error/myscript_{JobName}_%j.err",
        "#SBATCH --mail-type=ALL",
        "#SBATCH --mail-user=simondn@uw.edu",
        "",
        "cd ~/RashomonActiveLearning",
        "module load Python",
        "python Code/RunSimulation.py \\",
        f"    --JobName " + JobName +" \\",
        f"    --Seed {Seed} \\",
        f"    --Data {Data} \\",
        f"    --TestProportion {TestProportion} \\",
        f"    --CandidateProportion {CandidateProportion} \\",
        f"    --SelectorType {SelectorType} \\",
        f"    --ModelType {ModelType} \\",
        f"    --DataArgs {DataArgs} \\",
        f"    --SelectorArgs {SelectorArgs} \\",
        f"    --ModelArgs {ModelArgs} \\",
        f"    --Output {Output}"
    ]

    # Write content to .sbatch file
    os.makedirs(os.path.dirname(sbatch_file_path), exist_ok=True)  # Ensure directory exists
    with open(sbatch_file_path, "w") as sbatch_file:
        sbatch_file.write("\n".join(sbatch_content))

print("Sbatch files generated successfully.")


Sbatch files generated successfully.


In [4]:
import pickle

# Open the file in read-binary mode
with open("/Users/simondn/Documents/RashomonActiveLearning/Results/Seed14_DataSimulate_TP0.2_CP0.8_STiGS_MTRandomForestRegressor.pkl", "rb") as file:
    df = pickle.load(file)


In [11]:
import numpy as np
import pandas as pd

In [15]:
df["SimulationParameters"]

{'DataFileInput': 'Simulate',
 'Seed': '14',
 'TestProportion': '0.2',
 'CandidateProportion': '0.8',
 'SelectorType': '<function iGSFunction at 0x7f244bfee020>',
 'ModelType': '<function LinearRegressionFunction at 0x7f244bfee5c0>',
 'DataArgs': "{'N': 1000, 'K': 5}",
 'SelectorArgs': "{'df_Candidate':             Y        X1        X2        X3        X4        X5\n190 -2.672417  0.315955  0.536081  0.700329  0.507978  0.697347\n692 -0.108694  0.671742  0.920083  0.004231  0.532617  0.369186\n659  0.690482  0.979350  0.450078  0.974579  0.630690  0.612384\n564  1.046877  0.051091  0.426775  0.961120  0.352643  0.133231\n856 -0.249669  0.655063  0.963146  0.110788  0.266243  0.003216\n..        ...       ...       ...       ...       ...       ...\n960 -3.488292  0.088270  0.268384  0.731328  0.814591  0.442803\n587  1.028138  0.072046  0.631174  0.458126  0.855897  0.706635\n925 -0.370125  0.520122  0.541692  0.411592  0.836630  0.837720\n180 -2.621933  0.186245  0.452627  0.603598  