In [1]:
import sys
sys.path.append('../')
import torch
import pandas as pd
from torch.utils.data import Dataset, DataLoader
from utils.system import read_ir_from_file


In [2]:
class DevMapDataset(Dataset):
    def __init__(self, data_path, platform):
        super().__init__()

        llfiles = pd.read_csv(data_path + "/all.txt", sep="\s+")
        self.fileNum = llfiles["FileNum"]
        self.filesname = llfiles["ProgramName"]

        self.device_dict = {"amd": "AMD Tahiti 7970", "nvidia": "NVIDIA GTX 970"}
        self.platform_name = self.device_dict[platform]

        # Load runtime data
        self.df = pd.read_csv(data_path + "/cgo17-{}.csv".format(platform), index_col=0)
        self.df["bench_data"] = (
            self.df.loc[self.df["dataset"] != "default", "benchmark"]
            + str("_")
            + self.df.loc[self.df["dataset"] != "default", "dataset"]
        )
        self.df.loc[self.df["dataset"] == "default", "bench_data"] = self.df.loc[
            self.df["dataset"] == "default", "benchmark"
        ]
        self.df["bench_data_path"] = data_path + '/kernels_ir/' + self.df["bench_data"] + str(".ll")

        
    def __getitem__(self, index):
        ir = read_ir_from_file(self.df.iloc[index]["bench_data_path"])
        runtime_cpu = self.df.iloc[index]["runtime_cpu"]
        runtime_gpu = self.df.iloc[index]["runtime_gpu"]
        label = self.df.iloc[index]["oracle"]
        
        return ir, label, runtime_cpu, runtime_gpu

    def __len__(self):
        return len(self.df)
    
    
data_path = '../../data/opencl_device_mapping'
dataset = DevMapDataset(data_path, 'nvidia')
print(len(dataset))
dataset[200]


669


('; ModuleID = \'npb-CG-makea_6.cl\'\nsource_filename = "npb-CG-makea_6.cl"\ntarget datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"\ntarget triple = "x86_64-apple-macosx10.13.0"\n\n; Function Attrs: nounwind ssp uwtable\ndefine spir_kernel void @makea_6(double* nocapture, double* nocapture readonly, i32* nocapture readonly, i32* nocapture, i32* nocapture readonly, i32, i32) local_unnamed_addr #0 !kernel_arg_addr_space !4 !kernel_arg_access_qual !5 !kernel_arg_type !6 !kernel_arg_base_type !6 !kernel_arg_type_qual !7 {\n  %8 = tail call i64 @_Z13get_global_idj(i32 0) #2\n  %9 = trunc i64 %8 to i32\n  %10 = icmp slt i32 %9, %6\n  br i1 %10, label %11, label %112\n\n; <label>:11:                                     ; preds = %7\n  %12 = sext i32 %5 to i64\n  %13 = getelementptr inbounds i32, i32* %4, i64 %12\n  %14 = icmp sgt i32 %9, 0\n  %15 = shl i64 %8, 32\n  %16 = ashr exact i64 %15, 32\n  br i1 %14, label %17, label %25\n\n; <label>:17:                                     ; pred

In [None]:
import os
import pickle
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import StratifiedKFold
from torch.utils.data import DataLoader, TensorDataset

# 假设 model 是一个 PyTorch 模型类
class MyModel(nn.Module):
    def __init__(self, embedding_dim, dense_layer_size):
        super(MyModel, self).__init__()
        self.dense_layer = nn.Linear(embedding_dim, dense_layer_size)
        self.output_layer = nn.Linear(dense_layer_size, 1)  # 二分类问题输出1个值

    def forward(self, x):
        x = torch.relu(self.dense_layer(x))
        x = torch.sigmoid(self.output_layer(x))
        return x

def evaluate(model, device, data_folder, out_folder, embeddings,
             dense_layer_size, print_summary, num_epochs, batch_size) -> pd.DataFrame:

    # Create device list
    if device == 'all':
        device_list = ["amd", "nvidia"]
    else:
        device_list = [device]

    data = []
    for i, platform in enumerate(device_list):
        platform_name = platform2str(platform)

        # Load runtime data
        data_file = os.path.join(data_folder, f"cgo17-{platform}.csv")
        print('\n--- Read data from', data_file)
        df = pd.read_csv(data_file)

        # Encode input source codes
        sequences, maxlen = encode_srcs(data_folder, df)

        # Normalize embeddings using PyTorch
        embedding_matrix_normalized = torch.nn.functional.normalize(torch.tensor(embeddings), p=2, dim=1)
        sequences_tensor = torch.tensor(sequences, dtype=torch.int64)

        embedding_input = embedding_matrix_normalized[sequences_tensor]

        # Values used for training & predictions
        aux_in = auxiliary_inputs(df)

        # Optimal mappings
        y = np.array([1 if x == "GPU" else 0 for x in df["oracle"].values])
        y_tensor = torch.tensor(y, dtype=torch.float32)
    
        # 10-fold cross-validation
        n_splits = 10
        kf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=42)
        for j, (train_index, test_index) in enumerate(kf.split(sequences, y)):
            print('--- Cross validation step [', j, '/ ', n_splits, ']')

            model_name = model.__class__.__name__
            model_basename = model_name
            model_path = os.path.join(out_folder, f"models/{model_basename}-{platform}-{j}.model")
            predictions_path = os.path.join(out_folder, f"predictions/{model_basename}-{platform}-{j}.result")
            log_dir = os.path.join(out_folder, "logs")

            if os.path.exists(predictions_path):
                # load result from cache
                print("\tFound predictions in", predictions_path, ", skipping...")
                with open(predictions_path, 'rb') as infile:
                    p = pickle.load(infile)
            else:
                # Create model and optimizer
                model = MyModel(embedding_dim=embedding_matrix_normalized.shape[1], dense_layer_size=dense_layer_size)
                optimizer = optim.Adam(model.parameters())
                criterion = nn.BCELoss()  # Binary Cross Entropy for binary classification

                # Prepare data
                train_data = TensorDataset(embedding_input[train_index], y_tensor[train_index])
                train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)

                if not os.path.exists(model_path):
                    # Train model
                    print('\n--- Training model... ')
                    for epoch in range(num_epochs):
                        model.train()
                        total_loss = 0
                        for xb, yb in train_loader:
                            optimizer.zero_grad()
                            output = model(xb)
                            loss = criterion(output.view(-1), yb)
                            loss.backward()
                            optimizer.step()
                            total_loss += loss.item()

                    torch.save(model.state_dict(), model_path)
                    print('\tsaved model to', model_path)

                else:
                    # Load the saved model
                    model.load_state_dict(torch.load(model_path))
                    print("\n\tFound trained model in", model_path, ", skipping...")

                # Test model
                print('\n--- Testing model... ')
                test_data = TensorDataset(embedding_input[test_index], y_tensor[test_index])
                test_loader = DataLoader(test_data, batch_size=batch_size)

                # Predict
                model.eval()
                p = []
                with torch.no_grad():
                    for xb, _ in test_loader:
                        output = model(xb)
                        preds = output.view(-1).round().numpy()
                        p.extend(preds)

                # Cache results
                with open(predictions_path, 'wb') as outfile:
                    pickle.dump(p, outfile)
                print('\tsaved predictions to', predictions_path)

            # Benchmark names and true values
            benchmarks = df['benchmark'].values[test_index]
            o = y[test_index]
            correct = (np.array(p) == o)

            zero_r_dev = "runtime_cpu" if platform == "amd" else "runtime_gpu"
            zer_r_runtimes = df[zero_r_dev].values[test_index]
            runtimes = df[['runtime_cpu', 'runtime_gpu']].values[test_index]
            p_runtimes = [r[p_] for p_, r in zip(np.array(p, dtype=int), runtimes)]
            p_speedup = zer_r_runtimes / p_runtimes

            assert (len(benchmarks) == len(o) == len(correct) == len(p) == len(p_speedup))

            for benchmark_, o_, p_, correct_, p_speedup_ in zip(benchmarks, o, p, correct, p_speedup):
                data.append({
                    "Model": model_basename,
                    "Platform": platform_name,
                    'Benchmark': escape_benchmark_name(benchmark_),
                    'Benchmark Suite': escape_suite_name(benchmark_),
                    "Oracle Mapping": o_,
                    "Predicted Mapping": p_,
                    "Correct?": correct_,
                    "Speedup": p_speedup_,
                })

    # Convert to DataFrame
    return pd.DataFrame(data)
