In [17]:
import mlphep as hep
hep.style.use("CMS")


class PlotRegression:
    def __init__(self, model, test_loader, batch_size):
        self.model = model
        self.test_loader = test_loader
        self.batch_size = batch_size
        self.pt_pred_arr = []
        self.pt_truth_arr = []

    def evaluate(self):
        with torch.no_grad():
            for data in self.test_loader:
                out = self.model(data)
                for item in range(0, out.size(0)):
                    vector_pred = out[item]
                    vector_real = data[item].y
                    self.pt_pred_arr.append(vector_pred.item())
                    self.pt_truth_arr.append(vector_real.item())

    def plot_regression(self, output_dir):
        if not os.path.exists(output_dir):
            os.makedirs(output_dir)
        plt.clf()
        print(f"Plotting regression in {output_dir}")
        plt.hist(self.pt_truth_arr, bins=100, color='skyblue', alpha=0.5, label="truth")
        plt.hist(self.pt_pred_arr, bins=100, color='g', alpha=0.5, label="prediction")
        plt.legend()
        hep.cms.label("Preliminary")
        plt.savefig(os.path.join(output_dir, "pt_regression.png"))

        plt.clf()

        print(f"Plotting scatter in {output_dir}")
        plt.plot(self.pt_truth_arr, self.pt_pred_arr, 'o')
        plt.xlabel("Truth")
        plt.ylabel("Prediction")
        hep.cms.label("Preliminary")
        plt.savefig(os.path.join(output_dir, "pt_regression_scatter.png"))
        plt.clf()

        print(f"Plotting difference in {output_dir}")
        # plot difference between truth and prediction
        diff = [x - y for x, y in zip(self.pt_truth_arr, self.pt_pred_arr)]
        plt.hist(diff, bins=100, color='r', alpha=0.5, label="difference")
        plt.legend()
        hep.cms.label("Preliminary")
        plt.savefig(os.path.join(output_dir, "pt_regression_diff.png"))
        plt.clf()

In [21]:
import torch

## check if EOS folder exists otherwise use local folder
if os.path.exists("/eos/cms/store/user/folguera/L1TMuon/INTREPID/Graphs_v240725_241015/"):
    GraphDIR = "/eos/cms/store/user/folguera/L1TMuon/INTREPID/Graphs_v240725_241015/"
else:
    GraphDIR = "../graph_folder/"
using_only = 20  ## number of files used
print(GraphDIR)


Allgraphs = []
all_files = os.listdir(GraphDIR)

# Filter for .pkl files
pkl_files = [f for f in all_files if f.endswith('.pkl') and '_3_' in f]
print(f"Using files: {pkl_files}")
if not pkl_files:
    print("No .pkl files found in the directory.")
    sys.exit()


using_only = 5
count_files = 0
for pkl_file in pkl_files:
    if count_files >= using_only: break
    file_path = os.path.join(GraphDIR, pkl_file)
    print(f"Loading file: {pkl_file}")
    with open(file_path, 'rb') as file:
        graphfile = torch.load(file)
        Allgraphs.append(graphfile)
    count_files+=1


../graph_folder/
Using files: ['vix_graph_3_15Oct_onlypt_009.pkl', 'vix_graph_3_15Oct_onlypt_008.pkl', 'vix_graph_3_15Oct_onlypt_005.pkl', 'vix_graph_3_15Oct_onlypt_004.pkl', 'vix_graph_3_15Oct_onlypt_006.pkl', 'vix_graph_3_15Oct_onlypt_007.pkl', 'vix_graph_3_15Oct_onlypt_003.pkl', 'vix_graph_3_15Oct_onlypt_002.pkl', 'vix_graph_3_15Oct_onlypt_001.pkl']
Loading file: vix_graph_3_15Oct_onlypt_009.pkl


  graphfile = torch.load(file)


Loading file: vix_graph_3_15Oct_onlypt_008.pkl


  graphfile = torch.load(file)


Loading file: vix_graph_3_15Oct_onlypt_005.pkl
Loading file: vix_graph_3_15Oct_onlypt_004.pkl
Loading file: vix_graph_3_15Oct_onlypt_006.pkl


In [23]:
BatchSize=64

Graphs_for_training = sum(Allgraphs, [])
Graphs_for_training_reduced = Graphs_for_training
Graphs_for_training_filtered = [g for g in Graphs_for_training_reduced if g.edge_index.size(1) > 0]  # remove empty graphs

# remove extra dimenson in y
print(f"Total Graphs: {len(Graphs_for_training)}")
for i in range(0, len(Graphs_for_training_filtered)):
    Graphs_for_training_filtered[i].y = Graphs_for_training_filtered[i].y.mean(dim=0)

print(f"Total Graphs after filtering: {len(Graphs_for_training_filtered)}")

# Train and test split:
events = len(Graphs_for_training_filtered)
ntrain = int((events * 0.7) / BatchSize) * BatchSize  # to have full batches
print(f"Training events: {ntrain}")
train_dataset = Graphs_for_training_filtered[:ntrain]
test_dataset = Graphs_for_training_filtered[ntrain:ntrain * 2]

print("====================================")
print("Example of data (after normalization):")
print(train_dataset[0].x)
print(train_dataset[0].edge_index)
print(train_dataset[0].edge_attr)
print(train_dataset[0].deltaPhi)
print(train_dataset[0].deltaEta)
print(train_dataset[0].y)
print("====================================")



Total Graphs: 279461
Total Graphs after filtering: 279332
Training events: 195520
Example of data (after normalization):
tensor([[1.0005e+00, 1.7940e+03, 4.3113e+02, 0.0000e+00, 3.0000e+00],
        [9.4612e-01, 1.7750e+03, 4.1368e+02, 1.0000e+01, 5.0000e+00],
        [8.9175e-01, 1.7780e+03, 4.4868e+02, 1.1000e+01, 5.0000e+00],
        [9.7875e-01, 1.7890e+03, 1.0766e+03, 6.0000e+00, 9.0000e+00],
        [1.0005e+00, 1.7810e+03, 1.0914e+03, 1.5000e+01, 5.0000e+00]],
       dtype=torch.float64)
tensor([[0, 0, 1, 1, 2, 2, 3, 4],
        [1, 2, 0, 2, 0, 1, 4, 3]])
None
tensor([-19, -16, -19,   3, -16,   3,  -8,  -8])
tensor([-0.0544, -0.1088, -0.0544, -0.0544, -0.1088, -0.0544,  0.0217,  0.0217])
tensor(-64.5507)


In [32]:
from torch_geometric.loader import DataLoader

train_loader = DataLoader(train_dataset, batch_size=BatchSize, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=1, shuffle=False)

model = torch.load("../tools/training/Bsize64_lr5e-4_NOnormNodes/model_120.pth")
            
evaluator = PlotRegression(model, test_loader, batch_size=BatchSize)
evaluator.evaluate()
evaluator.plot_regression(output_dir="../model_folder/")


  model = torch.load("../tools/training/Bsize64_lr5e-4_NOnormNodes/model_120.pth")


AttributeError: Can't get attribute 'GATRegressor' on <module 'models' (<_frozen_importlib_external.NamespaceLoader object at 0x300c9f310>)>