# Monte Carlo Files Merger

Combine multiple Monte Carlo simulation ROOT files into a single merged file for streamlined analysis and centralized fit result storage.

• **Method**: Tree-based merging with event copying and branch preservation across multiple input files
• **Output**: Single merged ROOT file containing combined TTree with all MC events from input files
• **Customization**: Configurable tree names, event limits per file, and output naming conventions

## ** Important Requirement: Manual File List Required**

**The following code cell requires manual input of the file list for each merge operation:**
- Update `files_to_merge` list with specific MC file names for your analysis
- Modify `output_filename` to reflect the decay mode and data period
- Adjust `tree_name` parameter if using different TTree structures
- **This step has not be automated yet** - file lists must be manually specified for each decay mode/block combination

## Usage Notes

• **File Organization**: Input files should be organized by decay mode (B2CC, B2OC) and data periods
• **Tree Compatibility**: All input files must contain the same TTree structure for successful merging  
• **Event Limits**: Optional `max_events_per_file` parameter for testing or data size management
• **Error Handling**: Automatic skipping of missing files with detailed progress reporting

In [None]:
import ROOT as r
import os
from pathlib import Path

MC_DATA = Path("data/monte_carlo") # Path for MC data files

def merge_mc_files(input_files, output_filename, tree_name="ST-b2cc", max_events_per_file=None): # Change Ttree name if needed
    """
    Merge multiple ROOT files containing MC simulation data into a single file.
    
    Parameters:
    -----------
    input_files : list
        List of input ROOT file paths
    output_filename : str
        Name of the output merged file
    tree_name : str
        Name of the tree to merge (default: "ST-b2oc")
    max_events_per_file : int or None
        Maximum number of events to take from each file (None = all events)
    
    Returns:
    --------
    tuple: (success: bool, total_events: int, merged_filename: str)
    """
    
    print(f" Starting merge of {len(input_files)} files...")
    print(f" Output file: {output_filename}")
    print(f" Tree name: {tree_name}")
    
    # Check if input files exist
    existing_files = []
    for fname in input_files:
        if os.path.exists(fname):
            existing_files.append(fname)
        else:
            print(f"  Warning: File {fname} not found, skipping...")
    
    if not existing_files:
        print(" No valid input files found!")
        return False, 0, ""
    
    print(f" Found {len(existing_files)} valid files to merge")
    
    # Create output file
    output_file = r.TFile(output_filename, "RECREATE")
    if not output_file or output_file.IsZombie():
        print(f" Failed to create output file: {output_filename}")
        return False, 0, ""
    
    # Initialize variables for merging
    output_tree = None
    total_events = 0
    successful_files = 0
    
    # Process each input file
    for i, fname in enumerate(existing_files):
        print(f"\n Processing file {i+1}/{len(existing_files)}: {fname}")
        
        # Open input file
        input_file = r.TFile.Open(fname, "READ")
        if not input_file or input_file.IsZombie():
            print(f" Failed to open {fname}, skipping...")
            continue
            
        # Get the tree
        input_tree = input_file.Get(tree_name)
        if not input_tree:
            print(f" Tree '{tree_name}' not found in {fname}, skipping...")
            input_file.Close()
            continue
        
        n_entries = input_tree.GetEntries()
        events_to_copy = n_entries if max_events_per_file is None else min(n_entries, max_events_per_file)
        
        print(f"    Events in file: {n_entries}")
        print(f"    Events to copy: {events_to_copy}")
        
        # Create output tree if this is the first file
        if output_tree is None:
            output_file.cd()
            output_tree = input_tree.CloneTree(0)  # Clone structure but no events
            print(f"Created output tree structure")
        
        # Copy events
        input_tree.SetBranchStatus("*", 1)  # Enable all branches
        
        copied_events = 0
        for entry in range(events_to_copy):
            if input_tree.GetEntry(entry) > 0:
                output_tree.Fill()
                copied_events += 1
                
            # Progress indicator
            if (entry + 1) % 10000 == 0 or entry == events_to_copy - 1:
                print(f"Copied {entry + 1}/{events_to_copy} events", end='\r')
        
        print(f"\n    Successfully copied {copied_events} events")
        total_events += copied_events
        successful_files += 1
        
        input_file.Close()
    
    # Write and close output file
    if output_tree:
        output_file.cd()
        output_tree.Write()
        print(f"\n Merge completed successfully!")
        print(f"    Total events merged: {total_events}")
        print(f"    Files processed: {successful_files}/{len(existing_files)}")
        print(f"    Output saved to: {output_filename}")
    else:
        print(f"\n No trees were successfully processed!")
        
    output_file.Close()
    
    return successful_files > 0, total_events, output_filename

# Example usage with file list
files_to_merge = [
    MC_DATA/"jpsipi_00307042_00000002_1.highstats-Friend-B2CC-W4042-UP.root", MC_DATA/"jpsipi_00307042_00000001_1.highstats-Friend-B2CC-W4042-UP.root"  # Manually insert the list of files to be used
]

# Merge all files into one
success, total_events, merged_file = merge_mc_files(
    input_files=files_to_merge,
    output_filename="2024_MC_B2CC_JPSIPI_B8.root",
    tree_name="ST-b2cc",
    max_events_per_file=None  
)

if success:
    print(f"\n SUCCESS! Merged {total_events} events into '{merged_file}'")
else:
    print(f"\n FAILED to merge files!")