In [None]:
import subprocess
import logging
import os

# Set up basic configuration for logging
logger = logging.getLogger(__name__)
logging.basicConfig(format="=== %(levelname)s === %(asctime)s === %(message)s",
                    level=logging.DEBUG, datefmt='%Y-%m-%d %H:%M:%S')

def check_and_index_tabix(file_path, preset='vcf', tabix_path="tabix"):
    """
    Ensure that the tabix index exists for the given file, and create it if it does not.
    """
    index_files = [file_path + ".tbi", file_path + ".csi"]
    if not any(os.path.exists(f) for f in index_files):
        logger.info(f"Index file not found for {file_path}. Creating index...")
        command = [tabix_path, '-p', preset, file_path]
        try:
            subprocess.run(command, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
            logger.info("Index created successfully.")
        except subprocess.CalledProcessError as e:
            logger.error(f"Failed to create index: {e.stderr}")
            raise RuntimeError("Failed to create Tabix index") from e

def execute_command(command):
    """
    Executes a system command via subprocess and returns the output, handling errors.
    """
    try:
        result = subprocess.run(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, check=True)
        return result.stdout.strip()
    except subprocess.CalledProcessError as e:
        logger.error(f"Command '{' '.join(command)}' failed with error: {e.stderr}")
        return None

def supremum_details(tabixfiles, chrom, base_path, tabix_path="tabix"):
    """
    Calculate the supremum position and total number of sites using Tabix.
    """
    max_position = 0
    total_sites = 0

    for file in tabixfiles:
        file_path = f"{base_path}/{file}"
        #check_and_index_tabix(file_path, 'auto', tabix_path)  # 'auto' depends on file format
        command = [tabix_path, file_path, f"{chrom}"]
        output = execute_command(command)
        if output:
            positions = [int(line.split('\t')[2]) for line in output.split('\n') if line]
            max_position = max(max_position, max(positions, default=0))
            total_sites += len(positions)

    return max_position, total_sites

def get_regions(tabixfiles, chrom, exp_numsites=1e3, base_path="", tabix_path="tabix"):
    """
    Calculate regions based on the supremum details of chromosome positions and sites.
    """
    logger.debug("Getting regions for tabix-indexed files...")
    sup_position, sup_numsites = supremum_details(tabixfiles, chrom, base_path, tabix_path)

    if sup_position == 0 or sup_numsites == 0:
        logger.info("No data available for the specified chromosome.")
        return False

    step = math.ceil(sup_position / sup_numsites * exp_numsites)
    stepsize = min(step, sup_position)
    pos_start = list(range(0, sup_position, stepsize))
    pos_end = list(range(stepsize, sup_position + stepsize, stepsize))
    pos_end[-1] = min(pos_end[-1], sup_position)  # Adjust the last end position

    progress = [round(100 * end / sup_position, 1) for end in pos_end]
    regions = list(zip([chrom] * len(pos_start), pos_start, pos_end))

    logger.debug("Regions successfully retrieved with progress: %s", progress)
    return regions, progress
