In [2]:
def merge(a, l, m, r):
    """
    Merge function for merging two subarrays within an array 'a'.

    Args:
        a (list of tuples): The array to be sorted.
        l (int): Left index of the subarray.
        m (int): Middle index of the subarray.
        r (int): Right index of the subarray.
    """
    i = 0
    j = 0
    k = l  # Initialize k with the starting index of the left subarray.
    L = a[l:m]
    length_left = len(L)
    R = a[m:r]
    length_right = len(R)

    while i < length_left and j < length_right:
        # Access the SAM record from the tuple.
        record_l = L[i][1].split('\t')
        record_r = R[j][1].split('\t')
        rname_l = record_l[0]
        rname_r = record_r[0]

        # Compare based on RNAME.
        if rname_l < rname_r:
            a[k] = L[i]
            i, k = i + 1, k + 1
        else:
            a[k] = R[j]
            j, k = j + 1, k + 1

    while i < length_left:
        a[k] = L[i]
        i, k = i + 1, k + 1
    while j < length_right:
        a[k] = R[j]
        j, k = j + 1, k + 1

def merge_sort(a, l, r):
    """
    Merge sort function for sorting an array using the merge sort algorithm.

    Args:
        a (list of tuples): The array to be sorted.
        l (int): Left index of the subarray.
        r (int): Right index of the subarray.
    """
    if l >= r - 1:
        return
    m = l + (r - l) // 2
    merge_sort(a, l, m)
    merge_sort(a, m, r)
    merge(a, l, m, r)

# Open the input SAM file and create an output file for the sorted SAM records. All in the same directory.
input_sam_file = input("Enter SAM File Name: ")
output_sam_file = "output_sorted_by_rname.sam"

with open(input_sam_file, 'r') as infile, open(output_sam_file, 'w') as outfile:
    sam_records = infile.readlines()
    
    # Separate header and records (Correction)
    header_lines = [record for record in sam_records if record.startswith('@')]
    data_records = [record for record in sam_records if not record.startswith('@')]

    # Keep track of line numbers for sorting.
    indexed_records = [(i, record) for i, record in enumerate(data_records)]

    # Sort the indexed records based on RNAME.
    merge_sort(indexed_records, 0, len(indexed_records))

    # Write the sorted SAM records to the output file.
    for i, record in indexed_records:
        outfile.write(record)

# Example usage:
with open(output_sam_file, 'r') as sorted_file:
    for line in sorted_file:
        print(line, end="")


Enter SAM File Name: toy.sam
r001	83	ref	37	30	9M	=	7	-39	CAGCGCCAT	*
r001	163	ref	7	30	8M4I4M1D3M	=	37	39	TTAGATAAAGAGGATACTG	*	XX:B:S,12561,2,20,112
r002	0	ref	9	30	1S2I6M1P1I1P1I4M2I	*	0	0	AAAAGATAAGGGATAAA	*
r003	16	ref	29	30	6H5M	*	0	0	TAGGC	*
r003	0	ref	9	30	5H6M	*	0	0	AGCTAA	*
r004	0	ref	16	30	6M14N1I5M	*	0	0	ATAGCTCTCAGC	*
x1	0	ref2	1	30	20M	*	0	0	aggttttataaaacaaataa	????????????????????
x2	0	ref2	2	30	21M	*	0	0	ggttttataaaacaaataatt	?????????????????????
x3	0	ref2	6	30	9M4I13M	*	0	0	ttataaaacAAATaattaagtctaca	??????????????????????????
x4	0	ref2	10	30	25M	*	0	0	CaaaTaattaagtctacagagcaac	?????????????????????????
x5	0	ref2	12	30	24M	*	0	0	aaTaattaagtctacagagcaact	????????????????????????
x6	0	ref2	14	30	23M	*	0	0	Taattaagtctacagagcaacta	???????????????????????