In [10]:
import pandas as pd
import random
from itertools import chain

def replace_padlock_strings(csv_file, txt_file, output_name=""):
    padlock_dict = {}

    # Read the .txt file containing the list of 10-character strings
    with open(txt_file, 'r') as txt:
        padlock_strings = txt.read().splitlines()

    df = pd.read_csv(csv_file)
    
    #create a dictionary of padlock strings grouped by acronym
    for _, row in df.iterrows():
        acronym = row['acronym']
        padlock = row['padlock']
        if acronym not in padlock_dict:
            padlock_dict[acronym] = []
        padlock_dict[acronym].append(padlock)

    # Assign a randomly selected and unique substituted string to all padlocks within each acronym group
    barcodes = []
    for acronym, padlocks in padlock_dict.items():
        substituted_string = random.choice(padlock_strings)
        for i in range(len(padlocks)):
            padlock_dict[acronym][i] = padlocks[i].replace('N' * 10, substituted_string + 'C')
            barcodes.append(substituted_string)
        padlock_strings.remove(substituted_string)
        
    df['padlock'] = list(chain.from_iterable(list(padlock_dict.values())))
    df['barcode'] = barcodes
    add_reverse_complement(df)
    
    # Write the updated DataFrame to a new .csv file
    if output_name:
        df.to_csv(f'{output_name}.csv', index=False)

    return df, padlock_dict, padlock_strings

def reverse_complement(dna_sequence):
    complement_dict = {'A': 'T', 'T': 'A', 'C': 'G', 'G': 'C'}
    reverse_complement_seq = ''
    
    for base in dna_sequence[::-1]:
        reverse_complement_seq += complement_dict.get(base, base)
    
    return reverse_complement_seq

def add_reverse_complement(df):
    df['RT primer'] = df['padlock'].str[:20].apply(reverse_complement)
    return df

In [None]:
padlock_df, padlock_dict, barcode_strings = replace_padlock_strings('example.csv', 'example.txt', output_name='example')