In [11]:
import csv

def parse_fasta(filename):
    entries = []
    with open(filename, 'r') as file:
        lines = file.readlines()
        for i in range(0, len(lines), 2):
            entry = {}
            header = lines[i].strip().split(',')
            entry['id'] = int(header[1].split('=')[1])  # Extracting ID from the fasta file
            entry['overall_confidence'] = float(header[4].split('=')[1])
            entry['ligand_confidence'] = float(header[5].split('=')[1])
            entry['seq_rec'] = float(header[6].split('=')[1])
            sequence = lines[i+1].strip()
            entry['sequence'] = sequence
            residues = [sequence[idx] for idx in [28, 42, 44, 63, 66, 67, 71, 106, 110]]
            entry['residues'] = residues
            entries.append(entry)
    return entries

def save_to_csv(entries, output_filename):
    residue_positions = [28, 42, 44, 63, 66, 67, 71, 106, 110]
    header = ['id', 'overall_confidence', 'ligand_confidence', 'seq_rec'] + [f"residue_{pos}" for pos in residue_positions]

    with open(output_filename, mode='w', newline='') as file:
        writer = csv.DictWriter(file, fieldnames=header)
        writer.writeheader()
        for entry in entries:
            row = {
                'id': entry['id'],
                'overall_confidence': entry['overall_confidence'],
                'ligand_confidence': entry['ligand_confidence'],
                'seq_rec': entry['seq_rec']
            }
            for idx, pos in enumerate(residue_positions):
                row[f"residue_{pos}"] = entry['residues'][idx]
            writer.writerow(row)

filename = "Mb.fa"
entries = parse_fasta(filename)

output_filename = "output.csv"
save_to_csv(entries, output_filename)

print("Output saved to:", output_filename)




Output saved to: output.csv


In [12]:
def count_residue_occurrences(entries):
    residue_positions = [28, 42, 44, 63, 66, 67, 71, 106, 110]
    residue_counts = {pos: {} for pos in residue_positions}

    for entry in entries:
        for idx, pos in enumerate(residue_positions):
            residue = entry['residues'][idx]
            residue_counts[pos][residue] = residue_counts[pos].get(residue, 0) + 1

    return residue_counts

filename = "Mb.fa"
entries = parse_fasta(filename)

residue_counts = count_residue_occurrences(entries)

# Printing the occurrence counts for each residue position
for pos, counts in residue_counts.items():
    print(f"Residue at position {pos}: {counts}")


Residue at position 28: {'I': 471, 'L': 27, 'A': 2}
Residue at position 42: {'F': 500}
Residue at position 44: {'A': 500}
Residue at position 63: {'A': 500}
Residue at position 66: {'A': 112, 'T': 388}
Residue at position 67: {'A': 500}
Residue at position 71: {'L': 500}
Residue at position 106: {'S': 500}
Residue at position 110: {'I': 500}
