In [1]:
import os
import re

# Function to fix the question sequence in a text file
def fix_question_sequence_in_file(input_file, output_file):
    with open(input_file, 'r', encoding='UTF-8') as file:
        content = file.readlines()

    corrected_content = []
    question_number = 1

    # Iterate through each line and fix question numbers
    for line in content:
        # Match lines that start with "Question X:", allowing for any whitespace issues
        match = re.match(r'Question\s+\d+:', line)
        if match:
            # Replace the current question number with the correct sequential number
            corrected_line = re.sub(r'Question\s+\d+:', f'Question {question_number}:', line)
            corrected_content.append(corrected_line)
            question_number += 1
        else:
            corrected_content.append(line)

    # Write the corrected content to the output file
    with open(output_file, 'w', encoding='UTF-8') as file:
        file.writelines(corrected_content)

    print(f"Question sequence corrected and saved to {output_file}")

# Function to process all files in the given folder
def fix_question_sequence_in_folder(input_folder, output_folder):
    # Create output folder if it doesn't exist
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    # List all text files in the input folder
    files = [f for f in os.listdir(input_folder) if f.endswith('.txt')]

    # Process each file one by one
    for file_name in files:
        input_file_path = os.path.join(input_folder, file_name)
        output_file_path = os.path.join(output_folder, file_name)

        # Fix the question sequence for each file
        fix_question_sequence_in_file(input_file_path, output_file_path)

# Define folder paths
input_folder = '13_Answer Key'  # Folder with the extracted answer files
output_folder = '14_Reorder Answer Key'  # Folder to save renumbered answer files

# Call the function to process all files in the folder
fix_question_sequence_in_folder(input_folder, output_folder)


Question sequence corrected and saved to 14_Reorder Answer Key\CH-1.txt
Question sequence corrected and saved to 14_Reorder Answer Key\CH-10.txt
Question sequence corrected and saved to 14_Reorder Answer Key\CH-11.txt
Question sequence corrected and saved to 14_Reorder Answer Key\CH-12.txt
Question sequence corrected and saved to 14_Reorder Answer Key\CH-13.txt
Question sequence corrected and saved to 14_Reorder Answer Key\CH-14.txt
Question sequence corrected and saved to 14_Reorder Answer Key\CH-15.txt
Question sequence corrected and saved to 14_Reorder Answer Key\CH-16.txt
Question sequence corrected and saved to 14_Reorder Answer Key\CH-17.txt
Question sequence corrected and saved to 14_Reorder Answer Key\CH-18.txt
Question sequence corrected and saved to 14_Reorder Answer Key\CH-19.txt
Question sequence corrected and saved to 14_Reorder Answer Key\CH-2.txt
Question sequence corrected and saved to 14_Reorder Answer Key\CH-20.txt
Question sequence corrected and saved to 14_Reorder A

In [2]:
import os
import re
from docx import Document

# Function to parse the answers from the text file
def parse_answer_file(answer_file):
    with open(answer_file, 'r', encoding='UTF-8') as file:
        content = file.readlines()

    # Extract answers in the format `number: answer`
    answers = re.findall(r'(\d+):\s*([A-D])', ''.join(content))
    
    # Determine the total question count from the last line
    if content:
        last_line = content[-1]
        question_count_match = re.search(r'(\d+)', last_line)
        total_questions = int(question_count_match.group(1)) if question_count_match else 0
    else:
        total_questions = 0

    answer_data = {q_num: answer for q_num, answer in answers}
    return answer_data, total_questions

# Function to clean text by removing invalid XML characters
def clean_text(text):
    return re.sub(r'[^\x09\x0A\x0D\x20-\x7E\x85\xA0-\uD7FF\uE000-\uFFFD]', '', text)

# Function to update the existing docx file with answer data
def update_existing_docx_with_answers(docx_file, answers_data, output_file_name):
    doc = Document(docx_file)
    question_count = 0  # Initialize the counter for questions

    for table in doc.tables:
        for row in table.rows:
            first_cell_text = row.cells[0].text.strip()

            question_match = re.search(r'Question (\d+)', first_cell_text)
            if question_match:
                question_number = question_match.group(1)
                correct_answer = answers_data.get(question_number, "")

                # Update correct answer
                correct_answer_row = table.rows[row._index + 5]
                correct_answer_row.cells[1].text = clean_text(correct_answer)
                question_count += 1  # Increment the question count

    # Save the updated docx file
    doc.save(output_file_name)

    return question_count  # Return the total question count

# Function to process all files in the input folders and generate final docx files with updated answers
def process_files(input_folder_ans, final_doc_folder, output_folder):
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    ans_files = [f for f in os.listdir(input_folder_ans) if f.endswith('.txt')]
    doc_files = sorted([f for f in os.listdir(final_doc_folder) if f.endswith('.docx')])

    for ans_file, doc_file in zip(ans_files, doc_files):
        ans_file_path = os.path.join(input_folder_ans, ans_file)
        doc_file_path = os.path.join(final_doc_folder, doc_file)

        # Parse the answer data and get the total question count
        answers_data, total_questions = parse_answer_file(ans_file_path)

        # Modify the output file name to include the total question count
        base_filename = os.path.splitext(doc_file)[0]
        output_file_name = os.path.join(output_folder, f"{base_filename}_{total_questions}.docx")

        # Update the docx file with answer data
        update_existing_docx_with_answers(doc_file_path, answers_data, output_file_name)

        # Print the total questions based on the last line of the answer file
        print(f"Processed Total questions for {doc_file} = {total_questions}")

# Define folder paths
input_folder_ans = '14_Reorder Answer Key'  # Folder with the renumbered answer key files
final_doc_folder = '12_Final doc'  # Folder with existing .docx files to update
output_folder = 'Xoom File'  # Output folder for updated .docx files

# Process all files
process_files(input_folder_ans, final_doc_folder, output_folder)


Processed Total questions for CH-1.docx = 74
Processed Total questions for CH-10.docx = 60
Processed Total questions for CH-11.docx = 112
Processed Total questions for CH-12.docx = 100
Processed Total questions for CH-13.docx = 130
Processed Total questions for CH-14.docx = 50
Processed Total questions for CH-15.docx = 84
Processed Total questions for CH-16.docx = 90
Processed Total questions for CH-17.docx = 108
Processed Total questions for CH-18.docx = 128
Processed Total questions for CH-19.docx = 85
Processed Total questions for CH-2.docx = 40
Processed Total questions for CH-20.docx = 60
Processed Total questions for CH-21.docx = 92
Processed Total questions for CH-22.docx = 39
Processed Total questions for CH-23.docx = 100
Processed Total questions for CH-24.docx = 100
Processed Total questions for CH-25.docx = 73
Processed Total questions for CH-26.docx = 73
Processed Total questions for CH-27.docx = 81
Processed Total questions for CH-28.docx = 57
Processed Total questions for