## Extract Sections from Text File to Excel

In [None]:
import re
import pandas as pd


### 1. Read the content of the text file

In [None]:
with open("path_to_text_file.txt", "r") as file:
    content = file.read()


### 2. Extract the required sections

In [None]:
# Extracting sections based on the provided conditions

# ICD10 Codes from Assessment
icd10_code_pattern = re.compile(r'(?P<code>\w+\d+)')
icd10_codes_from_assessment = icd10_code_pattern.findall(assessment)
combined_icd10_codes = [icd10_codes_from_assessment[i] + '.' + icd10_codes_from_assessment[i + 1] 
                        for i in range(0, len(icd10_codes_from_assessment) - 1, 2)]

# Subjective
subjective_pattern = re.compile(r'- Subjective -\n\n(.*?)- Past Medical History -', re.DOTALL)
subjective_match = subjective_pattern.search(content)
subjective = subjective_match.group(1).strip() if subjective_match else ""

# Assessment
assessment_pattern = re.compile(r'- Assessment -\n\n(.*?)(?:\n){2}', re.DOTALL)
assessment_match = assessment_pattern.search(content)
assessment = assessment_match.group(1).strip() if assessment_match else ""

# Plan
plan_pattern = re.compile(r'- Plan -\n\n(.*?)CPT', re.DOTALL)
plan_match = plan_pattern.search(content)
plan = plan_match.group(1).strip() if plan_match else ""


### 3. Write the extracted sections to an Excel file

In [None]:
# Splitting the "Assessment" section by newlines to create multiple rows
assessment_sentences = assessment.split('\n')

# Expanding the other columns to match the length of assessment_sentences
df_expanded = pd.DataFrame({
    "ICD10 code": ', '.join(combined_icd10_codes),
    "Subjective": [subjective] * len(assessment_sentences),
    "Assessment": assessment_sentences,
    "Plan": [plan] * len(assessment_sentences)
})

# Writing the expanded DataFrame to an Excel file
output_file_path = "output_excel_file.xlsx"
df_expanded.to_excel(output_file_path, index=False)
