In [1]:
import random

def generate_diabetic_retinopathy_dataset(num_samples=200):
    """Generates a synthetic dataset for diabetic retinopathy-related questions and answers."""

    stages = ["mild nonproliferative retinopathy", "moderate nonproliferative retinopathy", "severe nonproliferative retinopathy", "proliferative retinopathy"]
    symptoms = ["blurred vision", "floaters", "dark areas in vision", "vision loss", "difficulty seeing at night", "impaired color vision"]
    treatments = ["laser photocoagulation", "anti-VEGF injections", "vitrectomy", "regular eye exams", "blood sugar control", "blood pressure management"]
    risk_factors = ["diabetes duration", "poor blood sugar control", "high blood pressure", "high cholesterol", "pregnancy", "tobacco use"]
    tests = ["dilated eye exam", "optical coherence tomography (OCT)", "fluorescein angiography (FA)", "visual acuity test"]
    complications = ["vitreous hemorrhage", "retinal detachment", "neovascular glaucoma", "macular edema"]
    questions = [
        "What are the symptoms of diabetic retinopathy?",
        "What are the stages of diabetic retinopathy?",
        "What are the risk factors for diabetic retinopathy?",
        "How is diabetic retinopathy diagnosed?",
        "What are the treatment options for diabetic retinopathy?",
        "What are the complications of diabetic retinopathy?",
        "How can I prevent diabetic retinopathy?",
        "What is the [test]?",
        "How often should I get a [test] if I have diabetes?",
        "What is [stage]?",
        "How does [treatment] work?",
        "What is macular edema?",
    ]

    data = []
    for _ in range(num_samples):
        stage = random.choice(stages)
        treatment = random.choice(treatments)
        test = random.choice(tests)
        question_template = random.choice(questions)
        question = question_template.replace("[stage]", stage).replace("[treatment]", treatment).replace("[test]", test)

        if "symptoms" in question:
            context = f"Symptoms of diabetic retinopathy: " + ", ".join(random.sample(symptoms, random.randint(3, 5)))
            answer = ", ".join(random.sample(symptoms, random.randint(2, 4))) + ". Consult an ophthalmologist immediately if you experience these."
        elif "stages" in question:
            context = f"Stages of diabetic retinopathy: " + ", ".join(stages)
            answer = ", ".join(stages) + ". Progression can lead to vision loss."
        elif "risk factors" in question:
            context = f"Risk factors for diabetic retinopathy: " + ", ".join(random.sample(risk_factors, random.randint(3, 5)))
            answer = ", ".join(random.sample(risk_factors, random.randint(2, 4))) + ". Regular eye exams are crucial."
        elif "diagnosed" in question:
            context = f"Diagnosis of diabetic retinopathy: " + ", ".join(random.sample(tests, random.randint(2, 3)))
            answer = "Diagnosis involves " + ", ".join(random.sample(tests, random.randint(1, 2))) + ". Early detection is key."
        elif "treatment" in question:
            context = f"Treatment options for diabetic retinopathy: " + ", ".join(random.sample(treatments, random.randint(2, 4)))
            answer = ", ".join(random.sample(treatments, random.randint(1, 3))) + ". Treatment depends on the stage and severity."
        elif "complications" in question:
            context = f"Complications of diabetic retinopathy: " + ", ".join(random.sample(complications, random.randint(2, 4)))
            answer = ", ".join(random.sample(complications, random.randint(1, 3))) + ". Prompt treatment can prevent vision loss."
        elif "prevent" in question:
            context = "Prevention: strict blood sugar control, regular eye exams, healthy lifestyle."
            answer = "Maintain good blood sugar control, have regular eye exams, and lead a healthy lifestyle."
        elif "What is macular edema?" in question:
            context = "Macular edema is swelling in the macula, the part of the retina responsible for sharp central vision."
            answer = "Macular edema is swelling in the macula that can cause vision loss."
        elif "How does [treatment] work?" in question:
            if treatment == "laser photocoagulation":
                context = "Laser photocoagulation uses a laser to seal leaking blood vessels in the retina."
                answer = "Laser photocoagulation seals leaking blood vessels."
            elif treatment == "anti-VEGF injections":
                context = "Anti-VEGF injections block the growth of new blood vessels in the retina."
                answer = "Anti-VEGF injections block new blood vessel growth."
            elif treatment == "vitrectomy":
                context = "Vitrectomy removes the vitreous gel to clear blood and debris from the eye."
                answer = "Vitrectomy clears blood and debris from the eye."
            else:
                context = "General Information regarding treatment."
                answer = "Treatment varies depending on the severity."
        elif "What is [test]?" in question:
            if test == "dilated eye exam":
                context = "A dilated eye exam uses eye drops to widen the pupil, allowing the doctor to see the retina."
                answer = "A dilated eye exam allows the doctor to see the retina."
            elif test == "optical coherence tomography (OCT)":
                context = "OCT uses light waves to create detailed images of the retina."
                answer = "OCT creates detailed images of the retina."
            elif test == "fluorescein angiography (FA)":
                context = "FA uses a dye and camera to visualize blood flow in the retina."
                answer = "FA visualizes blood flow in the retina."
            elif test == "visual acuity test":
                context = "Measures the sharpness of your vision."
                answer = "Measures how clearly you see."
            else:
                context = "General information regarding tests."
                answer = "Tests help diagnose and monitor the condition."
        elif "How often should I get a [test] if I have diabetes?" in question:
            context = "Frequency of exams depends on diabetes type and retinopathy stage. Annual exams are common."
            answer = "Frequency depends on individual risk. Consult your ophthalmologist."
        else:
            context = "General information about diabetic retinopathy."
            answer = "Diabetic retinopathy is a complication of diabetes that affects the eyes."

        data.append({"question": question, "context": context, "answer": answer})
    return data

diabetic_retinopathy_data = generate_diabetic_retinopathy_dataset()

# Example output
print(diabetic_retinopathy_data[0])

{'question': 'What is the optical coherence tomography (OCT)?', 'context': 'General information about diabetic retinopathy.', 'answer': 'Diabetic retinopathy is a complication of diabetes that affects the eyes.'}


In [2]:
import json
def save_to_text_file(data, filename="diabetic_retinopathy_dataset.json"):
    """Saves the dataset to a text file, with one JSON object per line."""
    with open(filename, "w") as f:
        for item in data:
            f.write(json.dumps(item) + "\n")

save_to_text_file(diabetic_retinopathy_data)