In [None]:
import random
import pandas as pd

pain_types = ["Sharp", "Dull", "Burning", "Throbbing"]
pain_locations = ["Head", "Back", "Stomach", "Chest", "Arm", "Leg", "Neck", "Everywhere"]
radiate = ["Yes", "No"]
durations = ["Today", "Yesterday", "Several days ago", "Last week", "More than a week ago"]

def severity_label(score):
    if score <= 2:
        return "Mild"
    elif score <= 4:
        return "Moderate"
    else:
        return "Severe"

def choose_severity(pain_type, pain_location, radiates, duration, self_score):
    # Weights for 1–5
    weights = [1, 1, 1, 1, 1]  # index 0=score 1, 4=score 5
    weights[self_score-1] += 6  # strong influence from self_score

    if self_score >= 4:
        weights[3] += 2  # index 3=score 4
        weights[4] += 3  # index 4=score 5
    elif self_score <= 2:
        weights[0] += 3
        weights[1] += 2

    if pain_location == "Everywhere":
        weights[3] += 2
        weights[4] += 3

    if radiates == "Yes":
        weights[3] += 2
        weights[4] += 2

    if pain_type in ["Sharp", "Burning"]:
        weights[2] += 1
        weights[3] += 2
        weights[4] += 2
    if pain_type in ["Dull", "Throbbing"]:
        weights[0] += 2
        weights[1] += 2
        weights[2] += 1

    if duration in ["Last week", "More than a week ago"]:
        weights[3] += 2
        weights[4] += 3
        weights[0] += 1
        weights[1] += 1

    total = sum(weights)
    probs = [w/total for w in weights]
    severity_score = random.choices(range(1, 6), probs)[0]  # Scores from 1 to 5
    return severity_score

data = []
for _ in range(2000):
    pain_type = random.choice(pain_types)
    pain_location = random.choice(pain_locations)
    radiates = random.choice(radiate)
    duration = random.choice(durations)
    self_score = random.randint(1, 5)
    severity_score = choose_severity(pain_type, pain_location, radiates, duration, self_score)
    severity_class = severity_label(severity_score)
    data.append({
        "pain_type": pain_type,
        "pain_location": pain_location,
        "radiates": radiates,
        "duration": duration,
        "self_score": self_score,
        "severity_score": severity_score,
        "severity_class": severity_class
    })

df = pd.DataFrame(data)
df.to_csv("../data/synthetic_users_dataset.csv", index=False)




Unnamed: 0,pain_type,pain_location,radiates,duration,self_score,severity_score,severity_class
0,Throbbing,Back,Yes,Last week,2,5,Severe
1,Throbbing,Everywhere,No,More than a week ago,5,5,Severe
2,Burning,Head,Yes,Today,5,4,Moderate
3,Throbbing,Stomach,Yes,Several days ago,1,1,Mild
4,Throbbing,Back,Yes,More than a week ago,4,5,Severe


In [4]:
df.head(15)

Unnamed: 0,pain_type,pain_location,radiates,duration,self_score,severity_score,severity_class
0,Throbbing,Back,Yes,Last week,2,5,Severe
1,Throbbing,Everywhere,No,More than a week ago,5,5,Severe
2,Burning,Head,Yes,Today,5,4,Moderate
3,Throbbing,Stomach,Yes,Several days ago,1,1,Mild
4,Throbbing,Back,Yes,More than a week ago,4,5,Severe
5,Throbbing,Stomach,Yes,Today,3,3,Moderate
6,Sharp,Everywhere,No,Several days ago,4,4,Moderate
7,Throbbing,Back,No,Today,4,5,Severe
8,Dull,Neck,Yes,Several days ago,3,3,Moderate
9,Throbbing,Neck,Yes,More than a week ago,5,1,Mild
