### Imports

In [32]:
import pandas as pd
from pathlib import Path

### Writes patients data to CSV

In [33]:
csv_path = Path("patients.csv")

In [34]:
# Build patient records
data = [
    {"id": 1, "name": "Andi",     "age": 45, "symptoms": "demam, batuk, sesak napas"},
    {"id": 2, "name": "Budi",     "age": 29, "symptoms": "mual, sakit perut"},
    {"id": 3, "name": "Citra",    "age": 62, "symptoms": "pusing, kehilangan keseimbangan"},
    {"id": 4, "name": "Dita",     "age": 30, "symptoms": "susah tidur"},
    {"id": 5, "name": "Eka",      "age": 18, "symptoms": "gusi berdarah"},
    {"id": 6, "name": "Fitra",    "age": 49, "symptoms": "pusing, sakit perut"},
    {"id": 7, "name": "Gio",      "age": 49, "symptoms": "menggigil, batuk, sakit kepala"},
    {"id": 8, "name": "Harianto", "age": 33, "symptoms": "memar di tangan"},
    {"id": 9, "name": "Idul",     "age": 88, "symptoms": "susah tidur"},
    {"id": 10,"name": "Jaka",     "age": 54, "symptoms": "sesak nafas"},
]

In [35]:
# Create DataFrame
df_original = pd.DataFrame(data)

# Write to CSV
df_original.to_csv(csv_path, index=False)

In [36]:
display(df_original)

Unnamed: 0,id,name,age,symptoms
0,1,Andi,45,"demam, batuk, sesak napas"
1,2,Budi,29,"mual, sakit perut"
2,3,Citra,62,"pusing, kehilangan keseimbangan"
3,4,Dita,30,susah tidur
4,5,Eka,18,gusi berdarah
5,6,Fitra,49,"pusing, sakit perut"
6,7,Gio,49,"menggigil, batuk, sakit kepala"
7,8,Harianto,33,memar di tangan
8,9,Idul,88,susah tidur
9,10,Jaka,54,sesak nafas


### Loads the CSV

In [37]:
# Read the created CSV
df = pd.read_csv("patients.csv")

In [38]:
display(df)

Unnamed: 0,id,name,age,symptoms
0,1,Andi,45,"demam, batuk, sesak napas"
1,2,Budi,29,"mual, sakit perut"
2,3,Citra,62,"pusing, kehilangan keseimbangan"
3,4,Dita,30,susah tidur
4,5,Eka,18,gusi berdarah
5,6,Fitra,49,"pusing, sakit perut"
6,7,Gio,49,"menggigil, batuk, sakit kepala"
7,8,Harianto,33,memar di tangan
8,9,Idul,88,susah tidur
9,10,Jaka,54,sesak nafas


### Tokenizes the symptoms into lists

In [39]:
def tokenize_symptoms(symptom_text: str):
    # If field is missing or NaN, return empty list
    if pd.isna(symptom_text):
        return []
    # Split on comma and strip whitespace
    return [part.strip() for part in symptom_text.split(",")]

In [40]:
# Apply to the DataFrame to create a new column
df["symptom_list"] = df["symptoms"].apply(tokenize_symptoms)

display(df)

Unnamed: 0,id,name,age,symptoms,symptom_list
0,1,Andi,45,"demam, batuk, sesak napas","[demam, batuk, sesak napas]"
1,2,Budi,29,"mual, sakit perut","[mual, sakit perut]"
2,3,Citra,62,"pusing, kehilangan keseimbangan","[pusing, kehilangan keseimbangan]"
3,4,Dita,30,susah tidur,[susah tidur]
4,5,Eka,18,gusi berdarah,[gusi berdarah]
5,6,Fitra,49,"pusing, sakit perut","[pusing, sakit perut]"
6,7,Gio,49,"menggigil, batuk, sakit kepala","[menggigil, batuk, sakit kepala]"
7,8,Harianto,33,memar di tangan,[memar di tangan]
8,9,Idul,88,susah tidur,[susah tidur]
9,10,Jaka,54,sesak nafas,[sesak nafas]


### Filters patients older than 40 with more than 2 symptoms

In [41]:
# Create a column for number of symptoms
df["symptom_count"] = df["symptom_list"].apply(len)

display(df)

Unnamed: 0,id,name,age,symptoms,symptom_list,symptom_count
0,1,Andi,45,"demam, batuk, sesak napas","[demam, batuk, sesak napas]",3
1,2,Budi,29,"mual, sakit perut","[mual, sakit perut]",2
2,3,Citra,62,"pusing, kehilangan keseimbangan","[pusing, kehilangan keseimbangan]",2
3,4,Dita,30,susah tidur,[susah tidur],1
4,5,Eka,18,gusi berdarah,[gusi berdarah],1
5,6,Fitra,49,"pusing, sakit perut","[pusing, sakit perut]",2
6,7,Gio,49,"menggigil, batuk, sakit kepala","[menggigil, batuk, sakit kepala]",3
7,8,Harianto,33,memar di tangan,[memar di tangan],1
8,9,Idul,88,susah tidur,[susah tidur],1
9,10,Jaka,54,sesak nafas,[sesak nafas],1


In [44]:
# Filter patients older than 40 with more than 2 symptoms
filtered_df = df[(df["age"] > 40) & (df["symptom_count"] > 2)]

display(filtered_df[["id", "name", "age", "symptoms"]])

Unnamed: 0,id,name,age,symptoms
0,1,Andi,45,"demam, batuk, sesak napas"
6,7,Gio,49,"menggigil, batuk, sakit kepala"


### Writes filtered patients data to CSV

In [45]:
# Write to CSV
filtered_df[["id", "name", "age", "symptoms"]].to_csv("filtered_patients.csv", index=False)