<a href="https://colab.research.google.com/github/vspin25/acute_inflammation/blob/main/Preprocess.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Acute Inflammations**
Dataset https://archive.ics.uci.edu/dataset/184/acute+inflammations

Libraries

In [None]:
import pandas as pd
from google.colab import files
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

Dataset

In [None]:
uploaded = files.upload()

Saving Acute_Inflammations.xlsx to Acute_Inflammations (1).xlsx


In [None]:
dataset = pd.read_excel('Acute_Inflammations.xlsx')

# Preprocess

In [None]:
dataset['temperature'] = dataset['temperature'].str.replace(',', '.')

dataset['temperature'] = pd.to_numeric(dataset['temperature'])

print(dataset['temperature'].head())

0    35.5
1    35.9
2    35.9
3    36.0
4    36.0
Name: temperature, dtype: float64


Change temperature to categorical variable

hypothermia: less than 36.2 (not including 36.2)
normal: 36.2-37.4
fever: more than 37.5 (including 37.5)

In [None]:
bins = [0, 36.2, 37.5, 42]
labels = ['hypothermia', 'normal', 'fever']
dataset['temperature'] = pd.cut(dataset['temperature'], bins=bins, labels=labels, right=False)
print(dataset['temperature'].unique())

['hypothermia', 'normal', 'fever']
Categories (3, object): ['hypothermia' < 'normal' < 'fever']


Create new feature "Symptom count"

In [None]:
dataset['symptom_count'] = dataset[['nausea', 'lumbar-pain', 'urine-pushing', 'micturition-pains', 'burning-urethra']].apply(lambda x: (x == 'yes').sum(), axis=1)
print(dataset.head())

   temperature nausea lumbar-pain urine-pushing micturition-pains  \
0  hypothermia     no         yes            no                no   
1  hypothermia     no          no           yes               yes   
2  hypothermia     no         yes            no                no   
3  hypothermia     no          no           yes               yes   
4  hypothermia     no         yes            no                no   

  burning-urethra bladder-inflammation nephritis  symptom_count  
0              no                   no        no              1  
1             yes                  yes        no              3  
2              no                   no        no              1  
3             yes                  yes        no              3  
4              no                   no        no              1  


In [None]:
dataset['symptom_count'].value_counts()

Unnamed: 0_level_0,count
symptom_count,Unnamed: 1_level_1
3,51
1,30
2,10
4,10
0,10
5,9


Nephritis and symptom count

In [None]:
nephritis_data = dataset[dataset['nephritis'] == 'yes']
nephritis_symptom = nephritis_data['symptom_count'].value_counts()

print("Symptom count (nephritis):")
print(nephritis_symptom)

Symptom count (nephritis):
symptom_count
3    31
4    10
5     9
Name: count, dtype: int64


Bladder and symptom count

In [None]:
bladder_data = dataset[dataset['bladder-inflammation'] == 'yes']
bladder_symptom = bladder_data['symptom_count'].value_counts()

print("Symptom count (bladder inflammation):")
print(bladder_symptom)

Symptom count (bladder inflammation):
symptom_count
3    20
2    10
1    10
4    10
5     9
Name: count, dtype: int64


Encode columns (0,1,2)

In [None]:
new_label = LabelEncoder()
data_encoded = dataset.copy()

label_mappings = {}

for col in data_encoded.columns:
    new_label.fit(data_encoded[col])
    data_encoded[col] = new_label.fit_transform(data_encoded[col])
    label_mappings[col] = dict(zip(new_label.classes_, new_label.transform(new_label.classes_)))

# Mapping values
for feature, mapping in label_mappings.items():
    print(f"Feature: {feature}")
    print("Original---> Encoded:")
    for original, encoded in mapping.items():
        print(f"  {original} -> {encoded}")
    print("\n")

Feature: temperature
Original---> Encoded:
  fever -> 0
  hypothermia -> 1
  normal -> 2


Feature: nausea
Original---> Encoded:
  no -> 0
  yes -> 1


Feature: lumbar-pain
Original---> Encoded:
  no -> 0
  yes -> 1


Feature: urine-pushing
Original---> Encoded:
  no -> 0
  yes -> 1


Feature: micturition-pains
Original---> Encoded:
  no -> 0
  yes -> 1


Feature: burning-urethra
Original---> Encoded:
  no -> 0
  yes -> 1


Feature: bladder-inflammation
Original---> Encoded:
  no -> 0
  yes -> 1


Feature: nephritis
Original---> Encoded:
  no -> 0
  yes -> 1


Feature: symptom_count
Original---> Encoded:
  0 -> 0
  1 -> 1
  2 -> 2
  3 -> 3
  4 -> 4
  5 -> 5




In [None]:
print(data_encoded.head())

   temperature  nausea  lumbar-pain  urine-pushing  micturition-pains  \
0            1       0            1              0                  0   
1            1       0            0              1                  1   
2            1       0            1              0                  0   
3            1       0            0              1                  1   
4            1       0            1              0                  0   

   burning-urethra  bladder-inflammation  nephritis  symptom_count  
0                0                     0          0              1  
1                1                     1          0              3  
2                0                     0          0              1  
3                1                     1          0              3  
4                0                     0          0              1  


Define target variables
y1 = bladder inflammation
y2 = nephritis

In [None]:
x = data_encoded.drop(['bladder-inflammation', 'nephritis'], axis=1)
y1 = data_encoded['bladder-inflammation']
y2 = data_encoded['nephritis']