In [13]:
#pip install tpot scikit-fuzzy scikit-learn pandas numpy

In [14]:
from tpot import TPOTClassifier
import numpy as np
import pandas as pd
import skfuzzy as fuzz
from sklearn.model_selection import train_test_split

In [15]:
selected_data = pd.read_csv('selected_data.csv')

In [16]:
selected_data

Unnamed: 0,sepsis_icd,resp,heart_rate,ph,bp_systolic,bicarbonate
0,0,17.521739,95.043478,7.371304,100.217391,21.809524
1,0,17.000000,71.695652,7.371304,121.652174,25.217391
2,1,27.761905,100.000000,7.371304,104.380952,17.000000
3,0,29.695652,79.652174,7.360000,105.782609,19.000000
4,1,14.875000,94.166667,7.456667,113.208333,27.000000
...,...,...,...,...,...,...
1495,0,15.875000,62.625000,7.371304,125.416667,29.000000
1496,0,21.416667,87.875000,7.371304,113.625000,32.250000
1497,0,23.095238,81.409091,7.371304,132.523810,34.000000
1498,1,27.869565,112.347826,7.479545,126.454545,27.173913


In [17]:
data = selected_data
X = data.drop(columns=["sepsis_icd"])
y = data["sepsis_icd"]

In [18]:
def create_membership_functions(variable_range, percentiles=(33, 50, 67)):
    low = fuzz.trimf(variable_range, [variable_range.min(), variable_range.min(), np.percentile(variable_range, percentiles[0])])
    medium = fuzz.trimf(variable_range, [variable_range.min(), np.percentile(variable_range, percentiles[1]), variable_range.max()])
    high = fuzz.trimf(variable_range, [np.percentile(variable_range, percentiles[2]), variable_range.max(), variable_range.max()])
    return low, medium, high

def calculate_membership_degrees(row, ranges, memberships, columns):
    degrees = []
    for col, var_range, (low, medium, high) in zip(columns, ranges, memberships):
        degrees.extend([
            fuzz.interp_membership(var_range, low, row[col]),
            fuzz.interp_membership(var_range, medium, row[col]),
            fuzz.interp_membership(var_range, high, row[col]),
        ])
    return degrees

def apply_fuzzy_logic(df, columns):

    ranges = [np.arange(df[col].min(), df[col].max(), 0.1 if col != "ph" else 0.01) for col in columns]
    memberships = [create_membership_functions(var_range) for var_range in ranges]
    
    fuzzy_features = df.apply(
        calculate_membership_degrees, axis=1, ranges=ranges, memberships=memberships, columns=columns
    )
    fuzzy_array = np.array(fuzzy_features.tolist())
    
    fuzzy_column_names = [f"{col}_{level}" for col in columns for level in ["low", "medium", "high"]]
    fuzzy_df = pd.DataFrame(fuzzy_array, columns=fuzzy_column_names)
    
    return fuzzy_df

In [19]:
def apply_mamdani_rules(fuzzy_features):
    resp_low, resp_medium, resp_high, hr_low, hr_medium, hr_high, ph_low, ph_medium, ph_high, \
    bp_systolic_low, bp_systolic_medium, bp_systolic_high, bicarbonate_low, bicarbonate_medium, bicarbonate_high = fuzzy_features

    rule_1 = min(resp_high, hr_high)
    rule_2 = min(resp_medium, hr_medium) 
    rule_3 = min(resp_low, hr_low)
    rule_4 = min(ph_low, bp_systolic_low)
    rule_5 = min(bicarbonate_low, bp_systolic_high)
    rule_6 = min(ph_medium, hr_medium)
    rule_7 = min(ph_high, bicarbonate_high)
    
    aggregated_output = max(rule_1, rule_2, rule_3, rule_4, rule_5, rule_6, rule_7)
    
    if aggregated_output == 0:
        return 0
    else:
        output_range = np.arange(0, 1.1, 0.1)
        crisp_output = fuzz.defuzz(output_range, np.full_like(output_range, aggregated_output), 'centroid')
        return crisp_output

In [20]:
columns_to_transform = ["resp", "heart_rate", "ph", "bp_systolic", "bicarbonate"]
fuzzy_X = apply_fuzzy_logic(X, columns_to_transform)
fuzzy_X['mamdani_output'] = fuzzy_X.apply(lambda row: apply_mamdani_rules(row), axis=1)

combined_X = pd.concat([X, fuzzy_X], axis=1)

X_train, X_test, y_train, y_test = train_test_split(combined_X, y, test_size=0.2, random_state=42)

tpot = TPOTClassifier(generations=5, population_size=50, verbosity=2, random_state=42)
tpot.fit(X_train, y_train)

accuracy = tpot.score(X_test, y_test)
print(f"TPOT Accuracy: {accuracy}")

is_classifier
is_regressor
is_classifier
is_regressor
is_classifier
is_regressor
is_classifier
is_classifier
is_classifier
is_classifier
is_regressor
is_classifier




is_regressor
is_classifier
is_regressor
is_classifier
is_regressor
is_classifier
is_regressor
is_classifier
is_regressor
is_classifier
is_regressor
is_classifier
is_classifier
is_classifier
is_classifier
is_classifier
is_classifier
is_classifier
is_classifier
is_regressor
is_classifier
is_regressor
is_classifier
is_regressor
is_classifier
is_regressor
is_classifier
is_regressor
is_classifier
is_regressor
is_classifier
is_regressor
is_classifier
is_classifier
is_classifier
is_regressor
is_classifier
is_regressor
is_classifier


Optimization Progress:   0%|          | 0/300 [00:00<?, ?pipeline/s]


Generation 1 - Current best internal CV score: 0.7275

Generation 2 - Current best internal CV score: 0.7275

Generation 3 - Current best internal CV score: 0.7275

Generation 4 - Current best internal CV score: 0.7275

Generation 5 - Current best internal CV score: 0.73

Best pipeline: ExtraTreesClassifier(MLPClassifier(input_matrix, alpha=0.01, learning_rate_init=0.1), bootstrap=False, criterion=entropy, max_features=0.25, min_samples_leaf=7, min_samples_split=15, n_estimators=100)
TPOT Accuracy: 0.6766666666666666




In [21]:
tpot.export('best_pipeline.py')