In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

In [6]:
df = pd.read_excel('Medis.xlsx')
df.head()

Unnamed: 0,Age,Gender,Heart rate,Systolic blood pressure,Diastolic blood pressure,Blood sugar,CK-MB,Troponin,Result
0,63,1,66,160,83,160.0,1.8,0.012,negative
1,20,1,94,98,46,296.0,6.75,1.06,positive
2,56,1,64,160,77,270.0,1.99,0.003,negative
3,66,1,70,120,55,270.0,13.87,0.122,positive
4,54,1,64,112,65,300.0,1.08,0.003,negative


In [7]:
df['Result'] = df['Result'].map({'positive': 1, 'negative': 0})

Replace 'positive' and 'negative' with binary values (1 for positive and 0 for negative)

In [8]:
df.head()

Unnamed: 0,Age,Gender,Heart rate,Systolic blood pressure,Diastolic blood pressure,Blood sugar,CK-MB,Troponin,Result
0,63,1,66,160,83,160.0,1.8,0.012,0
1,20,1,94,98,46,296.0,6.75,1.06,1
2,56,1,64,160,77,270.0,1.99,0.003,0
3,66,1,70,120,55,270.0,13.87,0.122,1
4,54,1,64,112,65,300.0,1.08,0.003,0


In [9]:
df = df.dropna()

In [10]:
x = df.drop(columns=['Result'])
y = df['Result']

Separating features and target variable

In [11]:
print(x)

      Age  Gender  Heart rate  Systolic blood pressure  \
0      63       1          66                      160   
1      20       1          94                       98   
2      56       1          64                      160   
3      66       1          70                      120   
4      54       1          64                      112   
...   ...     ...         ...                      ...   
1314   44       1          94                      122   
1315   66       1          84                      125   
1316   45       1          85                      168   
1317   54       1          58                      117   
1318   51       1          94                      157   

      Diastolic blood pressure  Blood sugar  CK-MB  Troponin  
0                           83        160.0   1.80     0.012  
1                           46        296.0   6.75     1.060  
2                           77        270.0   1.99     0.003  
3                           55        270.0  13.87 

In [12]:
print(y)

0       0
1       1
2       0
3       1
4       0
       ..
1314    0
1315    1
1316    1
1317    1
1318    1
Name: Result, Length: 1319, dtype: int64


In [13]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=42)

Splitting the dataset into trainin and testing sets (70% training and 30% testing)

In [14]:
print(x_test)

      Age  Gender  Heart rate  Systolic blood pressure  \
677    76       1          73                      114   
1046   30       0          68                       91   
610    50       1          63                       98   
49     38       0          80                      152   
1284   29       1          81                      150   
...   ...     ...         ...                      ...   
141    50       1          80                       98   
1169   53       1          61                      119   
613    69       1          89                      160   
543    60       1          70                      120   
139    43       0          79                       89   

      Diastolic blood pressure  Blood sugar   CK-MB  Troponin  
677                         68        144.0  297.50     0.024  
1046                        61         93.0    3.93     0.003  
610                         57        111.0    2.55     0.006  
49                          78        133.0    

In [15]:
print(x_train)

      Age  Gender  Heart rate  Systolic blood pressure  \
1156   64       0         117                      112   
1023   77       0          94                       91   
224    75       0         116                      144   
590    55       0          64                      121   
274    39       1          94                      105   
...   ...     ...         ...                      ...   
1095   50       0          85                      119   
1130   57       1          78                      101   
1294   50       0          79                       92   
860    54       1          72                      154   
1126  103       0          56                      171   

      Diastolic blood pressure  Blood sugar   CK-MB  Troponin  
1156                        74        146.0   2.880     0.088  
1023                        52        116.0   1.420     0.026  
224                         82         96.0   4.000     0.012  
590                         58        103.0  13

In [16]:
scaler = StandardScaler()
x_train_scaled = scaler.fit_transform(x_train)
x_test_scaled = scaler.transform(x_test)

Standardize the features values

In [17]:
model = LogisticRegression()

In [18]:
model.fit(x_train_scaled, y_train)

In [19]:
y_pred = model.predict(x_test_scaled)

In [20]:
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
class_report = classification_report(y_test, y_pred)

In [21]:
print(f"Accuracy: {accuracy * 100:.2f}%")
print("Confusion Matrix:")
print(conf_matrix)

Accuracy: 78.03%
Confusion Matrix:
[[110  45]
 [ 42 199]]


In [22]:
print("Classification Report:")
print(class_report)

Classification Report:
              precision    recall  f1-score   support

           0       0.72      0.71      0.72       155
           1       0.82      0.83      0.82       241

    accuracy                           0.78       396
   macro avg       0.77      0.77      0.77       396
weighted avg       0.78      0.78      0.78       396



In [23]:
correlation_matrix = df.corr()
print(correlation_matrix['Result'].sort_values(ascending=False))

Result                      1.000000
Age                         0.238002
Troponin                    0.229376
CK-MB                       0.217720
Gender                      0.094432
Heart rate                  0.006920
Diastolic blood pressure   -0.009659
Systolic blood pressure    -0.020825
Blood sugar                -0.033059
Name: Result, dtype: float64


Finding the correlation that has the most influence on the prediction results