In [None]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from imblearn.over_sampling import SMOTE
import pickle

In [None]:
liver_dataset = pd.read_csv('/content/Liver data1.csv', encoding='latin1')

In [None]:
#print("Original Result Distribution:\n", liver_dataset['Result'].value_counts())

In [None]:
liver_dataset = liver_dataset.dropna()
liver_dataset['Result'] = liver_dataset['Result'].map({ 1:1, 2: 0})

In [None]:
# Ensure there are both classes in the target variable
if liver_dataset['Result'].nunique() != 2:
    raise ValueError("Target variable should have two classes.")

In [None]:
X=liver_dataset.drop(columns='Result',axis=1)
Y=liver_dataset['Result']

In [None]:
scaler=StandardScaler()

In [None]:
scaler.fit(X)

In [None]:
standardized_data=scaler.transform(X)

In [None]:
print(standardized_data)

[[ 1.30703167 -1.67695682 -0.42762973 ...  0.30273524  0.22207793
  -0.13438341]
 [ 1.11919436  0.59631828  1.18314183 ...  0.95000996  0.09586539
  -0.62790814]
 [ 1.11919436  0.59631828  0.61463422 ...  0.48767087  0.22207793
  -0.1652287 ]
 ...
 [ 0.6182949   0.59631828  0.53567483 ... -0.06713603 -0.03034715
  -0.13438341]
 [ 0.2426203  -1.67695682 -0.23812719 ... -2.00896021 -1.92353525
  -1.05974227]
 [-0.8844035   0.59631828 -0.04862466 ...  0.30273524  0.97935317
   1.09942842]]


In [None]:
X= standardized_data
Y= liver_dataset['Result']

In [None]:
# Apply SMOTE to handle class imbalance
smote = SMOTE(random_state=42)
X_smote, Y_smote = smote.fit_resample(X, Y)
# Check the distribution after SMOTE
print("SMOTE Result Distribution:\n", pd.Series(Y_smote).value_counts())

SMOTE Result Distribution:
 Result
1    19478
0    19478
Name: count, dtype: int64


In [None]:
print(X)
print(Y)

[[ 1.30703167 -1.67695682 -0.42762973 ...  0.30273524  0.22207793
  -0.13438341]
 [ 1.11919436  0.59631828  1.18314183 ...  0.95000996  0.09586539
  -0.62790814]
 [ 1.11919436  0.59631828  0.61463422 ...  0.48767087  0.22207793
  -0.1652287 ]
 ...
 [ 0.6182949   0.59631828  0.53567483 ... -0.06713603 -0.03034715
  -0.13438341]
 [ 0.2426203  -1.67695682 -0.23812719 ... -2.00896021 -1.92353525
  -1.05974227]
 [-0.8844035   0.59631828 -0.04862466 ...  0.30273524  0.97935317
   1.09942842]]
0        1
1        1
2        1
3        1
4        1
        ..
30686    1
30687    1
30688    1
30689    1
30690    1
Name: Result, Length: 27158, dtype: int64


In [None]:
X_train,X_test, Y_train, Y_test=train_test_split(X_smote,Y_smote,test_size=0.2,stratify=Y_smote,random_state=42)

In [None]:
classifier= LogisticRegression(random_state=42)

In [None]:
classifier.fit(X_train,Y_train)

In [None]:
X_train_prediction= classifier.predict(X_train)

In [None]:
X_train_accuracy= accuracy_score(X_train_prediction,Y_train)

In [None]:
print(X_train_accuracy)

0.7052047233987935


In [None]:
X_test_prediction= classifier.predict(X_test)
X_test_accuracy= accuracy_score(X_test_prediction,Y_test)
print(X_test_accuracy)

0.7049537987679672


In [None]:
 input_data=(65, 0, 0.7, 0.1, 187, 16, 18, 6.8, 3.3, 0.9)
 input_np=np.asarray(input_data)
 input_np_reshaped=input_np.reshape(1,-1)
 std_data=scaler.transform(input_np_reshaped)
 print(std_data)
 input_prediction=classifier.predict( std_data)
 print(input_prediction)
if(input_prediction==1):
  print('Liver Disease Present')
else:
  print('Liver Disease not present')

[[ 1.30703167 -1.67695682 -0.42762973 -0.4979673  -0.43049194 -0.35953043
  -0.3318029   0.30273524  0.22207793 -0.13438341]]
[0]
Liver Disease not present




In [None]:
#import pickle


In [None]:
#filename='Liver_Disease.sav'
pickle.dump(classifier,open(filename,'wb'))

NameError: name 'pickle' is not defined

In [None]:
#model_loaded=pickle.load(open('Liver_Disease.sav','rb'))