In [None]:
!pip install gdown

import pandas as pd
import gdown

url = 'https://drive.google.com/uc?id=1DFS1XlIfz2zzob54jg5Nk21Q2aQT4Exs'
output = 'Hypertension-risk-model-main.csv'
gdown.download(url, output, quiet=False)


data = pd.read_csv(output)



Downloading...
From: https://drive.google.com/uc?id=1DFS1XlIfz2zzob54jg5Nk21Q2aQT4Exs
To: /content/Hypertension-risk-model-main.csv
100%|██████████| 170k/170k [00:00<00:00, 6.11MB/s]


In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.experimental import enable_iterative_imputer
from sklearn.impute import IterativeImputer
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
import joblib

dataset_path = 'Hypertension-risk-model-main.csv'
data = pd.read_csv(dataset_path)

print("Dataset Head:")
print(data.head())
print("\nDataset Info:")
print(data.info())
print("\nSummary Statistics:")
print(data.describe())

print("\nMissing Values:")
print(data.isnull().sum())

imputer = IterativeImputer(random_state=42)
data_imputed = pd.DataFrame(imputer.fit_transform(data), columns=data.columns)

data_imputed = pd.get_dummies(data_imputed, drop_first=True)

X = data_imputed.drop('Risk', axis=1)
y = data_imputed['Risk']

scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)


#### Random Forest
rf_model = RandomForestClassifier(random_state=42)
rf_model.fit(X_train, y_train)
rf_y_pred = rf_model.predict(X_test)
print("\nRandom Forest Results:")
print("Accuracy:", accuracy_score(y_test, rf_y_pred))
print("Confusion Matrix:")
print(confusion_matrix(y_test, rf_y_pred))
print("Classification Report:")
print(classification_report(y_test, rf_y_pred))

#### Naive Bayes
nb_model = GaussianNB()
nb_model.fit(X_train, y_train)
nb_y_pred = nb_model.predict(X_test)
print("\nNaive Bayes Results:")
print("Accuracy:", accuracy_score(y_test, nb_y_pred))
print("Confusion Matrix:")
print(confusion_matrix(y_test, nb_y_pred))
print("Classification Report:")
print(classification_report(y_test, nb_y_pred))

#### Support Vector Machine (SVM)
svm_model = SVC(probability=True, random_state=42)
svm_model.fit(X_train, y_train)
svm_y_pred = svm_model.predict(X_test)
print("\nSVM Results:")
print("Accuracy:", accuracy_score(y_test, svm_y_pred))
print("Confusion Matrix:")
print(confusion_matrix(y_test, svm_y_pred))
print("Classification Report:")
print(classification_report(y_test, svm_y_pred))

#### Decision Tree
dt_model = DecisionTreeClassifier(random_state=42)
dt_model.fit(X_train, y_train)
dt_y_pred = dt_model.predict(X_test)
print("\nDecision Tree Results:")
print("Accuracy:", accuracy_score(y_test, dt_y_pred))
print("Confusion Matrix:")
print(confusion_matrix(y_test, dt_y_pred))
print("Classification Report:")
print(classification_report(y_test, dt_y_pred))

rf_model_filename = 'hypertension_rf_model.pkl'
dt_model_filename = 'hypertension_dt_model.pkl'
scaler_filename = 'scaler.pkl'
joblib.dump(rf_model, rf_model_filename)
joblib.dump(dt_model, dt_model_filename)
joblib.dump(scaler, scaler_filename)
print("\nRandom Forest and Decision Tree Models saved as 'hypertension_rf_model.pkl' and 'hypertension_dt_model.pkl'.")
print("Scaler saved as 'scaler.pkl'.")

print("\nProvide new data for prediction:")
input_data = []

feature_names = X.columns
for feature in feature_names:
    value = float(input(f"Enter value for {feature}: "))
    input_data.append(value)

new_data = np.array([input_data])

scaler = joblib.load(scaler_filename)
new_data_scaled = scaler.transform(new_data)

#### Random Forest Prediction
rf_model = joblib.load(rf_model_filename)
rf_prediction = rf_model.predict(new_data_scaled)[0]
rf_probabilities = rf_model.predict_proba(new_data_scaled)[0] * 100
print(f"\nRandom Forest Risk Prediction: {rf_prediction}")
print(f"Prediction Probability: {rf_probabilities[int(rf_prediction)]:.2f}%")

#### Decision Tree Prediction
dt_model = joblib.load(dt_model_filename)
dt_prediction = dt_model.predict(new_data_scaled)[0]
dt_probabilities = dt_model.predict_proba(new_data_scaled)[0] * 100
print(f"\nDecision Tree Risk Prediction: {dt_prediction}")
print(f"Prediction Probability: {dt_probabilities[int(dt_prediction)]:.2f}%")

#### Naive Bayes Prediction
nb_prediction = nb_model.predict(new_data_scaled)[0]
nb_probabilities = nb_model.predict_proba(new_data_scaled)[0] * 100
print(f"\nNaive Bayes Risk Prediction: {nb_prediction}")
print(f"Prediction Probability: {nb_probabilities[int(nb_prediction)]:.2f}%")

#### SVM Prediction
svm_prediction = svm_model.predict(new_data_scaled)[0]
svm_probabilities = svm_model.predict_proba(new_data_scaled)[0] * 100
print(f"\nSVM Risk Prediction: {svm_prediction}")
print(f"Prediction Probability: {svm_probabilities[int(svm_prediction)]:.2f}%")


Dataset Head:
   male  age  currentSmoker  cigsPerDay  BPMeds  diabetes  totChol  sysBP  \
0     1   39              0         0.0     0.0         0    195.0  106.0   
1     0   46              0         0.0     0.0         0    250.0  121.0   
2     1   48              1        20.0     0.0         0    245.0  127.5   
3     0   61              1        30.0     0.0         0    225.0  150.0   
4     0   46              1        23.0     0.0         0    285.0  130.0   

   diaBP    BMI  heartRate  glucose  Risk  
0   70.0  26.97       80.0     77.0     0  
1   81.0  28.73       95.0     76.0     0  
2   80.0  25.34       75.0     70.0     0  
3   95.0  28.58       65.0    103.0     1  
4   84.0  23.10       85.0     85.0     0  

Dataset Info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4240 entries, 0 to 4239
Data columns (total 13 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   male           4240 non-null   int64  
 1   