In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

# Expanded synthetic data with more symptoms
data = {
    'memory_loss': [1, 0, 0, 0, 1, 0, 0, 0],
    'confusion': [1, 0, 0, 0, 1, 0, 0, 0],
    'difficulty_recognizing_people': [1, 0, 0, 0, 1, 0, 0, 0],
    'poor_judgment': [1, 0, 0, 0, 1, 0, 0, 0],
    'tremors': [0, 1, 0, 0, 0, 1, 0, 0],
    'slowness': [0, 1, 0, 0, 0, 1, 0, 0],
    'stooped_posture': [0, 1, 0, 0, 0, 1, 0, 0],
    'small_handwriting': [0, 1, 0, 0, 0, 1, 0, 0],
    'drooling': [0, 1, 0, 0, 0, 1, 0, 0],
    'muscle_weakness': [0, 0, 1, 0, 0, 0, 1, 0],
    'chorea': [0, 0, 0, 1, 0, 0, 0, 1],
    'mood_swings': [0, 0, 0, 1, 0, 0, 0, 1],
    'speech_difficulty': [0, 1, 1, 1, 0, 1, 1, 1],
    'difficulty_swallowing': [0, 0, 1, 1, 0, 0, 1, 1],
    'muscle_cramps': [0, 0, 1, 0, 0, 0, 1, 0],
    'impaired_balance': [0, 1, 0, 0, 0, 1, 0, 0],
    'cognitive_decline': [1, 0, 0, 1, 1, 0, 0, 1],
    'personality_changes': [1, 0, 0, 1, 1, 0, 0, 1],
    'seizures': [0, 0, 0, 1, 0, 0, 0, 1],
    'difficulty_holding_objects': [0, 0, 1, 0, 0, 0, 1, 0],
    'head_drop': [0, 0, 1, 0, 0, 0, 1, 0],
    'emotional_lability': [0, 0, 1, 0, 0, 0, 1, 0],
    'disease': ['Alzheimer’s', 'Parkinson’s', 'ALS', 'Huntington’s',
                'Alzheimer’s', 'Parkinson’s', 'ALS', 'Huntington’s']
}

# Convert to DataFrame
df = pd.DataFrame(data)

# Features (symptoms)
X = df.drop(columns=['disease'])

# Target (disease)
y = df['disease']

# Split the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Build a simple Logistic Regression model
model = LogisticRegression()
model.fit(X_train, y_train)

# Predict on the test set
y_pred = model.predict(X_test)

# Evaluate the model's accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f'Model Accuracy: {accuracy * 100:.2f}%')

# Function to make predictions based on symptoms
def predict_disease(symptoms):
    symptom_dict = {col: 0 for col in X.columns}
    for symptom in symptoms:
        if symptom in symptom_dict:
            symptom_dict[symptom] = 1
    input_data = pd.DataFrame([symptom_dict])
    prediction = model.predict(input_data)
    return prediction[0]

# Example input
user_symptoms = ['memory_loss', 'cognitive_decline', 'poor_judgment', 'confusion']
predicted_disease = predict_disease(user_symptoms)
print(f'Predicted Disease: {predicted_disease}')


Model Accuracy: 0.00%
Predicted Disease: Alzheimer’s


In [None]:
user_symptoms = ['memory_loss', 'confusion', 'poor_judgment', 'cognitive_decline']


In [None]:
import pandas as pd
import random
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

# Define symptoms for each disease (1 means the symptom is present, 0 means absent)
diseases = ['Alzheimer’s', 'Parkinson’s', 'ALS', 'Huntington’s']

# Define the symptoms for each disease profile
symptoms_profile = {
    'Alzheimer’s': ['memory_loss', 'confusion', 'difficulty_recognizing_people', 'poor_judgment', 'cognitive_decline'],
    'Parkinson’s': ['tremors', 'slowness', 'stooped_posture', 'small_handwriting', 'drooling'],
    'ALS': ['muscle_weakness', 'speech_difficulty', 'difficulty_swallowing', 'muscle_cramps', 'difficulty_holding_objects'],
    'Huntington’s': ['chorea', 'mood_swings', 'seizures', 'cognitive_decline', 'personality_changes']
}

# Create a function to generate synthetic patients
def generate_synthetic_data(num_patients=50):
    data = []
    for _ in range(num_patients):
        # Randomly select a disease
        disease = random.choice(diseases)
        # Generate symptom data based on disease profile
        patient_data = {symptom: 1 if symptom in symptoms_profile[disease] else 0 for symptom in set(sum(symptoms_profile.values(), []))}
        # Append the disease label
        patient_data['disease'] = disease
        data.append(patient_data)
    return data

# Generate synthetic data for 50 patients
synthetic_data = generate_synthetic_data(num_patients=50)

# Convert to DataFrame
df = pd.DataFrame(synthetic_data)

# Features (symptoms)
X = df.drop(columns=['disease'])

# Target (disease)
y = df['disease']

# Split the data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Build a simple Logistic Regression model
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)

# Predict on the test set
y_pred = model.predict(X_test)

# Evaluate the model's accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f'Model Accuracy: {accuracy * 100:.2f}%')

# Function to make predictions based on symptoms
def predict_disease(symptoms):
    symptom_dict = {col: 0 for col in X.columns}
    for symptom in symptoms:
        if symptom in symptom_dict:
            symptom_dict[symptom] = 1
    input_data = pd.DataFrame([symptom_dict])
    prediction = model.predict(input_data)
    return prediction[0]

# Example input
user_symptoms = ['memory_loss', 'cognitive_decline', 'poor_judgment', 'confusion']
predicted_disease = predict_disease(user_symptoms)
print(f'Predicted Disease: {predicted_disease}')


Model Accuracy: 100.00%
Predicted Disease: Alzheimer’s


In [None]:
user_symptoms = ['tremors', 'slowness', 'drooling']
predicted_disease = predict_disease(user_symptoms)
print(f'Predicted Disease: {predicted_disease}')


Predicted Disease: Parkinson’s


In [None]:
import pandas as pd
import random

# Define diseases and their symptoms
diseases = ['Alzheimer’s', 'Parkinson’s', 'ALS', 'Huntington’s']

# Define the symptoms for each disease profile
symptoms_profile = {
    'Alzheimer’s': ['memory_loss', 'confusion', 'difficulty_recognizing_people', 'poor_judgment', 'cognitive_decline'],
    'Parkinson’s': ['tremors', 'slowness', 'stooped_posture', 'small_handwriting', 'drooling'],
    'ALS': ['muscle_weakness', 'speech_difficulty', 'difficulty_swallowing', 'muscle_cramps', 'difficulty_holding_objects'],
    'Huntington’s': ['chorea', 'mood_swings', 'seizures', 'cognitive_decline', 'personality_changes']
}

# Expand the dataset to 200 patients
def generate_synthetic_data(num_patients=200):
    data = []
    for _ in range(num_patients):
        # Randomly select a disease
        disease = random.choice(diseases)
        # Generate random symptoms: 70% chance of the symptom being present if it's in the profile
        patient_data = {symptom: 1 if (symptom in symptoms_profile[disease] and random.random() < 0.7) else 0
                        for symptom in set(sum(symptoms_profile.values(), []))}
        # Append the disease label
        patient_data['disease'] = disease
        data.append(patient_data)
    return data

# Generate synthetic data for 200 patients
expanded_synthetic_data = generate_synthetic_data(num_patients=200)

# Convert to DataFrame
df_expanded = pd.DataFrame(expanded_synthetic_data)

# Features (symptoms)
X_expanded = df_expanded.drop(columns=['disease'])

# Target (disease)
y_expanded = df_expanded['disease']

# Split the data into train and test sets
from sklearn.model_selection import train_test_split
X_train_expanded, X_test_expanded, y_train_expanded, y_test_expanded = train_test_split(X_expanded, y_expanded, test_size=0.2, random_state=42)

# Train the model on the expanded dataset
from sklearn.linear_model import LogisticRegression
model_expanded = LogisticRegression(max_iter=1000)
model_expanded.fit(X_train_expanded, y_train_expanded)

# Predict on the test set
y_pred_expanded = model_expanded.predict(X_test_expanded)

# Evaluate the model's accuracy
from sklearn.metrics import accuracy_score
accuracy_expanded = accuracy_score(y_test_expanded, y_pred_expanded)
print(f'Expanded Model Accuracy: {accuracy_expanded * 100:.2f}%')

# Function to make predictions based on symptoms in expanded dataset
def predict_disease_expanded(symptoms):
    symptom_dict = {col: 0 for col in X_expanded.columns}
    for symptom in symptoms:
        if symptom in symptom_dict:
            symptom_dict[symptom] = 1
    input_data = pd.DataFrame([symptom_dict])
    prediction = model_expanded.predict(input_data)
    return prediction[0]

# Example input to test expanded model
user_symptoms_expanded = ['tremors', 'slowness', 'drooling']
predicted_disease_expanded = predict_disease_expanded(user_symptoms_expanded)
print(f'Predicted Disease with Expanded Dataset: {predicted_disease_expanded}')


Expanded Model Accuracy: 100.00%
Predicted Disease with Expanded Dataset: Parkinson’s


In [None]:
from sklearn.model_selection import cross_val_score
from sklearn.linear_model import LogisticRegression

# Initialize the Logistic Regression model
logreg_model = LogisticRegression(max_iter=1000)

# Perform 5-fold cross-validation
cv_scores = cross_val_score(logreg_model, X_expanded, y_expanded, cv=5)

# Print cross-validation scores and the mean score
print(f'Cross-Validation Scores: {cv_scores}')
print(f'Mean Cross-Validation Accuracy: {cv_scores.mean() * 100:.2f}%')


Cross-Validation Scores: [1.    1.    1.    0.975 1.   ]
Mean Cross-Validation Accuracy: 99.50%


In [None]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_val_score

# Initialize the Random Forest model
rf_model = RandomForestClassifier(n_estimators=100, random_state=42)

# Perform 5-fold cross-validation for Random Forest
rf_cv_scores = cross_val_score(rf_model, X_expanded, y_expanded, cv=5)

# Print cross-validation scores and the mean score
print(f'Random Forest Cross-Validation Scores: {rf_cv_scores}')
print(f'Mean Random Forest Accuracy: {rf_cv_scores.mean() * 100:.2f}%')


Random Forest Cross-Validation Scores: [1.    1.    1.    0.975 1.   ]
Mean Random Forest Accuracy: 99.50%


In [None]:
from sklearn.svm import SVC

# Initialize the SVM model
svm_model = SVC()

# Perform 5-fold cross-validation for SVM
svm_cv_scores = cross_val_score(svm_model, X_expanded, y_expanded, cv=5)

# Print cross-validation scores and the mean score
print(f'SVM Cross-Validation Scores: {svm_cv_scores}')
print(f'Mean SVM Accuracy: {svm_cv_scores.mean() * 100:.2f}%')


SVM Cross-Validation Scores: [1.    1.    1.    0.975 1.   ]
Mean SVM Accuracy: 99.50%


In [None]:
import joblib

# Save the trained model to a file
joblib.dump(model, 'disease_prediction_model.pkl')

# To load the model later:
# loaded_model = joblib.load('disease_prediction_model.pkl')


['disease_prediction_model.pkl']

In [None]:
pip install flask joblib




In [None]:
from flask import Flask, request, jsonify
import joblib

# Load the saved model
model = joblib.load('disease_prediction_model.pkl')

app = Flask(__name__)

@app.route('/predict', methods=['POST'])
def predict():
    symptoms = request.json['symptoms']
    prediction = predict_disease(symptoms)
    return jsonify({'predicted_disease': prediction})

def predict_disease(symptoms):
    # Use your model to predict the disease based on symptoms
    symptom_dict = {col: 0 for col in X.columns}
    for symptom in symptoms:
        if symptom in symptom_dict:
            symptom_dict[symptom] = 1
    input_data = pd.DataFrame([symptom_dict])
    prediction = model.predict(input_data)
    return prediction[0]

if __name__ == '__main__':
    app.run(debug=True)


 * Serving Flask app '__main__'
 * Debug mode: on


 * Running on http://127.0.0.1:5000
INFO:werkzeug:[33mPress CTRL+C to quit[0m
INFO:werkzeug: * Restarting with stat


In [None]:
{
  "symptoms": ["memory_loss", "cognitive_decline", "poor_judgment", "confusion"]
}
