In [None]:
import pandas as pd
import numpy as np
import random
import requests
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score, confusion_matrix, classification_report
from sklearn.model_selection import train_test_split

# Load dataset containing health risk thresholds
csv_path = "/content/drive/MyDrive/datasets/narl/health_risk_conditions.csv"
thresholds_df = pd.read_csv(csv_path)

# API setup for OpenWeatherMap
api_key = "621510a0118692b0acc4bf703dfcec38"
base_url = "http://api.openweathermap.org/data/2.5/weather?"

# Load and prepare the data for training the Random Forest model
def train_model():
    # Label encode the target variable
    le = LabelEncoder()
    thresholds_df['condition_encoded'] = le.fit_transform(thresholds_df['condition'])

    # Prepare features (average of min/max values)
    thresholds_df['avg_temp'] = (thresholds_df['min_temp'] + thresholds_df['max_temp']) / 2
    thresholds_df['avg_humidity'] = (thresholds_df['min_humidity'] + thresholds_df['max_humidity']) / 2
    thresholds_df['aqi'] = thresholds_df['max_aqi']

    # Define features and label
    X = thresholds_df[['avg_temp', 'avg_humidity', 'aqi']]
    y = thresholds_df['condition_encoded']

    # Introduce Gaussian noise to make the model generalize
    X += np.random.normal(0, 0.5, X.shape)

    # Randomly flip 5% of labels to simulate real-world inconsistencies
    y = y.copy()  # avoid modifying original
    flip_indices = random.sample(range(len(y)), int(0.05 * len(y)))
    for idx in flip_indices:
        y.iloc[idx] = random.randint(0, len(le.classes_) - 1)

    # Train/test split
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

    # Train the model with restricted depth and trees
    model = RandomForestClassifier(n_estimators=50, max_depth=4, random_state=42)
    model.fit(X_train, y_train)

    # Predictions
    y_train_pred = model.predict(X_train)
    y_test_pred = model.predict(X_test)

    return model, le, X_train, X_test, y_train, y_test, y_train_pred, y_test_pred

# Function to get weather data from OpenWeatherMap API
def get_weather(city):
    complete_url = f"{base_url}q={city}&appid={api_key}&units=metric"
    response = requests.get(complete_url)
    data = response.json()

    if data.get("cod") != 200:
        print(f"Error: {data.get('message', 'City not found or API issue')}")
        return None
    else:
        main_data = data['main']
        wind_data = data.get('wind', {})
        aqi = 80  # Static AQI placeholder. Replace with real AQI API if needed.
        temperature = main_data['temp']
        humidity = main_data['humidity']
        wind_speed = wind_data.get('speed', 0)
        return temperature, humidity, aqi, wind_speed

# Function to collect user data for health condition prediction
def collect_user_data():
    name = input("Enter your name: ")
    age = int(input("Enter your age: "))

    if age > 100:
        print("Age exceeds the analysis limit (100 years). Exiting.")
        return None, None, None

    print("\nSelect your health conditions from the list (comma-separated numbers):")
    for i, cond in enumerate(thresholds_df['condition'].unique(), start=1):
        print(f"{i}. {cond}")

    selected = input("Enter condition numbers (e.g., 1,3): ")
    indices = list(map(int, selected.split(',')))
    conditions = [thresholds_df['condition'].unique()[i - 1] for i in indices]

    return name, age, conditions

# Function to generate a personalized alert message based on environmental conditions
def generate_alert_message(predicted_condition, triggered_by, value):
    if predicted_condition == "Pneumonia":
        return f"🚨 Stay indoor because your condition of Pneumonia is triggered by {triggered_by} levels of {value}."
    elif predicted_condition == "Heart Disease":
        return f"🚨 Be cautious! Your condition of Heart Disease is triggered by {triggered_by} levels of {value}."
    elif predicted_condition == "Asthma":
        return f"🚨 Asthma condition triggered! Stay safe, as {triggered_by} is high with a value of {value}."
    else:
        return f"🚨 Alert! Your condition of {predicted_condition} is at risk due to {triggered_by} conditions (value: {value})."

# Function to predict health risk using the Random Forest model
def predict_health_risk(model, le, temperature, humidity, aqi, conditions):
    features = [[temperature, humidity, aqi]]
    prediction = model.predict(features)[0]
    predicted_condition = le.inverse_transform([prediction])[0]

    print(f"\n🩺 Based on current weather, you might be at risk for: **{predicted_condition}**")

    # Check if the predicted condition matches user conditions
    if predicted_condition in conditions:
        print(f"⚠️ Health risk detected for {predicted_condition} due to current weather!")
        # Generate personalized alert based on the triggering parameter
        if temperature > 30:
            print(generate_alert_message(predicted_condition, "temperature", temperature))
        elif humidity > 80:
            print(generate_alert_message(predicted_condition, "humidity", humidity))
        elif aqi > 100:
            print(generate_alert_message(predicted_condition, "AQI", aqi))
        else:
            print(f"✅ No significant environmental trigger detected for {predicted_condition}.")
    else:
        print(f"✅ No health risk detected for {predicted_condition} under current conditions.")

# Function to calculate and print accuracy metrics
def print_accuracy_metrics(y_train, y_test, y_train_pred, y_test_pred):
    print("\n=== Model Evaluation Metrics ===")
    # Accuracy
    train_accuracy = accuracy_score(y_train, y_train_pred)
    test_accuracy = accuracy_score(y_test, y_test_pred)
    # F1 Score
    f1 = f1_score(y_test, y_test_pred, average='weighted')
    # Precision
    precision = precision_score(y_test, y_test_pred, average='weighted')
    # Recall
    recall = recall_score(y_test, y_test_pred, average='weighted')
    # Confusion Matrix
    confusion = confusion_matrix(y_test, y_test_pred)
    # Classification Report
    class_report = classification_report(y_test, y_test_pred)

    # Print out the metrics after predictions
    print(f"Train Accuracy: {train_accuracy:.4f}")
    print(f"Test Accuracy: {test_accuracy:.4f}")
    print(f"F1 Score (Weighted): {f1:.4f}")
    print(f"Precision (Weighted): {precision:.4f}")
    print(f"Recall (Weighted): {recall:.4f}")
    print("Confusion Matrix:")
    print(confusion)
    print("\nClassification Report:")
    print(class_report)

# Main driver function
def main():
    # Train the model and get evaluation metrics
    model, le, X_train, X_test, y_train, y_test, y_train_pred, y_test_pred = train_model()

    # Get city weather data
    city = input("Enter the city name: ")
    weather_data = get_weather(city)

    if weather_data:
        temperature, humidity, aqi, wind_speed = weather_data
        print(f"\n📍 Weather in {city}:\n🌡 Temperature: {temperature}°C\n💧 Humidity: {humidity}%\n🌬 Wind Speed: {wind_speed} m/s\n🌫 AQI: {aqi}")

        # Collect user data
        name, age, health_conditions = collect_user_data()
        if name is None:
            return

        print(f"\n👤 User: {name}\n🎂 Age: {age}\n🩺 Conditions: {', '.join(health_conditions)}\n")

        # Predict health risk using the trained model
        predict_health_risk(model, le, temperature, humidity, aqi, health_conditions)

        # Print accuracy metrics after the example output
        print_accuracy_metrics(y_train, y_test, y_train_pred, y_test_pred)

# Run the app
main()

Enter the city name: chittoor

📍 Weather in chittoor:
🌡 Temperature: 35.82°C
💧 Humidity: 37%
🌬 Wind Speed: 2.64 m/s
🌫 AQI: 80
Enter your name: kumar
Enter your age: 21

Select your health conditions from the list (comma-separated numbers):
1. Bronchitis
2. Asthma
3. Sinusitis
4. Heart Disease
5. Cold & Flu
6. Arthritis
7. Pneumonia
8. COPD
Enter condition numbers (e.g., 1,3): 5

👤 User: kumar
🎂 Age: 21
🩺 Conditions: Cold & Flu


🩺 Based on current weather, you might be at risk for: **Heart Disease**
✅ No health risk detected for Heart Disease under current conditions.

=== Model Evaluation Metrics ===
Train Accuracy: 0.9513
Test Accuracy: 0.9440
F1 Score (Weighted): 0.9440
Precision (Weighted): 0.9446
Recall (Weighted): 0.9440
Confusion Matrix:
[[48  1  0  0  0  0  1  2]
 [ 0 55  1  0  1  2  0  3]
 [ 0  0 63  0  0  0  1  0]
 [ 1  0  1 61  0  0  1  0]
 [ 2  1  1  0 64  0  1  1]
 [ 1  0  0  0  0 67  0  0]
 [ 0  1  0  0  1  0 61  0]
 [ 0  3  0  0  0  1  0 53]]

Classification Report:
    

