In [None]:
# --- Import Libraries ---
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, classification_report
import warnings

# --- (a) Load Dataset & Data Pre-processing ---
warnings.filterwarnings('ignore')

# Load the dataset
df = pd.read_csv("madfhantr.csv")

print("--- Data Head ---")
print(df.head())

print("\n--- Checking for Missing Values ---")
print(df.isnull().sum())

# Fill missing numeric values with mean
df.fillna(df.mean(numeric_only=True), inplace=True)

# Fill missing categorical values with mode
for col in df.select_dtypes(include=['object']).columns:
    df[col].fillna(df[col].mode()[0], inplace=True)

# --- Label Encoding for Categorical Columns ---
le = LabelEncoder()
for col in df.select_dtypes(include=['object']).columns:
    df[col] = le.fit_transform(df[col])

print("\n--- Encoded Data Head ---")
print(df.head())

# --- (b) Data Preparation (Train-Test Split) ---
# Assuming 'Loan_Status' is the target column
X = df.drop(columns=['Loan_Status'])
y = df['Loan_Status']

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# --- (c) Apply Decision Tree Classification ---
dt_clf = DecisionTreeClassifier(random_state=42)
dt_clf.fit(X_train, y_train)

# --- (d) Evaluate Model ---
y_pred = dt_clf.predict(X_test)
print("\n--- Model Evaluation ---")
print(f"Accuracy: {accuracy_score(y_test, y_pred):.2f}")
print("\nClassification Report:")
print(classification_report(y_test, y_pred))

# --- Predict for new user input (Example) ---
print("\n--- Predict Loan Eligibility for New Customer ---")
try:
    Gender = int(input("Enter Gender (0=Female, 1=Male): "))
    Married = int(input("Enter Married (0=No, 1=Yes): "))
    Education = int(input("Enter Education (0=Graduate, 1=Not Graduate): "))
    ApplicantIncome = float(input("Enter Applicant Income: "))
    LoanAmount = float(input("Enter Loan Amount: "))
    Credit_History = float(input("Enter Credit History (1 or 0): "))

    user_input = [[Gender, Married, Education, ApplicantIncome, LoanAmount, Credit_History]]
    prediction = dt_clf.predict(user_input)[0]
    print(f"\nPredicted Loan Eligibility (1=Eligible, 0=Not Eligible): {prediction}")
except Exception as e:
    print("Input skipped for demo (not running interactively).")


--- Data Head ---
    Loan_ID Gender Married Dependents     Education Self_Employed  \
0  LP001002   Male      No          0      Graduate            No   
1  LP001003   Male     Yes          1      Graduate            No   
2  LP001005   Male     Yes          0      Graduate           Yes   
3  LP001006   Male     Yes          0  Not Graduate            No   
4  LP001008   Male      No          0      Graduate            No   

   ApplicantIncome  CoapplicantIncome  LoanAmount  Loan_Amount_Term  \
0             5849                0.0         NaN             360.0   
1             4583             1508.0       128.0             360.0   
2             3000                0.0        66.0             360.0   
3             2583             2358.0       120.0             360.0   
4             6000                0.0       141.0             360.0   

   Credit_History Property_Area Loan_Status  
0             1.0         Urban           Y  
1             1.0         Rural           N  
2 