In [None]:
# DhartiKrishi-Sahayata: AI-Powered Tenant Farmer Credit Assessment
# Google Colab Notebook with Gradio UI

# ============================================================================
# STEP 1: Install Required Dependencies
# ============================================================================
"""
Run this cell first to install all required packages
"""

!pip install -q gradio pandas numpy scikit-learn xgboost shap matplotlib seaborn plotly

# ============================================================================
# STEP 2: Import Libraries
# ============================================================================

import gradio as gr
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.preprocessing import StandardScaler
import xgboost as xgb
import shap
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.graph_objects as go
from datetime import datetime, timedelta
import json
import warnings
warnings.filterwarnings('ignore')

# ============================================================================
# STEP 3: Create Synthetic Training Data & Train Model
# ============================================================================

class TenantFarmerCreditModel:
    def __init__(self):
        self.model = None
        self.scaler = StandardScaler()
        self.feature_names = [
            'age', 'years_farming', 'land_area_acres', 'ndvi_score',
            'rainfall_adequacy', 'soil_health_index', 'input_purchase_frequency',
            'mandi_transaction_count', 'crop_diversity_score', 'insurance_coverage'
        ]
        self.train_model()

    def generate_synthetic_data(self, n_samples=5000):
        """Generate synthetic training data for tenant farmers"""
        np.random.seed(42)

        data = {
            'age': np.random.randint(25, 65, n_samples),
            'years_farming': np.random.randint(1, 40, n_samples),
            'land_area_acres': np.random.uniform(0.5, 10, n_samples),
            'ndvi_score': np.random.uniform(0.3, 0.9, n_samples),  # Satellite crop health
            'rainfall_adequacy': np.random.uniform(0.4, 1.0, n_samples),
            'soil_health_index': np.random.uniform(0.3, 0.95, n_samples),
            'input_purchase_frequency': np.random.randint(2, 24, n_samples),  # per year
            'mandi_transaction_count': np.random.randint(0, 15, n_samples),
            'crop_diversity_score': np.random.uniform(0.2, 1.0, n_samples),
            'insurance_coverage': np.random.choice([0, 1], n_samples, p=[0.7, 0.3])
        }

        df = pd.DataFrame(data)

        # Create credit worthiness score (target)
        # Higher weights for key factors
        credit_score = (
            df['ndvi_score'] * 0.25 +
            df['soil_health_index'] * 0.20 +
            df['rainfall_adequacy'] * 0.15 +
            (df['years_farming'] / 40) * 0.15 +
            (df['input_purchase_frequency'] / 24) * 0.10 +
            (df['mandi_transaction_count'] / 15) * 0.10 +
            df['insurance_coverage'] * 0.05
        )

        # Add some noise
        credit_score += np.random.normal(0, 0.05, n_samples)
        credit_score = np.clip(credit_score, 0, 1)

        # Convert to categories: High (1), Medium (0.5), Low (0)
        df['creditworthy'] = pd.cut(credit_score,
                                     bins=[0, 0.4, 0.7, 1.0],
                                     labels=[0, 1, 2]).astype(int)

        return df

    def train_model(self):
        """Train the credit assessment model"""
        print("üåæ Training Tenant Farmer Credit Model...")

        # Generate training data
        df = self.generate_synthetic_data()

        X = df[self.feature_names]
        y = df['creditworthy']

        # Scale features
        X_scaled = self.scaler.fit_transform(X)

        # Train XGBoost model
        self.model = xgb.XGBClassifier(
            n_estimators=100,
            max_depth=5,
            learning_rate=0.1,
            random_state=42
        )
        self.model.fit(X_scaled, y)

        print("‚úÖ Model trained successfully!")
        print(f"   Training samples: {len(df)}")
        print(f"   Feature count: {len(self.feature_names)}")

    def calculate_tcs_score(self, features):
        """Calculate Tenant Credit Score (TCS)"""
        features_array = np.array([features])
        features_scaled = self.scaler.transform(features_array)

        # Get prediction probabilities
        proba = self.model.predict_proba(features_scaled)[0]

        # Calculate weighted TCS (0-1000 scale)
        tcs_score = (proba[0] * 300 + proba[1] * 600 + proba[2] * 1000)

        return int(tcs_score), proba

    def get_credit_category(self, tcs_score):
        """Categorize credit score"""
        if tcs_score >= 750:
            return "Excellent", "üü¢"
        elif tcs_score >= 600:
            return "Good", "üü°"
        elif tcs_score >= 400:
            return "Fair", "üü†"
        else:
            return "Poor", "üî¥"

    def calculate_loan_eligibility(self, tcs_score, land_area):
        """Calculate eligible loan amount"""
        base_amount_per_acre = 15000  # Base: ‚Çπ15,000 per acre

        if tcs_score >= 750:
            multiplier = 1.5
        elif tcs_score >= 600:
            multiplier = 1.2
        elif tcs_score >= 400:
            multiplier = 0.8
        else:
            multiplier = 0.5

        loan_amount = int(base_amount_per_acre * land_area * multiplier)
        return loan_amount

    def generate_explainability_report(self, features):
        """Generate feature-based explanation using feature importance"""
        # Get feature importance from the model
        feature_importance = self.model.feature_importances_

        # Normalize feature values to show their contribution
        features_array = np.array(features)

        # Calculate weighted impact (feature value * importance)
        feature_impact = {}
        for i, feature_name in enumerate(self.feature_names):
            # Normalize the feature value to 0-1 range for comparison
            if feature_name == 'age':
                normalized = (features_array[i] - 25) / 40
            elif feature_name == 'years_farming':
                normalized = features_array[i] / 40
            elif feature_name == 'land_area_acres':
                normalized = min(features_array[i] / 10, 1.0)
            elif feature_name == 'input_purchase_frequency':
                normalized = features_array[i] / 24
            elif feature_name == 'mandi_transaction_count':
                normalized = features_array[i] / 15
            else:
                normalized = features_array[i]

            # Impact is the product of importance and normalized value
            impact = feature_importance[i] * normalized
            feature_impact[feature_name] = impact

        # Sort by importance
        sorted_features = sorted(feature_impact.items(), key=lambda x: x[1], reverse=True)

        return sorted_features[:5]  # Top 5 factors

# ============================================================================
# STEP 4: Initialize Model
# ============================================================================

credit_model = TenantFarmerCreditModel()

# ============================================================================
# STEP 5: Create Visualization Functions
# ============================================================================

def create_gauge_chart(tcs_score):
    """Create a gauge chart for TCS score"""
    fig = go.Figure(go.Indicator(
        mode="gauge+number+delta",
        value=tcs_score,
        domain={'x': [0, 1], 'y': [0, 1]},
        title={'text': "Tenant Credit Score (TCS)", 'font': {'size': 24}},
        delta={'reference': 600},
        gauge={
            'axis': {'range': [None, 1000], 'tickwidth': 1, 'tickcolor': "darkblue"},
            'bar': {'color': "darkblue"},
            'bgcolor': "white",
            'borderwidth': 2,
            'bordercolor': "gray",
            'steps': [
                {'range': [0, 400], 'color': '#ffcccc'},
                {'range': [400, 600], 'color': '#ffffcc'},
                {'range': [600, 750], 'color': '#ccffcc'},
                {'range': [750, 1000], 'color': '#ccffff'}
            ],
            'threshold': {
                'line': {'color': "red", 'width': 4},
                'thickness': 0.75,
                'value': 600
            }
        }
    ))

    fig.update_layout(
        height=300,
        margin=dict(l=20, r=20, t=50, b=20),
        paper_bgcolor="white"
    )

    return fig

def create_feature_importance_chart(top_features):
    """Create feature importance bar chart"""
    features, impacts = zip(*top_features)

    # Friendly names
    friendly_names = {
        'ndvi_score': 'Crop Health (Satellite)',
        'soil_health_index': 'Soil Health',
        'rainfall_adequacy': 'Rainfall Adequacy',
        'years_farming': 'Farming Experience',
        'input_purchase_frequency': 'Input Purchase Pattern',
        'mandi_transaction_count': 'Market Transactions',
        'crop_diversity_score': 'Crop Diversity',
        'insurance_coverage': 'Insurance Status',
        'land_area_acres': 'Land Area',
        'age': 'Age'
    }

    names = [friendly_names.get(f, f) for f in features]

    fig = go.Figure(go.Bar(
        x=impacts,
        y=names,
        orientation='h',
        marker=dict(color='#2E7D32')
    ))

    fig.update_layout(
        title="Top Factors Influencing Credit Score",
        xaxis_title="Impact Score",
        yaxis_title="Factor",
        height=300,
        margin=dict(l=20, r=20, t=50, b=20)
    )

    return fig

# ============================================================================
# STEP 6: Main Credit Assessment Function
# ============================================================================

def assess_credit(
    farmer_name,
    age,
    years_farming,
    land_area_acres,
    ndvi_score,
    rainfall_adequacy,
    soil_health_index,
    input_purchase_frequency,
    mandi_transaction_count,
    crop_diversity_score,
    insurance_coverage
):
    """Main function to assess tenant farmer credit"""

    # Prepare features
    features = [
        age, years_farming, land_area_acres, ndvi_score,
        rainfall_adequacy, soil_health_index, input_purchase_frequency,
        mandi_transaction_count, crop_diversity_score, insurance_coverage
    ]

    # Calculate TCS score
    tcs_score, probabilities = credit_model.calculate_tcs_score(features)

    # Get credit category
    category, emoji = credit_model.get_credit_category(tcs_score)

    # Calculate loan eligibility
    loan_amount = credit_model.calculate_loan_eligibility(tcs_score, land_area_acres)

    # Generate explainability
    top_features = credit_model.generate_explainability_report(features)

    # Create visualizations
    gauge_chart = create_gauge_chart(tcs_score)
    feature_chart = create_feature_importance_chart(top_features)

    # Generate detailed report
    report = f"""
# üåæ DhartiKrishi Credit Assessment Report

## Farmer Details
- **Name:** {farmer_name}
- **Age:** {age} years
- **Farming Experience:** {years_farming} years
- **Land Area:** {land_area_acres} acres

---

## Credit Assessment Results

### Tenant Credit Score (TCS)
**Score: {tcs_score} / 1000** {emoji}

**Category:** {category}

### Loan Eligibility
- **Eligible Loan Amount:** ‚Çπ{loan_amount:,}
- **Interest Rate:** {8.5 if tcs_score >= 750 else 9.5 if tcs_score >= 600 else 11.0 if tcs_score >= 400 else 13.0}% per annum
- **Tenure:** 6-12 months (crop cycle linked)

---

## Key Insights

### Credit Score Distribution
- Probability of Excellent Credit: {probabilities[2]*100:.1f}%
- Probability of Good Credit: {probabilities[1]*100:.1f}%
- Probability of Fair/Poor Credit: {probabilities[0]*100:.1f}%

### Recommendations
"""

    if tcs_score >= 750:
        report += """
‚úÖ **Highly creditworthy** - Approve for full loan amount with premium terms
‚úÖ Consider for higher credit limits and longer tenure options
‚úÖ Fast-track application processing recommended
"""
    elif tcs_score >= 600:
        report += """
‚úÖ **Good credit profile** - Approve with standard terms
‚ö†Ô∏è Monitor crop health regularly during loan cycle
üí° Encourage to maintain input purchase patterns
"""
    elif tcs_score >= 400:
        report += """
‚ö†Ô∏è **Moderate risk** - Approve with conservative loan amount
‚ö†Ô∏è Require crop insurance coverage before disbursement
üí° Provide agricultural advisory support
üìä More frequent monitoring required
"""
    else:
        report += """
üî¥ **High risk profile** - Consider rejection or very small loan with strict monitoring
üìö Recommend financial literacy and farming best practices training
ü§ù Suggest co-lending or group lending model
üí° Revisit after one crop cycle with improved practices
"""

    report += f"""

---

## Dynamic Monitoring Plan
- Real-time satellite monitoring: Active
- Weather alerts: Enabled
- Market price tracking: Enabled
- Next review: {(datetime.now() + timedelta(days=30)).strftime('%d %B %Y')}

---

*Report generated on: {datetime.now().strftime('%d %B %Y, %I:%M %p')}*
"""

    return report, gauge_chart, feature_chart

# ============================================================================
# STEP 7: Create Gradio Interface
# ============================================================================

def create_gradio_interface():
    """Create the Gradio UI"""

    with gr.Blocks(title="DhartiKrishi-Sahayata", theme=gr.themes.Soft()) as demo:
        gr.Markdown("""
        # üåæ DhartiKrishi-Sahayata
        ## AI-Powered Credit Assessment for Tenant Farmers

        *Breaking the credit barrier for India's landless farmers through advanced AI and satellite data*
        """)

        with gr.Row():
            with gr.Column(scale=1):
                gr.Markdown("### üë®‚Äçüåæ Farmer Information")

                farmer_name = gr.Textbox(
                    label="Farmer Name",
                    placeholder="Enter farmer name",
                    value="Ram Kumar"
                )

                age = gr.Slider(
                    label="Age (years)",
                    minimum=18,
                    maximum=70,
                    value=38,
                    step=1
                )

                years_farming = gr.Slider(
                    label="Farming Experience (years)",
                    minimum=1,
                    maximum=50,
                    value=15,
                    step=1
                )

                land_area_acres = gr.Slider(
                    label="Tenant Land Area (acres)",
                    minimum=0.5,
                    maximum=15,
                    value=3.5,
                    step=0.5
                )

                gr.Markdown("### üõ∞Ô∏è Satellite & Environmental Data")

                ndvi_score = gr.Slider(
                    label="NDVI Score (Crop Health from Satellite)",
                    minimum=0.0,
                    maximum=1.0,
                    value=0.72,
                    step=0.01,
                    info="0=Poor, 1=Excellent"
                )

                rainfall_adequacy = gr.Slider(
                    label="Rainfall Adequacy",
                    minimum=0.0,
                    maximum=1.0,
                    value=0.68,
                    step=0.01,
                    info="Based on IMD data"
                )

                soil_health_index = gr.Slider(
                    label="Soil Health Index",
                    minimum=0.0,
                    maximum=1.0,
                    value=0.75,
                    step=0.01,
                    info="From soil health cards"
                )

                gr.Markdown("### üìä Transaction & Activity Data")

                input_purchase_frequency = gr.Slider(
                    label="Input Purchase Frequency (per year)",
                    minimum=0,
                    maximum=24,
                    value=12,
                    step=1,
                    info="Seeds, fertilizers, pesticides"
                )

                mandi_transaction_count = gr.Slider(
                    label="Mandi Transaction Count (per year)",
                    minimum=0,
                    maximum=20,
                    value=6,
                    step=1,
                    info="Recorded market sales"
                )

                crop_diversity_score = gr.Slider(
                    label="Crop Diversity Score",
                    minimum=0.0,
                    maximum=1.0,
                    value=0.65,
                    step=0.01,
                    info="0=Mono-crop, 1=Highly diverse"
                )

                insurance_coverage = gr.Radio(
                    label="Crop Insurance Coverage",
                    choices=[("No", 0), ("Yes", 1)],
                    value=1
                )

                assess_btn = gr.Button("üîç Assess Credit Eligibility", variant="primary", size="lg")

            with gr.Column(scale=2):
                gr.Markdown("### üìä Assessment Results")

                report_output = gr.Markdown()

                with gr.Row():
                    gauge_output = gr.Plot(label="Credit Score Gauge")
                    feature_output = gr.Plot(label="Key Influencing Factors")

        # Connect the button
        assess_btn.click(
            fn=assess_credit,
            inputs=[
                farmer_name, age, years_farming, land_area_acres,
                ndvi_score, rainfall_adequacy, soil_health_index,
                input_purchase_frequency, mandi_transaction_count,
                crop_diversity_score, insurance_coverage
            ],
            outputs=[report_output, gauge_output, feature_output]
        )

        gr.Markdown("""
        ---
        ### üéØ About This System

        **DhartiKrishi-Sahayata** uses multi-modal AI to assess creditworthiness of tenant farmers who lack traditional collateral:

        - üõ∞Ô∏è **Satellite Data:** Real-time crop health monitoring via NDVI
        - üåßÔ∏è **Weather Intelligence:** Rainfall and climate risk assessment
        - üè™ **Transaction Analysis:** Input purchases and market sales patterns
        - üå± **Agricultural Practices:** Crop diversity and soil health tracking
        - ü§ñ **Explainable AI:** Transparent credit decisions with SHAP analysis

        *Powered by XGBoost, SHAP, and Agri Stack APIs*
        """)

    return demo

# ============================================================================
# STEP 8: Launch the Application
# ============================================================================

if __name__ == "__main__":
    demo = create_gradio_interface()
    demo.launch(
        share=True,  # Creates a public link
        debug=True
    )
    print("\n‚úÖ DhartiKrishi-Sahayata is now running!")
    print("üì± Access the public URL above to share with your team")

üåæ Training Tenant Farmer Credit Model...
‚úÖ Model trained successfully!
   Training samples: 5000
   Feature count: 10
Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://b8932fd89d912dc4b5.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)
