# Task 6: FinTech Vendor Scorecard for Micro-Lending
Analyzing vendor performance and creating lending scores for Ethiopian e-commerce channels

In [None]:
# Setup and imports
import sys
sys.path.append('../src')

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from vendor_analytics.vendor_scorer import VendorAnalyticsEngine
from preprocessing.text_preprocessor import AmharicTextPreprocessor

In [None]:
# Load processed data from Task 1
try:
    processed_df = pd.read_csv("../data/processed/processed_telegram_data.csv")
    print(f"Loaded {len(processed_df)} processed messages")
    print(f"Channels: {processed_df['channel'].unique()}")
except FileNotFoundError:
    print("Processed data not found. Creating sample data...")
    # Create sample data for demonstration
    import numpy as np
    from datetime import datetime, timedelta
    
    sample_data = {
        'channel': ['@ZemenExpress', '@sinayelj', '@Shewabrand'] * 100,
        'text': ['የሕፃናት ጠርሙስ ዋጋ 150 ብር ቦሌ አካባቢ'] * 300,
        'views': np.random.randint(50, 1000, 300),
        'forwards': np.random.randint(0, 50, 300),
        'replies': np.random.randint(0, 20, 300),
        'date': pd.date_range('2024-01-01', periods=300, freq='6H'),
        'cleaned_text': ['የሕፃናት ጠርሙስ ዋጋ 150 ብር ቦሌ አካባቢ'] * 300,
        'token_count': [8] * 300,
        'price_hints': [['150 ብር']] * 300,
        'location_hints': [['ቦሌ']] * 300,
        'has_amharic': [True] * 300
    }
    processed_df = pd.DataFrame(sample_data)
    print(f"Created sample data with {len(processed_df)} messages")

In [None]:
# Initialize vendor analytics engine
analytics = VendorAnalyticsEngine()
print("Vendor analytics engine initialized")
print(f"Scoring weights: {analytics.scoring_weights}")

In [None]:
# Analyze individual vendor activity
sample_vendor = processed_df['channel'].iloc[0]
vendor_metrics = analytics.analyze_vendor_activity(processed_df, sample_vendor)

print(f"Sample Vendor Analysis: {sample_vendor}")
print("-" * 40)
print(f"Total posts: {vendor_metrics['total_posts']}")
print(f"Posts per week: {vendor_metrics['posts_per_week']:.2f}")
print(f"Average views per post: {vendor_metrics['avg_views_per_post']:.0f}")
print(f"Total views: {vendor_metrics['total_views']:,}")
print(f"Posting consistency: {vendor_metrics['posting_consistency']:.2f}")
print(f"Top post views: {vendor_metrics['top_post']['views']}")

In [None]:
# Extract price information for vendor
price_info = analytics.extract_price_info(processed_df, sample_vendor)

print(f"Price Analysis for {sample_vendor}:")
print("-" * 40)
print(f"Average price: {price_info['avg_price']:.0f} ETB")
print(f"Price range: {price_info['price_range'][0]:.0f} - {price_info['price_range'][1]:.0f} ETB")
print(f"Number of prices found: {price_info['price_count']}")
if price_info['price_count'] > 1:
    print(f"Price standard deviation: {price_info['price_std']:.0f} ETB")

In [None]:
# Calculate lending score for sample vendor
lending_score = analytics.calculate_lending_score(vendor_metrics, price_info)

print(f"Lending Score Calculation for {sample_vendor}:")
print("=" * 50)
print(f"Final Lending Score: {lending_score}/100")

# Score interpretation
if lending_score >= 70:
    risk_level = "HIGH PRIORITY - Ready for micro-lending"
    color = "🟢"
elif lending_score >= 40:
    risk_level = "MEDIUM PRIORITY - Requires additional assessment"
    color = "🟡"
else:
    risk_level = "LOW PRIORITY - High risk, not recommended"
    color = "🔴"

print(f"Risk Assessment: {color} {risk_level}")

In [None]:
# Analyze all vendors
vendor_scores = analytics.analyze_all_vendors(processed_df)

print(f"Analyzed {len(vendor_scores)} vendors")
print("\nVendor Scorecard Summary:")
print(vendor_scores[['vendor_channel', 'posts_per_week', 'avg_views_per_post', 
                    'avg_price_etb', 'lending_score']].to_string(index=False))

In [None]:
# Visualize vendor performance
fig, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, figsize=(15, 12))

# 1. Lending Score Distribution
ax1.hist(vendor_scores['lending_score'], bins=10, color='skyblue', alpha=0.7)
ax1.set_title('Lending Score Distribution')
ax1.set_xlabel('Lending Score')
ax1.set_ylabel('Number of Vendors')
ax1.axvline(70, color='green', linestyle='--', label='High Priority Threshold')
ax1.axvline(40, color='orange', linestyle='--', label='Medium Priority Threshold')
ax1.legend()

# 2. Views vs Lending Score
ax2.scatter(vendor_scores['avg_views_per_post'], vendor_scores['lending_score'], 
           c=vendor_scores['lending_score'], cmap='RdYlGn', alpha=0.7)
ax2.set_title('Average Views vs Lending Score')
ax2.set_xlabel('Average Views per Post')
ax2.set_ylabel('Lending Score')

# 3. Posts per Week Comparison
vendor_scores.plot(x='vendor_channel', y='posts_per_week', kind='bar', ax=ax3, color='lightcoral')
ax3.set_title('Posts per Week by Vendor')
ax3.set_xlabel('Vendor Channel')
ax3.set_ylabel('Posts per Week')
ax3.tick_params(axis='x', rotation=45)

# 4. Average Price Comparison
vendor_scores.plot(x='vendor_channel', y='avg_price_etb', kind='bar', ax=ax4, color='lightgreen')
ax4.set_title('Average Price by Vendor')
ax4.set_xlabel('Vendor Channel')
ax4.set_ylabel('Average Price (ETB)')
ax4.tick_params(axis='x', rotation=45)

plt.tight_layout()
plt.show()

In [None]:
# Generate lending recommendations
recommendations = analytics.create_lending_recommendations(vendor_scores)

print("LENDING RECOMMENDATIONS")
print("=" * 50)

print(f"🟢 HIGH PRIORITY VENDORS ({len(recommendations['high_priority'])})")
print("   Ready for immediate micro-lending:")
for vendor in recommendations['high_priority']:
    print(f"   • {vendor['channel']} (Score: {vendor['score']:.1f}, Views: {vendor['avg_views']:.0f})")

print(f"\n🟡 MEDIUM PRIORITY VENDORS ({len(recommendations['medium_priority'])})")
print("   Require additional assessment:")
for vendor in recommendations['medium_priority']:
    print(f"   • {vendor['channel']} (Score: {vendor['score']:.1f}, Views: {vendor['avg_views']:.0f})")

print(f"\n🔴 LOW PRIORITY VENDORS ({len(recommendations['low_priority'])})")
print("   High risk, not recommended:")
for vendor in recommendations['low_priority']:
    print(f"   • {vendor['channel']} (Score: {vendor['score']:.1f}, Views: {vendor['avg_views']:.0f})")

In [None]:
# Top performing vendors detailed analysis
top_vendors = vendor_scores.nlargest(3, 'lending_score')

print("TOP 3 PERFORMING VENDORS")
print("=" * 50)

for i, (_, vendor) in enumerate(top_vendors.iterrows(), 1):
    print(f"\n#{i}. {vendor['vendor_channel']}")
    print(f"   Lending Score: {vendor['lending_score']:.1f}/100")
    print(f"   Posts per Week: {vendor['posts_per_week']:.1f}")
    print(f"   Average Views: {vendor['avg_views_per_post']:.0f}")
    print(f"   Average Price: {vendor['avg_price_etb']:.0f} ETB")
    print(f"   Total Posts: {vendor['total_posts']}")
    print(f"   Top Post Views: {vendor['top_post_views']}")
    print(f"   Weeks Active: {vendor['weeks_active']:.1f}")

In [None]:
# Generate comprehensive vendor report
report_path = analytics.generate_vendor_report(vendor_scores)
print(f"Comprehensive vendor report saved to: {report_path}")

In [None]:
# Business impact summary
total_vendors = len(vendor_scores)
high_priority = len(recommendations['high_priority'])
medium_priority = len(recommendations['medium_priority'])
low_priority = len(recommendations['low_priority'])

print("BUSINESS IMPACT SUMMARY")
print("=" * 50)
print(f"Total vendors analyzed: {total_vendors}")
print(f"High priority (ready for lending): {high_priority} ({high_priority/total_vendors*100:.1f}%)")
print(f"Medium priority (needs assessment): {medium_priority} ({medium_priority/total_vendors*100:.1f}%)")
print(f"Low priority (high risk): {low_priority} ({low_priority/total_vendors*100:.1f}%)")

avg_score = vendor_scores['lending_score'].mean()
print(f"\nAverage lending score: {avg_score:.1f}/100")

if high_priority > 0:
    avg_high_score = vendor_scores[vendor_scores['lending_score'] >= 70]['lending_score'].mean()
    print(f"Average score of high-priority vendors: {avg_high_score:.1f}/100")

print("\nRECOMMENDATIONS FOR MICRO-LENDING:")
print(f"• Focus on {high_priority} high-priority vendors for immediate lending")
print(f"• Conduct detailed assessment of {medium_priority} medium-priority vendors")
print(f"• Monitor {low_priority} low-priority vendors for improvement")
print("• Use real-time scoring for ongoing risk assessment")