In [18]:
import pandas as pd
import numpy as np

class FoodSecurityCategorizer:
    def __init__(self, df):
        self.df = df.copy()
        self.categories = {}
        
    def preprocess_data(self):
        print("Preprocessing data...")
        
        first_col = self.df.columns[0]
        self.df = self.df.rename(columns={first_col: 'Rank'})
        
        def extract_country_name(rank_str):
            if pd.isna(rank_str):
                return ""
            rank_str = str(rank_str)
            country = rank_str.replace('=', '').strip()
            country = ''.join([c for c in country if not c.isdigit()]).strip()
            country = country.replace('st', '').replace('nd', '').replace('rd', '').replace('th', '').strip()
            return country
        
        self.df['Country'] = self.df['Rank'].apply(extract_country_name)
        
        def extract_rank_number(rank_str):
            if pd.isna(rank_str):
                return ""
            rank_str = str(rank_str)
            import re
            match = re.search(r'(\d+)', rank_str)
            return match.group(1) if match else ""
        
        self.df['Rank_Number'] = self.df['Rank'].apply(extract_rank_number)
        
        score_columns = ['Overall score', 'Affordability', 'Availability', 
                        'Quality and Safety', 'Sustainability and Adaptation']
        
        for col in score_columns:
            if col in self.df.columns:
                self.df[col] = pd.to_numeric(self.df[col], errors='coerce')
        
        self.df = self.df[self.df['Country'] != '']
        
        print(f"Processed {len(self.df)} countries successfully")
        print(f"Sample countries: {self.df['Country'].head().tolist()}")
        return self.df
    
    def define_food_security_rules(self):
        rules = {
            'food_security_level': {
                'Very High Food Security': lambda row: (
                    row['Overall score'] >= 80 and
                    row['Affordability'] >= 85 and
                    row['Availability'] >= 70 and
                    row['Quality and Safety'] >= 80
                ),
                'High Food Security': lambda row: (
                    row['Overall score'] >= 75 and
                    row['Affordability'] >= 75 and
                    row['Availability'] >= 65 and
                    row['Quality and Safety'] >= 70
                ),
                'Moderate Food Security': lambda row: (
                    65 <= row['Overall score'] < 75 and
                    row['Affordability'] >= 60 and
                    row['Availability'] >= 55
                ),
                'Low Food Security': lambda row: (
                    55 <= row['Overall score'] < 65 and
                    (row['Affordability'] < 65 or row['Availability'] < 60)
                ),
                'Very Low Food Security': lambda row: (
                    45 <= row['Overall score'] < 55
                ),
                'Food Crisis': lambda row: row['Overall score'] < 45
            }
        }
        
        return rules
    
    def apply_categorization(self):
        print("Applying categorization rules...")
        self.preprocess_data()
        rules = self.define_food_security_rules()
        
        results = {}
        
        for category_type, category_rules in rules.items():
            category_results = []
            
            for _, row in self.df.iterrows():
                country_category = 'Uncategorized'
                
                if (pd.notna(row['Overall score']) and 
                    pd.notna(row['Affordability']) and 
                    pd.notna(row['Availability']) and 
                    pd.notna(row['Quality and Safety'])):
                    
                    for category_name, rule_func in category_rules.items():
                        try:
                            if rule_func(row):
                                country_category = category_name
                                break
                        except:
                            continue
                
                if country_category == 'Uncategorized':
                    overall_score = row['Overall score']
                    if overall_score >= 75:
                        country_category = 'High Food Security'
                    elif overall_score >= 65:
                        country_category = 'Moderate Food Security'
                    elif overall_score >= 55:
                        country_category = 'Low Food Security'
                    elif overall_score >= 45:
                        country_category = 'Very Low Food Security'
                    else:
                        country_category = 'Food Crisis'
                
                category_results.append({
                    'Country': row['Country'],
                    'Rank': row['Rank_Number'],
                    'Overall_Score': row['Overall score'],
                    'Affordability': row['Affordability'],
                    'Availability': row['Availability'],
                    'Quality_Safety': row['Quality and Safety'],
                    'Sustainability': row['Sustainability and Adaptation'],
                    'Security_Level': country_category
                })
            
            results[category_type] = pd.DataFrame(category_results)
            print(f"{category_type}: Categorized {len(category_results)} countries")
        
        self.categories = results
        return results
    
    def generate_analysis_report(self):
        if not self.categories:
            self.apply_categorization()
        
        security_df = self.categories['food_security_level']
        
        print("\n" + "="*60)
        print("FOOD SECURITY ANALYSIS REPORT")
        print("="*60)
        
        print(f"\nFOOD SECURITY DISTRIBUTION ({len(security_df)} countries):")
        print("-" * 50)
        security_dist = security_df['Security_Level'].value_counts().sort_index()
        
        for category, count in security_dist.items():
            percentage = (count / len(security_df)) * 100
            print(f"  {category}: {count} countries ({percentage:.1f}%)")
        
        print(f"\nAVERAGE SCORES BY SECURITY LEVEL:")
        print("-" * 50)
        
        performance = security_df.groupby('Security_Level').agg({
            'Overall_Score': ['mean', 'min', 'max'],
            'Affordability': 'mean',
            'Availability': 'mean',
            'Quality_Safety': 'mean',
            'Sustainability': 'mean'
        }).round(1)
        
        print(performance)
        
        return security_df
    
    def get_country_analysis(self, country_name):
        if not self.categories:
            self.apply_categorization()
        
        security_df = self.categories['food_security_level']
        country_data = security_df[security_df['Country'].str.lower() == country_name.lower()]
        
        if country_data.empty:
            print(f"Country '{country_name}' not found in dataset")
            print(f"Available countries: {security_df['Country'].head(10).tolist()}")
            return None
        
        country_row = country_data.iloc[0]
        
        print(f"\nANALYSIS FOR: {country_row['Country'].upper()}")
        print("-" * 40)
        print(f"Global Rank: {country_row['Rank']}")
        print(f"Overall Score: {country_row['Overall_Score']}")
        print(f"Food Security Level: {country_row['Security_Level']}")
        
        print(f"\nCOMPONENT SCORES:")
        print(f"  â€¢ Affordability: {country_row['Affordability']}")
        print(f"  â€¢ Availability: {country_row['Availability']}")
        print(f"  â€¢ Quality & Safety: {country_row['Quality_Safety']}")
        print(f"  â€¢ Sustainability: {country_row['Sustainability']}")
        
        recommendations = self._generate_recommendations(country_row)
        print(f"\nRECOMMENDATIONS:")
        for i, rec in enumerate(recommendations, 1):
            print(f"  {i}. {rec}")
        
        return country_row
    
    def _generate_recommendations(self, country_data):
        recommendations = []
        scores = {
            'overall': country_data['Overall_Score'],
            'affordability': country_data['Affordability'],
            'availability': country_data['Availability'],
            'quality': country_data['Quality_Safety'],
            'sustainability': country_data['Sustainability']
        }
        
        if scores['overall'] >= 80:
            recommendations.append("Maintain excellence and focus on sustainable practices")
        elif scores['overall'] >= 70:
            recommendations.append("Continue current policies with focus on minor weaknesses")
        elif scores['overall'] >= 60:
            recommendations.append("Implement targeted improvements in weak areas")
        elif scores['overall'] >= 50:
            recommendations.append("Develop comprehensive food security strategy")
        else:
            recommendations.append("Urgent intervention needed for basic food access")
        
        if scores['affordability'] < 60:
            recommendations.append("Improve food affordability through subsidies and social programs")
        if scores['availability'] < 55:
            recommendations.append("Enhance food distribution networks and reduce waste")
        if scores['quality'] < 60:
            recommendations.append("Strengthen food safety regulations")
        if scores['sustainability'] < 50:
            recommendations.append("Invest in sustainable agriculture practices")
        
        return recommendations
    
    def show_top_bottom_countries(self, n=10):
        if not self.categories:
            self.apply_categorization()
        
        security_df = self.categories['food_security_level']
        
        print(f"\nTOP {n} COUNTRIES:")
        print("-" * 40)
        top_countries = security_df.nlargest(n, 'Overall_Score')[['Country', 'Overall_Score', 'Security_Level']]
        for _, row in top_countries.iterrows():
            print(f"  {row['Country']}: {row['Overall_Score']} ({row['Security_Level']})")
        
        print(f"\nBOTTOM {n} COUNTRIES:")
        print("-" * 40)
        bottom_countries = security_df.nsmallest(n, 'Overall_Score')[['Country', 'Overall_Score', 'Security_Level']]
        for _, row in bottom_countries.iterrows():
            print(f"  {row['Country']}: {row['Overall_Score']} ({row['Security_Level']})")
    
    def save_results(self):
        if not self.categories:
            self.apply_categorization()
        
        try:
            with pd.ExcelWriter('food_security_analysis.xlsx') as writer:
                for category_type, category_df in self.categories.items():
                    category_df.to_excel(writer, sheet_name=category_type[:31], index=False)
            print("Results saved to 'food_security_analysis.xlsx'")
            
            security_df = self.categories['food_security_level']
            security_df.to_csv('food_security_levels.csv', index=False)
            print("Security levels saved to 'food_security_levels.csv'")
            
        except Exception as e:
            print(f"Error saving files: {e}")

def main():
    try:
        print("Starting Food Security Analysis...")
        
        print("Loading dataset...")
        df = pd.read_csv('Rank.csv')
        
        print("Dataset loaded successfully!")
        print(f"   Shape: {df.shape}")
        print(f"   Columns: {df.columns.tolist()}")
        
        print("\nFirst 3 rows of data:")
        print(df.head(3))
        
        categorizer = FoodSecurityCategorizer(df)
        
        print("\nApplying food security categorization...")
        categories = categorizer.apply_categorization()
        
        print("\nGenerating analysis report...")
        security_df = categorizer.generate_analysis_report()
        
        categorizer.show_top_bottom_countries(10)
        
        test_countries = ['Finland', 'China', 'India', 'Nigeria', 'United States']
        
        print("\n" + "="*60)
        print("COUNTRY-SPECIFIC ANALYSIS")
        print("="*60)
        
        for country in test_countries:
            categorizer.get_country_analysis(country)
            print()
        
        print("Saving results...")
        categorizer.save_results()
        
        print("\nANALYSIS COMPLETED SUCCESSFULLY!")
        
    except FileNotFoundError:
        print("Error: 'Rank.csv' file not found. Please make sure the file is in the same directory.")
    except Exception as e:
        print(f"Unexpected error: {e}")
        import traceback
        traceback.print_exc()

if __name__ == "__main__":
    test_basic_functionality()
    print("\n" + "="*60)
    
    main()

ðŸ§ª Testing basic functionality...
Applying categorization rules...
Preprocessing data...
Processed 4 countries successfully
Sample countries: ['Finla', 'Irela', 'Kuwait', 'Syria']
food_security_level: Categorized 4 countries
âœ… Basic functionality test passed!

Starting Food Security Analysis...
Loading dataset...
Dataset loaded successfully!
   Shape: (113, 6)
   Columns: ['Rank\n(113 countries)', 'Overall score', 'Affordability', 'Availability', 'Quality and Safety', 'Sustainability and Adaptation']

First 3 rows of data:
  Rank\n(113 countries)  Overall score  Affordability  Availability  \
0           1st Finland           83.7           91.9          70.5   
1           2nd Ireland           81.7           92.6          70.5   
2            3rd Norway           80.5           87.2          60.4   

   Quality and Safety  Sustainability and Adaptation  
0                88.4                           82.6  
1                86.1                           75.1  
2                