In [1]:
# -----------------------------------------------------------------------------
# USDA Food Data Central Search Tool With Customizable Filtering
# -----------------------------------------------------------------------------

"""
This script enables users to query multiple food items across various data types
from the USDA database, including both generic foods (Survey FNDDS, Foundation,
SR Legacy) and branded food products. Users can select from several filtering
options to narrow results, such as filtering by specific keywords, requiring
that results begin with the search terms, or applying a strict term filter that
matches exact phrases or phrases followed by a comma.

The script implements polite API usage with configurable delays between requests
and handles pagination automatically to retrieve all available results. It
provides three main functionalities:

1. Interactive Food Search - Search multiple food items with customizable
   filters, generate singular/plural variants automatically, and export results
   as Python lists.

2. Serving Size Finder - Find available household serving sizes for specific
   foods, useful for meal planning and portion control.

3. Nutrition Analysis - Get detailed nutrition facts with smart default
   servings, export results to CSV for further analysis, and includes data
   validation and warnings.

Usage:
    The script is designed for Jupyter Notebook environments with interactive
    widgets. Configure your API key in the api_key variable, set your food
    lists in Cell 10, and use the interactive interface or call functions
    directly.

    Interactive Search:
        - Configure INTERACTIVE_FOOD_LIST_INPUT in Cell 10
        - Select desired filters using checkboxes
        - Click "Search USDA Database" button

    Serving Size Analysis:
        - Configure EXAMPLE_ANALYSIS_FOOD_LIST in Cell 10
        - Run main_serving_finder() function

    Nutrition Analysis:
        - Configure EXAMPLE_ANALYSIS_FOOD_LIST in Cell 10
        - Run main_nutrition_analysis() function

Requirements:
    - USDA FoodData Central API key (free at https://fdc.nal.usda.gov/api-guide.html)
    - Python packages: requests, time, ipywidgets, IPython, re, csv, fractions, typing
    - Jupyter Notebook environment for interactive widgets

Filter Options:
    - Singular/Plural Processing: Automatically generates word variants
    - Keyword Filter: Prioritizes foods with descriptive keywords (Raw, Frozen, Canned, etc.)
    - Leading Words Filter: Results must start with search terms
    - Strict Comma Filter: Most restrictive matching for exact terms or comma-separated phrases

The script includes comprehensive error handling, API rate limiting, data
validation, and export functionality for research and meal planning purposes.
"""

# -----------------------------------------------------------------------------
# Cell 1: Import Required Libraries and Dependencies
# -----------------------------------------------------------------------------

# ------ Import All Required Modules ------
import requests
import time
import ipywidgets as widgets
from IPython.display import display, clear_output
import re
import csv
from fractions import Fraction
from typing import Dict, List, Optional, Any, Union

api_key = "PodqZM9xrI5ByN5sS8zlEMf2haudDydBMCzt3U4N"

In [2]:
# ----------------------------------------------------------------------------
# Cell 2: Base USDA API Client Class
# ----------------------------------------------------------------------------

class BaseUSDAClient:
    """
    Base class for USDA FoodData Central API interactions.
    Provides common functionality for API requests and food searches.
    """
    
    def __init__(self, api_key: str):
        """
        Initialize the base API client with credentials and session.
        
        Args:
            api_key (str): Valid USDA FoodData Central API key
        """
        self.api_key = api_key
        self.base_url = "https://api.nal.usda.gov/fdc/v1"
        self.session = requests.Session()
    
    def _make_request(self, endpoint: str, params: dict, timeout: int = 20) -> dict:
        """
        Centralized API request handling with error management.
        
        Args:
            endpoint: API endpoint (without base URL)
            params: Request parameters (api_key will be added automatically)
            timeout: Request timeout in seconds
            
        Returns:
            Dictionary containing API response, or empty dict on error
        """
        params = params.copy()  # Don't modify original params
        params['api_key'] = self.api_key
        
        try:
            url = f"{self.base_url}/{endpoint.lstrip('/')}"
            response = self.session.get(url, params=params, timeout=timeout)
            response.raise_for_status()
            return response.json()
        except requests.exceptions.RequestException as e:
            print(f"API request error for {endpoint}: {e} ❌")
            return {}
    
    def _find_exact_match_id(self, query: str, data_type: str = "Survey (FNDDS)") -> Optional[int]:
        """
        Search for an exact match and return its FDC ID.
        
        Args:
            query: Food description to search for
            data_type: Type of food data to search (default: Survey FNDDS)
            
        Returns:
            FDC ID if exact match found, None otherwise
        """
        params = {
            'query': query,
            'dataType': [data_type],
            'pageSize': 5
        }
        results = self._make_request("foods/search", params)
        
        for food in results.get('foods', []):
            if food.get('description', '').lower() == query.lower():
                return food.get('fdcId')
        return None
    
    def _get_food_details(self, fdc_id: int) -> Dict:
        """
        Fetch complete details for a given FDC ID.
        
        Args:
            fdc_id: Food Data Central identification number
            
        Returns:
            Dictionary containing complete food details, or empty dict on error
        """
        params = {'format': 'full'}
        return self._make_request(f"food/{fdc_id}", params)

    def _get_available_measures(self, food_details: Dict) -> List[str]:
        """
        Extracts all available household measures from the food's portion data.

        Args:
            food_details: Complete food details from the API

        Returns:
            List of available measures with gram weights
        """
        portions = food_details.get('foodPortions', [])
        if not portions:
            return []  # Return empty list if no portions
        
        measures = []
        for portion in portions:
            desc = portion.get('portionDescription')
            grams = portion.get('gramWeight')
            if desc and grams and desc != 'N/A' and "Quantity not specified" not in desc:
                # Special formatting for fluid ounces to show both volume and weight
                if 'fl oz' in desc:
                    measures.append(f"{desc} ({grams:.1f}g)")
                else:
                    measures.append(f"{desc} ({grams:.1f}g)")
        
        return sorted(list(set(measures)))  # Use set to remove duplicates

In [3]:
# ----------------------------------------------------------------------------
# Cell 3: Singular/Plural Processing Functions
# ----------------------------------------------------------------------------

# Imports moved to Cell 1 for consolidation

def _parse_food_list_input(food_list_input: Union[str, List[str]]) -> List[str]:
    """
    Parses a food list from either a comma-separated string or a list.
    
    Args:
        food_list_input: A list of food items or a comma-separated string
        
    Returns:
        A standardized list of food item strings
    """
    if isinstance(food_list_input, str):
        # Parse comma-separated string, handling "and" as well
        items = re.split(r',|\\s+and\\s+', food_list_input)
        return [item.strip() for item in items if item.strip()]
    # If it's already a list, return it as is
    return food_list_input

def _handle_special_case(word: str, word_lower: str, special_cases: dict) -> Optional[str]:
    """Checks for and handles special case words, preserving capitalization."""
    if word_lower in special_cases:
        replacement = special_cases[word_lower]
        if word.isupper():
            return replacement.upper()
        elif word[0].isupper():
            return replacement.capitalize()
        else:
            return replacement
    return None

def to_singular(word):
    """
    Convert a word to its singular form using basic English rules.
    
    Args:
        word: The word to convert to singular form
        
    Returns:
        The singular form of the input word
    """
    word = word.strip()
    word_lower = word.lower()
    
    # Handle special cases first
    special_cases = {""}
    
    special_case_result = _handle_special_case(word, word_lower, special_cases)
    if special_case_result is not None:
        return special_case_result
    
    # Handle regular plural rules
    if word_lower.endswith('ies') and len(word) > 3:
        # berries -> berry, cherries -> cherry
        return word[:-3] + 'y'
    elif word_lower.endswith('ves'):
        # leaves -> leaf, knives -> knife
        return word[:-3] + 'f'
    elif word_lower.endswith('ses') and len(word) > 3:
        # glasses -> glass, classes -> class
        return word[:-2]
    elif word_lower.endswith('es') and len(word) > 2:
        # Check if it's likely a plural ending
        if word_lower.endswith(('ches', 'shes', 'xes', 'zes')):
            return word[:-2]
        # For words ending in 'es', try removing just 's' first
        elif not word_lower.endswith(('aes', 'ees', 'ies', 'oes', 'ues')):
            return word[:-1]
    elif word_lower.endswith('s') and len(word) > 1 and not word_lower.endswith('ss'):
        # Simple plural: cats -> cat, dogs -> dog
        return word[:-1]
    
    # If no rules apply, return the original word
    return word

def to_plural(word):
    """
    Convert a word to its plural form using basic English rules.
    
    Args:
        word: The word to convert to plural form
        
    Returns:
        The plural form of the input word
    """
    word = word.strip()
    word_lower = word.lower()
    
    # Handle special cases first
    special_cases = {""}
    
    special_case_result = _handle_special_case(word, word_lower, special_cases)
    if special_case_result is not None:
        return special_case_result
    
    # Handle regular plural rules
    if word_lower.endswith('y') and len(word) > 1 and word_lower[-2] not in 'aeiou':
        # berry -> berries, cherry -> cherries
        return word[:-1] + 'ies'
    elif word_lower.endswith(('f', 'fe')):
        # leaf -> leaves, knife -> knives
        if word_lower.endswith('fe'):
            return word[:-2] + 'ves'
        else:
            return word[:-1] + 'ves'
    elif word_lower.endswith(('ch', 'sh', 'x', 'z', 's')):
        # glass -> glasses, box -> boxes
        return word + 'es'
    elif word_lower.endswith('o') and len(word) > 1 and word_lower[-2] not in 'aeiou':
        # potato -> potatoes (but photo -> photos, handled by special cases if needed)
        return word + 'es'
    else:
        # Simple plural: cat -> cats, dog -> dogs
        return word + 's'

def process_food_entry(entry):
    """
    Process a single food entry to generate both singular and plural forms.
    
    Args:
        entry: A single food item string to process
        
    Returns:
        A list of terms to search for including singular and plural variants
    """
    entry = entry.strip()
    if not entry:
        return []
    
    # Split multi-word entries and process each significant word
    words = entry.split()
    
    # For single words, create both singular and plural
    if len(words) == 1:
        singular = to_singular(entry)
        plural = to_plural(entry)
        
        # Return unique forms
        if singular.lower() == plural.lower():
            return [entry]  # Word doesn't change form
        elif singular.lower() == entry.lower():
            return [singular, plural]  # Entry was singular
        elif plural.lower() == entry.lower():
            return [singular, plural]  # Entry was plural
        else:
            return [entry, singular, plural]  # Entry was neither standard form
    
    # For multi-word entries, focus on the main noun (usually the last word)
    else:
        main_word = words[-1]
        prefix_words = ' '.join(words[:-1])
        
        singular_main = to_singular(main_word)
        plural_main = to_plural(main_word)
        
        terms = []
        
        # Add the original entry
        terms.append(entry)
        
        # Add singular and plural variants if they're different
        if singular_main.lower() != main_word.lower():
            terms.append(f"{prefix_words} {singular_main}".strip())
        if plural_main.lower() != main_word.lower():
            terms.append(f"{prefix_words} {plural_main}".strip())
        
        # Remove duplicates while preserving order
        unique_terms = []
        seen = set()
        for term in terms:
            if term.lower() not in seen:
                unique_terms.append(term)
                seen.add(term.lower())
        
        return unique_terms

def expand_food_list(food_list_input: Union[str, List[str]]) -> List[str]:
    """
    Process the entire food list to create singular and plural variants.
    
    Args:
        food_list_input: List of food items or comma-separated string
    
    Returns:
        List of processed food terms with singular and plural variants
    """
    # Use the helper function to handle both list and string inputs
    items = _parse_food_list_input(food_list_input)
    
    expanded_list = []
    
    print("=== SINGULAR AND PLURAL PROCESSING ===")
    print("Converting input entries to include both singular and plural forms:\\n")
    
    for item in items:
        processed_terms = process_food_entry(item)
        if processed_terms:
            # Display the conversion
            if len(processed_terms) == 1:
                print(f"'{item}' → {processed_terms[0]}")
            else:
                terms_display = '; '.join(processed_terms)
                print(f"'{item}' → {terms_display}")
            
            expanded_list.extend(processed_terms)
    
    print(f"\\nExpanded from {len(items)} entries to {len(expanded_list)} search terms.")
    print("=" * 50 + "\\n")
    
    return expanded_list

In [4]:
# ----------------------------------------------------------------------------
# Cell 4: Food Search and Filtering Class
# ----------------------------------------------------------------------------

# ------ Define Filter Keywords ------
KEYWORDS = {"Raw", "Frozen", "Canned", "Nfs", "Plain", "Unsalted"}
KEYWORDS_LOWER = {k.lower() for k in KEYWORDS}

# ------ Define the USDA Food Search and Filtering Class ------
class USDAFoodSearcher(BaseUSDAClient):
    """
    A class to search the USDA FDC API for food items, applying user-selected
    filters to reorder or restrict results.
    """
    def search_foods(
        self,
        query: str,
        data_type: str,
        enable_keyword_filter: bool,
        enable_leading_words_filter: bool,
        enable_strict_comma_filter: bool,
        page_size: int = 200,
        polite_pause: float = 0.25
    ) -> list[str]:
        """
        Search the USDA FDC API for food items matching the specified query and data type,
        applying user-selected filters to reorder or restrict results. Implement logic to
        prioritize or strictly filter results based on the presence of keywords, the position
        of query words, and the occurrence of exact or comma-separated phrases.

        Args:
            query: Search term for food items
            data_type: Type of food data to search
            enable_keyword_filter: If True, includes only items containing a keyword
            enable_leading_words_filter: If True, requires all query words to appear at the
                start of the food description
            enable_strict_comma_filter: If True, restricts results to those where the leading
                term is followed by a comma or is an exact match
            page_size: Number of results per page
            polite_pause: Delay between API requests in seconds
        
        Returns:
            A list of strings, where each string is the description of a filtered food item
        """
        page_number = 1
        all_pages_foods = []

        print(f"\n=== Search Term: {query!r}  |  Data Type: {data_type} ===")

        # ------ Retrieve All Results From the API ------
        while True:
            params = {
                "query": query,
                "dataType": [data_type],
                "pageSize": page_size,
                "pageNumber": page_number
            }
            # Use the inherited helper method for the API call
            payload = self._make_request("foods/search", params)
            
            # _make_request returns an empty dict on error, which stops the loop
            if not payload:
                break

            foods = payload.get("foods", [])

            if page_number == 1:
                total_hits = payload.get('totalHits', 0)
                print(f"Total matches found in the USDA database: {total_hits} \U0001F50D")

            if not foods:
                break

            all_pages_foods.extend(foods)

            if len(foods) < page_size:
                break
            page_number += 1
            time.sleep(polite_pause)

        # ------ Helper Function for Strict Comma or Exact Match Filtering ------
        def is_strict_match(food_item, query_str):
            """
            Determine if a food item matches the strict filtering criteria.
            
            Args:
                food_item: Dictionary containing food information from API
                query_str: The search query string
                
            Returns:
                Boolean indicating whether the item meets strict match criteria
            """
            desc = food_item.get("description", "")
            lower_desc = desc.lower()
            lower_query = query_str.lower()

            if lower_desc == lower_query:
                return True

            query_words = lower_query.split()
            num_query_words = len(query_words)
            leading_desc_words = lower_desc.split()[:num_query_words]
            cleaned_desc_words_set = {re.sub(r'[^a-z0-9]', '', word) for word in leading_desc_words}

            if set(query_words) != cleaned_desc_words_set:
                return False

            desc_words_full = desc.split()
            if len(desc_words_full) < num_query_words:
                return False
            last_word_of_phrase = desc_words_full[num_query_words - 1]
            return last_word_of_phrase.endswith(',')

        # ------ Apply Keyword and Leading Words Filters ------
        pre_filtered_results = []
        for food in all_pages_foods:
            desc = food.get("description", "—")
            lower_desc = desc.lower()

            if enable_keyword_filter:
                desc_words_set = set(re.sub(r'[^a-z0-9\\s]', '', lower_desc).split())
                has_keyword = not KEYWORDS_LOWER.isdisjoint(desc_words_set)
                is_perfect_match = (lower_desc == query.lower())
                if not (has_keyword or is_perfect_match):
                    continue

            if enable_leading_words_filter:
                query_words_set = set(query.lower().split())
                num_words_to_check = len(query_words_set)
                leading_desc_words = lower_desc.split()[:num_words_to_check]
                cleaned_desc_words_set = {re.sub(r'[^a-z0-9]', '', word) for word in leading_desc_words}
                if query_words_set != cleaned_desc_words_set:
                    continue

            pre_filtered_results.append(food)

        # ------ Apply Strict Comma Filter or Reorder Results ------
        final_results = []
        if enable_strict_comma_filter:
            final_results = [food for food in pre_filtered_results if is_strict_match(food, query)]
        elif enable_leading_words_filter:
            pre_filtered_results.sort(key=lambda food: is_strict_match(food, query), reverse=True)
            final_results = pre_filtered_results
        else:
            final_results = pre_filtered_results

        # ------ Display the Filtered Results and Prepare Return List ------
        filtered_descriptions = []
        if not final_results:
            print("--> No results found that match the selected filter criteria for this search term.")
        else:
            for i, food in enumerate(final_results, 1):
                desc_raw = food.get("description", "—")
                desc_title = desc_raw.title()
                fdc_id = food.get("fdcId")
                
                # Add the raw description to the list that will be returned
                filtered_descriptions.append(desc_raw)

                if data_type == "Branded":
                    brand = food.get("brandName") or food.get("brandOwner", "N/A")
                    print(f"{i:3d}. {desc_title} (Brand: {brand}) – FDC ID: {fdc_id}")
                else:
                    print(f"{i:3d}. {desc_title} – FDC ID: {fdc_id}")

            print(f"--> Displayed {len(final_results)} filtered results for this search term.")
        
        # Return the list of filtered descriptions
        return filtered_descriptions

In [5]:
# ----------------------------------------------------------------------------
# Cell 5: USDA Serving Finder Class
# ----------------------------------------------------------------------------

class USDAServingFinder(BaseUSDAClient):
    """
    A class to find available servings for single foods from the USDA Survey
    (FNDDS) database using an exact match search.
    """

    def display_servings_for_food(self, query: str):
        """
        Find a food by exact match and print its available servings.

        Args:
            query (str): Food description to search for and display servings
        """
        print(f"Searching for Food Item: '{query}'")
        fdc_id = self._find_exact_match_id(query)

        if not fdc_id:
            print("  No exact match found in the database")
            print()
            return

        food_details = self._get_food_details(fdc_id)
        if not food_details:
            print("  Could not retrieve food details from the database")
            print()
            return

        portions = food_details.get('foodPortions', [])

        if not portions:
            print("  No household servings listed in the database")
            print()
            return

        print(f"  Match Found Successfully (FDC ID: {fdc_id}) ✅")
        print("  Available Household Serving Sizes:")

        measures = self._get_available_measures(food_details)

        if not measures:
            print("    No valid household servings found in the database")
        else:
            for measure in measures:
                print(f"    {measure}")

        print()  # Add a blank line for readability

In [6]:
# -----------------------------------------------------------------------------
# Cell 6: Food Category and Unit Preferences Configuration
# -----------------------------------------------------------------------------

# ------ Food Category Classification Dictionary ------

FOOD_CATEGORIES = {
    'Eggs': 'PRIMARY PROTEIN SOURCES',
    'Greek Yogurt': 'PRIMARY PROTEIN SOURCES',
    'Protein Powder': 'PRIMARY PROTEIN SOURCES',
    'Milk': 'PRIMARY PROTEIN SOURCES',
    'Cottage Cheese': 'PRIMARY PROTEIN SOURCES',
    'Mozzarella Cheese': 'PRIMARY PROTEIN SOURCES',
    'Lentils': 'PRIMARY PROTEIN SOURCES',
    'Chickpeas': 'PRIMARY PROTEIN SOURCES',
    'Kidney Beans': 'PRIMARY PROTEIN SOURCES',
    'Hummus': 'PRIMARY PROTEIN SOURCES',
    'Cheese Tortellinis': 'PRIMARY PROTEIN SOURCES',
    'Spinach Tortellinis': 'PRIMARY PROTEIN SOURCES',
    'Olive Oil': 'PRIMARY FAT SOURCES',
    'Peanut Butter': 'PRIMARY FAT SOURCES',
    'Almonds': 'PRIMARY FAT SOURCES',
    'Mixed Nuts': 'PRIMARY FAT SOURCES',
    'Avocados': 'PRIMARY FAT SOURCES',
    'Sunflower Seeds': 'PRIMARY FAT SOURCES',
    'Chia Seeds': 'PRIMARY FAT SOURCES',
    'Tahini': 'PRIMARY FAT SOURCES',
    'Heavy Cream': 'PRIMARY FAT SOURCES',
    'Trail Mix': 'PRIMARY FAT SOURCES',
    'Oats': 'PRIMARY CARBOHYDRATE SOURCES',
    'Potatoes': 'PRIMARY CARBOHYDRATE SOURCES',
    'White Rice': 'PRIMARY CARBOHYDRATE SOURCES',
    'Multigrain Bread': 'PRIMARY CARBOHYDRATE SOURCES',
    'Pasta': 'PRIMARY CARBOHYDRATE SOURCES',
    'Bananas': 'PRIMARY CARBOHYDRATE SOURCES',
    'Couscous': 'PRIMARY CARBOHYDRATE SOURCES',
    'Corn': 'PRIMARY CARBOHYDRATE SOURCES',
    'Green Peas': 'PRIMARY CARBOHYDRATE SOURCES',
    'Pizza': 'PRIMARY CARBOHYDRATE SOURCES',
    'Mixed Vegetables': 'PRIMARY MICRONUTRIENT SOURCES',
    'Spinach': 'PRIMARY MICRONUTRIENT SOURCES',
    'Broccoli': 'PRIMARY MICRONUTRIENT SOURCES',
    'Berries': 'PRIMARY MICRONUTRIENT SOURCES',
    'Carrots': 'PRIMARY MICRONUTRIENT SOURCES',
    'Tomatoes': 'PRIMARY MICRONUTRIENT SOURCES',
    'Mushrooms': 'PRIMARY MICRONUTRIENT SOURCES',
    'Cauliflower': 'PRIMARY MICRONUTRIENT SOURCES',
    'Green Beans': 'PRIMARY MICRONUTRIENT SOURCES',
    'Orange Juice': 'PRIMARY MICRONUTRIENT SOURCES',
    'Apple Juice': 'PRIMARY MICRONUTRIENT SOURCES',
    'Fruit Juice': 'PRIMARY MICRONUTRIENT SOURCES'
}

# Category keywords to a prioritized list of preferred units
# This is now matched against the official 'wweiaFoodCategoryDescription' from the API
CATEGORY_UNITS = {
    # Dairy & Dairy Products
    'yogurt': ['cup', 'container', 'oz'],
    'milk': ['cup', 'oz', 'container'],
    'cheese': ['slice', 'stick', 'cup', 'curd', 'inch'],
    'cream': ['tablespoon', 'container', 'cup'],
    'cottage': ['cup'],
    
    # Protein Foods
    'beans': ['cup'],
    'lentils': ['cup'],
    'chickpeas': ['cup', 'pea'],
    'edamame': ['cup', 'pod'],
    'hummus': ['tablespoon', 'container'],
    'almonds': ['oz', 'cup', 'nut', 'package'],
    'mixed nuts': ['oz', 'cup', 'package'],
    'seeds': ['oz', 'tablespoon', 'cup', 'package'],
    'peanut butter': ['tablespoon', 'serving'],
    'almond butter': ['tablespoon'],
    'tahini': ['tablespoon'],
    'egg': ['egg', 'cup'],
    
    # Grains
    'oats': ['cup'],
    'rice': ['cup'],
    'pasta': ['cup', 'oz'],
    'tortellini': ['cup'],
    'bread': ['slice', 'inch'],
    'bagel': ['bagel', 'large', 'regular', 'small', 'miniature'],
    'quinoa': ['cup'],
    'corn': ['cup', 'ear'],
    'couscous': ['cup', 'oz'],
    
    # Fruits
    'apple': ['medium', 'large', 'small', 'cup', 'slice', 'package'],
    'banana': ['banana', 'cup', 'slice', 'inch'],
    'orange': ['fruit', 'medium', 'large', 'small', 'cup', 'section', 'slice'],
    'berries': ['cup', 'berry'],
    'raisins': ['cup', 'box', 'raisin', 'oz'],
    'date': ['date', 'cup'],
    'avocado': ['fruit', 'medium', 'large', 'small', 'cup', 'slice'],
    
    # Vegetables
    'spinach': ['cup', 'leaf'],
    'broccoli': ['cup', 'floweret', 'piece'],
    'cauliflower': ['cup', 'floweret', 'piece'],
    'carrots': ['cup', 'carrot', 'slice', 'stick'],
    'mushrooms': ['cup', 'whole', 'slice', 'piece'],
    'tomatoes': ['cup', 'whole', 'tomato', 'slice', 'cherry', 'grape', 'plum'],
    'potato': ['potato', 'cup'],
    'sweet potato': ['medium', 'large', 'small', 'cup', 'oz'],
    'brussels sprouts': ['sprout', 'cup'],
    'peas': ['cup'],
    'green beans': ['cup', 'bean', 'piece'],
    
    # Snacks & Mixtures
    'trail mix': ['cup', 'package'],
    
    # Mixed Dishes
     'pizza': ['small', 'personal', 'slice', 'piece'], 
    
    # Fats, Oils & Sweets
    'oil': ['tablespoon', 'cup'],
    
    # Beverages
    'juice': ['cup', 'oz', 'box', 'container', 'pouch'],
    'drink': ['cup', 'bottle', 'can', 'oz'],
    'shake': ['cup', 'bottle', 'can', 'oz'],
}

In [7]:
# -----------------------------------------------------------------------------
# Cell 7: Food Name Cleaning Function
# -----------------------------------------------------------------------------

def clean_food_name(raw_name: str, is_branded: bool = False) -> str:
    """
    Cleans up food description strings for better readability.
    
    Args:
        raw_name: The original food description from the database
        is_branded: Whether the food is a branded product
        
    Returns:
        A cleaned and formatted food name
    """
    if is_branded:
        # For branded items, take the primary name before a comma
        cleaned_name = raw_name.split(',')[0].strip()
        return cleaned_name.title()
    else:
        # For generic foods, apply comprehensive cleaning logic
        
        # Step 1: Convert to lowercase for processing
        cleaned_name = raw_name.lower()
        
        # Step 2: Remove common USDA descriptors and preparation methods
        junk_patterns = [
            r',?\s*ns as to.*',
            r',?\s*nfs.*',
            r',?\s*from canned.*',
            r',?\s*from frozen.*',
            r',?\s*cooked.*',
            r',?\s*raw.*',
            r',?\s*plain.*',
            r',?\s*100%.*',
            r',?\s*regular.*',
            r',?\s*unsweetened.*',
            r',?\s*ready-to-drink.*',
            r',?\s*no added fat.*',
            r',?\s*no sauce.*',
            r',?\s*unsalted.*',
            r',?\s*with oil.*',
            r',?\s*bottled or in a carton.*',
            r',?\s*thin crust.*',
            r',?\s*creamed.*',
            r',?\s*large or small curd.*',
            r',?\s*part skim.*',
            r',?\s*reduced fat.*',
            r',?\s*\(\d+%\).*',
            r',?\s*whole.*',
            r',?\s*high protein.*',
        ]
        
        # Remove all the junk patterns
        for pattern in junk_patterns:
            cleaned_name = re.sub(pattern, '', cleaned_name)
        
        # Step 3: Handle specific food type simplifications
        food_simplifications = {
            # Eggs
            r'^egg,?\s*': 'eggs',
            
            # Protein powders
            r'^nutritional powder mix.*': 'protein powder',
            r'.*protein.*powder.*': 'protein powder',
            
            # Dairy products
            r'^yogurt,?\s*greek,?\s*nonfat milk': 'greek yogurt',
            r'^milk,?\s*.*': 'milk',
            r'^cheese,?\s*cottage': 'cottage cheese',
            r'^cheese,?\s*mozzarella': 'mozzarella cheese',
            r'^cream,?\s*heavy': 'heavy cream',
            
            # Proteins
            r'^mixed nuts,?\s*with peanuts': 'mixed nuts',
            r'^peanut butter': 'peanut butter',
            r'^almond butter': 'almond butter',
            r'^tahini': 'tahini',
            r'^hummus': 'hummus',
            
            # Grains & Starches
            r'^bread,?\s*multigrain': 'multigrain bread',
            r'^pasta': 'pasta',
            r'^rice,?\s*white': 'white rice',
            r'^oats': 'oats',
            r'^couscous': 'couscous',
            
            # Vegetables
            r'^spinach': 'spinach',
            r'^broccoli': 'broccoli',
            r'^mushrooms': 'mushrooms',
            r'^cauliflower': 'cauliflower',
            r'^carrots': 'carrots',
            r'^tomatoes': 'tomatoes',
            r'^green beans': 'green beans',
            r'^green peas': 'green peas',
            r'^corn': 'corn',
            r'^potato': 'potatoes',
            r'^classic mixed vegetables': 'mixed vegetables',
            
            # Fruits
            r'^banana': 'bananas',
            r'^avocado': 'avocados',
            r'^berries': 'berries',
            
            # Beverages
            r'^orange juice': 'orange juice',
            r'^apple juice': 'apple juice',
            r'^fruit juice': 'fruit juice',
            
            # Specialty items
            r'^tortellini,?\s*cheese-filled': 'cheese tortellinis',
            r'^tortellini,?\s*spinach-filled': 'spinach tortellinis',
            r'^trail mix with nuts and fruit': 'trail mix',
            r'^pizza,?\s*cheese with vegetables.*frozen': 'pizza',
            
            # Seeds and nuts
            r'^sunflower seeds': 'sunflower seeds',
            r'^almonds': 'almonds',
            r'^chia seeds': 'chia seeds',
            
            # Oils
            r'^olive oil': 'olive oil',
            
            # Legumes
            r'^lentils': 'lentils',
            r'^chickpeas': 'chickpeas',
            r'^kidney beans': 'kidney beans',
        }
        
        # Apply food-specific simplifications
        for pattern, replacement in food_simplifications.items():
            if re.match(pattern, cleaned_name):
                cleaned_name = replacement
                break
        
        # Step 4: If no specific simplification matched, do general cleanup
        if not any(re.match(pattern, raw_name.lower()) for pattern in food_simplifications.keys()):
            # Split by comma, strip whitespace, remove empty parts
            parts = [part.strip() for part in cleaned_name.split(',') if part.strip()]
            
            # For multi-part names, take the first meaningful part
            if parts:
                cleaned_name = parts[0]
        
        # Step 5: Final cleanup - remove any remaining commas and extra spaces
        cleaned_name = re.sub(r',.*', '', cleaned_name)  # Remove everything after first comma
        cleaned_name = re.sub(r'\s+', ' ', cleaned_name)  # Normalize whitespace
        cleaned_name = cleaned_name.strip()
        
        # Step 6: Handle edge case where name becomes empty
        if not cleaned_name:
            # Fall back to the first word of the original name
            first_word = raw_name.split()[0] if raw_name.split() else "Unknown Food"
            cleaned_name = first_word.lower()
        
        # Step 7: Convert to title case
        return cleaned_name.title()

In [8]:
# -----------------------------------------------------------------------------
# Cell 8: USDA Nutrition API Class
# -----------------------------------------------------------------------------

class USDANutritionAPI(BaseUSDAClient):
    """
    A class to get nutrition for single foods from the USDA Survey (FNDDS) database,
    using a smart default serving size for calculations based on the food's official category.
    """

    # ------ Serving Size and Measurement Methods ------

    def _filter_guideline_amounts(self, available_measures: List[str]) -> List[str]:
        """
        Filters out guideline amounts which are portion control suggestions, not typical servings.

        Args:
            available_measures: List of all available measures

        Returns:
            List of measures with guideline amounts removed
        """
        return [measure for measure in available_measures 
                if 'guideline amount' not in measure.lower()]

    def _get_default_measure(self, food_category: Optional[str], available_measures: List[str], food_description: str = "") -> str:
        """
        Determines the most logical default measure based on the food's WWEIA category and description.

        Args:
            food_category: The WWEIA food category description
            available_measures: List of available household measures
            food_description: The food description for additional context

        Returns:
            The best default measure unit as a string
        """
        # Filter out guideline amounts first
        filtered_measures = self._filter_guideline_amounts(available_measures)
        if not filtered_measures:
            filtered_measures = available_measures  # Fallback if all are guideline amounts

        food_desc_lower = food_description.lower()

        # Specific food-based fixes based on the serving size data
        
        # Fix for potatoes - prioritize "any size" over baby/new potato
        if 'potato' in food_desc_lower and 'nfs' in food_desc_lower:
            for measure in filtered_measures:
                if 'any size' in measure.lower():
                    return 'potato'
        
        # Fix for protein powder - prioritize scoop over packet
        if 'nutritional powder mix' in food_desc_lower or 'high protein' in food_desc_lower:
            # Look for scoop first, prefer "NFS" version for consistency
            for measure in filtered_measures:
                if 'scoop' in measure.lower() and 'nfs' in measure.lower():
                    return 'scoop'
            for measure in filtered_measures:
                if 'scoop' in measure.lower():
                    return 'scoop'

        # Fix for mozzarella cheese - prioritize shredded cup
        if 'mozzarella' in food_desc_lower:
            for measure in filtered_measures:
                if 'cup' in measure.lower() and 'shredded' in measure.lower():
                    return 'cup'
            # Fallback to any cup measure
            for measure in filtered_measures:
                if 'cup' in measure.lower() and 'nfs' in measure.lower():
                    return 'cup'

        # Standard specific food overrides
        if 'cream cheese' in food_desc_lower:
            for measure in filtered_measures:
                if 'tablespoon' in measure.lower():
                    return 'tablespoon'

        if 'hummus' in food_desc_lower:
            for measure in filtered_measures:
                if 'tablespoon' in measure.lower():
                    return 'tablespoon'

        if 'peanut butter' in food_desc_lower or 'almond butter' in food_desc_lower:
            for measure in filtered_measures:
                if 'tablespoon' in measure.lower():
                    return 'tablespoon'

        if 'tahini' in food_desc_lower:
            for measure in filtered_measures:
                if 'tablespoon' in measure.lower():
                    return 'tablespoon'

        if 'avocado' in food_desc_lower:
            for measure in filtered_measures:
                if 'fruit' in measure.lower():
                    return 'fruit'

        if 'banana' in food_desc_lower:
            for measure in filtered_measures:
                if 'banana' in measure.lower():
                    return 'banana'

        # Fix for tortellini - prioritize cup over piece
        if 'tortellini' in food_desc_lower:
            for measure in filtered_measures:
                if 'cup' in measure.lower():
                    return 'cup'

        # Category-based selection using filtered measures
        if food_category:
            category_lower = food_category.lower()

            # Special handling for juice - look for direct cup measures first
            if 'juice' in category_lower or 'drink' in category_lower:
                # Check if a cup measure exists directly
                for measure in filtered_measures:
                    if 'cup' in measure.lower():
                        return 'cup'
                # If no cup, we'll handle conversion later
                for measure in filtered_measures:
                    if 'fl oz' in measure.lower() and 'nfs' in measure.lower():
                        return 'cup'  # Signal that we want cup conversion

            # Apply category-based matching
            for category_keyword, units in CATEGORY_UNITS.items():
                if category_keyword in category_lower:
                    # Go through units in priority order
                    for unit in units:
                        for measure in filtered_measures:
                            measure_lower = measure.lower()
                            # More precise matching to avoid false positives
                            if unit == 'fruit' and 'fruit' in measure_lower:
                                return 'fruit'
                            elif unit == 'medium' and 'medium' in measure_lower:
                                return 'medium'
                            elif unit == 'cup' and 'cup' in measure_lower:
                                # For cheese, prefer shredded if available
                                if 'cheese' in category_lower and 'shredded' in measure_lower:
                                    return 'cup'
                                elif 'cheese' not in category_lower:
                                    return 'cup'
                            elif unit == 'tablespoon' and ('tablespoon' in measure_lower or 'tbsp' in measure_lower):
                                return 'tablespoon'
                            elif unit in measure_lower:
                                return unit

        # Fallback for items without a matching category
        preferred_units = [
            'medium', 'large', 'small', 'cup', 'container', 'piece', 'slice',
            'tablespoon', 'tbsp', 'oz', 'ounce', 'tsp', 'teaspoon'
        ]

        for unit in preferred_units:
            for measure in filtered_measures:
                if unit in measure.lower():
                    return unit

        if filtered_measures:
            return filtered_measures[0].split('(')[0].strip()

        return "100g"

    def _find_portion_grams(self, food_details: Dict, unit_to_find: str) -> Optional[float]:
        """
        Finds the gram weight for a given unit from the food's portions.

        Args:
            food_details: Complete food details from the API
            unit_to_find: The unit to find the gram weight for

        Returns:
            The gram weight for the specified unit, or None if not found
        """
        if unit_to_find == "100g":
            return 100.0
            
        unit_to_find = unit_to_find.lower()
        
        # Handle juice cup conversion using actual USDA ratios
        if unit_to_find == 'cup':
            # First check if a direct cup measure exists
            for portion in food_details.get('foodPortions', []):
                desc = portion.get('portionDescription', '').lower()
                if 'cup' in desc and 'guideline' not in desc:
                    return portion.get('gramWeight')
            
            # If no direct cup, convert from fl oz using USDA data
            # Look for "1 fl oz (NFS)" measure for most accurate conversion
            base_grams_per_fl_oz = None
            for portion in food_details.get('foodPortions', []):
                desc = portion.get('portionDescription', '').lower()
                if '1 fl oz' in desc and 'nfs' in desc:
                    base_grams_per_fl_oz = portion.get('gramWeight')
                    break
            
            if base_grams_per_fl_oz:
                # 1 cup = 8 fl oz, using actual USDA gram conversion
                return base_grams_per_fl_oz * 8.0
        
        # Specific fixes based on serving size data
        
        # Fix for potatoes - look for "any size" specifically
        if unit_to_find == 'potato':
            for portion in food_details.get('foodPortions', []):
                desc = portion.get('portionDescription', '').lower()
                if 'any size' in desc:
                    return portion.get('gramWeight')
        
        # Fix for protein powder - look for scoop, prefer NFS
        if unit_to_find == 'scoop':
            # First try to find "scoop, NFS"
            for portion in food_details.get('foodPortions', []):
                desc = portion.get('portionDescription', '').lower()
                if 'scoop' in desc and 'nfs' in desc:
                    return portion.get('gramWeight')
            # Fallback to any scoop
            for portion in food_details.get('foodPortions', []):
                desc = portion.get('portionDescription', '').lower()
                if 'scoop' in desc:
                    return portion.get('gramWeight')
        
        # Standard matching with exact word boundaries
        exact_match_pattern = re.compile(r'\\b' + re.escape(unit_to_find) + r'\\b')
        
        best_match = None
        
        for portion in food_details.get('foodPortions', []):
            desc = portion.get('portionDescription', '').lower()
            # Skip guideline amounts
            if 'guideline amount' in desc:
                continue
            # Prefer exact word matches first
            if exact_match_pattern.search(desc):
                return portion.get('gramWeight')
            # Fallback to substring match if no exact match is found yet
            if unit_to_find in desc and not best_match:
                best_match = portion.get('gramWeight')
        
        return best_match

    def _validate_serving_size(self, food_name: str, selected_measure: str, grams: float, calories: float):
        """
        Validates serving sizes and provides warnings for unrealistic values.

        Args:
            food_name: Name of the food
            selected_measure: The selected serving measure
            grams: Gram weight of the serving
            calories: Calories in the serving
        """
        warnings = []
        
        # Check for very large servings (except pizza which is intentionally large)
        if grams > 500 and 'pizza' not in food_name.lower():
            warnings.append(f"Very large serving size ({grams:.1f}g)")
        
        # Check for very high calorie servings
        if calories > 800 and 'pizza' not in food_name.lower():
            warnings.append(f"High calorie serving ({calories:.0f} kcal)")
        
        # Check if guideline amount was used (shouldn't happen with fixes)
        if 'guideline amount' in selected_measure.lower():
            warnings.append("Using guideline amount, not typical serving")
        
        # Check for suspiciously light servings for certain foods
        if 'milk' in food_name.lower() and 'cup' in selected_measure and grams < 200:
            warnings.append(f"Milk serving seems too light ({grams:.1f}g for cup)")
        
        if warnings:
            print(f"{food_name}: {'; '.join(warnings)} ⚠️")

    # ------ Main Processing Method ------

    def display_nutrition_for_food(self, query: str) -> Optional[Dict[str, Any]]:
        """
        Processes a food item, prints its nutrition, and returns the data as a dictionary.

        Args:
            query: The food name to search for and analyze

        Returns:
            Dictionary containing nutrition data, or None if food not found
        """
        fdc_id = self._find_exact_match_id(query)
        if not fdc_id:
            return None

        food_details = self._get_food_details(fdc_id)
        if not food_details:
            return None
        
        wweia_category_obj = food_details.get('wweiaFoodCategory', {})
        food_category = wweia_category_obj.get('wweiaFoodCategoryDescription')
        
        available_measures = self._get_available_measures(food_details)
        # Pass food description to help with unit selection
        default_unit = self._get_default_measure(food_category, available_measures, query)
        grams_per_portion = self._find_portion_grams(food_details, default_unit)

        # Data extraction
        nutrients_100g = {'Calories': 0.0, 'Protein': 0.0, 'Fat': 0.0, 'Carbohydrates': 0.0}
        nutrient_map = {1008: 'Calories', 1003: 'Protein', 1004: 'Fat', 1005: 'Carbohydrates'}
        
        for n in food_details.get('foodNutrients', []):
            nutrient_id = n.get('nutrient', {}).get('id')
            if nutrient_id in nutrient_map:
                key = nutrient_map[nutrient_id]
                nutrients_100g[key] = n.get('amount', 0.0)

        description = food_details.get('description', 'N/A')
        cleaned_description = clean_food_name(description, is_branded=False)
        
        # Prepare results for printing and returning
        result_data = {
            "name": cleaned_description,
            "category": FOOD_CATEGORIES.get(cleaned_description, "UNKNOWN CATEGORY"),
            "fdcId": fdc_id,
            "serving_unit": " ".join([
                word if word.lower() in ['oz', 'fl'] else word.capitalize()
                for word in f"1 {default_unit}".split()
            ]),
            "serving_grams": None,
            "calories": None,
            "protein": None,
            "fat": None,
            "carbs": None
        }

        # Console output and final calculations
        print(f"Basic Info: {cleaned_description}")
        print(f"FDC ID: {fdc_id}")

        if grams_per_portion:
            scale = grams_per_portion / 100.0
            result_data['serving_grams'] = f"{grams_per_portion:.1f}"
            result_data['calories'] = f"{nutrients_100g['Calories'] * scale:.0f}"
            result_data['protein'] = f"{nutrients_100g['Protein'] * scale:.1f}"
            result_data['fat'] = f"{nutrients_100g['Fat'] * scale:.1f}"
            result_data['carbs'] = f"{nutrients_100g['Carbohydrates'] * scale:.1f}"

            # Validate serving size
            self._validate_serving_size(
                cleaned_description, 
                result_data['serving_unit'], 
                grams_per_portion, 
                float(result_data['calories'])
            )

            print(f"Serving Size: {result_data['serving_unit']} ({result_data['serving_grams']}g)")
            print("Key Nutrition Facts:")
            print(f"  - Calories: {result_data['calories']} kcal")
            print(f"  - Protein: {result_data['protein']} g")
            print(f"  - Fat: {result_data['fat']} g")
            print(f"  - Carbohydrates: {result_data['carbs']} g")
        else:
            print("No serving size data available ❌")
            result_data['serving_grams'] = "N/A"
            result_data['calories'] = "N/A"
            result_data['protein'] = "N/A"
            result_data['fat'] = "N/A"
            result_data['carbs'] = "N/A"

        if available_measures:
            print(f"Available Measures: {', '.join(available_measures)}")
        
        print()  # Blank line for readability
        return result_data

In [9]:
# -----------------------------------------------------------------------------
# Cell 9: CSV Export Functionality
# -----------------------------------------------------------------------------

def export_to_csv(nutrition_data: List[Dict[str, Any]], filename: str = "nutrition_results.csv"):
    """
    Exports nutrition data to a CSV file.

    Args:
        nutrition_data: List of nutrition dictionaries
        filename: Output CSV filename
    """
    if not nutrition_data:
        print("No data to export ❌")
        return

    # Filter out None entries
    valid_data = [item for item in nutrition_data if item is not None]
    
    if not valid_data:
        print("No valid data to export ❌")
        return

    fieldnames = ['name', 'category', 'fdcId', 'serving_unit', 'serving_grams', 'calories', 'protein', 'fat', 'carbs']
    
    try:
        with open(filename, 'w', newline='', encoding='utf-8') as csvfile:
            writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
            writer.writeheader()
            writer.writerows(valid_data)
        
        print(f"Successfully exported {len(valid_data)} food items to {filename} ✅")
    except Exception as e:
        print(f"Error exporting to CSV: {e} ❌")

In [10]:
# ----------------------------------------------------------------------------
# Cell 10: User Interface Widget Setup and Main Execution
# ----------------------------------------------------------------------------

# ------ Configure Food Search Lists ------
# This list is used by the interactive "Search USDA Database" button.
INTERACTIVE_FOOD_LIST_INPUT = ["banana", "lentils", "spinach"]

# This list is used by the main_serving_finder() and main_nutrition_analysis() functions.
EXAMPLE_ANALYSIS_FOOD_LIST = [
    'Banana, raw',
    'Lentils, from canned',
    'Spinach, frozen, cooked, no added fat',
]

# ------ Set Up USDA API Configuration ------
GENERIC_DATASET = "Survey (FNDDS)"

# ------ Create Interactive UI Widgets ------
keyword_filter_checkbox = widgets.Checkbox(
    value=False,
    description='Filter by Keywords',
    indent=False,
    style={'description_width': 'initial'},
    layout=widgets.Layout(width='auto')
)

leading_words_filter_checkbox = widgets.Checkbox(
    value=True,
    description='Only show results that start with the search terms',
    indent=False,
    style={'description_width': 'initial'},
    layout=widgets.Layout(width='auto')
)

strict_comma_filter_checkbox = widgets.Checkbox(
    value=True,
    description='Apply strict matching',
    indent=False,
    tooltip='Requires the second filter. Finds exact terms followed by a comma or exact description matches.',
    style={'description_width': 'initial'},
    layout=widgets.Layout(margin='0 0 0 25px', width='auto')
)

# Add checkbox for enabling singular and plural processing
singular_plural_checkbox = widgets.Checkbox(
    value=False,
    description='Automatically generate singular and plural variants of search terms',
    indent=False,
    style={'description_width': 'initial'},
    layout=widgets.Layout(width='auto')
)

# Add a new checkbox for printing the final list
print_list_checkbox = widgets.Checkbox(
    value=True,
    description='Print a combined Python list of all results at the end',
    indent=False,
    style={'description_width': 'initial'},
    layout=widgets.Layout(width='auto')
)

search_button = widgets.Button(
    description='Search USDA Database',
    button_style='success',
    tooltip='Click to start the search based on the selected filters',
    icon='search',
    layout=widgets.Layout(width='300px', margin='15px 0 0 0')
)

output_area = widgets.Output()

# ------ Link Strict Filter Checkbox to Leading Words Filter ------
def on_leading_words_change(change):
    """
    Enable or disable the strict filter checkbox based on the state of the leading words filter.
    If the leading words filter is unchecked, the strict filter is also unchecked.

    Args:
        change: The change event from the checkbox widget
    """
    strict_comma_filter_checkbox.disabled = not change.new
    if not change.new:
        strict_comma_filter_checkbox.value = False

leading_words_filter_checkbox.observe(on_leading_words_change, names='value')

# ------ Define Search Callback for Button Click ------
def on_search_button_clicked(b):
    """
    Handle the search button click event and execute the food search process.

    Args:
        b: The button widget that triggered the event
    """
    with output_area:
        clear_output(wait=True)
        print("Beginning the USDA Food Data Central search process 🚀\n")

        use_singular_plural = singular_plural_checkbox.value
        use_keyword_filter = keyword_filter_checkbox.value
        use_leading_words_filter = leading_words_filter_checkbox.value
        use_strict_comma_filter = strict_comma_filter_checkbox.value
        should_print_list = print_list_checkbox.value

        # Conditionally process the food list to include singular and plural variants
        if use_singular_plural:
            processed_food_list = expand_food_list(INTERACTIVE_FOOD_LIST_INPUT)
        else:
            # Use the helper function to parse the input without expansion
            processed_food_list = _parse_food_list_input(INTERACTIVE_FOOD_LIST_INPUT)

            print("Using original search terms without singular and plural processing:")
            for i, term in enumerate(processed_food_list, 1):
                print(f"{i}. {term}")
            print("=" * 50 + "\n")

        # Initialize the searcher class
        searcher = USDAFoodSearcher(api_key)
        all_filtered_foods = []  # Master list to hold all results

        for term in processed_food_list:
            # Call the search method from the searcher instance
            results_for_term = searcher.search_foods(
                query=term,
                data_type=GENERIC_DATASET,
                enable_keyword_filter=use_keyword_filter,
                enable_leading_words_filter=use_leading_words_filter,
                enable_strict_comma_filter=use_strict_comma_filter,
            )
            # Add the results for the current term to the master list
            all_filtered_foods.extend(results_for_term)
            time.sleep(0.5)

        print("\nThe search process has finished successfully ✅")

        # If the user requested it and results were found, print the formatted list
        if should_print_list and all_filtered_foods:
            print("\n" + "="*50)
            print("--- Combined List of Filtered Foods ---")
            print("="*50 + "\n")
            print("food_list = [")
            for food_item in all_filtered_foods:
                # Use repr() to handle quotes inside the string properly
                print(f'    {repr(food_item)},')
            print("]")
            print(f"\nSaved comprehensive list containing {len(all_filtered_foods)} food items 📋")

        print("\nThank you for using the USDA Food Search Tool! Keep exploring nutritious choices and have a delicious day! 🍎")

search_button.on_click(on_search_button_clicked)

In [11]:
# -----------------------------------------------------------------------------
# Cell 11: Main Functions for Serving and Nutrition Analysis
# -----------------------------------------------------------------------------

def main_serving_finder():
    """
    Main function to run the USDA food serving searches.
    Uses the global EXAMPLE_ANALYSIS_FOOD_LIST.
    """
    # ------ API Configuration and Initialization ------

    finder = USDAServingFinder(api_key)

    # ------ Process Each Food Item with Rate Limiting ------

    print("Starting USDA Food Database Serving Size Search 🔍")
    print()

    for food_item in EXAMPLE_ANALYSIS_FOOD_LIST:
        finder.display_servings_for_food(food_item)
        time.sleep(1)  # Be polite to the API

    print("USDA food serving search completed successfully 🎉")
    print("Thanks for exploring the world of food data with us!")
    print("Remember, good nutrition starts with knowing your portions! 🥗")

def main_nutrition_analysis():
    """
    Main function to run the nutrition analysis for all foods.
    Uses the global EXAMPLE_ANALYSIS_FOOD_LIST.
    """
    # ------ API Configuration ------

    # For production use, obtain your own key from: https://fdc.nal.usda.gov/api-guide.html
    nutrition_api = USDANutritionAPI(api_key)

    # ------ Process Each Food Item ------

    print("USDA Food Nutrition Analysis Tool 🍎")
    print("=" * 60)
    print()

    nutrition_results = []

    for food_item in EXAMPLE_ANALYSIS_FOOD_LIST:
        print(f"Processing: {food_item}")
        result = nutrition_api.display_nutrition_for_food(food_item)
        nutrition_results.append(result)
        time.sleep(1)  # Be respectful to the API

    # ------ Export Results ------

    print("=" * 60)
    print("Export Summary 📊")
    print("=" * 60)

    export_to_csv(nutrition_results, "nutrition_results.csv")

    # ------ Summary Statistics ------

    successful_items = [item for item in nutrition_results if item is not None]
    failed_items = len(nutrition_results) - len(successful_items)

    print(f"Successfully processed: {len(successful_items)} foods ✅")
    if failed_items > 0:
        print(f"Failed to process: {failed_items} foods ❌")

    print()
    print("Analysis complete! Check the CSV file for detailed nutrition data 🎉")
    print("Key fixes applied 💡")
    print("  - Corrected milk serving size (244g vs 61g)")
    print("  - Fixed potato serving size (170g vs 60g)")
    print("  - Improved juice conversions using actual USDA ratios")
    print("  - Filtered out guideline amounts")
    print("  - Added serving size validation warnings")

In [12]:
# -----------------------------------------------------------------------------
# Cell 12: Display User Interface and Run Tool
# -----------------------------------------------------------------------------

# ------ Display User Interface Components ------
print("Please configure your search filters below and click the button to begin your search.")
display(widgets.VBox([
    widgets.Label("Search Options:"),
    singular_plural_checkbox,
    widgets.HTML("<hr style='margin-top:10px; margin-bottom:10px;'>"),
    widgets.Label("Filter Options:"),
    keyword_filter_checkbox,
    leading_words_filter_checkbox,
    strict_comma_filter_checkbox,
    widgets.HTML("<hr style='margin-top:10px; margin-bottom:10px;'>"),
    widgets.Label("Output Options:"),
    print_list_checkbox,
    search_button
]))

display(output_area)

Please configure your search filters below and click the button to begin your search.


VBox(children=(Label(value='Search Options:'), Checkbox(value=False, description='Automatically generate singu…

Output()

In [13]:
# -----------------------------------------------------------------------------
# Cell 13: Usage Instructions and Examples
# -----------------------------------------------------------------------------

print("""
🔍 USDA Food Data Central Search Tool - Usage Guide
==================================================

This comprehensive tool provides three main functionalities:

1. Interactive Food Search (Above Interface)
   - Search multiple food items with customizable filters
   - Generate singular/plural variants automatically
   - Export results as Python lists

2. Serving Size Finder (main_serving_finder function)
   - Find available household serving sizes for specific foods
   - Useful for meal planning and portion control

3. Nutrition Analysis (main_nutrition_analysis function)
   - Get detailed nutrition facts with smart default servings
   - Export results to CSV for further analysis
   - Includes data validation and warnings

📋 Configuration Steps:
======================

1. Set your API key in the variables:
   - Replace empty strings in api_key variables with your USDA FDC API key
   - Get free key at: https://fdc.nal.usda.gov/api-guide.html

2. Configure food lists in Cell 10:
   - For interactive search: Update the INTERACTIVE_FOOD_LIST_INPUT variable.
   - For serving/nutrition analysis: Update the EXAMPLE_ANALYSIS_FOOD_LIST variable.

🎯 Example Usage:
================

The food lists for all examples are now configured in a single location in Cell 10.

For instance, to change the foods for the interactive search, you would modify:
INTERACTIVE_FOOD_LIST_INPUT = ["banana", "lentils", "spinach"]

To change the foods for the standalone analysis functions, you would modify:
EXAMPLE_ANALYSIS_FOOD_LIST = [
    'Banana, raw',
    'Lentils, from canned',
    'Spinach, frozen, cooked, no added fat',
]

⚙️ Filter Options Explained:
============================

• Singular/Plural Processing: Automatically generates variants
  - "apple" → ["apple", "apples"]
  - "berry" → ["berry", "berries"]

• Keyword Filter: Prioritizes foods with descriptive keywords
  - Keywords: Raw, Frozen, Canned, Nfs, Plain, Unsalted

• Leading Words Filter: Results must start with search terms
  - "tofu" matches "Tofu, raw, regular" ✅
  - "tofu" skips "Soup with tofu" ❌

• Strict Comma Filter: Most restrictive matching
  - Finds exact matches or terms followed by a comma
  - "Rice, white" or exact "Rice" ✅
  - "Rice pudding" ❌

🚀 Getting Started:
==================

1. Add your API key to the variables
2. Configure your food lists in Cell 10
3. Select your preferred filters above
4. Click "Search USDA Database" button
5. For serving/nutrition analysis, run the respective functions

💡 Pro Tips:
============

• Use specific food names for better matches
• Enable singular/plural processing for broader coverage
• Strict filtering gives most relevant results
• Check CSV exports for detailed nutrition data
• Serving size validation helps identify unrealistic portions

Happy food searching! 🍎🥗🍞
""")


🔍 USDA Food Data Central Search Tool - Usage Guide

This comprehensive tool provides three main functionalities:

1. Interactive Food Search (Above Interface)
   - Search multiple food items with customizable filters
   - Generate singular/plural variants automatically
   - Export results as Python lists

2. Serving Size Finder (main_serving_finder function)
   - Find available household serving sizes for specific foods
   - Useful for meal planning and portion control

3. Nutrition Analysis (main_nutrition_analysis function)
   - Get detailed nutrition facts with smart default servings
   - Export results to CSV for further analysis

📋 Configuration Steps:

1. Set your API key in the variables:
   - Replace empty strings in api_key variables with your USDA FDC API key
   - Get free key at: https://fdc.nal.usda.gov/api-guide.html

2. Configure food lists in Cell 10:
   - For interactive search: Update the INTERACTIVE_FOOD_LIST_INPUT variable.
   - For serving/nutrition analysis: Updat

In [14]:
# ----------------------------------------------------------------------------
# Cell 14: Quick Test Functions
# ----------------------------------------------------------------------------

def quick_test_search():
    """
    Quick test function to verify the search functionality works.
    """
    print("\U0001F9EA Running Quick Search Test...")
    print("=" * 40)
    
    # Initialize the searcher class
    searcher = USDAFoodSearcher(api_key)
    
    # Test with a simple food item by calling the class method
    test_results = searcher.search_foods(
        query="lentils",
        data_type="Survey (FNDDS)",
        enable_keyword_filter=False,
        enable_leading_words_filter=True,
        enable_strict_comma_filter=False,
        page_size=5
    )
    
    print(f"\nTest completed! Found {len(test_results)} results.")
    print("If you see results above, the search function is working correctly ✅")

def quick_test_nutrition():
    """
    Quick test function to verify the nutrition analysis works.
    """
    print("\U0001F9EA Running Quick Nutrition Test...")
    print("=" * 40)
    
    # Initialize with demo key
    nutrition_api = USDANutritionAPI(api_key)
    
    # Test with a common food item
    result = nutrition_api.display_nutrition_for_food("Lentils, From Canned")
    
    if result:
        print("✅ Nutrition analysis test successful!")
    else:
        print("⚠️ Test failed - check your API key and internet connection")
        print("Replace 'api_key' with your actual API key for full functionality.")

# Uncomment the lines below to run quick tests:
# quick_test_search()
# quick_test_nutrition()

print("\n" + "="*60)
print("\U0001F4DA USDA Food Data Central Tool - Ready to Use!")
print("="*60)
print("✅ All classes and functions loaded successfully")
print("✅ User interface is active above")
print("✅ Code duplication has been eliminated")
print("✅ All original functionality preserved")
print("\n\U0001F3AF Next Steps:")
print("1. Add your USDA API key to the variables")
print("2. Configure your food lists")
print("3. Use the interface above or call functions directly")
print("4. Run quick_test_search() or quick_test_nutrition() to verify setup")


📚 USDA Food Data Central Tool - Ready to Use!
✅ All classes and functions loaded successfully
✅ User interface is active above
✅ Code duplication has been eliminated
✅ All original functionality preserved

🎯 Next Steps:
1. Add your USDA API key to the variables
2. Configure your food lists
3. Use the interface above or call functions directly
4. Run quick_test_search() or quick_test_nutrition() to verify setup


In [15]:
quick_test_search()

🧪 Running Quick Search Test...

=== Search Term: 'lentils'  |  Data Type: Survey (FNDDS) ===
Total matches found in the USDA database: 23 🔍
  1. Lentils, Nfs – FDC ID: 2707423
  2. Lentils, From Canned – FDC ID: 2707426
  3. Lentils, From Dried, Fat Added – FDC ID: 2707424
  4. Lentils, From Dried, No Added Fat – FDC ID: 2707425
--> Displayed 4 filtered results for this search term.

Test completed! Found 4 results.
If you see results above, the search function is working correctly ✅


In [16]:
quick_test_nutrition()

🧪 Running Quick Nutrition Test...
Basic Info: Lentils
FDC ID: 2707426
Serving Size: 1 Cup (185.0g)
Key Nutrition Facts:
  - Calories: 307 kcal
  - Protein: 15.5 g
  - Fat: 12.7 g
  - Carbohydrates: 34.6 g
Available Measures: 1 cup (185.0g)

✅ Nutrition analysis test successful!
