In [2]:
import google.generativeai as genai
import json
import time
import pandas as pd

  from .autonotebook import tqdm as notebook_tqdm


## Functions

In [4]:
def get_gemini_api_key(config_path='gemini.config'):
    try:
        with open(config_path, 'r') as f:
            for line in f:
                if line.startswith('API_KEY='):
                    return line.split('=')[1].strip()
    except FileNotFoundError:
        print(f"Error: Config file not found at {config_path}")
        print("Please create a gemini.config file with your API key in the format:")
        print("API_KEY=your_api_key_here")
    except Exception as e:
        print(f"Error reading config file: {e}")
    
    return None

### Ask Gemini

In [8]:
def ask_gemini(batch):
    prompt = f"""
    Act as an expert in zoology. I need technical information about these animals. If it's an plant don't include it in the JSON: {batch}.
    
    For each animal, return a JSON object with the following exact fields:
    - "scientific_name": Name of the animal (the same scientific name that I am using as input).
    - "common_name"
    - "weight": Approximate weight in kg (only number and unit).
    - "size": Height or length in meters (only number and unit).
    - "diet": Carnivore, Herbivore, Omnivore, Insectivore.
    - "life_span": Life span in years (e.g. average number of years or range like "10-15 years").
    - "habitat": 5 habitats of the animal as maximum (e.g. "forest, grassland, etc."). Just one word for each habitat.
    - "continent": Continent where the animal is found (e.g. "Asia, Africa, Europe, South America, North America, Central America, Oceania, Antarctica").

    Return a list of JSON objects.
    """
    
    try:
        response = model.generate_content(prompt)
        return json.loads(response.text)
    except Exception as e:
        print(f"Error requesting batch: {e}")
        return []

## Features Extraction

In [1]:
START_RECORD = 0
END_RECORD = 1000
BATCH_SIZE = 10

In [None]:
API_KEY = get_gemini_api_key()

genai.configure(api_key=API_KEY)

model = genai.GenerativeModel('gemini-2.5-flash',
                              generation_config={"response_mime_type": "application/json"})

# Read the CSV file
df = pd.read_csv('../data/animals.csv')  # Update with your file path if needed

# Get all values from a specific column as a list
animals = df['scientific_name'].tolist()

print(f"Starting with {len(animals)} animals...")

# Initialize results list
total_response = []


for i in range(START_RECORD, END_RECORD, BATCH_SIZE):
    
    current_batch = animals[i:i + BATCH_SIZE]
    
    print(f"Requesting batch {i//BATCH_SIZE + 1}: {current_batch}...")
    
    # Ask Gemini
    gemini_response = ask_gemini(current_batch)
    
    # Guardar resultados
    total_response.extend(gemini_response)
    
    # (Opcional) Pequeña pausa para no saturar si tienes miles de animales
    time.sleep(1) 

# Guardar resultados en un archivo JSON
with open(f"../data/gemini_animal_features_{START_RECORD}_{END_RECORD}.json", "w") as f:
    json.dump(total_response, f, indent=4)

print(f"Processed {len(total_response)} records and saved to gemini_animal_features_{START_RECORD}_{END_RECORD}.json")

Starting with 5159 animals...
Requesting batch 1: ['Abantennarius sanguineus', 'Abantis paradisea', 'Abbottina rivularis', 'Abisares viridipennis', 'Abramis brama', 'Abrus laevigatus', 'Abrus precatorius', 'Abutilon', 'Acada biseriata', 'Acalitus mallyi']...
Requesting batch 2: ['Acalypha wilkesiana', 'Acalyptratae', 'Acanalonia', 'Acanalonia conica', 'Acanthacris ruficornis', 'Acanthaspis obscura', 'Acanthepeira stellata', 'Acanthis flammea', 'Acanthis flammea flammea', 'Acanthocephala declivis']...
Requesting batch 3: ['Acanthocephala terminalis', 'Acanthocercus atricollis', 'Acanthocorini', 'Acanthogobius flavimanus', 'Acanthomorpha', 'Acanthoplus', 'Acanthoplus discoidalis', 'Acanthorhynchus tenuirostris', 'Acanthoscelides obtectus', 'Acanthus mollis']...
Requesting batch 4: ['Acari', 'Acariformes', 'Accipiter nisus', 'Accipiter nisus nisus', 'Accipiter striatus', 'Acer negundo', 'Acer pseudoplatanus', 'Acer rubrum', 'Acer saccharinum', 'Aceria']...
Requesting batch 5: ['Aceria alo