In [1]:
import pandas as pd
import requests
import time

# File paths
input_csv = "species_79_labels.csv"
output_csv = "worms_taxonomy_output.csv"

# Target taxonomic levels
levels = ['species', 'genus', 'family', 'order', 'class', 'phylum', 'kingdom']

def normalize_rank(rank):
    """Map a WoRMS rank to one of the 7 target levels."""
    rank = rank.lower()
    for lvl in levels:
        if lvl in rank:
            return lvl
    return None

def query_worms_taxonomy(name):
    """Query WoRMS and return a normalized taxonomy dictionary."""
    base_url = "https://www.marinespecies.org/rest"
    try:
        response = requests.get(f"{base_url}/AphiaRecordsByName/{name}?like=false&marine_only=false", timeout=10)
        response.raise_for_status()
        records = response.json()
        if not records:
            return None
        aphia_id = records[0]['AphiaID']

        # Get full classification
        hierarchy_response = requests.get(f"{base_url}/AphiaClassificationByAphiaID/{aphia_id}", timeout=10)
        hierarchy_response.raise_for_status()
        hierarchy = hierarchy_response.json()

        result = {lvl: None for lvl in levels}
        current = hierarchy

        while current:
            rank = normalize_rank(current.get('rank', ''))
            name = current.get('scientificname', '')
            if rank and not result[rank]:
                result[rank] = name
            current = current.get('child', None)

        # Determine the most specific known rank to fill lower levels
        last_known = None
        for lvl in reversed(levels):
            if result[lvl]:
                last_known = result[lvl]
            else:
                result[lvl] = last_known

        return result
    except Exception as e:
        print(f"Error querying '{name}': {e}")
        return None

# Load input
df = pd.read_csv(input_csv)
taxon_names = df.iloc[:, 0].tolist()

# Query and collect results
results = []
for i, name in enumerate(taxon_names):
    print(f"[{i+1}/{len(taxon_names)}] Querying: {name}")
    result = query_worms_taxonomy(name)
    if result:
        result['original_input'] = name
    else:
        result = {lvl: None for lvl in levels}
        result['original_input'] = name
    results.append(result)
    time.sleep(0.5)  # Rate limiting

# Save to CSV
out_df = pd.DataFrame(results)
out_df = out_df[['original_input'] + levels]  # Reorder
out_df.to_csv(output_csv, index=False)
print(f"\nâœ… Taxonomy saved to: {output_csv}")


FileNotFoundError: [Errno 2] No such file or directory: 'species_79_labels.csv'