# Data collection

Most of the data used in this project was collected by manually downloading datasets from online sources and databases. None of the data sources had public APIs for automatic extraction. However, some data sources required special approaches to collecting data manually. 

## Species richness

The cell below takes the geospatial map image of species richness over the United States and allows for manual input of overall state species richness in a tabular format. 

In [1]:
import tkinter as tk
from tkinter import ttk
import pandas as pd

class StateDataCollector:
    def __init__(self, root):
        self.root = root
        self.root.title("State Species Richness Data Collector")
        
        # Dictionary to store state:value pairs
        self.data = {}
        
        # Create the input frame
        input_frame = ttk.Frame(root, padding="10")
        input_frame.grid(row=0, column=0, sticky=(tk.W, tk.E, tk.N, tk.S))
        
        # Create state selector
        self.states = sorted([
            'Alabama', 'Alaska', 'Arizona', 'Arkansas', 'California', 'Colorado',
            'Connecticut', 'Delaware', 'Florida', 'Georgia', 'Hawaii', 'Idaho',
            'Illinois', 'Indiana', 'Iowa', 'Kansas', 'Kentucky', 'Louisiana',
            'Maine', 'Maryland', 'Massachusetts', 'Michigan', 'Minnesota',
            'Mississippi', 'Missouri', 'Montana', 'Nebraska', 'Nevada',
            'New Hampshire', 'New Jersey', 'New Mexico', 'New York',
            'North Carolina', 'North Dakota', 'Ohio', 'Oklahoma', 'Oregon',
            'Pennsylvania', 'Rhode Island', 'South Carolina', 'South Dakota',
            'Tennessee', 'Texas', 'Utah', 'Vermont', 'Virginia', 'Washington',
            'West Virginia', 'Wisconsin', 'Wyoming'
        ])
        
        self.state_var = tk.StringVar()
        state_label = ttk.Label(input_frame, text="State:")
        state_label.grid(row=0, column=0, padx=5, pady=5)
        state_combo = ttk.Combobox(input_frame, textvariable=self.state_var, values=self.states)
        state_combo.grid(row=0, column=1, padx=5, pady=5)
        
        # Create value input
        self.value_var = tk.StringVar()
        value_label = ttk.Label(input_frame, text="Species Richness (0-15):")
        value_label.grid(row=1, column=0, padx=5, pady=5)
        value_entry = ttk.Entry(input_frame, textvariable=self.value_var)
        value_entry.grid(row=1, column=1, padx=5, pady=5)
        
        # Create buttons
        button_frame = ttk.Frame(input_frame)
        button_frame.grid(row=2, column=0, columnspan=2, pady=10)
        
        add_button = ttk.Button(button_frame, text="Add Entry", command=self.add_entry)
        add_button.grid(row=0, column=0, padx=5)
        
        save_button = ttk.Button(button_frame, text="Save to CSV", command=self.save_data)
        save_button.grid(row=0, column=1, padx=5)
        
        # Create display area
        self.display = tk.Text(input_frame, height=10, width=40)
        self.display.grid(row=3, column=0, columnspan=2, pady=10)
        
    def add_entry(self):
        state = self.state_var.get()
        value = self.value_var.get()
        
        if state and value:
            try:
                value = int(value)
                if 0 <= value <= 15:
                    self.data[state] = value
                    self.update_display()
                    # Clear inputs
                    self.state_var.set('')
                    self.value_var.set('')
                else:
                    self.display.insert(tk.END, "Error: Value must be between 0 and 15\n")
            except ValueError:
                self.display.insert(tk.END, "Error: Please enter a valid number\n")
    
    def update_display(self):
        self.display.delete(1.0, tk.END)
        for state, value in sorted(self.data.items()):
            self.display.insert(tk.END, f"{state}: {value}\n")
    
    def save_data(self):
        df = pd.DataFrame.from_dict(self.data, orient='index', columns=['species_richness'])
        df.index.name = 'state'
        df.to_csv('../data/raw_data/species_richness_by_state.csv')
        self.display.insert(tk.END, "\nData saved to species_richness_by_state.csv\n")

# Create and run the application
if __name__ == "__main__":
    root = tk.Tk()
    app = StateDataCollector(root)
    root.mainloop()

Alabama: 10
Arizona: 5
Arkansas: 8
California: 10
Colorado: 2
Connecticut: 5
Delaware: 5
Florida: 9
Georgia: 4
Idaho: 1
Illinois: 6
Indiana: 8
Iowa: 5
Kansas: 1
Kentucky: 9
Louisiana: 10
Maine: 2
Maryland: 6
Massachusetts: 4
Michigan: 3
Minnesota: 3
Mississippi: 8
Missouri: 7
Montana: 0
Nebraska: 1
Nevada: 2
New Hampshire: 4
New Jersey: 6
New Mexico: 1
New York: 9
North Carolina: 5
North Dakota: 1
Ohio: 4
Oklahoma: 2
Oregon: 2
Pennsylvania: 6
Rhode Island: 4
South Carolina: 7
South Dakota: 0
Tennessee: 11
Texas: 3
Utah: 3
Vermont: 4
Virginia: 10
Washington: 2
West Virginia: 10
Wisconsin: 6
Wyoming: 0