In [5]:
import pandas as pd
from geopy.distance import geodesic
from collections import defaultdict

# Load the dataset
data = pd.read_excel('./data/horn_bill_telemetry.xlsx')

# Convert timestamps to datetime objects
data['timestamp'] = pd.to_datetime(data['timestamp'], format='%d-%m-%Y %H:%M')

# Add columns for the month and year of each timestamp
data['month'] = data['timestamp'].dt.strftime('%B')
data['year'] = data['timestamp'].dt.year

# Sort data by bird identifier and timestamp
data = data.sort_values(by=['individual-local-identifier', 'timestamp'])

# Define a function to check if two points are within 2 km
def is_within_radius(point1, point2, radius_km=2):
    distance = geodesic(point1, point2).km
    return distance <= radius_km

# Initialize a dictionary to store clustered areas
clusters = defaultdict(list)

# Iterate through each bird's data to identify clusters
for bird_id in data['individual-local-identifier'].unique():
    bird_data = data[data['individual-local-identifier'] == bird_id]
    
    for i in range(len(bird_data)):
        point1 = (bird_data.iloc[i]['location-lat'], bird_data.iloc[i]['location-long'])
        found_cluster = False
        
        # Check if the point belongs to any existing cluster
        for cluster_center in clusters.keys():
            if is_within_radius(point1, cluster_center):
                clusters[cluster_center].append({
                    'bird_id': bird_id,
                    'visit_time': bird_data.iloc[i]['timestamp'],
                    'month': bird_data.iloc[i]['month'],
                    'year': bird_data.iloc[i]['year'],
                    'location_lat': point1[0],
                    'location_long': point1[1]
                })
                found_cluster = True
                break
        
        # If not found, create a new cluster
        if not found_cluster:
            clusters[point1].append({
                'bird_id': bird_id,
                'visit_time': bird_data.iloc[i]['timestamp'],
                'month': bird_data.iloc[i]['month'],
                'year': bird_data.iloc[i]['year'],
                'location_lat': point1[0],
                'location_long': point1[1]
            })

# Initialize a list to store results of visits by month and year
visits_by_month_year = []

# Analyze clusters to see which months and years the hornbill visited the same area
for cluster_center, visits in clusters.items():
    month_year_visits = defaultdict(int)
    
    for visit in visits:
        month_year_key = f"{visit['month']}-{visit['year']}"
        month_year_visits[month_year_key] += 1
    
    visits_by_month_year.append({
        'cluster_center_lat': cluster_center[0],
        'cluster_center_long': cluster_center[1],
        'month_year_visits': dict(month_year_visits)
    })

# Convert the visits by month and year list to a DataFrame
visits_by_month_year_df = pd.DataFrame(visits_by_month_year)

# Save the DataFrame to an Excel file
output_file = './data/visits_by_month_year.xlsx'
visits_by_month_year_df.to_excel(output_file, index=False)

# Display the results
print(visits_by_month_year_df)


    cluster_center_lat  cluster_center_long  \
0            26.939159            92.974570   
1            26.924494            92.989876   
2            26.928302            92.953082   
3            26.918439            92.934127   
4            26.956905            92.955370   
5            26.906733            92.911573   
6            26.931647            93.022020   
7            26.983876            92.982154   
8            26.958240            92.983440   
9            26.928639            92.917349   
10           26.948148            93.011115   
11           26.919334            93.048120   
12           26.906694            92.977679   
13           26.947722            92.935651   
14           26.990236            92.946242   
15           27.008855            92.956208   
16           27.021891            92.971048   
17           27.046332            92.978675   
18           27.063375            92.986077   
19           27.081959            92.994297   
20           

In [6]:
output_file = './insights/visits_by_month.xlsx'
visits_by_month_df.to_excel(output_file, index=False)