### DISTANCE TRAVELLED BY EACH `TAGGED BIRD` WITHIN GIVEN DURATION


In [8]:
import pandas as pd
import plotly.express as px

# Load the dataset
file_path = './insights/bird_speed_analysis.xlsx'
bird_data = pd.read_excel(file_path, engine='openpyxl')

# Convert timestamp to datetime format
bird_data['timestamp'] = pd.to_datetime(bird_data['start_time'])

# Function to filter data by date range and calculate total distance for a specific bird
def calculate_total_distance_by_interval(data, bird_id, start_date, end_date):
    # Filter data by bird identifier and date range
    filtered_data = data.loc[
        (data['individual-local-identifier'] == bird_id) &
        (data['timestamp'] >= start_date) &
        (data['timestamp'] <= end_date)
    ].copy()
    
    # Extract year, month, and day from the timestamp
    filtered_data['year'] = filtered_data['timestamp'].dt.year
    filtered_data['month'] = filtered_data['timestamp'].dt.month
    filtered_data['day'] = filtered_data['timestamp'].dt.day
    
    # Group by year, month, and day
    total_distance_per_interval = filtered_data.groupby(['year', 'month', 'day'])['distance_km'].sum().reset_index()
    
    return total_distance_per_interval

# Input values
bird_id = '1_bill'  #1_bill,2_gabbar,3_mogambo,4_godfather,5_rifle,6_tkbhai
start_date = '2015-03-01'  # Replace with the desired start date
end_date = '2016-07-01'  # Replace with the desired end date

# Get the filtered and calculated data
filtered_data = calculate_total_distance_by_interval(bird_data, bird_id, start_date, end_date)

# Create a date column using pd.to_datetime
filtered_data['date'] = pd.to_datetime(filtered_data[['year', 'month', 'day']])

# Plotting the data using Plotly
fig = px.bar(
    filtered_data,
    x='date',
    y='distance_km',
    labels={'distance_km': 'Total Distance (km)', 'date': 'Date'},
    title=f'Total Distance Traveled by {bird_id} from {start_date} to {end_date}'
)

# Customize the layout
fig.update_layout(
    xaxis_title='Date',
    yaxis_title='Total Distance (km)',
    showlegend=False,
)

# Show the plot
fig.show()

# Function to calculate total distance traveled by each bird within a date range
def calculate_total_distance_per_bird(data, start_date, end_date):
    # Filter data by date range
    filtered_data = data.loc[
        (data['timestamp'] >= start_date) &
        (data['timestamp'] <= end_date)
    ].copy()
    
    # Extract year and month from timestamp
    filtered_data['year'] = filtered_data['timestamp'].dt.year
    filtered_data['month'] = filtered_data['timestamp'].dt.month
    
    # Group by bird identifier, year, and month, then sum distances
    total_distance_per_bird = filtered_data.groupby(
        ['individual-local-identifier', 'year', 'month']
    )['distance_km'].sum().reset_index()
    
    return total_distance_per_bird

# Get the calculated data for the specific bird
distance_data = calculate_total_distance_per_bird(bird_data, start_date, end_date)
selected_bird_data = distance_data[distance_data['individual-local-identifier'] == bird_id]

# Print the total distance traveled by the selected bird for every month and year
print(f"Total distance traveled by bird '{bird_id}' from {start_date} to {end_date}:")
for _, row in selected_bird_data.iterrows():
    year = row['year']
    month = row['month']
    distance = row['distance_km']
    print(f"In {year}-{month:02d}: {distance:.2f} km")

# Function to calculate total distance traveled by each bird over a time range
def calculate_total_distance_by_bird(data, start_date, end_date):
    # Filter data by the specified date range
    filtered_data = data.loc[
        (data['timestamp'] >= start_date) &
        (data['timestamp'] <= end_date)
    ].copy()

    # Group by bird identifier and sum the distances
    total_distance_by_bird = filtered_data.groupby('individual-local-identifier')['distance_km'].sum().reset_index()
    
    return total_distance_by_bird

# Get the total distance traveled by each bird within the specified date range
total_distance_by_bird = calculate_total_distance_by_bird(bird_data, start_date, end_date)

# Print the total distance traveled by the selected bird only
selected_bird_total_distance = total_distance_by_bird[total_distance_by_bird['individual-local-identifier'] == bird_id]

if not selected_bird_total_distance.empty:
    total_distance = selected_bird_total_distance['distance_km'].values[0]
    print(f"Total distance traveled by bird '{bird_id}' from {start_date} to {end_date} is {total_distance:.2f} km.")
else:
    print(f"No data available for bird '{bird_id}' from {start_date} to {end_date}.")


Total distance traveled by bird '1_bill' from 2015-03-01 to 2016-07-01:
In 2015-03: 176.10 km
In 2015-04: 372.74 km
In 2015-05: 249.23 km
Total distance traveled by bird '1_bill' from 2015-03-01 to 2016-07-01 is 798.07 km.


In [9]:
import pandas as pd
import plotly.express as px

# Load the dataset
file_path = './insights/bird_speed_analysis.xlsx'
bird_data = pd.read_excel(file_path, engine='openpyxl')

# Convert timestamp to datetime format
bird_data['timestamp'] = pd.to_datetime(bird_data['start_time'])

# Function to filter data by date range and calculate total distance for a specific bird
def calculate_total_distance_by_interval(data, bird_id, start_date, end_date):
    # Filter data by bird identifier and date range
    filtered_data = data.loc[
        (data['individual-local-identifier'] == bird_id) &
        (data['timestamp'] >= start_date) &
        (data['timestamp'] <= end_date)
    ].copy()
    
    # Extract year, month, and day from the timestamp
    filtered_data['year'] = filtered_data['timestamp'].dt.year
    filtered_data['month'] = filtered_data['timestamp'].dt.month
    filtered_data['day'] = filtered_data['timestamp'].dt.day
    
    # Group by year, month, and day
    total_distance_per_interval = filtered_data.groupby(['year', 'month', 'day'])['distance_km'].sum().reset_index()
    
    return total_distance_per_interval

# Input values
bird_id = '2_gabbar'  #1_bill,2_gabbar,3_mogambo,4_godfather,5_rifle,6_tkbhai
start_date = '2015-03-01'  # Replace with the desired start date
end_date = '2016-07-01'  # Replace with the desired end date

# Get the filtered and calculated data
filtered_data = calculate_total_distance_by_interval(bird_data, bird_id, start_date, end_date)

# Create a date column using pd.to_datetime
filtered_data['date'] = pd.to_datetime(filtered_data[['year', 'month', 'day']])

# Plotting the data using Plotly
fig = px.bar(
    filtered_data,
    x='date',
    y='distance_km',
    labels={'distance_km': 'Total Distance (km)', 'date': 'Date'},
    title=f'Total Distance Traveled by {bird_id} from {start_date} to {end_date}'
)

# Customize the layout
fig.update_layout(
    xaxis_title='Date',
    yaxis_title='Total Distance (km)',
    showlegend=False,
)

# Show the plot
fig.show()

# Function to calculate total distance traveled by each bird within a date range
def calculate_total_distance_per_bird(data, start_date, end_date):
    # Filter data by date range
    filtered_data = data.loc[
        (data['timestamp'] >= start_date) &
        (data['timestamp'] <= end_date)
    ].copy()
    
    # Extract year and month from timestamp
    filtered_data['year'] = filtered_data['timestamp'].dt.year
    filtered_data['month'] = filtered_data['timestamp'].dt.month
    
    # Group by bird identifier, year, and month, then sum distances
    total_distance_per_bird = filtered_data.groupby(
        ['individual-local-identifier', 'year', 'month']
    )['distance_km'].sum().reset_index()
    
    return total_distance_per_bird

# Get the calculated data for the specific bird
distance_data = calculate_total_distance_per_bird(bird_data, start_date, end_date)
selected_bird_data = distance_data[distance_data['individual-local-identifier'] == bird_id]

# Print the total distance traveled by the selected bird for every month and year
print(f"Total distance traveled by bird '{bird_id}' from {start_date} to {end_date}:")
for _, row in selected_bird_data.iterrows():
    year = row['year']
    month = row['month']
    distance = row['distance_km']
    print(f"In {year}-{month:02d}: {distance:.2f} km")

# Function to calculate total distance traveled by each bird over a time range
def calculate_total_distance_by_bird(data, start_date, end_date):
    # Filter data by the specified date range
    filtered_data = data.loc[
        (data['timestamp'] >= start_date) &
        (data['timestamp'] <= end_date)
    ].copy()

    # Group by bird identifier and sum the distances
    total_distance_by_bird = filtered_data.groupby('individual-local-identifier')['distance_km'].sum().reset_index()
    
    return total_distance_by_bird

# Get the total distance traveled by each bird within the specified date range
total_distance_by_bird = calculate_total_distance_by_bird(bird_data, start_date, end_date)

# Print the total distance traveled by the selected bird only
selected_bird_total_distance = total_distance_by_bird[total_distance_by_bird['individual-local-identifier'] == bird_id]

if not selected_bird_total_distance.empty:
    total_distance = selected_bird_total_distance['distance_km'].values[0]
    print(f"Total distance traveled by bird '{bird_id}' from {start_date} to {end_date} is {total_distance:.2f} km.")
else:
    print(f"No data available for bird '{bird_id}' from {start_date} to {end_date}.")


Total distance traveled by bird '2_gabbar' from 2015-03-01 to 2016-07-01:
In 2015-03: 80.25 km
In 2015-04: 1315.91 km
In 2015-05: 968.41 km
In 2015-06: 372.00 km
Total distance traveled by bird '2_gabbar' from 2015-03-01 to 2016-07-01 is 2736.57 km.


In [10]:
import pandas as pd
import plotly.express as px

# Load the dataset
file_path = './insights/bird_speed_analysis.xlsx'
bird_data = pd.read_excel(file_path, engine='openpyxl')

# Convert timestamp to datetime format
bird_data['timestamp'] = pd.to_datetime(bird_data['start_time'])

# Function to filter data by date range and calculate total distance for a specific bird
def calculate_total_distance_by_interval(data, bird_id, start_date, end_date):
    # Filter data by bird identifier and date range
    filtered_data = data.loc[
        (data['individual-local-identifier'] == bird_id) &
        (data['timestamp'] >= start_date) &
        (data['timestamp'] <= end_date)
    ].copy()
    
    # Extract year, month, and day from the timestamp
    filtered_data['year'] = filtered_data['timestamp'].dt.year
    filtered_data['month'] = filtered_data['timestamp'].dt.month
    filtered_data['day'] = filtered_data['timestamp'].dt.day
    
    # Group by year, month, and day
    total_distance_per_interval = filtered_data.groupby(['year', 'month', 'day'])['distance_km'].sum().reset_index()
    
    return total_distance_per_interval

# Input values
bird_id = '3_mogambo'  #1_bill,2_gabbar,3_mogambo,4_godfather,5_rifle,6_tkbhai
start_date = '2015-03-01'  # Replace with the desired start date
end_date = '2016-07-01'  # Replace with the desired end date

# Get the filtered and calculated data
filtered_data = calculate_total_distance_by_interval(bird_data, bird_id, start_date, end_date)

# Create a date column using pd.to_datetime
filtered_data['date'] = pd.to_datetime(filtered_data[['year', 'month', 'day']])

# Plotting the data using Plotly
fig = px.bar(
    filtered_data,
    x='date',
    y='distance_km',
    labels={'distance_km': 'Total Distance (km)', 'date': 'Date'},
    title=f'Total Distance Traveled by {bird_id} from {start_date} to {end_date}'
)

# Customize the layout
fig.update_layout(
    xaxis_title='Date',
    yaxis_title='Total Distance (km)',
    showlegend=False,
)

# Show the plot
fig.show()

# Function to calculate total distance traveled by each bird within a date range
def calculate_total_distance_per_bird(data, start_date, end_date):
    # Filter data by date range
    filtered_data = data.loc[
        (data['timestamp'] >= start_date) &
        (data['timestamp'] <= end_date)
    ].copy()
    
    # Extract year and month from timestamp
    filtered_data['year'] = filtered_data['timestamp'].dt.year
    filtered_data['month'] = filtered_data['timestamp'].dt.month
    
    # Group by bird identifier, year, and month, then sum distances
    total_distance_per_bird = filtered_data.groupby(
        ['individual-local-identifier', 'year', 'month']
    )['distance_km'].sum().reset_index()
    
    return total_distance_per_bird

# Get the calculated data for the specific bird
distance_data = calculate_total_distance_per_bird(bird_data, start_date, end_date)
selected_bird_data = distance_data[distance_data['individual-local-identifier'] == bird_id]

# Print the total distance traveled by the selected bird for every month and year
print(f"Total distance traveled by bird '{bird_id}' from {start_date} to {end_date}:")
for _, row in selected_bird_data.iterrows():
    year = row['year']
    month = row['month']
    distance = row['distance_km']
    print(f"In {year}-{month:02d}: {distance:.2f} km")

# Function to calculate total distance traveled by each bird over a time range
def calculate_total_distance_by_bird(data, start_date, end_date):
    # Filter data by the specified date range
    filtered_data = data.loc[
        (data['timestamp'] >= start_date) &
        (data['timestamp'] <= end_date)
    ].copy()

    # Group by bird identifier and sum the distances
    total_distance_by_bird = filtered_data.groupby('individual-local-identifier')['distance_km'].sum().reset_index()
    
    return total_distance_by_bird

# Get the total distance traveled by each bird within the specified date range
total_distance_by_bird = calculate_total_distance_by_bird(bird_data, start_date, end_date)

# Print the total distance traveled by the selected bird only
selected_bird_total_distance = total_distance_by_bird[total_distance_by_bird['individual-local-identifier'] == bird_id]

if not selected_bird_total_distance.empty:
    total_distance = selected_bird_total_distance['distance_km'].values[0]
    print(f"Total distance traveled by bird '{bird_id}' from {start_date} to {end_date} is {total_distance:.2f} km.")
else:
    print(f"No data available for bird '{bird_id}' from {start_date} to {end_date}.")


Total distance traveled by bird '3_mogambo' from 2015-03-01 to 2016-07-01:
In 2015-11: 43.43 km
In 2015-12: 107.57 km
In 2016-01: 55.01 km
Total distance traveled by bird '3_mogambo' from 2015-03-01 to 2016-07-01 is 206.02 km.


In [11]:
import pandas as pd
import plotly.express as px

# Load the dataset
file_path = './insights/bird_speed_analysis.xlsx'
bird_data = pd.read_excel(file_path, engine='openpyxl')

# Convert timestamp to datetime format
bird_data['timestamp'] = pd.to_datetime(bird_data['start_time'])

# Function to filter data by date range and calculate total distance for a specific bird
def calculate_total_distance_by_interval(data, bird_id, start_date, end_date):
    # Filter data by bird identifier and date range
    filtered_data = data.loc[
        (data['individual-local-identifier'] == bird_id) &
        (data['timestamp'] >= start_date) &
        (data['timestamp'] <= end_date)
    ].copy()
    
    # Extract year, month, and day from the timestamp
    filtered_data['year'] = filtered_data['timestamp'].dt.year
    filtered_data['month'] = filtered_data['timestamp'].dt.month
    filtered_data['day'] = filtered_data['timestamp'].dt.day
    
    # Group by year, month, and day
    total_distance_per_interval = filtered_data.groupby(['year', 'month', 'day'])['distance_km'].sum().reset_index()
    
    return total_distance_per_interval

# Input values
bird_id = '4_godfather'  #1_bill,2_gabbar,3_mogambo,4_godfather,5_rifle,6_tkbhai
start_date = '2015-03-01'  # Replace with the desired start date
end_date = '2016-07-01'  # Replace with the desired end date

# Get the filtered and calculated data
filtered_data = calculate_total_distance_by_interval(bird_data, bird_id, start_date, end_date)

# Create a date column using pd.to_datetime
filtered_data['date'] = pd.to_datetime(filtered_data[['year', 'month', 'day']])

# Plotting the data using Plotly
fig = px.bar(
    filtered_data,
    x='date',
    y='distance_km',
    labels={'distance_km': 'Total Distance (km)', 'date': 'Date'},
    title=f'Total Distance Traveled by {bird_id} from {start_date} to {end_date}'
)

# Customize the layout
fig.update_layout(
    xaxis_title='Date',
    yaxis_title='Total Distance (km)',
    showlegend=False,
)

# Show the plot
fig.show()

# Function to calculate total distance traveled by each bird within a date range
def calculate_total_distance_per_bird(data, start_date, end_date):
    # Filter data by date range
    filtered_data = data.loc[
        (data['timestamp'] >= start_date) &
        (data['timestamp'] <= end_date)
    ].copy()
    
    # Extract year and month from timestamp
    filtered_data['year'] = filtered_data['timestamp'].dt.year
    filtered_data['month'] = filtered_data['timestamp'].dt.month
    
    # Group by bird identifier, year, and month, then sum distances
    total_distance_per_bird = filtered_data.groupby(
        ['individual-local-identifier', 'year', 'month']
    )['distance_km'].sum().reset_index()
    
    return total_distance_per_bird

# Get the calculated data for the specific bird
distance_data = calculate_total_distance_per_bird(bird_data, start_date, end_date)
selected_bird_data = distance_data[distance_data['individual-local-identifier'] == bird_id]

# Print the total distance traveled by the selected bird for every month and year
print(f"Total distance traveled by bird '{bird_id}' from {start_date} to {end_date}:")
for _, row in selected_bird_data.iterrows():
    year = row['year']
    month = row['month']
    distance = row['distance_km']
    print(f"In {year}-{month:02d}: {distance:.2f} km")

# Function to calculate total distance traveled by each bird over a time range
def calculate_total_distance_by_bird(data, start_date, end_date):
    # Filter data by the specified date range
    filtered_data = data.loc[
        (data['timestamp'] >= start_date) &
        (data['timestamp'] <= end_date)
    ].copy()

    # Group by bird identifier and sum the distances
    total_distance_by_bird = filtered_data.groupby('individual-local-identifier')['distance_km'].sum().reset_index()
    
    return total_distance_by_bird

# Get the total distance traveled by each bird within the specified date range
total_distance_by_bird = calculate_total_distance_by_bird(bird_data, start_date, end_date)

# Print the total distance traveled by the selected bird only
selected_bird_total_distance = total_distance_by_bird[total_distance_by_bird['individual-local-identifier'] == bird_id]

if not selected_bird_total_distance.empty:
    total_distance = selected_bird_total_distance['distance_km'].values[0]
    print(f"Total distance traveled by bird '{bird_id}' from {start_date} to {end_date} is {total_distance:.2f} km.")
else:
    print(f"No data available for bird '{bird_id}' from {start_date} to {end_date}.")


Total distance traveled by bird '4_godfather' from 2015-03-01 to 2016-07-01:
In 2016-02: 94.91 km
In 2016-03: 416.07 km
In 2016-04: 82.35 km
Total distance traveled by bird '4_godfather' from 2015-03-01 to 2016-07-01 is 593.33 km.


In [12]:
import pandas as pd
import plotly.express as px

# Load the dataset
file_path = './insights/bird_speed_analysis.xlsx'
bird_data = pd.read_excel(file_path, engine='openpyxl')

# Convert timestamp to datetime format
bird_data['timestamp'] = pd.to_datetime(bird_data['start_time'])

# Function to filter data by date range and calculate total distance for a specific bird
def calculate_total_distance_by_interval(data, bird_id, start_date, end_date):
    # Filter data by bird identifier and date range
    filtered_data = data.loc[
        (data['individual-local-identifier'] == bird_id) &
        (data['timestamp'] >= start_date) &
        (data['timestamp'] <= end_date)
    ].copy()
    
    # Extract year, month, and day from the timestamp
    filtered_data['year'] = filtered_data['timestamp'].dt.year
    filtered_data['month'] = filtered_data['timestamp'].dt.month
    filtered_data['day'] = filtered_data['timestamp'].dt.day
    
    # Group by year, month, and day
    total_distance_per_interval = filtered_data.groupby(['year', 'month', 'day'])['distance_km'].sum().reset_index()
    
    return total_distance_per_interval

# Input values
bird_id = '5_rifle'  #1_bill,2_gabbar,3_mogambo,4_godfather,5_rifle,6_tkbhai
start_date = '2015-03-01'  # Replace with the desired start date
end_date = '2016-07-01'  # Replace with the desired end date

# Get the filtered and calculated data
filtered_data = calculate_total_distance_by_interval(bird_data, bird_id, start_date, end_date)

# Create a date column using pd.to_datetime
filtered_data['date'] = pd.to_datetime(filtered_data[['year', 'month', 'day']])

# Plotting the data using Plotly
fig = px.bar(
    filtered_data,
    x='date',
    y='distance_km',
    labels={'distance_km': 'Total Distance (km)', 'date': 'Date'},
    title=f'Total Distance Traveled by {bird_id} from {start_date} to {end_date}'
)

# Customize the layout
fig.update_layout(
    xaxis_title='Date',
    yaxis_title='Total Distance (km)',
    showlegend=False,
)

# Show the plot
fig.show()

# Function to calculate total distance traveled by each bird within a date range
def calculate_total_distance_per_bird(data, start_date, end_date):
    # Filter data by date range
    filtered_data = data.loc[
        (data['timestamp'] >= start_date) &
        (data['timestamp'] <= end_date)
    ].copy()
    
    # Extract year and month from timestamp
    filtered_data['year'] = filtered_data['timestamp'].dt.year
    filtered_data['month'] = filtered_data['timestamp'].dt.month
    
    # Group by bird identifier, year, and month, then sum distances
    total_distance_per_bird = filtered_data.groupby(
        ['individual-local-identifier', 'year', 'month']
    )['distance_km'].sum().reset_index()
    
    return total_distance_per_bird

# Get the calculated data for the specific bird
distance_data = calculate_total_distance_per_bird(bird_data, start_date, end_date)
selected_bird_data = distance_data[distance_data['individual-local-identifier'] == bird_id]

# Print the total distance traveled by the selected bird for every month and year
print(f"Total distance traveled by bird '{bird_id}' from {start_date} to {end_date}:")
for _, row in selected_bird_data.iterrows():
    year = row['year']
    month = row['month']
    distance = row['distance_km']
    print(f"In {year}-{month:02d}: {distance:.2f} km")

# Function to calculate total distance traveled by each bird over a time range
def calculate_total_distance_by_bird(data, start_date, end_date):
    # Filter data by the specified date range
    filtered_data = data.loc[
        (data['timestamp'] >= start_date) &
        (data['timestamp'] <= end_date)
    ].copy()

    # Group by bird identifier and sum the distances
    total_distance_by_bird = filtered_data.groupby('individual-local-identifier')['distance_km'].sum().reset_index()
    
    return total_distance_by_bird

# Get the total distance traveled by each bird within the specified date range
total_distance_by_bird = calculate_total_distance_by_bird(bird_data, start_date, end_date)

# Print the total distance traveled by the selected bird only
selected_bird_total_distance = total_distance_by_bird[total_distance_by_bird['individual-local-identifier'] == bird_id]

if not selected_bird_total_distance.empty:
    total_distance = selected_bird_total_distance['distance_km'].values[0]
    print(f"Total distance traveled by bird '{bird_id}' from {start_date} to {end_date} is {total_distance:.2f} km.")
else:
    print(f"No data available for bird '{bird_id}' from {start_date} to {end_date}.")


Total distance traveled by bird '5_rifle' from 2015-03-01 to 2016-07-01:
In 2016-02: 49.22 km
In 2016-03: 332.60 km
Total distance traveled by bird '5_rifle' from 2015-03-01 to 2016-07-01 is 381.82 km.


In [2]:
import pandas as pd
import plotly.express as px

# Load the dataset
file_path = './insights/bird_speed_analysis.xlsx'
bird_data = pd.read_excel(file_path, engine='openpyxl')

# Convert timestamp to datetime format
bird_data['timestamp'] = pd.to_datetime(bird_data['start_time'])

# Function to filter data by date range and calculate total distance for a specific bird
def calculate_total_distance_by_interval(data, bird_id, start_date, end_date):
    # Filter data by bird identifier and date range
    filtered_data = data.loc[
        (data['individual-local-identifier'] == bird_id) &
        (data['timestamp'] >= start_date) &
        (data['timestamp'] <= end_date)
    ].copy()
    
    # Extract year, month, and day from the timestamp
    filtered_data['year'] = filtered_data['timestamp'].dt.year
    filtered_data['month'] = filtered_data['timestamp'].dt.month
    filtered_data['day'] = filtered_data['timestamp'].dt.day
    
    # Group by year, month, and day
    total_distance_per_interval = filtered_data.groupby(['year', 'month', 'day'])['distance_km'].sum().reset_index()
    
    return total_distance_per_interval

# Input values
bird_id = '6_tkbhai'  # Replace with the desired bird identifier
start_date = '2015-03-01'  # Replace with the desired start date
end_date = '2016-07-01'  # Replace with the desired end date

# Get the filtered and calculated data
filtered_data = calculate_total_distance_by_interval(bird_data, bird_id, start_date, end_date)

# Create a date column using pd.to_datetime
filtered_data['date'] = pd.to_datetime(filtered_data[['year', 'month', 'day']])

# Plotting the data using Plotly
fig = px.bar(
    filtered_data,
    x='date',
    y='distance_km',
    labels={'distance_km': 'Total Distance (km)', 'date': 'Date'},
    title=f'Total Distance Traveled by {bird_id} from {start_date} to {end_date}'
)

# Customize the layout
fig.update_layout(
    xaxis_title='Date',
    yaxis_title='Total Distance (km)',
    showlegend=False,
)

# Show the plot
fig.show()

# Function to calculate total distance traveled by each bird within a date range
def calculate_total_distance_per_bird(data, start_date, end_date):
    # Filter data by date range
    filtered_data = data.loc[
        (data['timestamp'] >= start_date) &
        (data['timestamp'] <= end_date)
    ].copy()
    
    # Extract year and month from timestamp
    filtered_data['year'] = filtered_data['timestamp'].dt.year
    filtered_data['month'] = filtered_data['timestamp'].dt.month
    
    # Group by bird identifier, year, and month, then sum distances
    total_distance_per_bird = filtered_data.groupby(
        ['individual-local-identifier', 'year', 'month']
    )['distance_km'].sum().reset_index()
    
    return total_distance_per_bird

# Get the calculated data for the specific bird
distance_data = calculate_total_distance_per_bird(bird_data, start_date, end_date)
selected_bird_data = distance_data[distance_data['individual-local-identifier'] == bird_id]

# Print the total distance traveled by the selected bird for every month and year
print(f"Total distance traveled by bird '{bird_id}' from {start_date} to {end_date}:")
for _, row in selected_bird_data.iterrows():
    year = row['year']
    month = row['month']
    distance = row['distance_km']
    print(f"In {year}-{month:02d}: {distance:.2f} km")

# Function to calculate total distance traveled by each bird over a time range
def calculate_total_distance_by_bird(data, start_date, end_date):
    # Filter data by the specified date range
    filtered_data = data.loc[
        (data['timestamp'] >= start_date) &
        (data['timestamp'] <= end_date)
    ].copy()

    # Group by bird identifier and sum the distances
    total_distance_by_bird = filtered_data.groupby('individual-local-identifier')['distance_km'].sum().reset_index()
    
    return total_distance_by_bird

# Get the total distance traveled by each bird within the specified date range
total_distance_by_bird = calculate_total_distance_by_bird(bird_data, start_date, end_date)

# Print the total distance traveled by the selected bird only
selected_bird_total_distance = total_distance_by_bird[total_distance_by_bird['individual-local-identifier'] == bird_id]

if not selected_bird_total_distance.empty:
    total_distance = selected_bird_total_distance['distance_km'].values[0]
    print(f"Total distance traveled by bird '{bird_id}' from {start_date} to {end_date} is {total_distance:.2f} km.")
else:
    print(f"No data available for bird '{bird_id}' from {start_date} to {end_date}.")


Total distance traveled by bird '6_tkbhai' from 2015-03-01 to 2016-07-01:
In 2016-03: 301.22 km
In 2016-04: 160.85 km
In 2016-05: 42.27 km
Total distance traveled by bird '6_tkbhai' from 2015-03-01 to 2016-07-01 is 504.34 km.


### COMPARE DISTANCE TRAVELLED BY EACH BIRD FOR A GIVEN TIME PERIOD

In [2]:
import pandas as pd
import plotly.express as px

# Load the dataset
file_path = './insights/bird_speed_analysis.xlsx'
bird_data = pd.read_excel(file_path, engine='openpyxl')

# Convert timestamp to datetime format
bird_data['timestamp'] = pd.to_datetime(bird_data['start_time'])

# Function to filter data by date range and calculate total distance for all birds
def calculate_total_distance_by_interval(data, start_date, end_date):
    # Filter data by date range
    filtered_data = data.loc[
        (data['timestamp'] >= start_date) &
        (data['timestamp'] <= end_date)
    ].copy()
    
    # Extract year, month, and day from the timestamp
    filtered_data['year'] = filtered_data['timestamp'].dt.year
    filtered_data['month'] = filtered_data['timestamp'].dt.month.astype('Int64')
    filtered_data['day'] = filtered_data['timestamp'].dt.day.astype('Int64')
    
    # Group by bird identifier, year, month, and day
    total_distance_per_interval = filtered_data.groupby(
        ['bird_id', 'year', 'month', 'day']
    )['distance_km'].sum().reset_index()
    
    return total_distance_per_interval

# Input values
start_date = '2015-02-01'  # Replace with the desired start date
end_date = '2016-07-01'  # Replace with the desired end date

# Get the filtered and calculated data for all birds
filtered_data = calculate_total_distance_by_interval(bird_data, start_date, end_date)

# Create a date column for plotting
filtered_data['date'] = pd.to_datetime(filtered_data[['year', 'month', 'day']])

# Plotting the data using Plotly
fig = px.line(
    filtered_data,
    x='date',
    y='distance_km',
    color='bird_id',
    labels={'distance_km': 'Total Distance (km)', 'date': 'Date', 'bird_id': 'Bird ID'},
    title=f'Total Distance Traveled by Each Bird from {start_date} to {end_date}'
)

# Customize the layout
fig.update_layout(
    xaxis_title='Date',
    yaxis_title='Total Distance (km)',
    showlegend=True,
    xaxis_tickangle=45  # Rotate x-axis labels for better readability
)

# Show the plot
fig.show()


In [2]:
import pandas as pd
import plotly.express as px

# Load the dataset
file_path = './insights/bird_speed_analysis.xlsx'
bird_data = pd.read_excel(file_path, engine='openpyxl')

# Convert the timestamp column to datetime
bird_data['timestamp'] = pd.to_datetime(bird_data['start_time'])

# Extract the year from the timestamp
bird_data['year'] = bird_data['timestamp'].dt.year

# Calculate the total distance traveled by each bird in 2015
distance_2015 = bird_data[bird_data['year'] == 2015].groupby('individual-local-identifier')['distance_km'].sum().reset_index()
distance_2015.rename(columns={'distance_km': 'distance_2015_km'}, inplace=True)

# Calculate the total distance traveled by each bird in 2016
distance_2016 = bird_data[bird_data['year'] == 2016].groupby('individual-local-identifier')['distance_km'].sum().reset_index()
distance_2016.rename(columns={'distance_km': 'distance_2016_km'}, inplace=True)

# Calculate the total distance traveled by each bird regardless of the year
total_distance = bird_data.groupby('individual-local-identifier')['distance_km'].sum().reset_index()
total_distance.rename(columns={'distance_km': 'total_distance_km'}, inplace=True)

# Merge the dataframes
merged_data = pd.merge(distance_2015, distance_2016, on='individual-local-identifier', how='outer')
merged_data = pd.merge(merged_data, total_distance, on='individual-local-identifier', how='outer')
merged_data = merged_data.fillna(0)  # Fill NaN values with 0 for birds that may not have data for both years

# Melt the dataframe to make it suitable for a grouped bar plot
melted_data = pd.melt(merged_data, id_vars='individual-local-identifier', 
                      value_vars=['distance_2015_km', 'distance_2016_km', 'total_distance_km'], 
                      var_name='Distance Type', value_name='Distance (km)')

# Map distance types to more readable labels
distance_type_mapping = {
    'distance_2015_km': '2015',
    'distance_2016_km': '2016',
    'total_distance_km': 'Total'
}

melted_data['Distance Type'] = melted_data['Distance Type'].map(distance_type_mapping)

# Plotting the grouped bar plot using Plotly
fig = px.bar(
    melted_data,
    x='individual-local-identifier',
    y='Distance (km)',
    color='Distance Type',
    barmode='group',
    title='Total Distance Traveled by Each Bird in 2015, 2016, and Overall',
    labels={'individual-local-identifier': 'Bird Identifier'}
)

# Update layout to adjust bar width and spacing
fig.update_layout(
    xaxis_title='Bird Identifier',
    yaxis_title='Distance (km)',
    yaxis=dict(
        dtick=400  # Set y-axis tick interval to 100
    ),
    bargap=0.2,  # Space between different bars
    bargroupgap=0.1  # Space between bar groups
)

# Adjust bar width (thinner bars)
fig.update_traces(marker=dict(line=dict(width=0.3)))  # Reduce line width for thinner bars

fig.show()

# Calculate the total distance traveled by each bird (reused)
total_distance_per_bird = bird_data.groupby('individual-local-identifier')['distance_km'].sum().reset_index()

# Plotting the percentage of total distance using a pie chart with Plotly
fig_pie = px.pie(
    total_distance_per_bird,
    names='individual-local-identifier',
    values='distance_km',
    title='Percentage of Total Distance Traveled by Each Bird',
    color='individual-local-identifier',  # This adds color to differentiate birds
    color_discrete_sequence=px.colors.qualitative.Pastel  # A set of pastel colors
)

# Adding legend to differentiate the birds
fig_pie.update_traces(textinfo='percent+label', showlegend=True)

fig_pie.show()

### HOUR WISE ACTIVITY OF EACH `TAGGED BIRD`
1. Helps to find the active time of hornbill in a day

In [2]:
import pandas as pd
import plotly.express as px

# Load the dataset
file_path = './insights/bird_speed_analysis.xlsx'
data = pd.read_excel(file_path, engine='openpyxl')
df = pd.DataFrame(data)

# Convert timestamp to datetime and extract hour
df['timestamp'] = pd.to_datetime(df['start_time'])
df['hour'] = df['timestamp'].dt.hour

# Group by bird identifier and hour, then sum the distance
hourly_distance_by_bird = df.groupby(['individual-local-identifier', 'hour'])['distance_km'].sum().reset_index()

# Create line plot with scatter markers
fig = px.line(hourly_distance_by_bird, 
              x='hour', 
              y='distance_km', 
              color='individual-local-identifier', 
              title='Bird Activity by Hour',
              labels={'distance_km': 'Distance Traveled (km)', 'hour': 'Hour of the Day'},
              markers=True)

# Customize layout for better visualization
fig.update_layout(
    xaxis=dict(
        tickmode='linear',
        tick0=0,
        dtick=1
    ),
    yaxis_title='Distance Traveled (km)',
    xaxis_title='Hour of the Day',
    legend_title='Bird Identifier'
)

# Show the plot
fig.show()
