### BIRD-WISE TRAVEL PATTERN WITHIN CLUSTERS

In [None]:
import pandas as pd

# Replace this with your actual data loading mechanism
data = pd.read_excel("./insights/data_with_clusters.xlsx")

# Ensure 'timestamp' is in datetime format
data['timestamp'] = pd.to_datetime(data['timestamp'])

# Initialize a list to store travel paths for each bird
travel_paths = []

# Get unique birds from 'individual-local-identifier'
unique_birds = data['individual-local-identifier'].unique()

# Iterate through each unique bird
for bird_id in unique_birds:
    # Filter data for the current bird
    bird_data = data[data['individual-local-identifier'] == bird_id]

    # Sort data by timestamp
    bird_data = bird_data.sort_values(by='timestamp')
    
    # Shift the cluster column to get previous cluster
    bird_data['previous_cluster'] = bird_data['cluster'].shift(1)

    # Filter transitions where the cluster has changed
    migrations = bird_data[bird_data['cluster'] != bird_data['previous_cluster']].dropna(subset=['previous_cluster'])

    # Create a travel path for the current bird
    if not migrations.empty:
        travel_path = {
            'individual-local-identifier': bird_id,
            'travel_path': ' -> '.join(migrations['previous_cluster'].astype(str) + ' to ' + migrations['cluster'].astype(str))
        }
    else:
        # If no migrations, indicate the bird stayed in the same cluster
        current_cluster = bird_data['cluster'].iloc[0]  # Get the current cluster
        travel_path = {
            'individual-local-identifier': bird_id,
            'travel_path': f'Stayed in cluster {current_cluster}'
        }

    travel_paths.append(travel_path)

# Convert the list of travel paths to a DataFrame
travel_paths_df = pd.DataFrame(travel_paths)

# Print the travel paths for all birds
print(travel_paths_df)


  individual-local-identifier                       travel_path
0                      1_bill               Stayed in cluster 2
1                    2_gabbar              2.0 to 3 -> 3.0 to 2
2                   3_mogambo  2.0 to 1 -> 1.0 to 2 -> 2.0 to 0
3                 4_godfather               Stayed in cluster 0
4                     5_rifle               Stayed in cluster 0
5                    6_tkbhai              0.0 to 3 -> 3.0 to 0


## TO GET MIGRATION DATA BETWEEN CLUSTERS WITH TIMESPENT

In [1]:
import pandas as pd

# Load the dataset
data = pd.read_excel("./insights/data_with_clusters.xlsx")

# Create a DataFrame
df = pd.DataFrame(data)

# Convert timestamp to datetime
df['timestamp'] = pd.to_datetime(df['timestamp'])

# Initialize a list to hold results for all birds
all_birds_cluster_times = []

# Loop over each unique bird identifier
for individual_identifier in df['individual-local-identifier'].unique():
    # Filter data for the current bird
    bird_df = df[df['individual-local-identifier'] == individual_identifier].copy()
    
    # Sort the data by timestamp
    bird_df = bird_df.sort_values(by='timestamp').reset_index(drop=True)

    # Identify cluster changes
    bird_df['previous_cluster'] = bird_df['cluster'].shift(1)
    bird_df['cluster_change'] = bird_df['cluster'] != bird_df['previous_cluster']

    # Calculate time spent in each cluster
    bird_df['time_in_cluster'] = bird_df['timestamp'].shift(-1) - bird_df['timestamp']

    # Find indices of migrations
    migration_indices = bird_df[bird_df['cluster_change']].index

    # Calculate time spent in each cluster before each migration
    for i in range(len(migration_indices)):
        start_idx = migration_indices[i]
        end_idx = migration_indices[i + 1] if i + 1 < len(migration_indices) else len(bird_df)
        cluster_time = bird_df.iloc[start_idx:end_idx]['time_in_cluster'].sum()
        cluster = bird_df.iloc[start_idx]['cluster']
        start_date = bird_df.iloc[start_idx]['timestamp']
        end_date = bird_df.iloc[end_idx - 1]['timestamp']

        # Append result with bird identifier
        all_birds_cluster_times.append({
            'individual-local-identifier': individual_identifier,
            'migration_index': i + 1,
            'cluster': cluster,
            'time_spent': cluster_time,
            'start_date': start_date,
            'end_date': end_date
        })

# Convert results to a DataFrame
all_birds_cluster_times_df = pd.DataFrame(all_birds_cluster_times)

# Display time spent in each cluster before migration for all birds
print("\nTime spent in each cluster before migration for all birds:")
# print(all_birds_cluster_times_df)

# Save to CSV
all_birds_cluster_times_df.to_csv("./insights/migration.csv", index=False)



Time spent in each cluster before migration for all birds:


### TO SIMPLIFY THE PATH OF MIGRATION IF IT'S COMPLEX


In [3]:
# Generate the path of travel based on cluster changes
path_clusters = df.loc[df['cluster_change'], 'cluster'].tolist()
# Include the first cluster if it's not included in path_clusters
if df.iloc[0]['cluster'] not in path_clusters:
    path_clusters.insert(0, df.iloc[0]['cluster'])

# Add the last cluster (if needed) to the path
if df.iloc[-1]['cluster'] != path_clusters[-1]:
    path_clusters.append(df.iloc[-1]['cluster'])


# Function to find and replace repeating patterns with a formula
def minimize_path(path):
    n = len(path)
    patterns = []
    # Search for repeating patterns
    for length in range(1, n // 2 + 1):
        for start in range(n - 2 * length + 1):
            pattern = path[start:start + length]
            count = 0
            while path[start:start + length] == pattern:
                count += 1
                start += length
                if start + length > n:
                    break
            if count > 1:
                patterns.append((pattern, count))
    
    # Build the minimized path with patterns
    minimized_path = []
    i = 0
    while i < len(path):
        found = False
        for pattern, count in patterns:
            pattern_length = len(pattern)
            if path[i:i + pattern_length] == pattern:
                minimized_path.append(f"({'->'.join(map(str, pattern))})*{count}")
                i += pattern_length * count
                found = True
                break
        if not found:
            minimized_path.append(str(path[i]))
            i += 1
    
    return minimized_path

# Minimize the path and display the result
minimized_path = minimize_path(path_clusters)
print(f"\n Path of Travel: {' -> '.join(minimized_path)}")
print(minimized_path)



 Path of Travel: 2 -> 3 -> 2
['2', '3', '2']


## USE OF PLOT TO VISUALIZE MOVEMENT OF BIRDS


In [2]:
import pandas as pd
import plotly.express as px

# Load data from CSV file
migration_df = pd.read_csv('./insights/migration.csv')

# Loop through each unique bird identifier
for bird_id in migration_df['individual-local-identifier'].unique():
    # Filter the data for the current bird
    bird_df = migration_df[migration_df['individual-local-identifier'] == bird_id]
    
    # Create a line plot for the current bird
    fig = px.line(
        bird_df,
        x='migration_index',
        y='cluster',
        markers=True,
        title=f'Migration Path for {bird_id} with Time Stayed in Each Cluster',
        hover_data={
            'time_spent': True,  # Show time spent
            'start_date': True,  # Show start date
            'end_date': True     # Show end date
        }
    )

    # Add annotations for time spent with start and end dates
    for i, row in bird_df.iterrows():
        fig.add_annotation(
            x=row['migration_index'],
            y=row['cluster'],
            text=f"{row['time_spent']}<br>({row['start_date']} to {row['end_date']})",  # Include time spent and date range
            showarrow=True,
            arrowhead=2,
            ax=0,
            ay=-40,
            font=dict(color='black')
        )

    # Update layout for the plot
    fig.update_layout(
        xaxis_title='Migration Index',
        yaxis_title='Cluster Number',
        xaxis=dict(
            tickvals=bird_df['migration_index'],  # Ensure only migration index values are shown on x-axis
            dtick=1  # Set x-axis ticks to a unit of 1
        ),
        yaxis=dict(tickvals=bird_df['cluster']),  # Ensure only cluster numbers are shown on y-axis
        showlegend=False
    )

    # Show the plot for the current bird
    fig.show()


### TOTAL TIME SPENT IN EACH CLUSTERS IRRESPECTIVE OF MIGRATION

In [3]:
import pandas as pd

# Load the dataset
data = pd.read_excel("./insights/data_with_clusters.xlsx")

# Create a DataFrame
df = pd.DataFrame(data)

# Convert timestamp to datetime
df['timestamp'] = pd.to_datetime(df['timestamp'])

# Initialize an empty list to store results for each bird
all_birds_total_time_spent = []

# Loop through each unique bird identifier
for individual_identifier in df['individual-local-identifier'].unique():
    # Filter data for the current bird
    bird_df = df[df['individual-local-identifier'] == individual_identifier].copy()
    
    # Sort the data by timestamp
    bird_df = bird_df.sort_values(by='timestamp').reset_index(drop=True)
    
    # Identify cluster changes
    bird_df['previous_cluster'] = bird_df['cluster'].shift(1)
    bird_df['cluster_change'] = bird_df['cluster'] != bird_df['previous_cluster']
    
    # Calculate time spent in each cluster
    bird_df['time_in_cluster'] = bird_df['timestamp'].shift(-1) - bird_df['timestamp']
    
    # Calculate total time spent in each cluster (irrespective of migrations)
    total_time_spent = bird_df.groupby('cluster')['time_in_cluster'].sum().reset_index()
    total_time_spent.columns = ['cluster', 'total_time_spent']
    
    # Add an identifier for the bird
    total_time_spent['individual-local-identifier'] = individual_identifier
    
    # Append the result to the list
    all_birds_total_time_spent.append(total_time_spent)

# Concatenate all results into a single DataFrame
all_birds_total_time_spent_df = pd.concat(all_birds_total_time_spent, ignore_index=True)

# Display the total time spent in each cluster for all birds
print("\nTotal time spent in each cluster for all birds:")
print(all_birds_total_time_spent_df)



Total time spent in each cluster for all birds:
   cluster total_time_spent individual-local-identifier
0        2 77 days 22:54:00                      1_bill
1        2 80 days 07:46:00                    2_gabbar
2        3  0 days 00:30:00                    2_gabbar
3        0 14 days 11:01:00                   3_mogambo
4        1 22 days 04:15:00                   3_mogambo
5        2 16 days 18:46:00                   3_mogambo
6        0 50 days 23:37:00                 4_godfather
7        0 19 days 00:44:00                     5_rifle
8        0 31 days 04:18:00                    6_tkbhai
9        3 34 days 21:15:00                    6_tkbhai
