In [None]:
import plotly.graph_objects as go

# Convert columns to categorical type
migration_counts['previous_cluster'] = migration_counts['previous_cluster'].astype('category')
migration_counts['cluster'] = migration_counts['cluster'].astype('category')

# Set a constant value for the width of the paths
constant_value = 1  # This sets the width of each path to be the same

# Determine colors for incoming and outgoing links
incoming_color = "rgba(0, 0, 255, 0.5)"  # blue color for incoming links
outgoing_color = "rgba(255, 0, 0, 0.5)"  # red color for outgoing links

# Create a list of colors for each link
link_colors = []
for _, row in migration_counts.iterrows():
    if row['previous_cluster'] == row['cluster']:
        link_colors.append(outgoing_color)
    else:
        link_colors.append(incoming_color)

# Create a Sankey diagram
fig = go.Figure(data=[go.Sankey(
    node=dict(
        pad=150,
        thickness=20,
        line=dict(color="black", width=0.5),  # set the width of the node borders
        label=migration_counts['previous_cluster'].unique().astype(str),
        color="blue"
    ),
    link=dict(
        source=migration_counts['previous_cluster'].cat.codes,
        target=migration_counts['cluster'].cat.codes,
        value=[constant_value] * len(migration_counts),  # use a constant value for all links
        color=link_colors  # set the colors of the links
    )
)])

# Update the layout
fig.update_layout(title_text="Cluster Migration Sankey Diagram", font_size=15)

# Show the plot
fig.show()


In [1]:
import plotly.graph_objects as go
import pandas as pd

# Sample data for demonstration; replace with your actual migration_counts DataFrame
data = {
    'previous_cluster': [0.0, 1.0, 2.0, 2.0, 2.0, 3.0, 3.0],
    'cluster': [3, 2, 0, 1, 3, 0, 2],
    'count': [265, 1, 1, 1, 1, 265, 1]
}
migration_counts = pd.DataFrame(data)

# Convert columns to categorical type
migration_counts['previous_cluster'] = migration_counts['previous_cluster'].astype('category')
migration_counts['cluster'] = migration_counts['cluster'].astype('category')

# Define colors for incoming and outgoing links
incoming_color = "rgba(255, 0, 0, 0.5)"  # Red color for incoming links
outgoing_color = "rgba(0, 0, 255, 0.5)"  # Blue color for outgoing links

# Get unique clusters
clusters = pd.concat([migration_counts['previous_cluster'], migration_counts['cluster']]).unique()

# Create plots for each cluster
for selected_cluster in clusters:
    # Filter the data for the selected cluster
    incoming_data = migration_counts[migration_counts['cluster'] == selected_cluster]
    outgoing_data = migration_counts[migration_counts['previous_cluster'] == selected_cluster]
    
    # Combine incoming and outgoing data
    filtered_data = pd.concat([incoming_data, outgoing_data])
    
    # Set a constant width for all paths
    constant_width = 10  # Adjust this value as needed for your plot
    
    # Create a list of colors for each link based on migration type
    link_colors = []
    for _, row in filtered_data.iterrows():
        if row['cluster'] == selected_cluster:
            link_colors.append(incoming_color)  # Color incoming links red
        else:
            link_colors.append(outgoing_color)  # Color outgoing links blue
    
    # Get unique labels for nodes
    labels = pd.concat([filtered_data['previous_cluster'], filtered_data['cluster']]).unique()
    
    # Create a mapping of cluster labels to indices
    label_to_index = {label: idx for idx, label in enumerate(labels)}
    
    # Map categorical codes to indices
    source_indices = filtered_data['previous_cluster'].map(label_to_index)
    target_indices = filtered_data['cluster'].map(label_to_index)
    
    # Create a Sankey diagram for the selected cluster
    fig = go.Figure(data=[go.Sankey(
        node=dict(
            pad=15,
            thickness=20,
            line=dict(color="black", width=0.5),  # Set the width of the node borders
            label=labels.astype(str),
            color="blue"
        ),
        link=dict(
            source=source_indices,
            target=target_indices,
            value=[constant_value] * len(migration_counts),  # Use the count values for link width
            color=link_colors  # Set the colors of the links
        )
    )])
    
    # Update the layout
    fig.update_layout(
        title_text=f"Cluster {selected_cluster} Migration Sankey Diagram",
        font_size=15
    )
    
    # Show the plot
    fig.show()


NameError: name 'constant_value' is not defined

In [17]:
import pandas as pd
import plotly.express as px

# Read the dataset from the provided path
file_path = './insights/data_with_clusters.xlsx'
data = pd.read_excel(file_path)

# Ensure 'timestamp' is in datetime format
data['timestamp'] = pd.to_datetime(data['timestamp'])

# Extract year and month from the 'timestamp' column
data['year'] = data['timestamp'].dt.year
data['month'] = data['timestamp'].dt.month

# Convert year to string for categorical y-axis
data['year_str'] = data['year'].astype(str)

# Group by 'cluster', 'year', 'month', and 'individual-taxon-canonical-name' to identify species existence
species_existence_monthly = data.groupby(['cluster', 'year_str', 'month', 'individual-taxon-canonical-name']).size().reset_index(name='count')

# Group by 'cluster', 'year', 'month', and 'individual-local-identifier' to track individual birds in each cluster
individual_existence_monthly = data.groupby(['cluster', 'year_str', 'month', 'individual-local-identifier']).size().reset_index(name='count')

# Define distinct shapes and colors
shapes = ['circle', 'square', 'diamond', 'cross', 'x']
colors = px.colors.qualitative.Plotly  # A set of distinct colors

# Plot 1: Species Existence Month-wise per Cluster (Scatter plot with distinct shapes and colors)
fig_species = px.scatter(species_existence_monthly, 
                        x='month', 
                        y='year_str', 
                        symbol='individual-taxon-canonical-name',  # Use shapes to differentiate species
                        color='individual-taxon-canonical-name',  # Use colors to differentiate species
                        facet_col='cluster', 
                        facet_col_wrap=4,  # Adjust the number of columns for facet plots
                        title='Species Existence in Clusters (Month-wise)',
                        labels={'count': 'Number of Records', 'individual-taxon-canonical-name': 'Species'},
                        symbol_sequence=shapes,  # Use a sequence of shapes
                        color_discrete_sequence=colors  # Use a sequence of colors
                        )

fig_species.update_layout(xaxis_title='Month', yaxis_title='Year',
                          xaxis_tickformat='%B',  # To show month names instead of numbers
                          yaxis_type='category',  # Use category type for y-axis
                          xaxis_title_standoff=25)

# Plot 2: Individual Existence in Each Cluster (Month-wise, Scatter plot with distinct shapes and colors)
fig_individuals = px.scatter(individual_existence_monthly, 
                            x='month', 
                            y='year_str', 
                            symbol='individual-local-identifier',  # Use shapes to differentiate individuals
                            color='individual-local-identifier',  # Use colors to differentiate individuals
                            facet_col='cluster', 
                            facet_col_wrap=4,  # Adjust the number of columns for facet plots
                            title='Individual Birds Existence in Clusters (Month-wise)',
                            labels={'count': 'Number of Records', 'individual-local-identifier': 'Individual Bird'},
                            symbol_sequence=shapes,  # Use a sequence of shapes
                            color_discrete_sequence=colors  # Use a sequence of colors
                            )

fig_individuals.update_layout(xaxis_title='Month', yaxis_title='Year',
                              xaxis_tickformat='%B',  # To show month names instead of numbers
                              yaxis_type='category',  # Use category type for y-axis
                              xaxis_title_standoff=25)

# Show the interactive plots
fig_species.show()
fig_individuals.show()
