In [None]:
import pandas as pd
import pm4py

In [None]:
log = pd.read_excel("data/log.xlsx")

**VARIANTS EXPLORER**

In [None]:
variants = pm4py.get_variants(log, activity_key='concept:name', case_id_key='case:concept:name', timestamp_key='time:timestamp')

In [None]:
# Step 3: Extract unique activities
all_events = set()
for variant in variants:
    all_events.update(variant)
all_events

In [None]:
variants_count = sorted(variants.items(), key=lambda x: (x[1], x[0]), reverse=True)
## Printing the top 10 variants by case number
variants_count[:10]

In [None]:
import matplotlib.pyplot as plt
from matplotlib import patches

# Step 4: Assign a distinct color to each activity
distinct_colors = plt.colormaps['tab20']  # Use a colormap for distinct colors
activity_colors = {event: distinct_colors(i) for i, event in enumerate(all_events)}
# Step 5: Get top variants (adjust the number as needed)
top_variants = variants_count[:10]  # No longer limited to top 10
# Step 6: Prepare data for visualization
variant_names = [variant[0] for variant in top_variants]
frequencies = [variant[1] for variant in top_variants]
# Convert tuple of activities to a string
variant_names = [' -> '.join(variant) for variant in variant_names]
# Step 7: Calculate the total frequency and percentage for each variant
total_frequency = sum(frequencies)
percentages = [(freq / total_frequency) * 100 for freq in frequencies]
# Set font size
font_size = 12
char_width = 0.015 * font_size  # Adjusted character width for better box sizing
padding = 0.05  # Padding around the text inside the box
# Step 8: Calculate dynamic figure size
num_variants = len(top_variants)
max_variant_length = max(len(''.join([word[:3] for word in variant.split()])) for variant in variant_names)
fig_width = 0.16 * max_variant_length  # Adjust width based on longest variant
fig_height = 0.5 * num_variants  # Adjust height to accommodate all variants
plt.figure(figsize=(fig_width, fig_height))
y_offset = 0  # Starting position for the y-axis
for i, (variant_events, frequency, percentage) in enumerate(zip(variant_names, frequencies, percentages)):
    events = variant_events
    x_offset = 0.07  # Reset x_offset for each row (each variant)
    # Add frequency value and percentage at the beginning of the row
    plt.text(x_offset, y_offset + 0.15, f"Freq: {frequency} ({percentage:.2f}%)",
             fontsize=font_size, va='center', ha='left', color='black')
    # Update x_offset to move past the frequency text
    x_offset += 1.9  # Adjust the space for frequency text and percentage
    for event in events.split(' -> '):
        # Get the color assigned to this specific event/activity
        event_color = activity_colors[event]
        # Display the first three letters of each word in the event
        short_event_name = ''.join([word[:3] for word in event.split()])  # Take the first 3 letters of each word
        # Calculate the width based on the length of the short event name
        box_width = char_width * len(short_event_name) + 0.05 * padding  # Width based on number of characters plus padding
        box_height = 0.3  # Box height for the row
        # Create a colored rectangle for each event in the variant
        rect = patches.Rectangle((x_offset, y_offset), box_width, box_height, color=event_color, alpha=0.8)
        plt.gca().add_patch(rect)
        # Add text to the center of the rectangle with padding
        plt.text(x_offset + padding + box_width / 2 - padding, y_offset + box_height / 2, short_event_name,
                 fontsize=font_size, va='center', ha='center', color='black')
        # Move the x_offset to the right for the next event in the row
        x_offset += box_width + 0.02  # Adding a small space between boxes
    # Move the y_offset down for the next row (next variant)
    y_offset -= 0.35  # Adjust the vertical spacing
# Step 9: Set limits and remove axes
plt.xlim(0, fig_width)
plt.ylim(y_offset, 0.4)  # Adjust limits for visibility
plt.axis('off')  # Hide axes for cleaner visualization

# Manually adjust the margins
#plt.subplots_adjust(left=0.1, right=1.9, top=0.9, bottom=0.1)  # Adjust margins as needed

# Step 10: Show the plot
plt.title('Trace Explorer with Frequency and Percentage', fontsize=16, loc='left')
plt.tight_layout()
plt.show()