In [21]:
import json
import pandas as pd
from collections import defaultdict
import re, os
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.cm as cm

In [9]:
def read_json(file_path):
   with open(file_path, 'r') as json_file:
    data = json.load(json_file)
    return data 

In [10]:
version = "3"
method = "PO"

root_path = "D:\OneDrive - Delft University of Technology\Thesis\DEBBIE_STARFISH_2223"
ff_file_path = "fformations_each_version/" + method + "_f-formations_psecond_" + version + ".json"
ge_file_path = "fformations_each_version/" + method + "_Group_encoding_" + version + ".json"
gc_file_path = "fformations_each_version/" + method + "_aggregated_frequent_formations_" + version + ".json"


if version == "3":
    directory_path = root_path + "\Version 3_ Kalman_Fixed_Data + Overlap Filter\Synched_Data_GR0_2to2_ANGLE45\COTALK"
    file_pattern = re.compile(r'DAYCOTALK_(\d{6})_COTALK0_22_DEN_072924_V21588968411.CSV') #3
    
elif version == "2":
    directory_path = root_path + "\Version 2_Kalman_Fixed_Data\Synched_Data_GR0_2to2_ANGLE45\COTALK"
    file_pattern = re.compile(r'DAYCOTALK_(\d{6})_COTALK0_22_DEN_072224_V22072780461.CSV') # 2
    
# elif version == "1":
#     ge_file_path = 'fformations_each_version/P_Group_encoding_1.json'
    
else:
    print("FILE NOT FOUND")
    
files = [f for f in os.listdir(directory_path) if file_pattern.match(f)]
files, len(files)

(['DAYCOTALK_013023_COTALK0_22_DEN_072924_V21588968411.CSV',
  'DAYCOTALK_020123_COTALK0_22_DEN_072924_V21588968411.CSV',
  'DAYCOTALK_031323_COTALK0_22_DEN_072924_V21588968411.CSV',
  'DAYCOTALK_031523_COTALK0_22_DEN_072924_V21588968411.CSV',
  'DAYCOTALK_041723_COTALK0_22_DEN_072924_V21588968411.CSV',
  'DAYCOTALK_041923_COTALK0_22_DEN_072924_V21588968411.CSV',
  'DAYCOTALK_061523_COTALK0_22_DEN_072924_V21588968411.CSV',
  'DAYCOTALK_101922_COTALK0_22_DEN_072924_V21588968411.CSV',
  'DAYCOTALK_102122_COTALK0_22_DEN_072924_V21588968411.CSV',
  'DAYCOTALK_111422_COTALK0_22_DEN_072924_V21588968411.CSV',
  'DAYCOTALK_111622_COTALK0_22_DEN_072924_V21588968411.CSV',
  'DAYCOTALK_120522_COTALK0_22_DEN_072924_V21588968411.CSV',
  'DAYCOTALK_120722_COTALK0_22_DEN_072924_V21588968411.CSV'],
 13)

In [11]:
fformations_ps = read_json(ff_file_path)
group_encodings = read_json(ge_file_path)
group_count = read_json(gc_file_path)

In [12]:
def plot_scene(scene, time_key, ax=None, sigma=None, formations=None, save_path=None):
    if ax is None:
        fig, ax = plt.subplots(figsize=(10, 8))
    
    if formations and (time_key in formations.keys()):
        groups = formations[time_key]
        colors = cm.rainbow(np.linspace(0, 1, len(groups)))  # Color map for different groups

    # Extract subjects and their data
    subjects = [col.split('_')[0] for col in scene.columns if '_KC_O' in col]
    
    for subject in subjects:
        # Extract the X, Y, O coordinates for each subject
        x = scene[f'{subject}_KC_X'].values[0]
        y = scene[f'{subject}_KC_Y'].values[0]
        orientation = scene[f'{subject}_KC_O'].values[0]
        
        if not (np.isfinite(x) and np.isfinite(y) and np.isfinite(orientation)):
            print(f"Non-finite values detected for subject {subject}: x={x}, y={y}, orientation={orientation}")
            continue  # Skip this subject if values are non-finite


        group_color = 'grey'  # Default color for subjects not in any group

        if formations and (time_key in formations.keys()):
            for group_idx, group in groups.items():
                if subject in group:
                    group_color = colors[int(group_idx)]
                    break

        ax.scatter(x, y, label=subject, color=group_color)

        # Calculate and plot orientation as an arrow
        orientation_rad = np.radians(orientation)
        dx = np.cos(orientation_rad) 
        dy = np.sin(orientation_rad) 
        ax.arrow(x, y, dx * 0.5, dy * 0.5, head_width=0.2, head_length=0.4, fc=group_color, ec='black')

        # Annotate subject
        ax.text(x, y, subject, fontsize=12, ha='right', color=group_color)

    # Set axis limits, ticks, and aspect ratio
    ax.set_xlim(0, 14.0)
    ax.set_ylim(0, 8.4)
    ax.set_xticks(range(0, 15, 1))
    ax.set_yticks(range(0, 9, 1))
    ax.set_aspect('equal', adjustable='box')
    ax.set_xlabel('KC_X')
    ax.set_ylabel('KC_Y')
    ax.set_title(f'{time_key} Sigma={sigma}')
    ax.legend(loc='lower left')
    ax.grid(True)

    # Save the figure if save_path is provided
    if save_path:
        os.makedirs(os.path.dirname(save_path), exist_ok=True)
        fig.savefig(save_path, bbox_inches='tight')
        plt.close(fig)  # Close the figure after saving to avoid displaying it

    else:  # Display if no save path is provided
        plt.show()


In [13]:
# count their frequency
allday_count = defaultdict(int)
for day, groups in group_count.items():
    for group, count in groups.items():
        allday_count[group] += count
sorted_allday_count = sorted(allday_count.items(), key=lambda x: x[1], reverse=True)

# Print the sorted result
sorted_allday_count

[("('32', '45')", 3169),
 ("('29', '30')", 2886),
 ("('41', '44')", 2355),
 ("('41', '42')", 2355),
 ("('41', '46')", 2248),
 ("('41', '43')", 2248),
 ("('27', '44')", 2209),
 ("('42', '46')", 2199),
 ("('28', '29')", 2185),
 ("('27', '29')", 2132),
 ("('32', '46')", 2092),
 ("('45', '46')", 2076),
 ("('32', '44')", 2061),
 ("('32', '41')", 1997),
 ("('28', '46')", 1892),
 ("('27', '31')", 1881),
 ("('32', '42')", 1865),
 ("('28', '30')", 1852),
 ("('42', '44')", 1840),
 ("('29', '41')", 1811),
 ("('41', '45')", 1760),
 ("('43', '44')", 1738),
 ("('29', '31')", 1737),
 ("('44', '45')", 1694),
 ("('28', '31')", 1669),
 ("('29', '44')", 1545),
 ("('31', '45')", 1541),
 ("('32', '43')", 1540),
 ("('27', '41')", 1533),
 ("('27', '46')", 1526),
 ("('44', '46')", 1526),
 ("('30', '43')", 1510),
 ("('27', '28')", 1458),
 ("('30', '32')", 1453),
 ("('42', '45')", 1381),
 ("('30', '46')", 1368),
 ("('27', '33')", 1349),
 ("('28', '41')", 1347),
 ("('43', '45')", 1346),
 ("('28', '42')", 1291),


In [14]:
# read the coordinates of each day
coord_dfs = []
for day in range(13):
    coord_file = "fformations_each_version/coord/" + method  + "_coordinates_" + str(day) + "_" + version + ".csv"
    coord_df = pd.read_csv(coord_file)
    coord_dfs.append(coord_df)  

In [15]:
# save the images of f-formations for frequent groups
group_trace = defaultdict(list)
# {group composition:(day, time)}
# high_fre_groups = ['["32", "45"]', '["29", "30"]', '["27", "32", "43"]', '["41", "42", "46"]']

high_fre_groups = ['["32", "45"]', '["27", "32", "43"]', '["41", "42", "46"]']

for target in high_fre_groups:
    for day, all_fformations in fformations_ps.items():
        for timestamp, fformations in all_fformations.items():
            sorted_groups = []
            for group_id, group_list in fformations.items():
                sorted_group_list = sorted(group_list, key=lambda x: int(x))
                group_list_str = json.dumps(sorted_group_list)
                sorted_groups.append(group_list_str)
            if target in sorted_groups:
                group_trace[target].append((day,timestamp))
group_trace

defaultdict(list,
            {'["32", "45"]': [('1', '2023-01-30 09:47:10.100000'),
              ('1', '2023-01-30 09:47:11.100000'),
              ('1', '2023-01-30 09:47:12.100000'),
              ('1', '2023-01-30 09:47:13.100000'),
              ('1', '2023-01-30 09:47:14.100000'),
              ('1', '2023-01-30 09:47:15.100000'),
              ('1', '2023-01-30 09:47:20.100000'),
              ('1', '2023-01-30 09:47:24.100000'),
              ('1', '2023-01-30 09:47:25.100000'),
              ('1', '2023-01-30 09:47:36.100000'),
              ('1', '2023-01-30 09:47:37.100000'),
              ('1', '2023-01-30 09:47:38.100000'),
              ('1', '2023-01-30 09:47:39.100000'),
              ('1', '2023-01-30 09:47:42.100000'),
              ('1', '2023-01-30 09:47:43.100000'),
              ('1', '2023-01-30 09:47:44.100000'),
              ('1', '2023-01-30 09:47:50.100000'),
              ('1', '2023-01-30 09:47:54.100000'),
              ('1', '2023-01-30 09:47:58.100000'

In [None]:
output_folder = 'high_fre_fformation_pics'

group_idx = 0
for group, times in group_trace.items():
    for i, time in enumerate(times):
        day = int(time[0]) - 1
        coords = coord_dfs[day]
        coords['TIME'] = pd.to_datetime(coords['TIME'])
        coord_per_scene = coords[coords['TIME'] == pd.Timestamp(time[1])]
        
        # Create the file name for saving
        # print(group)

        # Then proceed with your original code
        save_path = os.path.join(output_folder, f'{group[2]}{group[3]}_{group[8]}{group[9]}_time_{i}.png')
        
        # Call plot_scene with the save_path
        plot_scene(coord_per_scene, formations=fformations_ps[str(day+1)], time_key=time[1], sigma=1.0, save_path=save_path)
        if i == 500: #plot this number of pictures
            break
    group_idx += 1

Non-finite values detected for subject 29: x=nan, y=nan, orientation=nan
Non-finite values detected for subject 29: x=nan, y=nan, orientation=nan
Non-finite values detected for subject 29: x=nan, y=nan, orientation=nan
Non-finite values detected for subject 44: x=nan, y=nan, orientation=nan
Non-finite values detected for subject 44: x=nan, y=nan, orientation=nan
Non-finite values detected for subject 45: x=nan, y=nan, orientation=nan
Non-finite values detected for subject 44: x=nan, y=nan, orientation=nan
Non-finite values detected for subject 45: x=nan, y=nan, orientation=nan
Non-finite values detected for subject 44: x=nan, y=nan, orientation=nan
Non-finite values detected for subject 45: x=nan, y=nan, orientation=nan
Non-finite values detected for subject 44: x=nan, y=nan, orientation=nan
Non-finite values detected for subject 45: x=nan, y=nan, orientation=nan
Non-finite values detected for subject 44: x=nan, y=nan, orientation=nan
Non-finite values detected for subject 45: x=nan, y