In [111]:
# read the data
import pandas as pd
df = pd.read_csv('df_sa_time.csv')

In [112]:
#(prepare)Phase Distance Calculation
import pandas as pd
import numpy as np

# Initialize variables to store the previous position
def calculate_distances(group):
    prev_posX = None
    prev_posZ = None

    for index, row in group.iterrows():
        if row['phase'] in ['planning', 'navigation']:
            if prev_posX is not None and prev_posZ is not None:
                # Calculate Euclidean distance between current and previous position
                distance = np.sqrt((row['posX'] - prev_posX)**2 + (row['posZ'] - prev_posZ)**2)
                if row['phase'] == 'planning':
                    group.at[index, 'planning_distance'] = distance
                else:
                    group.at[index, 'navigation_distance'] = distance
            # Update previous position
            prev_posX = row['posX']
            prev_posZ = row['posZ']
        else:
            # Reset previous position when phase is not planning or navigation
            prev_posX = None
            prev_posZ = None

    return group

# Apply the function to each group
df = df.groupby('trajectory_Id').apply(calculate_distances)

print(df)


To preserve the previous behavior, use

	>>> .groupby(..., group_keys=False)


	>>> .groupby(..., group_keys=True)
  df = df.groupby('trajectory_Id').apply(calculate_distances)


        Unnamed: 0.2  Unnamed: 0.1  Unnamed: 0                 DateTime  \
0                  0             0           0  2023-09-21 16:44:31.485   
1                  1             1           1  2023-09-21 16:44:31.486   
2                  2             2           2  2023-09-21 16:44:31.507   
3                  3             3           3  2023-09-21 16:44:31.535   
4                  4             4           4  2023-09-21 16:44:31.561   
...              ...           ...         ...                      ...   
159732        159732        159732      159732  2023-10-31 17:18:32.424   
159733        159733        159733      159733  2023-10-31 17:18:32.811   
159734        159734        159734      159734  2023-10-31 17:18:33.355   
159735        159735        159735      159735  2023-10-31 17:18:33.907   
159736        159736        159736      159736  2023-10-31 17:18:33.907   

             posX       posZ                 taskState  \
0      -1260.3060  -908.9736  setNavigati

In [113]:
# (prepare) Function to assign event numbers to consecutive blocks
def assign_event_numbers(df, column_name, event_name, target_value):
    event_counter = 0
    prev_value = None
    event_list = []

    for value in df[column_name]:
        if value == target_value:
            if prev_value != target_value:
                event_counter += 1
            event_list.append(event_counter)
        else:
            event_list.append(0)
        prev_value = value
    
    df[event_name] = event_list
    return df

# Assign inactive_event for consecutive rows where lockscreen is 1
df = assign_event_numbers(df, 'lockscreen', 'inactive_event', 1)

# Assign active_event for consecutive rows where lockscreen is 0
df = assign_event_numbers(df, 'lockscreen', 'active_event', 0)

print(df)

        Unnamed: 0.2  Unnamed: 0.1  Unnamed: 0                 DateTime  \
0                  0             0           0  2023-09-21 16:44:31.485   
1                  1             1           1  2023-09-21 16:44:31.486   
2                  2             2           2  2023-09-21 16:44:31.507   
3                  3             3           3  2023-09-21 16:44:31.535   
4                  4             4           4  2023-09-21 16:44:31.561   
...              ...           ...         ...                      ...   
159732        159732        159732      159732  2023-10-31 17:18:32.424   
159733        159733        159733      159733  2023-10-31 17:18:32.811   
159734        159734        159734      159734  2023-10-31 17:18:33.355   
159735        159735        159735      159735  2023-10-31 17:18:33.907   
159736        159736        159736      159736  2023-10-31 17:18:33.907   

             posX       posZ                 taskState  \
0      -1260.3060  -908.9736  setNavigati

In [30]:
# (result) feature of Route completion rate

# # Step 1: Calculate the total navigation distance for each trajectory
trajectory_distance = df.groupby('trajectory_Id')['navigation_distance'].sum().reset_index()
trajectory_distance.columns = ['trajectory_Id', 'trajectory_distance']

# Step 2: Calculate the event distance for each inactive_event
# Function to calculate the required distance
def calculate_distance(sub_df):
    # Find the rows where mapInteractions is 'mapLog:isScreenLocked+True'
    event_1_rows = sub_df[sub_df['mapInteractions'] == 'mapLog:isScreenLocked+True']
    
    # Initialize a list to store distances
    distances = []
    
    # Iterate over event_1_rows and calculate distance
    for _, event_1_row in event_1_rows.iterrows():
        event_1_index = event_1_row.name
        first_row_index = sub_df.index[0]
        
        # Sum the navigation_distance from the first row to the current event_1_row
        distance = sub_df.loc[first_row_index:event_1_index, 'navigation_distance'].sum()
        
        distances.append({
            'trajectory_Id': event_1_row['trajectory_Id'],
            'event_1_index': event_1_index,
            'distance': distance
        })
    
    return pd.DataFrame(distances)

# Group by trajectory_Id and apply the function
result_df = df.groupby('trajectory_Id').apply(calculate_distance).reset_index(drop=True)
result_df = result_df.merge(trajectory_distance, how= 'left', on= 'trajectory_Id')
result_df['completion_rate'] = (result_df['distance'] / result_df ['trajectory_distance']).round(2)

print(result_df)


      trajectory_Id  event_1_index    distance  trajectory_distance  \
0               1.0           25.0   14.430352           144.604313   
1               1.0           58.0   42.912766           144.604313   
2               2.0          169.0   20.557475           309.866157   
3               2.0          226.0   74.331894           309.866157   
4               2.0          352.0  225.905361           309.866157   
...             ...            ...         ...                  ...   
3118          863.0       159607.0    4.887072           163.027811   
3119          863.0       159630.0   30.624935           163.027811   
3120          863.0       159655.0   64.018226           163.027811   
3121          863.0       159679.0   90.653448           163.027811   
3122          863.0       159705.0  124.019408           163.027811   

      completion_rate  
0                0.10  
1                0.30  
2                0.07  
3                0.24  
4                0.73  
...

In [114]:
# renew
# Step 1: Calculate the total navigation distance for each trajectory
trajectory_distance = df.groupby('trajectory_Id')['navigation_distance'].sum().reset_index()
trajectory_distance.columns = ['trajectory_Id', 'trajectory_distance']

# Step 2: Calculate the event distance for each inactive_event
def calculate_distance(sub_df):
    # Find the rows where mapInteractions is 'mapLog:isScreenLocked+True'
    event_1_rows = sub_df[sub_df['mapInteractions'] == 'mapLog:isScreenLocked+True']
    
    # Initialize a list to store distances
    distances = []
    
    # Iterate over event_1_rows and calculate distance
    for _, event_1_row in event_1_rows.iterrows():
        event_1_index = event_1_row.name
        first_row_index = sub_df.index[0]
        
        # Sum the navigation_distance from the first row to the current event_1_row
        distance = sub_df.loc[first_row_index:event_1_index, 'navigation_distance'].sum()
        
        distances.append({
            'trajectory_Id': event_1_row['trajectory_Id'],
            'event_1_index': event_1_index,
            'distance': distance,
            'inactive_event': event_1_row['inactive_event']
        })
    
    return pd.DataFrame(distances)

# Group by trajectory_Id and apply the function
result_df = df.groupby('trajectory_Id').apply(calculate_distance).reset_index(drop=True)

# Merge with trajectory_distance to get the total navigation distance for each trajectory
result_df = result_df.merge(trajectory_distance, how='left', on='trajectory_Id')

# Calculate the completion rate
result_df['completion_rate'] = (result_df['distance'] / result_df['trajectory_distance']).round(2)

# Display the result
print(result_df)

      trajectory_Id  event_1_index    distance  inactive_event  \
0               1.0           25.0   14.430352             1.0   
1               1.0           58.0   42.912766             2.0   
2               2.0          169.0   20.557475             3.0   
3               2.0          226.0   74.331894             4.0   
4               2.0          352.0  225.905361             5.0   
...             ...            ...         ...             ...   
3118          863.0       159607.0    4.887072          3119.0   
3119          863.0       159630.0   30.624935          3120.0   
3120          863.0       159655.0   64.018226          3121.0   
3121          863.0       159679.0   90.653448          3122.0   
3122          863.0       159705.0  124.019408          3123.0   

      trajectory_distance  completion_rate  
0              144.604313             0.10  
1              144.604313             0.30  
2              309.866157             0.07  
3              309.866157  

In [74]:
# (result) feature of previous active time.

import pandas as pd

df = pd.read_csv('df_sa_time.csv')

# Convert DateTime to datetime object
df['DateTime'] = pd.to_datetime(df['DateTime'])

# Initialize event number and previous active time
time_previous = []

event_number = 0

event_start = False

# Group by 'trajectory_Id' and iterate over each group
for trajectory_id, group in df.groupby('trajectory_Id'):
    # Initialize event number and previous active time for each group
    # event_number = 0
    previous_active_time = 0
    # event_start = False

    for i, row in group.iterrows():
        if row['lockscreen'] == 0:
            if not event_start:
                event_number += 1
                event_start = True
                previous_active_time = 0
            previous_active_time += row['active_time']
        else:
            time_previous.append({
                'trajectory_Id': trajectory_id,
                'event': event_number,
                'time_previous': previous_active_time
            })
            event_start = False

time_previous_df = pd.DataFrame(time_previous)
time_previous_df = time_previous_df.drop_duplicates(subset=['event'])

# Reset index
time_previous_df = time_previous_df.reset_index(drop=True)
# Display the results
print(time_previous_df.head(20))

    trajectory_Id  event  time_previous
0               1      1          9.889
1               1      2          9.850
2               2      3         10.155
3               2      4         10.262
4               2      5          9.969
5               3      6         13.382
6               3      7         10.024
7               3      8         13.707
8               3      9          9.530
9               3     10          9.671
10              3     11         10.086
11              4     12         10.040
12              4     13          9.678
13              4     14          9.934
14              4     15         23.105
15              4     16          9.489
16              4     17          9.864
17              5     18          9.730
18              5     19         10.121
19              5     20          9.782


In [58]:
# (prepare) distinguish inactive phase and active phase
df = pd.read_csv('df_sa_time.csv')

# (prepare) Function to assign event numbers to consecutive blocks
def assign_event_numbers(df, column_name, event_name, target_value):
    event_counter = 0
    prev_value = None
    event_list = []

    for value in df[column_name]:
        if value == target_value:
            if prev_value != target_value:
                event_counter += 1
            event_list.append(event_counter)
        else:
            event_list.append(0)
        prev_value = value
    
    df[event_name] = event_list
    return df

# Assign inactive_event for consecutive rows where lockscreen is 1
df = assign_event_numbers(df, 'lockscreen', 'inactive_event', 1)

# Assign active_event for consecutive rows where lockscreen is 0
df = assign_event_numbers(df, 'lockscreen', 'active_event', 0)


df_inactive = df[df['inactive_event'] != 0]
df_active = df[df['inactive_event'] == 0]



df_inactive.to_csv('df_inactive.csv')
df_active.to_csv('df_active.csv')

In [76]:
# (prepare) distinguish inactive phase and active phase

# use to ignore the last active phase
df = pd.read_csv('df_sa_time.csv')
def filter_last_consecutive_zero(group):
    # Identify changes in 'lockscreen'
    group['lockscreen_change'] = group['lockscreen'].ne(group['lockscreen'].shift())
    
    # Mark the end of consecutive segments
    group['segment_end'] = group['lockscreen_change'].cumsum()
    
    # Get the last value of each segment
    last_values = group.groupby('segment_end')['lockscreen'].last()
    
    # Check if the last value is '0'
    last_segment = last_values.iloc[-1] if not last_values.empty else None
    
    # Filter out rows if the last segment value is '0'
    if last_segment == 0:
        return group[group['segment_end'] != group['segment_end'].max()]
    else:
        return group

# Apply the function to each group
filtered_df = df.groupby('trajectory_Id', group_keys=False).apply(filter_last_consecutive_zero)

# Drop the helper columns
filtered_df = filtered_df.drop(columns=['lockscreen_change', 'segment_end'])

print(filtered_df)

#___________________________________________________________
# (prepare) Function to assign event numbers to consecutive blocks
def assign_event_numbers(df, column_name, event_name, target_value):
    event_counter = 0
    prev_value = None
    event_list = []

    for value in df[column_name]:
        if value == target_value:
            if prev_value != target_value:
                event_counter += 1
            event_list.append(event_counter)
        else:
            event_list.append(0)
        prev_value = value
    
    df[event_name] = event_list
    return df

# Assign inactive_event for consecutive rows where lockscreen is 1
filtered_df = assign_event_numbers(filtered_df, 'lockscreen', 'inactive_event', 1)

# Assign active_event for consecutive rows where lockscreen is 0
filtered_df = assign_event_numbers(filtered_df, 'lockscreen', 'active_event', 0)


df_inactive = filtered_df[filtered_df['inactive_event'] != 0]
df_active = filtered_df[filtered_df['inactive_event'] == 0]



df_inactive.to_csv('df_inactive.csv')
df_active.to_csv('df_active.csv')


        Unnamed: 0.2  Unnamed: 0.1  Unnamed: 0                 DateTime  \
0                  0             0           0  2023-09-21 16:44:31.485   
1                  1             1           1  2023-09-21 16:44:31.486   
2                  2             2           2  2023-09-21 16:44:31.507   
3                  3             3           3  2023-09-21 16:44:31.535   
4                  4             4           4  2023-09-21 16:44:31.561   
...              ...           ...         ...                      ...   
159727        159727        159727      159727  2023-10-31 17:18:30.136   
159728        159728        159728      159728  2023-10-31 17:18:30.675   
159729        159729        159729      159729  2023-10-31 17:18:31.209   
159730        159730        159730      159730  2023-10-31 17:18:31.738   
159731        159731        159731      159731  2023-10-31 17:18:32.269   

             posX       posZ                 taskState  \
0      -1260.3060  -908.9736  setNavigati

In [33]:
# (prepare) feature of whether cross the road current phase _ create road network
from shapely.geometry import Point, LineString

# Define road network using nodes and edges (graph representation)
nodes = {
    'v11': Point(-1250, -632),    # E1
    'v12': Point(-1250, -932),
    'v13': Point(-1250, -1232),

    'v21': Point(-1150, -532),  # E1
    'v22': Point(-1150, -632),
    'v23': Point(-1150, -782),
    'v24': Point(-1150, -932),
    'v25': Point(-1150, -1032),
    'v26': Point(-1150, -1132),
    'v27': Point(-1150, -1232),

    'v31': Point(-1050, -532),
    'v32': Point(-1050, -782),
    'v33': Point(-1050, -1032),
    'v34': Point(-1050, -1132),

    'v41': Point(-950, -532),
    'v42': Point(-950, -632),
    'v43': Point(-950, -782),
    'v44': Point(-950, -932),
    'v45': Point(-950, -1032),
    'v46': Point(-950, -1132),
    'v47': Point(-950, -1232),

    'v51': Point(-850, -532),
    'v52': Point(-850, -632),
    'v53': Point(-850, -732),
    'v54': Point(-850, -832),
    'v55': Point(-850, -932),
    'v56': Point(-850, -1032),
    'v57': Point(-850, -1132),
    'v58': Point(-850, -1232),

    'v61': Point(-750, -532),
    'v62': Point(-750, -732),
    'v63': Point(-750, -832),
    'v64': Point(-750, -1032),
    'v65': Point(-750, -1132),

    'v71': Point(-650, -1132),
    'v72': Point(-650, -1232),

    'v81': Point(-600, -532), #symmetric 
    'v82': Point(-600, -592),
    'v83': Point(-600, -672),
    'v84': Point(-600, -732),
    'v85': Point(-600, -832),
    'v86': Point(-600, -932),
    'v87': Point(-600, -1032),

    'v-71': Point(-550, -1132),
    'v-72': Point(-550, -1232),

    'v-61': Point(-450, -532),
    'v-62': Point(-450, -732),
    'v-63': Point(-450, -832),
    'v-64': Point(-450, -1032),
    'v-65': Point(-450, -1132),

    'v-51': Point(-350, -532),
    'v-52': Point(-350, -632),
    'v-53': Point(-350, -732),
    'v-54': Point(-350, -832),
    'v-55': Point(-350, -932),
    'v-56': Point(-350, -1032),
    'v-57': Point(-308, -1132),
    'v-58': Point(-308, -1232),

    'v-41': Point(-250, -532),
    'v-42': Point(-250, -632),
    'v-43': Point(-250, -732),
    'v-44': Point(-250, -832),
    'v-45': Point(-250, -932),
    'v-46': Point(-250, -1032),
    'v-47': Point(-250, -1132),
    'v-48': Point(-250, -1232),
    
    'v-31': Point(-150, -532),
    'v-32': Point(-150, -732),
    'v-33': Point(-150, -832),
    'v-34': Point(-150, -1032),
    'v-35': Point(-150, -1132),


    'v-21': Point(-50, -532),  # E1
    'v-22': Point(-50, -632),
    'v-23': Point(-50, -732),
    'v-24': Point(-50, -832),
    'v-25': Point(-50, -932),
    'v-26': Point(-50, -1032),
    'v-27': Point(-50, -1132),
    'v-28': Point(-50, -1232),

    'v-11': Point(50, -632),    # E1
    'v-12': Point(50, -932),
    'v-13': Point(50, -1232),

    # circle
    'c11': Point(-642, -632),    
    'c12': Point(-558, -632),


}

#Define road segments as LineString objects between nodes
road_segments = {'v11' :LineString([nodes['v11'], nodes['v12']]),
                 'v12' :LineString([nodes['v12'], nodes['v13']]),
                 
                 
                 'v21' :LineString([nodes['v21'], nodes['v22']]),
                 'v22' :LineString([nodes['v22'], nodes['v23']]),
                 'v23' :LineString([nodes['v23'], nodes['v24']]),
                 'v24' :LineString([nodes['v24'], nodes['v25']]),
                 'v25' :LineString([nodes['v26'], nodes['v27']]),

                 'v31' :LineString([nodes['v31'], nodes['v32']]),
                 'v32' :LineString([nodes['v32'], nodes['v33']]),
                 'v33' :LineString([nodes['v33'], nodes['v34']]),

                 'v41' :LineString([nodes['v41'], nodes['v42']]),
                 'v42' :LineString([nodes['v42'], nodes['v43']]),
                 'v43' :LineString([nodes['v43'], nodes['v44']]),
                 'v44' :LineString([nodes['v44'], nodes['v45']]),
                 'v45' :LineString([nodes['v46'], nodes['v47']]),

                 'v51' :LineString([nodes['v51'], nodes['v52']]),
                 'v52' :LineString([nodes['v52'], nodes['v53']]),
                 'v53' :LineString([nodes['v54'], nodes['v55']]),
                 'v54' :LineString([nodes['v55'], nodes['v56']]),
                 'v55' :LineString([nodes['v57'], nodes['v58']]),

                 'v61' :LineString([nodes['v62'], nodes['v63']]),
                 'v62' :LineString([nodes['v64'], nodes['v65']]),

                 'v71' :LineString([nodes['v71'], nodes['v72']]),

                 'v81' :LineString([nodes['v81'], nodes['v82']]),
                 'v82' :LineString([nodes['v83'], nodes['v84']]),
                 'v83' :LineString([nodes['v85'], nodes['v86']]),
                 'v84' :LineString([nodes['v86'], nodes['v87']]),

                 'v-11' :LineString([nodes['v-11'], nodes['v-12']]),
                 'v-12' :LineString([nodes['v-12'], nodes['v-13']]),

                 'v-21' :LineString([nodes['v-21'], nodes['v-22']]),
                 'v-22' :LineString([nodes['v-22'], nodes['v-23']]),
                 'v-23' :LineString([nodes['v-23'], nodes['v-24']]),
                 'v-24' :LineString([nodes['v-24'], nodes['v-25']]),
                 'v-25' :LineString([nodes['v-25'], nodes['v-26']]),
                 'v-26' :LineString([nodes['v-27'], nodes['v-28']]),

                 'v-31' :LineString([nodes['v-31'], nodes['v-32']]),
                 'v-32' :LineString([nodes['v-32'], nodes['v-33']]),
                 'v-33' :LineString([nodes['v-33'], nodes['v-34']]),
                 'v-34' :LineString([nodes['v-34'], nodes['v-35']]),

                 'v-41' :LineString([nodes['v-41'], nodes['v-42']]),
                 'v-42' :LineString([nodes['v-42'], nodes['v-43']]),
                 'v-43' :LineString([nodes['v-43'], nodes['v-44']]),
                 'v-44' :LineString([nodes['v-44'], nodes['v-45']]),
                 'v-45' :LineString([nodes['v-45'], nodes['v-46']]),
                 'v-46' :LineString([nodes['v-47'], nodes['v-48']]),


                 'v-51' :LineString([nodes['v-51'], nodes['v-52']]),
                 'v-52' :LineString([nodes['v-52'], nodes['v-53']]),
                 'v-53' :LineString([nodes['v-54'], nodes['v-55']]),
                 'v-54' :LineString([nodes['v-55'], nodes['v-56']]),
                 'v-55' :LineString([nodes['v-57'], nodes['v-58']]),

                 'v-61' :LineString([nodes['v-62'], nodes['v-63']]),
                 'v-62' :LineString([nodes['v-64'], nodes['v-65']]),

                 'v-71' :LineString([nodes['v-71'], nodes['v-72']]),


                #  'v-11' :LineString([nodes['v-11'], nodes['v-13']]),
                #  'v-21' :LineString([nodes['v-21'], nodes['v-25']]),
                #  'v-22' :LineString([nodes['v-26'], nodes['v-27']]),
                #  'v-31' :LineString([nodes['v-31'], nodes['v-34']]),
                #  'v-41' :LineString([nodes['v-41'], nodes['v-45']]),
                #  'v-42' :LineString([nodes['v-46'], nodes['v-47']]),
                #  'v-51' :LineString([nodes['v-51'], nodes['v-53']]),
                #  'v-52' :LineString([nodes['v-54'], nodes['v-56']]),
                #  'v-53' :LineString([nodes['v-57'], nodes['v-58']]),
                #  'v-61' :LineString([nodes['v-62'], nodes['v-63']]),
                #  'v-62' :LineString([nodes['v-64'], nodes['v-65']]),
                #  'v-71' :LineString([nodes['v-71'], nodes['v-72']]),

                 #horizontal
                 'h11' : LineString([nodes['v21'], nodes['v31']]),
                 'h12' : LineString([nodes['v31'], nodes['v41']]),
                 'h13' : LineString([nodes['v51'], nodes['v81']]),
                 'h14' : LineString([nodes['v81'], nodes['v-51']]),
                 'h15' : LineString([nodes['v-41'], nodes['v-31']]),
                 'h16' : LineString([nodes['v-31'], nodes['v-21']]),

                 'h21' : LineString([nodes['v11'], nodes['v22']]),
                 'h22' : LineString([nodes['v42'], nodes['v52']]),
                 'h23' : LineString([nodes['v52'], nodes['c11']]),
                 'h24' : LineString([nodes['c12'], nodes['v-52']]),
                 'h25' : LineString([nodes['v-22'], nodes['v-11']]),    

                 'h31' : LineString([nodes['v53'], nodes['v62']]),
                 'h32' : LineString([nodes['v62'], nodes['v84']]),
                 'h33' : LineString([nodes['v84'], nodes['v-62']]),
                 'h34' : LineString([nodes['v-62'], nodes['v-53']]),
                 'h35' : LineString([nodes['v-43'], nodes['v-32']]),
                 'h36' : LineString([nodes['v-32'], nodes['v-23']]),

                 'h41' : LineString([nodes['v23'], nodes['v32']]),
                 'h42' : LineString([nodes['v32'], nodes['v43']]),

                 'h51' : LineString([nodes['v54'], nodes['v63']]),
                 'h52' : LineString([nodes['v63'], nodes['v85']]),
                 'h53' : LineString([nodes['v85'], nodes['v-63']]),
                 'h54' : LineString([nodes['v-63'], nodes['v-54']]),
                 'h55' : LineString([nodes['v-44'], nodes['v-33']]),
                 'h56' : LineString([nodes['v-33'], nodes['v-24']]),

                 'h61' : LineString([nodes['v12'], nodes['v24']]),
                 'h62' : LineString([nodes['v44'], nodes['v55']]),
                 'h63' : LineString([nodes['v55'], nodes['v86']]),
                 'h64' : LineString([nodes['v86'], nodes['v-55']]),
                 'h65' : LineString([nodes['v-55'], nodes['v-45']]),
                 'h66' : LineString([nodes['v-12'], nodes['v-25']]),

                 'h71' : LineString([nodes['v25'], nodes['v33']]),
                 'h72' : LineString([nodes['v33'], nodes['v45']]),
                 'h73' : LineString([nodes['v56'], nodes['v64']]),
                 'h74' : LineString([nodes['v64'], nodes['v87']]),
                 'h75' : LineString([nodes['v87'], nodes['v-64']]),
                 'h76' : LineString([nodes['v-64'], nodes['v-56']]),
                 'h77' : LineString([nodes['v-46'], nodes['v-34']]),
                 'h78' : LineString([nodes['v-34'], nodes['v-26']]),

                 'h81' : LineString([nodes['v26'], nodes['v34']]),
                 'h82' : LineString([nodes['v34'], nodes['v46']]),
                 'h83' : LineString([nodes['v46'], nodes['v57']]),
                 'h84' : LineString([nodes['v57'], nodes['v65']]),
                 'h85' : LineString([nodes['v65'], nodes['v71']]),
                 'h86' : LineString([nodes['v-65'], nodes['v-71']]),
                 'h87' : LineString([nodes['v-65'], nodes['v-57']]),
                 'h88' : LineString([nodes['v-57'], nodes['v-47']]),
                 'h89' : LineString([nodes['v-47'], nodes['v-35']]),
                 'h810' : LineString([nodes['v-35'], nodes['v-27']]),

                 'h91' : LineString([nodes['v13'], nodes['v27']]),
                 'h92' : LineString([nodes['v27'], nodes['v47']]),
                 'h93' : LineString([nodes['v58'], nodes['v72']]),
                 'h94' : LineString([nodes['v72'], nodes['v-72']]),
                 'h95' : LineString([nodes['v-72'], nodes['v-58']]),
                 'h96' : LineString([nodes['v-48'], nodes['v-28']]),
                 'h97' : LineString([nodes['v-28'], nodes['v-13']]),

                 #circle
                 'c1' : LineString([nodes['c11'], nodes['v82']]),
                 'c2' : LineString([nodes['c12'], nodes['v82']]),
                 'c3' : LineString([nodes['c12'], nodes['v83']]),
                 'c4' : LineString([nodes['c11'], nodes['v83']]),
             }


In [105]:
# (prepare) inactive Define a function to create a LineString from DataFrame rows
df = pd.read_csv('df_inactive.csv')
df = df[df['inactive_event'] !=839 ]

def create_linestring(group):
    return LineString(zip(group['posX'], group['posZ']))

# Group by `inactive_event` and create LineString for each group
grouped = df.groupby('inactive_event').apply(lambda group: create_linestring(group))

  df = pd.read_csv('df_inactive.csv')


In [120]:
# (result) crossroad inactive phase
def check_intersections(inactive_event_lines, road_segments):
    results = []
    for inactive_event, line_geom in inactive_event_lines.items():
        # Check if this LineString intersects with any road segment
        intersects = any(line_geom.intersects(road_geom) for road_geom in road_segments.values())
        results.append({
            'inactive_event': inactive_event,
            'ifcross_inactive': 1 if intersects else 0
        })
    return pd.DataFrame(results)

# Get intersection results
intersection_inactive = check_intersections(grouped, road_segments)

print(intersection_inactive)

      inactive_event  ifcross_inactive
0                  1                 0
1                  2                 1
2                  3                 0
3                  4                 1
4                  5                 0
...              ...               ...
3117            3119                 0
3118            3120                 0
3119            3121                 0
3120            3122                 0
3121            3123                 1

[3122 rows x 2 columns]


In [100]:
# (prepare) active Define a function 
df = pd.read_csv('df_active.csv')

def create_linestring(group):
    return LineString(zip(group['posX'], group['posZ']))

# Group by `inactive_event` and create LineString for each group
grouped = df.groupby('active_event').apply(lambda group: create_linestring(group))

In [102]:
# (result) crossroad active phase
def check_intersections(inactive_event_lines, road_segments):
    results = []
    for inactive_event, line_geom in inactive_event_lines.items():
        # Check if this LineString intersects with any road segment
        intersects = any(line_geom.intersects(road_geom) for road_geom in road_segments.values())
        results.append({
            'inactive_event': inactive_event,
            'ifcross_preactive': 1 if intersects else 0
        })
    return pd.DataFrame(results)

# Get intersection results
intersection_preactive = check_intersections(grouped, road_segments)
print(intersection_preactive)

      inactive_event  ifcross_preactive
0                  1                  0
1                  2                  1
2                  3                  1
3                  4                  0
4                  5                  0
...              ...                ...
3118            3119                  0
3119            3120                  0
3120            3121                  0
3121            3122                  0
3122            3123                  0

[3123 rows x 2 columns]


In [97]:
# feature of ifshortcuts
import pandas as pd

# Load data from CSV files
df_shortcuts = pd.read_csv('df_shortcuts.csv')
df_inactive = pd.read_csv('df_inactive.csv')

# Check if trajectory_Id in df_inactive exists in df_shortcuts
df_inactive['is_shortcut'] = df_inactive['trajectory_Id'].isin(df_shortcuts['trajectory_Id']).astype(int)

# Group by inactive_event and get the max value of is_shortcut (1 if any trajectory_Id is a shortcut)
result_shortcut = df_inactive.groupby('inactive_event')['is_shortcut'].max().reset_index()

# Display the result
print(result_shortcut)



      inactive_event  is_shortcut
0                  1            0
1                  2            0
2                  3            0
3                  4            0
4                  5            0
...              ...          ...
3118            3119            0
3119            3120            0
3120            3121            0
3121            3122            0
3122            3123            0

[3123 rows x 2 columns]


  df_inactive = pd.read_csv('df_inactive.csv')


In [None]:
# feature of whether passing poi

In [125]:
# merge
import pandas as pd
df = pd.read_csv('result_individual.csv')

df_m1 = df.merge(result_shortcut, how= 'left', left_on= 'event_number', right_on = 'inactive_event')
df_m2 = df_m1.merge(intersection_preactive, how= 'left', left_on= 'event_number', right_on = 'inactive_event')
df_m3 = df_m2.merge(intersection_inactive, how= 'left', left_on= 'event_number', right_on = 'inactive_event')
df_m4 = df_m3.merge(result_df, how= 'left', left_on= 'event_number', right_on = 'inactive_event')

columns_to_drop = [
    'Unnamed: 0',
    'inactive_event_y',
    'inactive_event_x',
    'trajectory_Id_y',
    'event_1_index',
    'distance',
    'trajectory_distance'
]

df_m4 = df_m4.drop(columns=columns_to_drop)


df_m4 = df_m4[df_m4['event_number'] != 1668]
print(df_m4)
df_m4.to_csv('result_indi_env.csv')


      event_number  participant  trajectory_Id_x  traffic_density  map_type  \
0                1            8                1                0         0   
1                2            8                1                0         0   
2                3            8                2                0         0   
3                4            8                2                0         0   
4                5            8                2                0         0   
...            ...          ...              ...              ...       ...   
3113          3119           63              863                1         1   
3114          3120           63              863                1         1   
3115          3121           63              863                1         1   
3116          3122           63              863                1         1   
3117          3123           63              863                1         1   

      total_inactive_time  state  sbsod_score  gend

  df_m4 = df_m3.merge(result_df, how= 'left', left_on= 'event_number', right_on = 'inactive_event')
