In [2]:
import os
from dotenv import load_dotenv, find_dotenv
load_dotenv(find_dotenv())
import importlib

data_path = os.getenv('DATA_PATH')
project_path = os.getenv('PROJECT_PATH')
os.chdir(project_path)

import pandas as pd


import src.models.trajectory
importlib.reload(src.models.trajectory)
from src.models.trajectory import Trajectory

import src.models.trajectories
importlib.reload(src.models.trajectories)
from src.models.trajectories import Trajectories

import src.utils.px_scatter
importlib.reload(src.utils.px_scatter)
from src.utils.px_scatter import plot_timeline

import src.utils.px_scatter_mapbox
importlib.reload(src.utils.px_scatter_mapbox)
from src.utils.px_scatter_mapbox import plot_map


import random
users = [user for user in os.listdir(data_path) if os.path.isdir(os.path.join(data_path, user))]
users.sort()

In [3]:
user = '020'
t = Trajectories.from_user(user_id=user)

In [4]:
df_labels = t.extract_labels(user_path=os.path.join(data_path, user))
df_labels

Unnamed: 0,start_datetime,end_datetime,label
0,2011-08-27 06:13:01,2011-08-27 08:01:37,walk
0,2011-08-27 09:34:43,2011-08-27 14:50:30,walk
0,2011-08-27 14:50:31,2011-08-27 15:01:58,bus
0,2011-08-27 15:01:59,2011-08-27 15:31:43,walk
0,2011-08-28 04:33:31,2011-08-28 04:44:25,walk
...,...,...,...
0,2012-02-09 12:46:07,2012-02-09 13:23:29,bus
0,2012-02-16 11:45:05,2012-02-16 12:01:01,bus
0,2012-02-17 01:49:29,2012-02-17 02:28:54,bus
0,2012-02-20 14:10:33,2012-02-20 14:29:59,bus


In [5]:
t.trajectories[3].records[-200]

Record(user_id='020', latitude=39.97788, longitude=116.332475, altitude=0, datetime=Timestamp('2011-08-27 15:08:38'), timestamp=1314457718.0, label=None, trajectory_id=None)

In [6]:
records_df = t.trajectories[3].df
records_df.drop(columns=['label'], inplace=True)
records_df

Unnamed: 0,user_id,trajectory_id,datetime,latitude,longitude,altitude,timestamp,time_diff,distance,speed
0,020,,2011-08-27 09:34:43,39.978668,116.307982,0,1.314438e+09,0.0,0.000000,0.000000
1,020,,2011-08-27 09:34:44,39.978653,116.308022,0,1.314438e+09,1.0,3.801128,3.801128
2,020,,2011-08-27 09:34:45,39.978640,116.308008,0,1.314438e+09,1.0,1.867869,1.867869
3,020,,2011-08-27 09:34:46,39.978623,116.307967,0,1.314438e+09,1.0,4.011535,4.011535
4,020,,2011-08-27 09:34:47,39.978628,116.307992,0,1.314438e+09,1.0,2.206496,2.206496
...,...,...,...,...,...,...,...,...,...,...
1335,020,,2011-08-27 15:31:39,39.978083,116.330227,0,1.314459e+09,1.0,0.339590,0.339590
1336,020,,2011-08-27 15:31:40,39.978083,116.330213,0,1.314459e+09,1.0,1.138949,1.138949
1337,020,,2011-08-27 15:31:41,39.978022,116.330275,0,1.314459e+09,1.0,8.638920,8.638920
1338,020,,2011-08-27 15:31:42,39.978018,116.330272,0,1.314459e+09,1.0,0.466969,0.466969


In [7]:
df_labels = df_labels.sort_values('start_datetime')
records_df = pd.merge_asof(
    records_df.sort_values('datetime'),
    df_labels,
    left_on='datetime',
    right_on='start_datetime',
    direction='backward',
    suffixes=('', '_label')
)

In [8]:
records_df

Unnamed: 0,user_id,trajectory_id,datetime,latitude,longitude,altitude,timestamp,time_diff,distance,speed,start_datetime,end_datetime,label
0,020,,2011-08-27 09:34:43,39.978668,116.307982,0,1.314438e+09,0.0,0.000000,0.000000,2011-08-27 09:34:43,2011-08-27 14:50:30,walk
1,020,,2011-08-27 09:34:44,39.978653,116.308022,0,1.314438e+09,1.0,3.801128,3.801128,2011-08-27 09:34:43,2011-08-27 14:50:30,walk
2,020,,2011-08-27 09:34:45,39.978640,116.308008,0,1.314438e+09,1.0,1.867869,1.867869,2011-08-27 09:34:43,2011-08-27 14:50:30,walk
3,020,,2011-08-27 09:34:46,39.978623,116.307967,0,1.314438e+09,1.0,4.011535,4.011535,2011-08-27 09:34:43,2011-08-27 14:50:30,walk
4,020,,2011-08-27 09:34:47,39.978628,116.307992,0,1.314438e+09,1.0,2.206496,2.206496,2011-08-27 09:34:43,2011-08-27 14:50:30,walk
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1335,020,,2011-08-27 15:31:39,39.978083,116.330227,0,1.314459e+09,1.0,0.339590,0.339590,2011-08-27 15:01:59,2011-08-27 15:31:43,walk
1336,020,,2011-08-27 15:31:40,39.978083,116.330213,0,1.314459e+09,1.0,1.138949,1.138949,2011-08-27 15:01:59,2011-08-27 15:31:43,walk
1337,020,,2011-08-27 15:31:41,39.978022,116.330275,0,1.314459e+09,1.0,8.638920,8.638920,2011-08-27 15:01:59,2011-08-27 15:31:43,walk
1338,020,,2011-08-27 15:31:42,39.978018,116.330272,0,1.314459e+09,1.0,0.466969,0.466969,2011-08-27 15:01:59,2011-08-27 15:31:43,walk


In [9]:
# Step 3: Update the 'label' column for records within the label time range
mask = (records_df['datetime'] >= records_df['start_datetime']) & (records_df['datetime'] <= records_df['end_datetime'])
records_df

Unnamed: 0,user_id,trajectory_id,datetime,latitude,longitude,altitude,timestamp,time_diff,distance,speed,start_datetime,end_datetime,label
0,020,,2011-08-27 09:34:43,39.978668,116.307982,0,1.314438e+09,0.0,0.000000,0.000000,2011-08-27 09:34:43,2011-08-27 14:50:30,walk
1,020,,2011-08-27 09:34:44,39.978653,116.308022,0,1.314438e+09,1.0,3.801128,3.801128,2011-08-27 09:34:43,2011-08-27 14:50:30,walk
2,020,,2011-08-27 09:34:45,39.978640,116.308008,0,1.314438e+09,1.0,1.867869,1.867869,2011-08-27 09:34:43,2011-08-27 14:50:30,walk
3,020,,2011-08-27 09:34:46,39.978623,116.307967,0,1.314438e+09,1.0,4.011535,4.011535,2011-08-27 09:34:43,2011-08-27 14:50:30,walk
4,020,,2011-08-27 09:34:47,39.978628,116.307992,0,1.314438e+09,1.0,2.206496,2.206496,2011-08-27 09:34:43,2011-08-27 14:50:30,walk
...,...,...,...,...,...,...,...,...,...,...,...,...,...
1335,020,,2011-08-27 15:31:39,39.978083,116.330227,0,1.314459e+09,1.0,0.339590,0.339590,2011-08-27 15:01:59,2011-08-27 15:31:43,walk
1336,020,,2011-08-27 15:31:40,39.978083,116.330213,0,1.314459e+09,1.0,1.138949,1.138949,2011-08-27 15:01:59,2011-08-27 15:31:43,walk
1337,020,,2011-08-27 15:31:41,39.978022,116.330275,0,1.314459e+09,1.0,8.638920,8.638920,2011-08-27 15:01:59,2011-08-27 15:31:43,walk
1338,020,,2011-08-27 15:31:42,39.978018,116.330272,0,1.314459e+09,1.0,0.466969,0.466969,2011-08-27 15:01:59,2011-08-27 15:31:43,walk


In [10]:
records_df.sample(10)

Unnamed: 0,user_id,trajectory_id,datetime,latitude,longitude,altitude,timestamp,time_diff,distance,speed,start_datetime,end_datetime,label
1199,20,,2011-08-27 15:25:02,39.978417,116.330752,0,1314459000.0,1.0,0.795307,0.795307,2011-08-27 15:01:59,2011-08-27 15:31:43,walk
526,20,,2011-08-27 14:54:32,39.974673,116.313107,0,1314457000.0,1.0,5.742931,5.742931,2011-08-27 14:50:31,2011-08-27 15:01:58,bus
850,20,,2011-08-27 15:03:48,39.976002,116.328948,0,1314457000.0,1.0,1.689678,1.689678,2011-08-27 15:01:59,2011-08-27 15:31:43,walk
730,20,,2011-08-27 15:01:48,39.975237,116.329447,0,1314457000.0,1.0,1.153932,1.153932,2011-08-27 14:50:31,2011-08-27 15:01:58,bus
867,20,,2011-08-27 15:04:05,39.976227,116.328913,0,1314457000.0,1.0,2.266495,2.266495,2011-08-27 15:01:59,2011-08-27 15:31:43,walk
1320,20,,2011-08-27 15:31:24,39.978063,116.330232,0,1314459000.0,1.0,1.479768,1.479768,2011-08-27 15:01:59,2011-08-27 15:31:43,walk
218,20,,2011-08-27 10:04:32,39.978957,116.305313,0,1314439000.0,1.0,0.339587,0.339587,2011-08-27 09:34:43,2011-08-27 14:50:30,walk
151,20,,2011-08-27 09:37:34,39.97881,116.307178,0,1314438000.0,1.0,2.260619,2.260619,2011-08-27 09:34:43,2011-08-27 14:50:30,walk
665,20,,2011-08-27 14:58:18,39.975112,116.322445,0,1314457000.0,1.0,11.585685,11.585685,2011-08-27 14:50:31,2011-08-27 15:01:58,bus
1140,20,,2011-08-27 15:08:38,39.97788,116.332475,0,1314458000.0,1.0,1.671587,1.671587,2011-08-27 15:01:59,2011-08-27 15:31:43,walk


In [16]:
import plotly.graph_objects as go

fig = go.Figure()
fig.add_trace(
    go.Scattermapbox(
        lat=records_df['latitude'],
        lon=records_df['longitude'],
        marker=dict(
            size=10,
            color=records_df['label'].astype('category').cat.codes,
        ),
        mode='markers',
        # cluster=dict(enabled=True),
    )
)
fig.update_layout(
    mapbox=dict(
        accesstoken=os.getenv('MAPBOX_TOKEN'),
        center=dict(
            lat=records_df['latitude'].mean(),
            lon=records_df['longitude'].mean()
        ),
        zoom=8,
    ),
    margin=dict(l=0, r=0, t=0, b=0),
    # mapbox_style="dark",
    template="plotly_dark",
)
fig.show()

In [137]:
import pandas as pd
import plotly.graph_objs as go

def plot_timeline(
    df: pd.DataFrame,
    y_data: str,
    mode: str = 'markers',
    height: int = 250,
    marker: dict = dict(color='red', size=5),
) -> go.Figure:
    if df.empty:
        return go.Figure(go.Scatter(), layout=dict(title="No data available"))
    
    df = df.copy()
    
    fig = go.Figure()

    # Define color map
    color_map = {'walk': 'blue', 'bike': 'green', 'bus': 'red', 'car': 'orange', 'train': 'purple', 'subway': 'black'}
    
    # Add traces for each label
    for label, color in color_map.items():
        label_df = df[df['label'] == label]
        fig.add_trace(
            go.Scatter(
                x=label_df['datetime'],
                y=label_df[y_data],
                mode=mode,
                marker=dict(size=marker['size'], color=color, opacity=marker.get('opacity', 1)),
                line=dict(width=2, color=color),
                name=label,
                hoverinfo='text',
                hovertext=label_df.columns,
                showlegend=True,
                visible=True,
            )
        )
    
    fig.update_layout(
        yaxis1=dict(
            title=y_data,
        ),
        mapbox_style='dark',
        template='plotly_dark',
        margin=dict(l=0, r=0, t=0, b=0),
        showlegend=True,
        legend=dict(
            title="Traces:",
            x=0,
            y=1,
            xanchor='left',
            yanchor='top',
            orientation='h',
        ),
        height=height,
    )
    return fig

plot_timeline(df=records_df, y_data='speed', mode='lines+markers')