In [1]:
import os
from dotenv import load_dotenv, find_dotenv
load_dotenv(find_dotenv())
import importlib


project_path = os.getenv('PROJECT_PATH')
os.chdir(project_path)
data_path = os.getenv('DATA_PATH')

from src.models.record import Record
from src.models.trajectory import Trajectory
from src.models.trajectories import Trajectories
from src.utils.parsers import RecordParser, PltRecordParser


In [5]:
# Pointing to a file

user_id = '170'
user_path = os.path.join(data_path, user_id)
records_files_paths = [os.path.join(user_path, 'Trajectory', file) for file in os.listdir(os.path.join(data_path, user_id, 'Trajectory')) if file.endswith('.plt')]
file_path = records_files_paths[0]
file_path

'/home/yohann/projects/geolife/data/geolife/170/Trajectory/20080512012705.plt'

In [6]:
# Use the Record class to read the file from the file_path
t = Trajectory.from_file(
    file_path=file_path,
    user_id=user_id,
    id='170_models_test',
    parser=PltRecordParser()
)

In [7]:
t.features

{'id': '170_models_test',
 'user_id': '170',
 'count': 991,
 'start_datetime': Timestamp('2008-05-12 01:27:05'),
 'end_datetime': Timestamp('2008-05-12 01:55:35'),
 'duration': Timedelta('0 days 00:28:30')}

In [8]:
# Use trajectories class to read all the files from an user_id

user_id = '000'

trajs = Trajectories.from_user(
    user_id=user_id,
    data_path=data_path,
)

In [9]:
trajs.features

Unnamed: 0,id,user_id,count,start_datetime,end_datetime,duration
0,000_0,000,908,2008-10-23 02:53:04,2008-10-23 11:11:12,0 days 08:18:08
1,000_1,000,244,2008-10-24 02:09:59,2008-10-24 02:47:06,0 days 00:37:07
2,000_2,000,745,2008-10-26 13:44:07,2008-10-26 15:04:07,0 days 01:20:00
3,000_3,000,50,2008-10-27 11:54:49,2008-10-27 12:05:54,0 days 00:11:05
4,000_4,000,1477,2008-10-28 00:38:26,2008-10-28 05:03:42,0 days 04:25:16
...,...,...,...,...,...,...
166,000_166,000,94,2009-07-01 17:19:36,2009-07-01 17:27:11,0 days 00:07:35
167,000_167,000,5757,2009-07-02 02:25:30,2009-07-02 14:17:46,0 days 11:52:16
168,000_168,000,1788,2009-07-03 00:28:00,2009-07-03 08:37:23,0 days 08:09:23
169,000_169,000,436,2009-07-04 04:26:34,2009-07-04 09:49:18,0 days 05:22:44


In [10]:
trajs.df.sample(10)

Unnamed: 0,user_id,trajectory_id,label,datetime,latitude,longitude,altitude,timestamp
497,0,000_11,,2008-11-12 04:26:32,40.009424,116.320634,-32,1226464000.0
2065,0,000_48,,2009-04-05 10:00:08,39.940943,116.373682,138,1238926000.0
169,0,000_44,,2009-04-01 05:24:52,39.999998,116.327435,148,1238563000.0
2103,0,000_80,,2009-04-30 07:47:27,39.938293,116.319072,174,1241078000.0
544,0,000_159,,2009-06-27 11:45:00,31.295924,121.544868,57,1246103000.0
9,0,000_92,,2009-05-08 10:05:16,39.999503,116.327139,218,1241777000.0
404,0,000_151,,2009-06-22 13:16:15,40.007793,116.31662,111,1245677000.0
2927,0,000_80,,2009-04-30 10:46:08,39.951005,116.329102,161,1241088000.0
724,0,000_0,,2008-10-23 10:13:01,40.007717,116.318842,71,1224757000.0
243,0,000_97,,2009-05-11 09:47:58,39.991736,116.32742,266,1242035000.0


In [36]:
trajs.df.shape

(173870, 8)

In [11]:
trajs.trajectories[5].df

Unnamed: 0,user_id,trajectory_id,label,datetime,latitude,longitude,altitude,timestamp
0,0,000_5,,2008-10-29 09:21:38,39.994238,116.326786,492,1225272000.0
1,0,000_5,,2008-10-29 09:21:43,39.994315,116.326758,492,1225272000.0
2,0,000_5,,2008-10-29 09:21:48,39.994414,116.326719,492,1225272000.0
3,0,000_5,,2008-10-29 09:21:53,39.994429,116.3267,492,1225272000.0
4,0,000_5,,2008-10-29 09:21:58,39.99445,116.326712,492,1225272000.0
5,0,000_5,,2008-10-29 09:22:03,39.994467,116.32675,492,1225272000.0
6,0,000_5,,2008-10-29 09:22:08,39.994501,116.326781,492,1225272000.0
7,0,000_5,,2008-10-29 09:22:13,39.994548,116.326814,492,1225272000.0
8,0,000_5,,2008-10-29 09:22:18,39.994531,116.326856,492,1225272000.0
9,0,000_5,,2008-10-29 09:22:23,39.994501,116.326873,492,1225272000.0


In [None]:
import src.utils.px_scatter_mapbox
import src.utils.px_scatter
import importlib
importlib.reload(src.utils.px_scatter_mapbox)
importlib.reload(src.utils.px_scatter)
from src.utils.px_scatter_mapbox import plot_map
from src.utils.px_scatter import plot_timeline

# plot_map(trajs.df[:10000]).show()
plot_timeline(
    df=trajs.df[:10000],
    y_data='latitude'
).show()
    


In [37]:
index = 0

In [9]:
import src.models.trajectories
importlib.reload(src.models.trajectories)
from src.models.trajectories import Trajectories

import src.utils.px_scatter
importlib.reload(src.utils.px_scatter)
from src.utils.px_scatter import plot_timeline

import src.utils.px_scatter_mapbox
importlib.reload(src.utils.px_scatter_mapbox)
from src.utils.px_scatter_mapbox import plot_map

import random
users = [user for user in os.listdir(data_path) if os.path.isdir(os.path.join(data_path, user))]


user_123_trajectories = Trajectories.from_user(
    user_id='123'
)
user_123_trajectories.features

Unnamed: 0,id,user_id,count,start_datetime,end_datetime,duration
0,123_0,123,15,2009-09-23 09:20:00,2009-09-23 09:21:10,0 days 00:01:10
1,123_1,123,3076,2009-09-23 15:43:35,2009-09-23 22:04:18,0 days 06:20:43
2,123_2,123,2160,2009-09-24 18:30:29,2009-09-24 23:19:54,0 days 04:49:25
3,123_3,123,2023,2009-09-26 19:09:29,2009-09-26 22:52:17,0 days 03:42:48
4,123_4,123,1554,2009-09-27 08:33:34,2009-09-27 16:12:13,0 days 07:38:39


In [10]:
plot_timeline(
    df=Trajectories
        .from_user(user_id='123')
        .df,
    y_data='latitude',
).show()
plot_map(
    Trajectories
        .from_user(user_id='123')
        .df
).show()

In [None]:
plot_map