In [26]:
    
import random
import datetime
import logging
import os
import time

import psycopg2
from dotenv import load_dotenv
from influxdb_client import InfluxDBClient
from pymongo import MongoClient
from pymongo.database import Database, Collection
import json
import logger_setup

# mongo
import folium


def get_results_folium(mongo_output=[], influx_tables=[], mobility_output=[], bounding_boxes_to_display=[],
                       display_large_results=False):
    no_points = len(mongo_output) + len(influx_tables) + len(mobility_output)
    if no_points > 3000 and display_large_results == False:
        print(f"There are many results to display ({no_points}). If you are certain you want to do that change display_large_results to True.")
        return
    CHICAGO_COORDINATES = (42, -95)

    map_attributions = ('&copy; <a href="http://www.openstreetmap.org/copyright">OpenStreetMap</a> '
                        'contributors, &copy; <a href="http://cartodb.com/attributions">CartoDB</a>')

    folium_map = folium.Map(location=CHICAGO_COORDINATES,
                            attr=map_attributions,
                            zoom_start=5,
                            control_scale=True,
                            height=800,
                            width=1400)
    
    for entry in mongo_output:
        geojson = (folium.GeoJson(entry))
        popup = folium.Popup(
            f"mmsi: {entry['properties']['MMSI']}, name {entry['properties']['VesselName']}, time: {entry['properties']['BaseDateTime']}")
        popup.add_to(geojson)
        geojson.add_to(folium_map)

    for table in influx_tables:
        for record in table:
            popup = folium.Popup(f"mmsi: {record['MMSI']}, time: {record['_time']}")
            folium.Marker(location=[record["lat"], record["lon"]],
                          popup=popup,
                          icon=folium.Icon(color='blue', icon='ship', prefix='fa')).add_to(folium_map)

    for mobility_output in mobility_output:
        mmsi = mobility_output[0]
        geojson_string = mobility_output[1]
        timestamp = mobility_output[2]

        # Parse the GeoJSON string to a Python dictionary
        geojson_obj = json.loads(geojson_string)

        # Extract coordinates
        coordinates = geojson_obj['coordinates']
        lat, lon = coordinates[1], coordinates[0]

        # Create the popup content
        popup_content = f"MMSI: {mmsi}<br>Timestamp: {timestamp}"

        # Add the point to the map
        folium.Marker(
            location=[lat, lon],
            popup=popup_content
        ).add_to(folium_map)

    for bbox in bounding_boxes_to_display:
        coords = bbox.get_coords()
        min_lon, min_lat, max_lon, max_lat = coords

        # Create the bounding box as a rectangle
        folium.Rectangle(
            bounds=[(min_lat, min_lon), (max_lat, max_lon)],
            color="blue",  # You can change the color as needed
            fill=True,
            popup=bbox.get_id(),
            fill_opacity=0.2
        ).add_to(folium_map)

    return folium_map

# Ciekawy statek:
# 215131000

logger_setup.setup_logging(level=logging.INFO)
logger = logging.getLogger()
load_dotenv()

True

---

# Setup

## Mongo setup

In [11]:
mongo_url = "mongodb://localhost:" + os.environ.get("MONGO_PORT", "55000")
mongo_database = "temp"
mongo_collection = "aisdata"

logger.info(f"MongoDB endpoint: {mongo_url}")
logger.info(f"MongoDB database name: {mongo_database}")
logger.info(f"MongoDB collection name: {mongo_collection}")

2024-09-06 21:55:31,138 - INFO: MongoDB endpoint: mongodb://localhost:55001
2024-09-06 21:55:31,140 - INFO: MongoDB database name: temp
2024-09-06 21:55:31,141 - INFO: MongoDB collection name: aisdata


## Influx setup

In [4]:

influx_token = os.environ.get("API_INFLUX_KEY")
influx_org = os.environ.get("INFLUX_ORG_ID")
influx_url = "http://localhost:" + os.environ.get("INFLUX_PORT", "55000")

logger.debug(f"InfluxDB Token: {influx_token}")
logger.debug(f"InfluxDB Organization id: {influx_org}")
logger.info(f"InfluxDB  Database endpoint: {influx_url}")

2024-09-06 21:50:52,557 - INFO: InfluxDB  Database endpoint: http://localhost:55002


## MobilityDB setup

In [5]:
mobility_host = os.environ.get("MOBILITY_HOST")
mobility_port = os.environ.get("MOBILITY_PORT")
mobility_user = os.environ.get("MOBILITY_USER")
mobility_password = os.environ.get("MOBILITY_PASSWORD")
mobility_database = os.environ.get("MOBILITY_DATABASE")

# Queries

---

## Spatial queries

---

### Datasest

In [46]:
class Bbox:
    def __init__(self, min_lon, min_lat, max_lon, max_lat, id: int):
        self.min_lon = min_lon
        self.min_lat = min_lat
        self.max_lon = max_lon
        self.max_lat = max_lat
        self.id = id

    def get_coords(self):
        return [self.min_lon, self.min_lat, self.max_lon, self.max_lat]
    
    def get_id(self):
        return self.id


bounding_boxes = [
    Bbox(-123.247925, 48.136125, -122.739476, 48.362910, 0),  # Puget Sound, Washington
    Bbox(-123.016525, 37.639830, -122.283450, 37.929824, 1),  # San Francisco Bay, California
    Bbox(-76.510574, 37.973348, -75.962608, 38.393338, 2),  # Chesapeake Bay, Maryland/Virginia
    Bbox(-88.161018, 30.334953, -87.927567, 30.639975, 3),  # Mobile Bay, Alabama
    Bbox(-95.104218, 29.327599, -94.617409, 29.623018, 4),  # Galveston Bay, Texas
    Bbox(-82.775543, 27.599938, -82.320755, 27.934847, 5),  # Tampa Bay, Florida
    Bbox(-122.019295, 36.776848, -121.819153, 37.018274, 6),  # Monterey Bay, California
    Bbox(-71.484741, 41.454842, -71.173431, 41.735072, 7),  # Narragansett Bay, Rhode Island
    Bbox(-117.253113, 32.600235, -117.085083, 32.736514, 8),  # San Diego Bay, California
    Bbox(-88.135986, 44.474116, -87.745605, 44.794497, 9),  # Green Bay
    Bbox(-80.45290918232058, 29.060643707480367, -78.32704059023007, 31.29195079716895, 10),  # Georgia coast
    Bbox(-77.36586166597581, 31.282283517600803, -75.27345529761102, 33.760865420475, 11),  # North Carolina coast
    Bbox(-74.51771061340146, 34.75075477385059, -71.85177110891351, 37.752840882799006, 12),  # Delaware Bay area
    Bbox(-73.8132903076812, 39.28679980551155, -69.84951150110152, 40.36716788459955, 13),  # Long Island Sound, New York
    Bbox(-87.53216792628709, 42.614159443972795, -86.57811342936832, 43.95974516921851, 14),  # Lake Michigan
    Bbox(-88.8185473646937, 47.31402814776524, -85.97885758914987, 48.36369038185671, 15),  # Lake Superior
    Bbox(-119.28398346805034, 28.592051721892147, -116.88328729269529, 32.65616790678931, 16),  # Baja California coast
    Bbox(-115.22516221315561, 21.159217960533027, -112.50145598046407, 25.734344868162523, 17),  # Gulf of California 0
    Bbox(-111.04125138719715, 20.598750857797, -105.33212892067695, 22.5805686602515, 18),  # Northern Gulf of California 0
    Bbox(-96.79281541240942, 25.885529478254256, -93.32496228352338, 28.099129345180913, 19),  # Gulf of Mexico
    Bbox(-88.54476002663944, 27.4284592236325, -84.0568785413999, 30.09273196827901, 20),  # Gulf of Mexico, Alabama
    Bbox(-128.9871203472166, 40.000911885515904, -125.04987957500578, 48.818149529347096, 21),  # Alaska coast
    Bbox(-126.4987002469207, 35.49916842114703, -122.76107863128735, 38.76867969769498, 22),  # Off the coast of California
    Bbox(-90.41838995337584, 30.039892740717292, -89.89240461303508, 30.37633672889592, 23),  # Mississippi coast
    Bbox(-123.02323990734207, 32.99070496259917, -119.56454319294964, 35.24101423046929, 24)  # Northern California coast
]
 
class Timespan:
    def __init__(self, start: datetime.datetime, end: datetime.datetime, id: int):
        self.start = start
        self.end = end
        self.id = id
    
    def get_start_end(self):
        return self.start, self.end
    
    def get_id(self):
        return self.id

def generate_random_timespans(count):
    timespans = []
    for i in range(count):
        # Generate random start time between 00:00 and 23:00
        start_hour = random.randint(0, 23)
        start_minute = random.randint(0, 59)
        start_second = random.randint(0, 59)
        start_time = datetime.datetime(2020, 12, 31, start_hour, start_minute, start_second)

        # Generate random duration between 10 minutes and 3 hours
        min_duration = 10
        max_duration = 180
        duration_minutes = random.randint(min_duration, max_duration)
        end_time = start_time + datetime.timedelta(minutes=duration_minutes)

        # Ensure end time does not exceed the day
        if end_time > datetime.datetime(2020, 12, 31, 23, 59, 59):
            end_time = datetime.datetime(2020, 12, 31, 23, 59, 59)

        # Create the Timespan object and append to the list
        timespans.append(
            Timespan(
                start=start_time,
                end=end_time,
                id=i
            )
        )
    
    return timespans


timespans = [
    Timespan(start=datetime.datetime(2020, 12, 31, 0, 0, 0), end=datetime.datetime(2020, 12, 31, 0, 1, 0), id=0),  # 00:00:00 - 00:30:00 - 0 hours 1 minutes
    Timespan(start=datetime.datetime(2020, 12, 31, 0, 45, 0), end=datetime.datetime(2020, 12, 31, 0, 47, 0), id=1),  # 00:45:00 - 01:15:00 - 0 hours 2 minutes
    Timespan(start=datetime.datetime(2020, 12, 31, 1, 30, 0), end=datetime.datetime(2020, 12, 31, 1, 33, 0), id=2),   # 01:30:00 - 02:00:00 - 0 hours 3 minutes
    Timespan(start=datetime.datetime(2020, 12, 31, 2, 15, 0), end=datetime.datetime(2020, 12, 31, 2, 19, 0), id=3),   # 02:15:00 - 03:00:00 - 0 hours 4 minutes
    Timespan(start=datetime.datetime(2020, 12, 31, 3, 10, 0), end=datetime.datetime(2020, 12, 31, 3, 15, 0), id=4),   # 03:10:00 - 04:05:00 - 0 hours 5 minutes
    Timespan(start=datetime.datetime(2020, 12, 31, 4, 15, 0), end=datetime.datetime(2020, 12, 31, 4, 21, 0), id=5),   # 04:15:00 - 05:00:00 - 0 hours 6 minutes
    Timespan(start=datetime.datetime(2020, 12, 31, 5, 20, 0), end=datetime.datetime(2020, 12, 31, 5, 27, 0), id=6),  # 05:20:00 - 06:10:00 - 0 hours 7 minutes
    Timespan(start=datetime.datetime(2020, 12, 31, 6, 25, 0), end=datetime.datetime(2020, 12, 31, 6, 33, 0), id=7),  # 06:25:00 - 07:10:00 - 0 hours 8 minutes
    Timespan(start=datetime.datetime(2020, 12, 31, 7, 30, 0), end=datetime.datetime(2020, 12, 31, 7, 39, 0), id=8),  # 07:30:00 - 08:15:00 - 0 hours 9 minutes
    Timespan(start=datetime.datetime(2020, 12, 31, 8, 30, 0), end=datetime.datetime(2020, 12, 31, 8, 40, 0), id=9),   # 08:30:00 - 09:00:00 - 0 hours 10 minutes
    Timespan(start=datetime.datetime(2020, 12, 31, 9, 15, 0), end=datetime.datetime(2020, 12, 31, 9, 55, 0), id=10),  # 09:15:00 - 09:55:00 - 0 hours 40 minutes
    Timespan(start=datetime.datetime(2020, 12, 31, 10, 5, 0), end=datetime.datetime(2020, 12, 31, 10, 40, 0), id=11), # 10:05:00 - 10:40:00 - 0 hours 35 minutes
    Timespan(start=datetime.datetime(2020, 12, 31, 11, 0, 0), end=datetime.datetime(2020, 12, 31, 11, 35, 0), id=12), # 11:00:00 - 11:35:00 - 0 hours 35 minutes
    Timespan(start=datetime.datetime(2020, 12, 31, 11, 50, 0), end=datetime.datetime(2020, 12, 31, 12, 30, 0), id=13), # 11:50:00 - 12:30:00 - 0 hours 40 minutes
    Timespan(start=datetime.datetime(2020, 12, 31, 12, 45, 0), end=datetime.datetime(2020, 12, 31, 13, 20, 0), id=14), # 12:45:00 - 13:20:00 - 0 hours 35 minutes
    Timespan(start=datetime.datetime(2020, 12, 31, 13, 35, 0), end=datetime.datetime(2020, 12, 31, 14, 20, 0), id=15), # 13:35:00 - 14:20:00 - 0 hours 45 minutes
    Timespan(start=datetime.datetime(2020, 12, 31, 14, 35, 0), end=datetime.datetime(2020, 12, 31, 15, 15, 0), id=16), # 14:35:00 - 15:15:00 - 0 hours 40 minutes
    Timespan(start=datetime.datetime(2020, 12, 31, 15, 30, 0), end=datetime.datetime(2020, 12, 31, 16, 15, 0), id=17), # 15:30:00 - 16:15:00 - 0 hours 45 minutes
    Timespan(start=datetime.datetime(2020, 12, 31, 16, 25, 0), end=datetime.datetime(2020, 12, 31, 17, 0, 0), id=18),  # 16:25:00 - 17:00:00 - 0 hours 35 minutes
    Timespan(start=datetime.datetime(2020, 12, 31, 17, 15, 0), end=datetime.datetime(2020, 12, 31, 17, 55, 0), id=19), # 17:15:00 - 17:55:00 - 0 hours 40 minutes
    Timespan(start=datetime.datetime(2020, 12, 31, 18, 10, 0), end=datetime.datetime(2020, 12, 31, 18, 50, 0), id=20), # 18:10:00 - 18:50:00 - 0 hours 40 minutes
    Timespan(start=datetime.datetime(2020, 12, 31, 19, 5, 0), end=datetime.datetime(2020, 12, 31, 19, 40, 0), id=21),  # 19:05:00 - 19:40:00 - 0 hours 35 minutes
    Timespan(start=datetime.datetime(2020, 12, 31, 19, 50, 0), end=datetime.datetime(2020, 12, 31, 20, 30, 0), id=22), # 19:50:00 - 20:30:00 - 0 hours 40 minutes
    Timespan(start=datetime.datetime(2020, 12, 31, 20, 45, 0), end=datetime.datetime(2020, 12, 31, 21, 20, 0), id=23), # 20:45:00 - 21:20:00 - 0 hours 35 minutes
    Timespan(start=datetime.datetime(2020, 12, 31, 21, 35, 0), end=datetime.datetime(2020, 12, 31, 22, 10, 0), id=24), # 21:35:00 - 22:10:00 - 0 hours 35 minutes
    Timespan(start=datetime.datetime(2020, 12, 31, 22, 25, 0), end=datetime.datetime(2020, 12, 31, 23, 0, 0), id=25),  # 22:25:00 - 23:00:00 - 0 hours 35 minutes
    Timespan(start=datetime.datetime(2020, 12, 31, 23, 10, 0), end=datetime.datetime(2020, 12, 31, 23, 55, 0), id=26), # 23:10:00 - 23:55:00 - 0 hours 45 minutes
    Timespan(start=datetime.datetime(2020, 12, 31, 0, 10, 0), end=datetime.datetime(2020, 12, 31, 0, 55, 0), id=27),  # 00:10:00 - 00:55:00 - 0 hours 45 minutes
    Timespan(start=datetime.datetime(2020, 12, 31, 1, 5, 0), end=datetime.datetime(2020, 12, 31, 1, 45, 0), id=28),   # 01:05:00 - 01:45:00 - 0 hours 40 minutes
    Timespan(start=datetime.datetime(2020, 12, 31, 2, 0, 0), end=datetime.datetime(2020, 12, 31, 2, 40, 0), id=29),   # 02:00:00 - 02:40:00 - 0 hours 40 minutes
    Timespan(start=datetime.datetime(2020, 12, 31, 3, 0, 0), end=datetime.datetime(2020, 12, 31, 3, 50, 0), id=30),   # 03:00:00 - 03:50:00 - 0 hours 50 minutes
    Timespan(start=datetime.datetime(2020, 12, 31, 4, 0, 0), end=datetime.datetime(2020, 12, 31, 4, 35, 0), id=31),   # 04:00:00 - 04:35:00 - 0 hours 35 minutes
    Timespan(start=datetime.datetime(2020, 12, 31, 5, 0, 0), end=datetime.datetime(2020, 12, 31, 5, 40, 0), id=32),   # 05:00:00 - 05:40:00 - 0 hours 40 minutes
    Timespan(start=datetime.datetime(2020, 12, 31, 6, 0, 0), end=datetime.datetime(2020, 12, 31, 6, 50, 0), id=33),   # 06:00:00 - 06:50:00 - 0 hours 50 minutes
    Timespan(start=datetime.datetime(2020, 12, 31, 7, 0, 0), end=datetime.datetime(2020, 12, 31, 7, 45, 0), id=34),   # 07:00:00 - 07:45:00 - 0 hours 45 minutes
    Timespan(start=datetime.datetime(2020, 12, 31, 8, 0, 0), end=datetime.datetime(2020, 12, 31, 8, 50, 0), id=35),   # 08:00:00 - 08:50:00 - 0 hours 50 minutes
    Timespan(start=datetime.datetime(2020, 12, 31, 9, 0, 0), end=datetime.datetime(2020, 12, 31, 9, 55, 0), id=36),   # 09:00:00 - 09:55:00 - 0 hours 55 minutes
    Timespan(start=datetime.datetime(2020, 12, 31, 10, 10, 0), end=datetime.datetime(2020, 12, 31, 10, 55, 0), id=37),# 10:10:00 - 10:55:00 - 0 hours 45 minutes
    Timespan(start=datetime.datetime(2020, 12, 31, 11, 0, 0), end=datetime.datetime(2020, 12, 31, 11, 50, 0), id=38), # 11:00:00 - 11:50:00 - 0 hours 50 minutes
    Timespan(start=datetime.datetime(2020, 12, 31, 12, 5, 0), end=datetime.datetime(2020, 12, 31, 12, 45, 0), id=39), # 12:05:00 - 12:45:00 - 0 hours 40 minutes
    Timespan(start=datetime.datetime(2020, 12, 31, 13, 0, 0), end=datetime.datetime(2020, 12, 31, 13, 40, 0), id=40), # 13:00:00 - 13:40:00 - 0 hours 40 minutes
    Timespan(start=datetime.datetime(2020, 12, 31, 14, 0, 0), end=datetime.datetime(2020, 12, 31, 14, 50, 0), id=41), # 14:00:00 - 14:50:00 - 0 hours 50 minutes
    Timespan(start=datetime.datetime(2020, 12, 31, 15, 5, 0), end=datetime.datetime(2020, 12, 31, 15, 50, 0), id=42), # 15:05:00 - 15:50:00 - 0 hours 45 minutes
    Timespan(start=datetime.datetime(2020, 12, 31, 16, 0, 0), end=datetime.datetime(2020, 12, 31, 16, 55, 0), id=43), # 16:00:00 - 16:55:00 - 0 hours 55 minutes
    Timespan(start=datetime.datetime(2020, 12, 31, 17, 5, 0), end=datetime.datetime(2020, 12, 31, 17, 55, 0), id=44), # 17:05:00 - 17:55:00 - 0 hours 50 minutes
    Timespan(start=datetime.datetime(2020, 12, 31, 18, 10, 0), end=datetime.datetime(2020, 12, 31, 19, 0, 0), id=45), # 18:10:00 - 19:00:00 - 0 hours 50 minutes
    Timespan(start=datetime.datetime(2020, 12, 31, 19, 10, 0), end=datetime.datetime(2020, 12, 31, 20, 0, 0), id=46), # 19:10:00 - 20:00:00 - 0 hours 50 minutes
    Timespan(start=datetime.datetime(2020, 12, 31, 20, 10, 0), end=datetime.datetime(2020, 12, 31, 21, 0, 0), id=47), # 20:10:00 - 21:00:00 - 0 hours 50 minutes
    Timespan(start=datetime.datetime(2020, 12, 31, 21, 10, 0), end=datetime.datetime(2020, 12, 31, 22, 0, 0), id=48), # 21:10:00 - 22:00:00 - 0 hours 50 minutes
    Timespan(start=datetime.datetime(2020, 12, 31, 22, 10, 0), end=datetime.datetime(2020, 12, 31, 23, 0, 0), id=49), # 22:10:00 - 23:00:00 - 0 hours 50 minutes
]


---

### MongoDB

In [13]:
# Mongo

client: MongoClient = MongoClient(mongo_url)
db: Database = client[mongo_database]
collection: Collection = db[mongo_collection]

mongo_spatial_times = []
mongo_spatial_results = []
logger.info(f"Running Mongo spatial")
for bbox in bounding_boxes:
    min_lon, min_lat, max_lon, max_lat = bbox.get_coords()

    # Define the query using $geoWithin and $box
    query = {
        'geometry': {
            '$geoWithin': {
                '$box': [
                    [min_lon, min_lat],
                    [max_lon, max_lat]
                ]
            }
        }
    }
    start_time = time.time()
    # Execute the query
    mongo_results = list(collection.find(query, {'_id': False}))
    end_time = time.time()
    logger.info(f"{bbox.get_id()}. Query time: {end_time - start_time}, no. of results: {len(mongo_results)}, bbox {bbox.get_id()}")
    mongo_spatial_times.append(end_time - start_time)
    mongo_spatial_results.append(mongo_results)
logger.info(f"Mongo spatial average time: {sum(mongo_spatial_times) / len(mongo_spatial_times)}")

client.close()

2024-09-06 21:57:50,631 - INFO: Running Mongo spatial
2024-09-06 21:57:50,633 - INFO: {"message": "Waiting for suitable server to become available", "selector": "Primary()", "operation": "find", "topologyDescription": "<TopologyDescription id: 66db5ebe40f62ad908ee49c9, topology_type: Unknown, servers: [<ServerDescription ('localhost', 55001) server_type: Unknown, rtt: None>]>", "clientId": {"$oid": "66db5ebe40f62ad908ee49c9"}, "remainingTimeMS": 29}
2024-09-06 21:57:58,976 - INFO: 0. Query time: 8.343759775161743, no. of results: 2611, bbox 0
2024-09-06 21:58:07,702 - INFO: 1. Query time: 8.724345922470093, no. of results: 94798, bbox 1
2024-09-06 21:58:16,402 - INFO: 2. Query time: 8.699569702148438, no. of results: 3813, bbox 2
2024-09-06 21:58:24,360 - INFO: 3. Query time: 7.957097291946411, no. of results: 3242, bbox 3
2024-09-06 21:58:35,242 - INFO: 4. Query time: 10.880998849868774, no. of results: 102114, bbox 4
2024-09-06 21:58:43,899 - INFO: 5. Query time: 8.656105995178223, n



---

### InfluxDB

In [None]:
influx_time_results = []

In [8]:
# Influx
influx_url = "http://localhost:55002"

# bucket = "shapedData_bucket2"
# bucket = "temp"
bucket = "aisdata_s2indexed_lvl24"
start_date = "2020-12-31T00:00:00Z"
stop_date = "2020-12-31T00:00:59Z"
# min_lat = 41.80
# max_lat = 41.87
# min_lon = -88.0
# max_lon = -87.0
level = 10
strict = "true"
levels = [10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]
for level in levels:
# for bbox in bounding_boxes:
    client = InfluxDBClient(url=influx_url, token=influx_token, org=influx_org,
                            timeout=600000)
    query_api = client.query_api()
    bbox = bounding_boxes[0]
    min_lon, min_lat, max_lon, max_lat = bbox.get_coords()

    query = f"""
    import "experimental/geo"
    
    region = {{
        minLat: {min_lat},
        maxLat: {max_lat},
        minLon: {min_lon},  
        maxLon: {max_lon},
    }}
    
    from(bucket: "{bucket}")
        |> range(start: {start_date}, stop: {stop_date})
        |> filter(fn: (r) => r._measurement == "vessels_ais_31_12")
        |> geo.filterRows(region: region, level: {level}, strict: {strict})
    """

    start_time = time.time()
    # tables = query_api.query_data_frame(query=query)
    influx_results = query_api.query(query=query)
    influx_time_results.append(influx_results)
    end_time = time.time()

    record_count = sum(len(table.records) for table in influx_results)
    logger.info(f"Query took {end_time - start_time} seconds, no. of results: {record_count}. Level: {level}")
    client.close()
    # break


2024-08-26 21:54:03,167 - INFO: Query took 109.31931138038635 seconds, no. of results: 2. Level: 10
2024-08-26 21:55:52,684 - INFO: Query took 109.51598715782166 seconds, no. of results: 2. Level: 11
2024-08-26 21:57:37,217 - INFO: Query took 104.5322802066803 seconds, no. of results: 2. Level: 12
2024-08-26 21:59:23,008 - INFO: Query took 105.78923559188843 seconds, no. of results: 2. Level: 13
2024-08-26 22:01:13,345 - INFO: Query took 110.3360652923584 seconds, no. of results: 2. Level: 14
2024-08-26 22:03:25,101 - INFO: Query took 131.7553825378418 seconds, no. of results: 2. Level: 15
2024-08-26 22:07:06,040 - INFO: Query took 220.93793106079102 seconds, no. of results: 2. Level: 16
2024-08-26 22:16:02,907 - INFO: Query took 536.8655359745026 seconds, no. of results: 2. Level: 17


ReadTimeoutError: HTTPConnectionPool(host='localhost', port=55002): Read timed out. (read timeout=599.9994445479997)

In [None]:
'''
109 sekund, 71 sekund dla lvl 10. Tragedia. 
Lvl 10 -    72s     2 wyniki
Lvl 11 -    122s    2 wyniki
Lvl 12 -    113s    2 wyniki
Lvl 13 -    134s    2 wyniki
Lvl 14 -    122s    2 wyniki
Lvl 15 -    139s    2 wyniki
Lvl 16 -    222s    2 wyniki
Lvl 17 -    573s    2 wyniki
Lvl 18 -    timeout po 600 sekundach
Lvl 19 -
Lvl 20 -
Lvl 24 - nie działa
'''

---

### MobilityDB

In [14]:
# Mobility
logger.info(f"Running Mobility spatial")

conn = psycopg2.connect(
    database=mobility_database,
    host=mobility_host,
    user=mobility_user,
    password=mobility_password,
    port=mobility_port,
)
mobility_spatial_times = []
mobility_spatial_results = []
cursor = conn.cursor()
for bbox in bounding_boxes:
    min_lon, min_lat, max_lon, max_lat = bbox.get_coords()
    start_time = time.time()
    cursor.execute(f'''
select mmsi, st_asgeojson(geom), timestamp from
(
select mmsi, unnest(instants(route))::geometry as geom, starttimestamp(unnest(instants(route))) as timestamp 
from aggregated_vessel_positions
where eintersects(st_setsrid(ST_MakeEnvelope({min_lon}, {min_lat}, {max_lon}, {max_lat}), 4326), route)
)
where st_intersects(st_setsrid(ST_MakeEnvelope({min_lon}, {min_lat}, {max_lon}, {max_lat}), 4326), geom);
    ''')
    
    mobility_result = []
    rows_limit_per_fetch = 5000
    while True:
        rows_queried = cursor.fetchmany(size=rows_limit_per_fetch)
        if not rows_queried:
            break
        mobility_result += rows_queried
    end_time = time.time()
    logger.info(f"{bbox.get_id()}. Query time: {end_time - start_time}, no. of results: {len(mobility_result)}, bbox {bbox.get_id()}")
    mobility_spatial_times.append(end_time - start_time)
    mobility_spatial_results.append(mobility_result)
conn.close()
logger.info(f"Mobility spatial average time: {sum(mobility_spatial_times) / len(mobility_spatial_times)}")


2024-09-06 22:01:26,156 - INFO: Running Mobility spatial
2024-09-06 22:01:27,591 - INFO: 0. Query time: 1.3774068355560303, no. of results: 2608, bbox 0
2024-09-06 22:01:28,797 - INFO: 1. Query time: 1.2048773765563965, no. of results: 90446, bbox 1
2024-09-06 22:01:29,475 - INFO: 2. Query time: 0.6769051551818848, no. of results: 3618, bbox 2
2024-09-06 22:01:30,234 - INFO: 3. Query time: 0.7580959796905518, no. of results: 2839, bbox 3
2024-09-06 22:01:31,698 - INFO: 4. Query time: 1.4630646705627441, no. of results: 96856, bbox 4
2024-09-06 22:01:32,700 - INFO: 5. Query time: 0.9999806880950928, no. of results: 54516, bbox 5
2024-09-06 22:01:33,372 - INFO: 6. Query time: 0.6715102195739746, no. of results: 953, bbox 6
2024-09-06 22:01:34,250 - INFO: 7. Query time: 0.8773725032806396, no. of results: 26290, bbox 7
2024-09-06 22:01:36,012 - INFO: 8. Query time: 1.7612102031707764, no. of results: 106132, bbox 8
2024-09-06 22:01:36,657 - INFO: 9. Query time: 0.644883394241333, no. of r

In [None]:
len(mobility_result)

---

## Temporal queries

---

### MongoDB

In [16]:
# Mongo

client: MongoClient = MongoClient(mongo_url)
db: Database = client[mongo_database]
collection: Collection = db[mongo_collection]

mongo_time_results = []
mongo_time_times = []
logger.info(f"Running Mongo spatial")
for timespan in timespans:
    start, end = timespan.get_start_end()
    # Define the query using $geoWithin and $box
    query = query = {
    "BaseDateTime": {
        "$gte": start,
        "$lte": end
    }
}
    start_time = time.time()
    # Execute the query
    mongo_time_results = list(collection.find(query, {'_id': False}))
    end_time = time.time()
    logger.info(f"{timespan.get_id()}. Query time: {end_time - start_time}, no. of results: {len(mongo_time_results)}, timespan {timespan.get_id()}")
    mongo_time_times.append(end_time - start_time)
logger.info(f"Mongo time average time: {sum(mongo_time_times) / len(mongo_time_times)}")

client.close()

2024-09-03 22:12:21,928 - INFO: Running Mongo spatial
2024-09-03 22:12:21,930 - INFO: {"message": "Waiting for suitable server to become available", "selector": "Primary()", "operation": "find", "topologyDescription": "<TopologyDescription id: 66d76da5a7f106175c627032, topology_type: Unknown, servers: [<ServerDescription ('localhost', 55001) server_type: Unknown, rtt: None>]>", "clientId": {"$oid": "66d76da5a7f106175c627032"}, "remainingTimeMS": 29}
2024-09-03 22:12:21,944 - INFO: 0. Query time: 0.014941930770874023, no. of results: 0, timespan 0
2024-09-03 22:12:21,948 - INFO: 1. Query time: 0.002562284469604492, no. of results: 0, timespan 1
2024-09-03 22:12:21,951 - INFO: 2. Query time: 0.0019047260284423828, no. of results: 0, timespan 2
2024-09-03 22:12:21,953 - INFO: 3. Query time: 0.001966714859008789, no. of results: 0, timespan 3
2024-09-03 22:12:21,956 - INFO: 4. Query time: 0.0018055438995361328, no. of results: 0, timespan 4
2024-09-03 22:12:21,958 - INFO: 5. Query time: 0.

---

### InfluxDB

In [73]:
# Influx
influx_time_results = []
influx_time_times = []
bucket = "temp_bucket_2"
for timespan in timespans:
    start, end = timespan.get_start_end()
    client = InfluxDBClient(url=influx_url, token=influx_token, org=influx_org,
                            timeout=600000)
    query_api = client.query_api()
    
    query = f"""
    from(bucket: "{bucket}")
      |> range(start: {start.isoformat()}Z, stop: {end.isoformat()}Z)
      |> filter(fn: (r) => r._measurement == "vessels_ais_31_12")
      |> filter(fn: (r) => r._field == "LAT" or r._field == "LON")
      |> pivot(rowKey: ["_time"], columnKey: ["_field"], valueColumn: "_value")
      |> keep(columns: ["_time", "MMSI", "lat", "lon"])
    """ 


    start_time = time.time()
    influx_time_result = query_api.query(query=query)
    end_time = time.time()
    
    influx_time_results.append(influx_time_result)
    influx_time_times.append(end_time - start_time)
    record_count = sum(len(table.records) for table in influx_time_result)
    logger.info(f"{timespan.get_id()}. Query time: {end_time - start_time}, no. of results: {record_count}, timespan {timespan.get_id()}")
    client.close()


2024-09-06 23:09:05,111 - INFO: 0. Query time: 2.045814037322998, no. of results: 5306, timespan 0
2024-09-06 23:09:07,080 - INFO: 1. Query time: 1.9648048877716064, no. of results: 9651, timespan 1
2024-09-06 23:09:09,161 - INFO: 2. Query time: 2.0773916244506836, no. of results: 14230, timespan 2
2024-09-06 23:09:11,770 - INFO: 3. Query time: 2.6053249835968018, no. of results: 19212, timespan 3
2024-09-06 23:09:14,581 - INFO: 4. Query time: 2.8080878257751465, no. of results: 22983, timespan 4
2024-09-06 23:09:17,600 - INFO: 5. Query time: 3.015068531036377, no. of results: 27930, timespan 5
2024-09-06 23:09:21,950 - INFO: 6. Query time: 4.344555377960205, no. of results: 31990, timespan 6
2024-09-06 23:09:26,072 - INFO: 7. Query time: 4.1177568435668945, no. of results: 36551, timespan 7


KeyboardInterrupt: 

In [47]:
# Mobility
logger.info(f"Running Mobility time")

conn = psycopg2.connect(
    database=mobility_database,
    host=mobility_host,
    user=mobility_user,
    password=mobility_password,
    port=mobility_port,
)
mobility_time_results = []
mobility_time_times = []
cursor = conn.cursor()
for timespan in timespans:
    start, end = timespan.get_start_end()
    start_time = time.time()
    cursor.execute(f'''
SELECT 
    mmsi, 
    st_asgeojson(instance::geometry),
    starttimestamp(instance)
FROM 
    (
    SELECT
        mmsi,
        unnest(instants(attime(route, tstzspan('[{start.isoformat()}, {end.isoformat()}]')))) as instance 
    FROM
        aggregated_vessel_positions
    );
    ''')
    
    mobility_result = []
    rows_limit_per_fetch = 5000
    while True:
        rows_queried = cursor.fetchmany(size=rows_limit_per_fetch)
        if not rows_queried:
            break
        mobility_result += rows_queried
    end_time = time.time()
    logger.info(f"{timespan.get_id()}. Query time: {end_time - start_time}, no. of results: {len(mobility_result)}, timespan {timespan.get_id()}")
    mobility_time_times.append(end_time - start_time)
    mobility_time_results.append(mobility_result)
conn.close()
logger.info(f"Mobility time average time: {sum(mobility_time_times) / len(mobility_time_times)}")


2024-09-06 22:43:34,222 - INFO: Running Mobility time
2024-09-06 22:43:35,699 - INFO: 0. Query time: 0.5085461139678955, no. of results: 10638, timespan 0
2024-09-06 22:43:36,117 - INFO: 1. Query time: 0.4168670177459717, no. of results: 29615, timespan 1
2024-09-06 22:43:36,594 - INFO: 2. Query time: 0.47581958770751953, no. of results: 34099, timespan 2
2024-09-06 22:43:37,020 - INFO: 3. Query time: 0.4250805377960205, no. of results: 38848, timespan 3
2024-09-06 22:43:37,558 - INFO: 4. Query time: 0.5375618934631348, no. of results: 42554, timespan 4
2024-09-06 22:43:38,105 - INFO: 5. Query time: 0.5453696250915527, no. of results: 47360, timespan 5
2024-09-06 22:43:38,632 - INFO: 6. Query time: 0.5263233184814453, no. of results: 51258, timespan 6
2024-09-06 22:43:39,187 - INFO: 7. Query time: 0.5540046691894531, no. of results: 55595, timespan 7
2024-09-06 22:43:39,757 - INFO: 8. Query time: 0.5693964958190918, no. of results: 60088, timespan 8
2024-09-06 22:43:40,313 - INFO: 9. Q

In [42]:
s,e = timespans[0].get_start_end()
print(s.isoformat())
# 2020-12-31T13:30:00Z

2020-12-31T00:00:00


---

### MobilityDB

---

## Spatiotemporal queries

---

### MongoDB

In [None]:
# Mongo

---

### InfluxDB

In [None]:
# Influx

---

### MobilityDB

In [None]:
# Mobility

---

## Results

---

In [None]:
# Results

# Visualisations

---

In [None]:
i = 6

m: folium.Map = get_results_folium(mongo_output=mongo_spatial_results[i], bounding_boxes_to_display=bounding_boxes[i:i + 1])
m

In [49]:
m: folium.Map = get_results_folium(mobility_output=mobility_time_results[5][:2500], display_large_results=1)#, bounding_boxes_to_display=bounding_boxes[0:1])
m

In [None]:
m: folium.Map = get_results_folium(bounding_boxes_to_display=bounding_boxes)
m

In [None]:
print(len(mongo_results))
print(len(mobility_result))