In [18]:
    
import random
import datetime
import logging
import os
import time

import psycopg2
from dotenv import load_dotenv
from influxdb_client import InfluxDBClient
from pymongo import MongoClient
from pymongo.database import Database, Collection
import json
import logger_setup

# mongo
import folium


def get_results_folium(mongo_output=[], influx_tables=[], mobility_output=[], bounding_boxes_to_display=[],
                       display_large_results=False):
    no_points = len(mongo_output) + len(influx_tables) + len(mobility_output)
    if no_points > 3000 and display_large_results == False:
        print(f"There are many results to display ({no_points}). If you are certain you want to do that change display_large_results to True.")
        return
    CHICAGO_COORDINATES = (42, -95)

    map_attributions = ('&copy; <a href="http://www.openstreetmap.org/copyright">OpenStreetMap</a> '
                        'contributors, &copy; <a href="http://cartodb.com/attributions">CartoDB</a>')

    folium_map = folium.Map(location=CHICAGO_COORDINATES,
                            attr=map_attributions,
                            zoom_start=5,
                            control_scale=True,
                            height=800,
                            width=1400)
    
    for entry in mongo_output:
        entry['properties']['BaseDateTime'] = entry['properties']['BaseDateTime'].isoformat()
        geojson = (folium.GeoJson(entry))
        popup = folium.Popup(
            f"mmsi: {entry['properties']['MMSI']}, name {entry['properties']['VesselName']}, time: {entry['properties']['BaseDateTime']}")
        popup.add_to(geojson)
        geojson.add_to(folium_map)

    for table in influx_tables:
        for record in table:
            popup = folium.Popup(f"mmsi: {record['MMSI']}, time: {record['_time']}")
            folium.Marker(location=[record["lat"], record["lon"]],
                          popup=popup,
                          icon=folium.Icon(color='blue', icon='ship', prefix='fa')).add_to(folium_map)

    for mobility_output in mobility_output:
        mmsi = mobility_output[0]
        geojson_string = mobility_output[1]
        timestamp = mobility_output[2]

        # Parse the GeoJSON string to a Python dictionary
        geojson_obj = json.loads(geojson_string)

        # Extract coordinates
        coordinates = geojson_obj['coordinates']
        lat, lon = coordinates[1], coordinates[0]

        # Create the popup content
        popup_content = f"MMSI: {mmsi}<br>Timestamp: {timestamp}"

        # Add the point to the map
        folium.Marker(
            location=[lat, lon],
            popup=popup_content
        ).add_to(folium_map)

    for bbox in bounding_boxes_to_display:
        coords = bbox.get_coords()
        min_lon, min_lat, max_lon, max_lat = coords

        # Create the bounding box as a rectangle
        folium.Rectangle(
            bounds=[(min_lat, min_lon), (max_lat, max_lon)],
            color="blue",  # You can change the color as needed
            fill=True,
            popup=bbox.get_id(),
            fill_opacity=0.2
        ).add_to(folium_map)

    return folium_map

class Bbox:
    def __init__(self, min_lon, min_lat, max_lon, max_lat, id: int):
        self.min_lon = min_lon
        self.min_lat = min_lat
        self.max_lon = max_lon
        self.max_lat = max_lat
        self.id = id

    def get_coords(self):
        return [self.min_lon, self.min_lat, self.max_lon, self.max_lat]
    
    def get_id(self):
        return self.id

class Timespan:
    def __init__(self, start: datetime.datetime, end: datetime.datetime, id: int):
        self.start = start
        self.end = end
        self.id = id
    
    def get_start_end(self):
        return self.start, self.end
    
    def get_id(self):
        return self.id

def generate_random_timespans(count):
    timespans = []
    for i in range(count):
        # Generate random start time between 00:00 and 23:00
        start_hour = random.randint(0, 23)
        start_minute = random.randint(0, 59)
        start_second = random.randint(0, 59)
        start_time = datetime.datetime(2020, 12, 31, start_hour, start_minute, start_second)

        # Generate random duration between 10 minutes and 3 hours
        min_duration = 10
        max_duration = 180
        duration_minutes = random.randint(min_duration, max_duration)
        end_time = start_time + datetime.timedelta(minutes=duration_minutes)

        # Ensure end time does not exceed the day
        if end_time > datetime.datetime(2020, 12, 31, 23, 59, 59):
            end_time = datetime.datetime(2020, 12, 31, 23, 59, 59)

        # Create the Timespan object and append to the list
        timespans.append(
            Timespan(
                start=start_time,
                end=end_time,
                id=i
            )
        )
    
    return timespans

def get_time_passed(end_time : float, start_time : float) -> float:
    time_passed = start_time - end_time
    return time_passed

def get_time_per_result(time_passed: float, no_results: float) -> float:
    time_per_result = time_passed / no_results
    return time_per_result

def log_spatial_info(iteration: int, iterations_no: int, time_passed: float, time_per_result: float, no_results: int):
    logger.info(f"{iteration}/{iterations_no}. Query time: {round(time_passed, 5)}, query time per result {round(time_per_result,5)} no. of results: {no_results}, bbox {iteration}")

def log_time_info(iteration: int, iterations_no: int, time_passed: float, time_per_result: float, no_results: int, timespan: Timespan):
    start, end = timespan.get_start_end()
    logger.info(f"{iteration}/{iterations_no}. Query time: {round(time_passed, 5)}, query time per result {round(time_per_result,5)} no. of results: {no_results}, timespan: {start}-{end}")

# Ciekawy statek:
# 215131000

logger_setup.setup_logging(level=logging.INFO)
logger = logging.getLogger()
load_dotenv()

True

---

# Setup

## Mongo setup

In [2]:
mongo_url = "mongodb://localhost:" + os.environ.get("MONGO_PORT", "55000")
mongo_database = "temp"
mongo_collection = "aisdata31-12-2020"

logger.info(f"MongoDB endpoint: {mongo_url}")
logger.info(f"MongoDB database name: {mongo_database}")
logger.info(f"MongoDB collection name: {mongo_collection}")

2024-09-15 19:27:49,953 - INFO: MongoDB endpoint: mongodb://localhost:55001
2024-09-15 19:27:49,955 - INFO: MongoDB database name: temp
2024-09-15 19:27:49,955 - INFO: MongoDB collection name: aisdata31-12-2020


## Influx setup

In [3]:

influx_token = os.environ.get("API_INFLUX_KEY")
influx_org = os.environ.get("INFLUX_ORG_ID")
influx_url = "http://localhost:" + os.environ.get("INFLUX_PORT", "55000")

logger.info(f"InfluxDB Token: {influx_token}")
logger.info(f"InfluxDB Organization id: {influx_org}")
logger.info(f"InfluxDB Database endpoint: {influx_url}")

2024-09-15 19:27:50,977 - INFO: InfluxDB Token: EJq62wZIuaOddyb4RZtujdg4Pv_o2lO5SNAdB5Dme5rK1bNkniAgMLnxLLugzT-epKiE4NVI71oMuZkfdj4ewg==
2024-09-15 19:27:50,979 - INFO: InfluxDB Organization id: bc3f6fcfff4173ac
2024-09-15 19:27:50,980 - INFO: InfluxDB Database endpoint: http://localhost:55002


## MobilityDB setup

In [4]:
mobility_host = os.environ.get("MOBILITY_HOST")
mobility_port = os.environ.get("MOBILITY_PORT")
mobility_user = os.environ.get("MOBILITY_USER")
mobility_password = os.environ.get("MOBILITY_PASSWORD")
mobility_database = os.environ.get("MOBILITY_DATABASE")

logger.info(f"MobilityDB endpoint: {mobility_host}:{mobility_port}")
logger.info(f"MobilityDB user name: {mobility_user}")
logger.info(f"MobilityDB database name: {mobility_database}")

2024-09-15 19:27:52,956 - INFO: MobilityDB endpoint: 172.17.0.1:55000
2024-09-15 19:27:52,958 - INFO: MobilityDB user name: postgres
2024-09-15 19:27:52,959 - INFO: MobilityDB database name: aisdata


# Queries

---

## Spatial queries

---

### Datasest

In [5]:


bounding_boxes = [
    Bbox(-123.247925, 48.136125, -122.739476, 48.362910, 0),  # Puget Sound, Washington
    Bbox(-123.016525, 37.639830, -122.283450, 37.929824, 1),  # San Francisco Bay, California
    Bbox(-76.510574, 37.973348, -75.962608, 38.393338, 2),  # Chesapeake Bay, Maryland/Virginia
    Bbox(-88.161018, 30.334953, -87.927567, 30.639975, 3),  # Mobile Bay, Alabama
    Bbox(-95.104218, 29.327599, -94.617409, 29.623018, 4),  # Galveston Bay, Texas
    Bbox(-82.775543, 27.599938, -82.320755, 27.934847, 5),  # Tampa Bay, Florida
    Bbox(-122.019295, 36.776848, -121.819153, 37.018274, 6),  # Monterey Bay, California
    Bbox(-71.484741, 41.454842, -71.173431, 41.735072, 7),  # Narragansett Bay, Rhode Island
    Bbox(-117.253113, 32.600235, -117.085083, 32.736514, 8),  # San Diego Bay, California
    Bbox(-88.135986, 44.474116, -87.745605, 44.794497, 9),  # Green Bay
    Bbox(-80.45290918232058, 29.060643707480367, -78.32704059023007, 31.29195079716895, 10),  # Georgia coast
    Bbox(-77.36586166597581, 31.282283517600803, -75.27345529761102, 33.760865420475, 11),  # North Carolina coast
    Bbox(-74.51771061340146, 34.75075477385059, -71.85177110891351, 37.752840882799006, 12),  # Delaware Bay area
    Bbox(-73.8132903076812, 39.28679980551155, -69.84951150110152, 40.36716788459955, 13),  # Long Island Sound, New York
    Bbox(-87.53216792628709, 42.614159443972795, -86.57811342936832, 43.95974516921851, 14),  # Lake Michigan
    Bbox(-88.8185473646937, 47.31402814776524, -85.97885758914987, 48.36369038185671, 15),  # Lake Superior
    Bbox(-119.28398346805034, 28.592051721892147, -116.88328729269529, 32.65616790678931, 16),  # Baja California coast
    Bbox(-115.22516221315561, 21.159217960533027, -112.50145598046407, 25.734344868162523, 17),  # Gulf of California 0
    Bbox(-111.04125138719715, 20.598750857797, -105.33212892067695, 22.5805686602515, 18),  # Northern Gulf of California 0
    Bbox(-96.79281541240942, 25.885529478254256, -93.32496228352338, 28.099129345180913, 19),  # Gulf of Mexico
    Bbox(-88.54476002663944, 27.4284592236325, -84.0568785413999, 30.09273196827901, 20),  # Gulf of Mexico, Alabama
    Bbox(-128.9871203472166, 40.000911885515904, -125.04987957500578, 48.818149529347096, 21),  # Alaska coast
    Bbox(-126.4987002469207, 35.49916842114703, -122.76107863128735, 38.76867969769498, 22),  # Off the coast of California
    Bbox(-90.41838995337584, 30.039892740717292, -89.89240461303508, 30.37633672889592, 23),  # Mississippi coast
    Bbox(-123.02323990734207, 32.99070496259917, -119.56454319294964, 35.24101423046929, 24)  # Northern California coast
]
 



timespans = [
    Timespan(start=datetime.datetime(2020, 12, 31, 0, 0, 0), end=datetime.datetime(2020, 12, 31, 0, 1, 0), id=0),  # 00:00:00 - 00:30:00 - 0 hours 1 minutes
    Timespan(start=datetime.datetime(2020, 12, 31, 0, 45, 0), end=datetime.datetime(2020, 12, 31, 0, 47, 0), id=1),  # 00:45:00 - 01:15:00 - 0 hours 2 minutes
    Timespan(start=datetime.datetime(2020, 12, 31, 1, 30, 0), end=datetime.datetime(2020, 12, 31, 1, 33, 0), id=2),   # 01:30:00 - 02:00:00 - 0 hours 3 minutes
    Timespan(start=datetime.datetime(2020, 12, 31, 2, 15, 0), end=datetime.datetime(2020, 12, 31, 2, 19, 0), id=3),   # 02:15:00 - 03:00:00 - 0 hours 4 minutes
    Timespan(start=datetime.datetime(2020, 12, 31, 3, 10, 0), end=datetime.datetime(2020, 12, 31, 3, 15, 0), id=4),   # 03:10:00 - 04:05:00 - 0 hours 5 minutes
    Timespan(start=datetime.datetime(2020, 12, 31, 4, 15, 0), end=datetime.datetime(2020, 12, 31, 4, 21, 0), id=5),   # 04:15:00 - 05:00:00 - 0 hours 6 minutes
    Timespan(start=datetime.datetime(2020, 12, 31, 5, 20, 0), end=datetime.datetime(2020, 12, 31, 5, 27, 0), id=6),  # 05:20:00 - 06:10:00 - 0 hours 7 minutes
    Timespan(start=datetime.datetime(2020, 12, 31, 6, 25, 0), end=datetime.datetime(2020, 12, 31, 6, 33, 0), id=7),  # 06:25:00 - 07:10:00 - 0 hours 8 minutes
    Timespan(start=datetime.datetime(2020, 12, 31, 7, 30, 0), end=datetime.datetime(2020, 12, 31, 7, 39, 0), id=8),  # 07:30:00 - 08:15:00 - 0 hours 9 minutes
    Timespan(start=datetime.datetime(2020, 12, 31, 8, 30, 0), end=datetime.datetime(2020, 12, 31, 8, 40, 0), id=9),   # 08:30:00 - 09:00:00 - 0 hours 10 minutes
    Timespan(start=datetime.datetime(2020, 12, 31, 9, 15, 0), end=datetime.datetime(2020, 12, 31, 9, 55, 0), id=10),  # 09:15:00 - 09:55:00 - 0 hours 40 minutes
    Timespan(start=datetime.datetime(2020, 12, 31, 10, 5, 0), end=datetime.datetime(2020, 12, 31, 10, 40, 0), id=11), # 10:05:00 - 10:40:00 - 0 hours 35 minutes
    Timespan(start=datetime.datetime(2020, 12, 31, 11, 0, 0), end=datetime.datetime(2020, 12, 31, 11, 35, 0), id=12), # 11:00:00 - 11:35:00 - 0 hours 35 minutes
    Timespan(start=datetime.datetime(2020, 12, 31, 11, 50, 0), end=datetime.datetime(2020, 12, 31, 12, 30, 0), id=13), # 11:50:00 - 12:30:00 - 0 hours 40 minutes
    Timespan(start=datetime.datetime(2020, 12, 31, 12, 45, 0), end=datetime.datetime(2020, 12, 31, 13, 20, 0), id=14), # 12:45:00 - 13:20:00 - 0 hours 35 minutes
    Timespan(start=datetime.datetime(2020, 12, 31, 13, 35, 0), end=datetime.datetime(2020, 12, 31, 14, 20, 0), id=15), # 13:35:00 - 14:20:00 - 0 hours 45 minutes
    Timespan(start=datetime.datetime(2020, 12, 31, 14, 35, 0), end=datetime.datetime(2020, 12, 31, 15, 15, 0), id=16), # 14:35:00 - 15:15:00 - 0 hours 40 minutes
    Timespan(start=datetime.datetime(2020, 12, 31, 15, 30, 0), end=datetime.datetime(2020, 12, 31, 16, 15, 0), id=17), # 15:30:00 - 16:15:00 - 0 hours 45 minutes
    Timespan(start=datetime.datetime(2020, 12, 31, 16, 25, 0), end=datetime.datetime(2020, 12, 31, 17, 0, 0), id=18),  # 16:25:00 - 17:00:00 - 0 hours 35 minutes
    Timespan(start=datetime.datetime(2020, 12, 31, 17, 15, 0), end=datetime.datetime(2020, 12, 31, 17, 55, 0), id=19), # 17:15:00 - 17:55:00 - 0 hours 40 minutes
    Timespan(start=datetime.datetime(2020, 12, 31, 18, 10, 0), end=datetime.datetime(2020, 12, 31, 18, 50, 0), id=20), # 18:10:00 - 18:50:00 - 0 hours 40 minutes
    Timespan(start=datetime.datetime(2020, 12, 31, 19, 5, 0), end=datetime.datetime(2020, 12, 31, 19, 40, 0), id=21),  # 19:05:00 - 19:40:00 - 0 hours 35 minutes
    Timespan(start=datetime.datetime(2020, 12, 31, 19, 50, 0), end=datetime.datetime(2020, 12, 31, 20, 30, 0), id=22), # 19:50:00 - 20:30:00 - 0 hours 40 minutes
    Timespan(start=datetime.datetime(2020, 12, 31, 20, 45, 0), end=datetime.datetime(2020, 12, 31, 21, 20, 0), id=23), # 20:45:00 - 21:20:00 - 0 hours 35 minutes
    Timespan(start=datetime.datetime(2020, 12, 31, 21, 35, 0), end=datetime.datetime(2020, 12, 31, 22, 10, 0), id=24), # 21:35:00 - 22:10:00 - 0 hours 35 minutes
    Timespan(start=datetime.datetime(2020, 12, 31, 22, 25, 0), end=datetime.datetime(2020, 12, 31, 23, 0, 0), id=25),  # 22:25:00 - 23:00:00 - 0 hours 35 minutes
    Timespan(start=datetime.datetime(2020, 12, 31, 23, 10, 0), end=datetime.datetime(2020, 12, 31, 23, 55, 0), id=26), # 23:10:00 - 23:55:00 - 0 hours 45 minutes
    Timespan(start=datetime.datetime(2020, 12, 31, 0, 10, 0), end=datetime.datetime(2020, 12, 31, 0, 55, 0), id=27),  # 00:10:00 - 00:55:00 - 0 hours 45 minutes
    Timespan(start=datetime.datetime(2020, 12, 31, 1, 5, 0), end=datetime.datetime(2020, 12, 31, 1, 45, 0), id=28),   # 01:05:00 - 01:45:00 - 0 hours 40 minutes
    Timespan(start=datetime.datetime(2020, 12, 31, 2, 0, 0), end=datetime.datetime(2020, 12, 31, 2, 40, 0), id=29),   # 02:00:00 - 02:40:00 - 0 hours 40 minutes
    Timespan(start=datetime.datetime(2020, 12, 31, 3, 0, 0), end=datetime.datetime(2020, 12, 31, 3, 50, 0), id=30),   # 03:00:00 - 03:50:00 - 0 hours 50 minutes
    Timespan(start=datetime.datetime(2020, 12, 31, 4, 0, 0), end=datetime.datetime(2020, 12, 31, 4, 35, 0), id=31),   # 04:00:00 - 04:35:00 - 0 hours 35 minutes
    Timespan(start=datetime.datetime(2020, 12, 31, 5, 0, 0), end=datetime.datetime(2020, 12, 31, 5, 40, 0), id=32),   # 05:00:00 - 05:40:00 - 0 hours 40 minutes
    Timespan(start=datetime.datetime(2020, 12, 31, 6, 0, 0), end=datetime.datetime(2020, 12, 31, 6, 50, 0), id=33),   # 06:00:00 - 06:50:00 - 0 hours 50 minutes
    Timespan(start=datetime.datetime(2020, 12, 31, 7, 0, 0), end=datetime.datetime(2020, 12, 31, 7, 45, 0), id=34),   # 07:00:00 - 07:45:00 - 0 hours 45 minutes
    Timespan(start=datetime.datetime(2020, 12, 31, 8, 0, 0), end=datetime.datetime(2020, 12, 31, 8, 50, 0), id=35),   # 08:00:00 - 08:50:00 - 0 hours 50 minutes
    Timespan(start=datetime.datetime(2020, 12, 31, 9, 0, 0), end=datetime.datetime(2020, 12, 31, 9, 55, 0), id=36),   # 09:00:00 - 09:55:00 - 0 hours 55 minutes
    Timespan(start=datetime.datetime(2020, 12, 31, 10, 10, 0), end=datetime.datetime(2020, 12, 31, 10, 55, 0), id=37),# 10:10:00 - 10:55:00 - 0 hours 45 minutes
    Timespan(start=datetime.datetime(2020, 12, 31, 11, 0, 0), end=datetime.datetime(2020, 12, 31, 11, 50, 0), id=38), # 11:00:00 - 11:50:00 - 0 hours 50 minutes
    Timespan(start=datetime.datetime(2020, 12, 31, 12, 5, 0), end=datetime.datetime(2020, 12, 31, 12, 45, 0), id=39), # 12:05:00 - 12:45:00 - 0 hours 40 minutes
    Timespan(start=datetime.datetime(2020, 12, 31, 13, 0, 0), end=datetime.datetime(2020, 12, 31, 13, 40, 0), id=40), # 13:00:00 - 13:40:00 - 0 hours 40 minutes
    Timespan(start=datetime.datetime(2020, 12, 31, 14, 0, 0), end=datetime.datetime(2020, 12, 31, 14, 50, 0), id=41), # 14:00:00 - 14:50:00 - 0 hours 50 minutes
    Timespan(start=datetime.datetime(2020, 12, 31, 15, 5, 0), end=datetime.datetime(2020, 12, 31, 15, 50, 0), id=42), # 15:05:00 - 15:50:00 - 0 hours 45 minutes
    Timespan(start=datetime.datetime(2020, 12, 31, 16, 0, 0), end=datetime.datetime(2020, 12, 31, 16, 55, 0), id=43), # 16:00:00 - 16:55:00 - 0 hours 55 minutes
    Timespan(start=datetime.datetime(2020, 12, 31, 17, 5, 0), end=datetime.datetime(2020, 12, 31, 17, 55, 0), id=44), # 17:05:00 - 17:55:00 - 0 hours 50 minutes
    Timespan(start=datetime.datetime(2020, 12, 31, 18, 10, 0), end=datetime.datetime(2020, 12, 31, 19, 0, 0), id=45), # 18:10:00 - 19:00:00 - 0 hours 50 minutes
    Timespan(start=datetime.datetime(2020, 12, 31, 19, 10, 0), end=datetime.datetime(2020, 12, 31, 20, 0, 0), id=46), # 19:10:00 - 20:00:00 - 0 hours 50 minutes
    Timespan(start=datetime.datetime(2020, 12, 31, 20, 10, 0), end=datetime.datetime(2020, 12, 31, 21, 0, 0), id=47), # 20:10:00 - 21:00:00 - 0 hours 50 minutes
    Timespan(start=datetime.datetime(2020, 12, 31, 21, 10, 0), end=datetime.datetime(2020, 12, 31, 22, 0, 0), id=48), # 21:10:00 - 22:00:00 - 0 hours 50 minutes
    Timespan(start=datetime.datetime(2020, 12, 31, 22, 10, 0), end=datetime.datetime(2020, 12, 31, 23, 0, 0), id=49), # 22:10:00 - 23:00:00 - 0 hours 50 minutes
]


---

### MongoDB

In [11]:
# Mongo

client: MongoClient = MongoClient(mongo_url)
db: Database = client[mongo_database]
collection: Collection = db[mongo_collection]

mongo_spatial_times = []
mongo_spatial_results = []
mongo_spatial_times_per_result = []
logger.info(f"Running Mongo spatial")
for bbox in bounding_boxes:
    min_lon, min_lat, max_lon, max_lat = bbox.get_coords()

    # Define the query using $geoWithin and $box
    query = {
        'geometry': {
            '$geoWithin': {
                '$box': [
                    [min_lon, min_lat],
                    [max_lon, max_lat]
                ]
            }
        }
    }
    start_time = time.time()
    # Execute the query
    mongo_results = list(collection.find(query, {'_id': False}))
    end_time = time.time()
    
    iteration = bbox.get_id()
    no_results = len(mongo_results)
    time_passed = get_time_passed(start_time, end_time)
    time_per_result = get_time_per_result(time_passed, no_results)
    log_spatial_info(iteration, len(bounding_boxes), time_passed, time_per_result, no_results)
    
    mongo_spatial_times_per_result.append(time_per_result)
    mongo_spatial_times.append(end_time - start_time)
    mongo_spatial_results.append(mongo_results)
logger.info(f"Mongo spatial average time: {sum(mongo_spatial_times) / len(mongo_spatial_times)}")

client.close()

2024-09-15 19:45:01,800 - INFO: Running Mongo spatial
2024-09-15 19:45:01,806 - INFO: {"message": "Waiting for suitable server to become available", "selector": "Primary()", "operation": "find", "topologyDescription": "<TopologyDescription id: 66e71d1df219c41a19a8adb6, topology_type: Unknown, servers: [<ServerDescription ('localhost', 55001) server_type: Unknown, rtt: None>]>", "clientId": {"$oid": "66e71d1df219c41a19a8adb6"}, "remainingTimeMS": 29}
2024-09-15 19:45:09,171 - INFO: 0. Query time: -7.37, query time per result -0.0028214124097352923 no. of results: 2611, bbox 0
2024-09-15 19:45:18,092 - INFO: 1. Query time: -8.92, query time per result -9.409857539703666e-05 no. of results: 94798, bbox 1
2024-09-15 19:45:25,591 - INFO: 2. Query time: -7.5, query time per result -0.0019665275881867567 no. of results: 3813, bbox 2
2024-09-15 19:45:32,687 - INFO: 3. Query time: -7.1, query time per result -0.0021885001754996074 no. of results: 3242, bbox 3
2024-09-15 19:45:43,173 - INFO: 4. 

KeyboardInterrupt: 



---

### InfluxDB

In [8]:
# Influx
influx_url = "http://localhost:55002"

# bucket = "shapedData_bucket2"
# bucket = "temp"
bucket = "aisdata_s2indexed_lvl24"
start_date = "2020-12-31T00:00:00Z"
stop_date = "2020-12-31T00:00:59Z"
# min_lat = 41.80
# max_lat = 41.87
# min_lon = -88.0
# max_lon = -87.0
influx_spatial_results = []
level = 10
strict = "true"
levels = [10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20]
for level in levels:
# for bbox in bounding_boxes:
    client = InfluxDBClient(url=influx_url, token=influx_token, org=influx_org,
                            timeout=600000)
    query_api = client.query_api()
    bbox = bounding_boxes[0]
    min_lon, min_lat, max_lon, max_lat = bbox.get_coords()

    query = f"""
    import "experimental/geo"
    
    region = {{
        minLat: {min_lat},
        maxLat: {max_lat},
        minLon: {min_lon},  
        maxLon: {max_lon},
    }}
    
    from(bucket: "{bucket}")
        |> range(start: {start_date}, stop: {stop_date})
        |> filter(fn: (r) => r._measurement == "vessels_ais_31_12")
        |> geo.filterRows(region: region, level: {level}, strict: {strict})
    """

    start_time = time.time()
    # tables = query_api.query_data_frame(query=query)
    influx_results = query_api.query(query=query)
    influx_spatial_results.append(influx_results)
    end_time = time.time()

    record_count = sum(len(table.records) for table in influx_results)
    logger.info(f"Query took {end_time - start_time} seconds, no. of results: {record_count}. Level: {level}")
    client.close()
    # break


2024-08-26 21:54:03,167 - INFO: Query took 109.31931138038635 seconds, no. of results: 2. Level: 10
2024-08-26 21:55:52,684 - INFO: Query took 109.51598715782166 seconds, no. of results: 2. Level: 11
2024-08-26 21:57:37,217 - INFO: Query took 104.5322802066803 seconds, no. of results: 2. Level: 12
2024-08-26 21:59:23,008 - INFO: Query took 105.78923559188843 seconds, no. of results: 2. Level: 13
2024-08-26 22:01:13,345 - INFO: Query took 110.3360652923584 seconds, no. of results: 2. Level: 14
2024-08-26 22:03:25,101 - INFO: Query took 131.7553825378418 seconds, no. of results: 2. Level: 15
2024-08-26 22:07:06,040 - INFO: Query took 220.93793106079102 seconds, no. of results: 2. Level: 16
2024-08-26 22:16:02,907 - INFO: Query took 536.8655359745026 seconds, no. of results: 2. Level: 17


ReadTimeoutError: HTTPConnectionPool(host='localhost', port=55002): Read timed out. (read timeout=599.9994445479997)

---

### MobilityDB

In [16]:
# Mobility
logger.info(f"Running Mobility spatial")

conn = psycopg2.connect(
    database=mobility_database,
    host=mobility_host,
    user=mobility_user,
    password=mobility_password,
    port=mobility_port,
)
mobility_spatial_times = []
mobility_spatial_results = []
mobility_spatial_times_per_result = []

cursor = conn.cursor()
for bbox in bounding_boxes:
    min_lon, min_lat, max_lon, max_lat = bbox.get_coords()
    start_time = time.time()
    cursor.execute(f'''
select mmsi, st_asgeojson(geom), timestamp from
(
select mmsi, unnest(instants(route))::geometry as geom, starttimestamp(unnest(instants(route))) as timestamp 
from aggregated_vessel_positions
where eintersects(st_setsrid(ST_MakeEnvelope({min_lon}, {min_lat}, {max_lon}, {max_lat}), 4326), route)
)
where st_intersects(st_setsrid(ST_MakeEnvelope({min_lon}, {min_lat}, {max_lon}, {max_lat}), 4326), geom);
    ''')
    
    mobility_result = []
    rows_limit_per_fetch = 5000
    while True:
        rows_queried = cursor.fetchmany(size=rows_limit_per_fetch)
        if not rows_queried:
            break
        mobility_result += rows_queried
    end_time = time.time()
    
    iteration = bbox.get_id()
    no_results = len(mobility_result)
    time_passed = get_time_passed(start_time, end_time)
    time_per_result = get_time_per_result(time_passed, no_results)
    log_spatial_info(iteration, len(bounding_boxes), time_passed, time_per_result, no_results)
    
    mobility_spatial_times_per_result.append(time_per_result)
    mobility_spatial_times.append(time_passed)
    mobility_spatial_results.append(mobility_result)
conn.close()
logger.info(f"Mobility spatial average time: {sum(mobility_spatial_times) / len(mobility_spatial_times)}")


2024-09-15 19:50:45,107 - INFO: Running Mobility spatial
2024-09-15 19:50:46,222 - INFO: 0. Query time: 1.04434, query time per result 0.0004 no. of results: 2608, bbox 0
2024-09-15 19:50:47,756 - INFO: 1. Query time: 1.53396, query time per result 2e-05 no. of results: 90446, bbox 1
2024-09-15 19:50:48,471 - INFO: 2. Query time: 0.71413, query time per result 0.0002 no. of results: 3618, bbox 2
2024-09-15 19:50:49,219 - INFO: 3. Query time: 0.74668, query time per result 0.00026 no. of results: 2839, bbox 3
2024-09-15 19:50:50,797 - INFO: 4. Query time: 1.57745, query time per result 2e-05 no. of results: 96856, bbox 4
2024-09-15 19:50:51,830 - INFO: 5. Query time: 1.03291, query time per result 2e-05 no. of results: 54516, bbox 5
2024-09-15 19:50:52,581 - INFO: 6. Query time: 0.75001, query time per result 0.00079 no. of results: 953, bbox 6
2024-09-15 19:50:53,517 - INFO: 7. Query time: 0.93497, query time per result 4e-05 no. of results: 26290, bbox 7
2024-09-15 19:50:54,872 - INFO

ZeroDivisionError: float division by zero

In [None]:
len(mobility_result)

---

## Temporal queries

---

### MongoDB

In [21]:
# Mongo

client: MongoClient = MongoClient(mongo_url)
db: Database = client[mongo_database]
collection: Collection = db[mongo_collection]

mongo_time_results = []
mongo_time_times = []
mongo_time_times_per_result = []

logger.info(f"Running Mongo spatial")
for timespan in timespans:
    start, end = timespan.get_start_end()
    # Define the query using $geoWithin and $box
    query = {
    "properties.BaseDateTime": {
        "$gte": start,
        "$lte": end
    }
}
    start_time = time.time()
    # Execute the query
    mongo_time_results = list(collection.find(query, {'_id': False}))
    end_time = time.time()
    
    iteration = timespan.get_id()
    no_results = len(mongo_time_results)
    
    time_passed = get_time_passed(start_time, end_time)
    time_per_result = get_time_per_result(time_passed, no_results)
    
    log_time_info(timespan.get_id(), len(timespans), time_passed, time_per_result, no_results, timespan)
    
    mongo_time_results.append(mongo_time_results)
    mongo_time_times.append(end_time - start_time)
    mongo_time_times_per_result.append(time_per_result)
    
logger.info(f"Mongo time average time: {sum(mongo_time_times) / len(mongo_time_times)}")

client.close()

2024-09-15 20:11:07,197 - INFO: Running Mongo spatial
2024-09-15 20:11:07,199 - INFO: {"message": "Waiting for suitable server to become available", "selector": "Primary()", "operation": "find", "topologyDescription": "<TopologyDescription id: 66e7233bf219c41a19a8adba, topology_type: Unknown, servers: [<ServerDescription ('localhost', 55001) server_type: Unknown, rtt: None>]>", "clientId": {"$oid": "66e7233bf219c41a19a8adba"}, "remainingTimeMS": 29}
2024-09-15 20:11:10,243 - INFO: 0/50. Query time: 3.04459, query time per result 0.00057 no. of results: 5333, timespan: 2020-12-31 00:00:00-2020-12-31 00:01:00
2024-09-15 20:11:13,666 - INFO: 1/50. Query time: 3.42202, query time per result 0.00035 no. of results: 9738, timespan: 2020-12-31 00:45:00-2020-12-31 00:47:00
2024-09-15 20:11:16,736 - INFO: 2/50. Query time: 3.06897, query time per result 0.00021 no. of results: 14283, timespan: 2020-12-31 01:30:00-2020-12-31 01:33:00
2024-09-15 20:11:20,846 - INFO: 3/50. Query time: 4.10903, que

---

### InfluxDB

In [22]:
# Influx
influx_time_results = []
influx_time_times = []
influx_time_times_per_result = []

bucket = "temp_bucket_2"

for timespan in timespans:
    start, end = timespan.get_start_end()
    client = InfluxDBClient(url=influx_url, token=influx_token, org=influx_org,
                            timeout=600000)
    query_api = client.query_api()
    
    query = f"""
    from(bucket: "{bucket}")
      |> range(start: {start.isoformat()}Z, stop: {end.isoformat()}Z)
      |> filter(fn: (r) => r._measurement == "vessels_ais_31_12")
      |> filter(fn: (r) => r._field == "LAT" or r._field == "LON")
      |> pivot(rowKey: ["_time"], columnKey: ["_field"], valueColumn: "_value")
      |> keep(columns: ["_time", "MMSI", "lat", "lon"])
    """ 

    start_time = time.time()
    influx_time_result = query_api.query(query=query)
    end_time = time.time()
    
    iteration = timespan.get_id()
    no_results = sum(len(table.records) for table in influx_time_result)

    time_passed = get_time_passed(start_time, end_time)
    time_per_result = get_time_per_result(time_passed, no_results)
    
    log_time_info(timespan.get_id(), len(timespans), time_passed, time_per_result, no_results, timespan)
    
    influx_time_results.append(influx_time_result)
    influx_time_times.append(time_passed)
    influx_time_times_per_result.append(time_per_result)

    client.close()
logger.info(f"Influx time average time: {sum(influx_time_times) / len(influx_time_times)}")


2024-09-15 20:18:07,039 - INFO: 0/50. Query time: 5.10495, query time per result 0.00096 no. of results: 5306, timespan: 2020-12-31 00:00:00-2020-12-31 00:01:00
2024-09-15 20:18:08,762 - INFO: 1/50. Query time: 1.71892, query time per result 0.00018 no. of results: 9651, timespan: 2020-12-31 00:45:00-2020-12-31 00:47:00
2024-09-15 20:18:10,928 - INFO: 2/50. Query time: 2.16346, query time per result 0.00015 no. of results: 14230, timespan: 2020-12-31 01:30:00-2020-12-31 01:33:00
2024-09-15 20:18:13,428 - INFO: 3/50. Query time: 2.49566, query time per result 0.00013 no. of results: 19212, timespan: 2020-12-31 02:15:00-2020-12-31 02:19:00
2024-09-15 20:18:16,582 - INFO: 4/50. Query time: 3.15046, query time per result 0.00014 no. of results: 22983, timespan: 2020-12-31 03:10:00-2020-12-31 03:15:00
2024-09-15 20:18:19,627 - INFO: 5/50. Query time: 3.04018, query time per result 0.00011 no. of results: 27930, timespan: 2020-12-31 04:15:00-2020-12-31 04:21:00
2024-09-15 20:18:22,969 - INFO

KeyboardInterrupt: 

---

### MobilityDB


In [23]:
# Mobility
logger.info(f"Running Mobility time")

conn = psycopg2.connect(
    database=mobility_database,
    host=mobility_host,
    user=mobility_user,
    password=mobility_password,
    port=mobility_port,
)
mobility_time_results = []
mobility_time_times = []
mobility_time_times_per_result = []

cursor = conn.cursor()
for timespan in timespans:
    start, end = timespan.get_start_end()
    start_time = time.time()
    cursor.execute(f'''
SELECT 
    mmsi, 
    st_asgeojson(instance::geometry),
    starttimestamp(instance)
FROM 
    (
    SELECT
        mmsi,
        unnest(instants(attime(route, tstzspan('[{start.isoformat()}, {end.isoformat()}]')))) as instance 
    FROM
        aggregated_vessel_positions
    );
    ''')
    
    mobility_result = []
    rows_limit_per_fetch = 5000
    while True:
        rows_queried = cursor.fetchmany(size=rows_limit_per_fetch)
        if not rows_queried:
            break
        mobility_result += rows_queried
    end_time = time.time()
    
    
    iteration = timespan.get_id()
    no_results = len(mobility_result)

    time_passed = get_time_passed(start_time, end_time)
    time_per_result = get_time_per_result(time_passed, no_results)
    
    log_time_info(timespan.get_id(), len(timespans), time_passed, time_per_result, no_results, timespan)
    # logger.info(f"{timespan.get_id()}/{len(timespans)}. Query time: {end_time - start_time}, no. of results: {len(mobility_result)}, timespan {timespan.get_id()}/{len(timespans)}: {start}-{end}")

    mobility_time_results.append(mobility_result)
    mobility_time_times.append(time_passed)
    mobility_time_times_per_result.append(time_per_result)
    
conn.close()
logger.info(f"Mobility time average time: {sum(mobility_time_times) / len(mobility_time_times)}")


2024-09-15 20:23:48,088 - INFO: Running Mobility time
2024-09-15 20:23:48,450 - INFO: 0/50. Query time: 0.3517, query time per result 3e-05 no. of results: 10638, timespan: 2020-12-31 00:00:00-2020-12-31 00:01:00
2024-09-15 20:23:48,977 - INFO: 1/50. Query time: 0.5264, query time per result 2e-05 no. of results: 29615, timespan: 2020-12-31 00:45:00-2020-12-31 00:47:00
2024-09-15 20:23:49,645 - INFO: 2/50. Query time: 0.66606, query time per result 2e-05 no. of results: 34099, timespan: 2020-12-31 01:30:00-2020-12-31 01:33:00
2024-09-15 20:23:50,213 - INFO: 3/50. Query time: 0.56632, query time per result 1e-05 no. of results: 38848, timespan: 2020-12-31 02:15:00-2020-12-31 02:19:00
2024-09-15 20:23:50,890 - INFO: 4/50. Query time: 0.67656, query time per result 2e-05 no. of results: 42554, timespan: 2020-12-31 03:10:00-2020-12-31 03:15:00
2024-09-15 20:23:51,576 - INFO: 5/50. Query time: 0.68452, query time per result 1e-05 no. of results: 47360, timespan: 2020-12-31 04:15:00-2020-12-

KeyboardInterrupt: 

---

## Spatiotemporal queries

---

### MongoDB

In [36]:
# Mongo

client: MongoClient = MongoClient(mongo_url)
db: Database = client[mongo_database]
collection: Collection = db[mongo_collection]
collection = db["aisdata31-12-2020"]
mongo_spatiotemporal_results = []
mongo_spatiotemporal_times = []
logger.info(f"Running Mongo spatiotemporal")
no_of_iterations = min(len(timespans), len(bounding_boxes))
for i in range(no_of_iterations):
    timespan = timespans[i]
    bbox = bounding_boxes[i]
    
    start, end = timespan.get_start_end()
    min_lon, min_lat, max_lon, max_lat = bbox.get_coords()

    query = {
    "properties.BaseDateTime": {
        "$gte": start,
        "$lte": end
    },
    'geometry': {
            '$geoWithin': {
                '$box': [
                    [min_lon, min_lat],
                    [max_lon, max_lat]
                ]
            }
        }
}
    start_time = time.time()
    # Execute the query
    mongo_spatiotemporal_result = list(collection.find(query, {'_id': False}))
    end_time = time.time()
    logger.info(f"{timespan.get_id()}/{no_of_iterations}. Query time: {end_time - start_time}, no. of results: {len(mongo_spatiotemporal_result)}, timespan {timespan.get_id()}/{no_of_iterations}: {start}-{end}")
    mongo_spatiotemporal_times.append(end_time - start_time)
    mongo_spatiotemporal_results.append(mongo_spatiotemporal_result)
logger.info(f"Mongo spatiotemporal average time: {sum(mongo_spatiotemporal_times) / len(mongo_spatiotemporal_times)}")

client.close()

2024-09-10 19:48:43,243 - INFO: Running Mongo spatiotemporal
2024-09-10 19:48:43,245 - INFO: {"message": "Waiting for suitable server to become available", "selector": "Primary()", "operation": "find", "topologyDescription": "<TopologyDescription id: 66e0867b94fd3a9afca0b297, topology_type: Unknown, servers: [<ServerDescription ('localhost', 55001) server_type: Unknown, rtt: None>]>", "clientId": {"$oid": "66e0867b94fd3a9afca0b297"}, "remainingTimeMS": 29}
2024-09-10 19:48:46,156 - INFO: 0/25. Query time: 2.911527633666992, no. of results: 2, timespan 0/25: 2020-12-31 00:00:00-2020-12-31 00:01:00
2024-09-10 19:48:49,035 - INFO: 1/25. Query time: 2.878049373626709, no. of results: 160, timespan 1/25: 2020-12-31 00:45:00-2020-12-31 00:47:00
2024-09-10 19:48:51,927 - INFO: 2/25. Query time: 2.8912782669067383, no. of results: 13, timespan 2/25: 2020-12-31 01:30:00-2020-12-31 01:33:00
2024-09-10 19:48:54,834 - INFO: 3/25. Query time: 2.905834436416626, no. of results: 12, timespan 3/25: 20

---

### MobilityDB

In [35]:
# Mobility

logger.info(f"Running Mobility spatiotemporal")

conn = psycopg2.connect(
    database=mobility_database,
    host=mobility_host,
    user=mobility_user,
    password=mobility_password,
    port=mobility_port,
)
mobility_spatiotemporal_results = []
mobility_spatiotemporal_times = []
cursor = conn.cursor()
no_of_iterations = min(len(timespans), len(bounding_boxes))
for i in range(no_of_iterations):
    timespan = timespans[i]
    bbox = bounding_boxes[i]
    
    start, end = timespan.get_start_end()
    min_lon, min_lat, max_lon, max_lat = bbox.get_coords()
    start_time = time.time()
    
    cursor.execute(f'''
select mmsi, st_asgeojson(instance::geometry) as geom, starttimestamp(instance) as timestamp from
(
    select mmsi, unnest(instants(attime(route, tstzspan('[{start.isoformat()}, {end.isoformat()}]')))) as instance 
    from
    (
        select mmsi, route
        from aggregated_vessel_positions
        where eintersects(st_setsrid(ST_MakeEnvelope({min_lon}, {min_lat}, {max_lon}, {max_lat}), 4326), route)
    )
)
where st_intersects(st_setsrid(ST_MakeEnvelope({min_lon}, {min_lat}, {max_lon}, {max_lat}), 4326), instance::geometry);
    ''')
    
    mobility_spatiotemporal_result = []
    rows_limit_per_fetch = 5000
    while True:
        rows_queried = cursor.fetchmany(size=rows_limit_per_fetch)
        if not rows_queried:
            break
        mobility_spatiotemporal_result += rows_queried
    end_time = time.time()
    logger.info(f"{timespan.get_id()}/{no_of_iterations}. Query time: {end_time - start_time}, no. of results: {len(mobility_spatiotemporal_result)}, timespan {timespan.get_id()}/{no_of_iterations}: {start}-{end}")
    mobility_spatiotemporal_times.append(end_time - start_time)
    mobility_spatiotemporal_results.append(mobility_spatiotemporal_result)
conn.close()
logger.info(f"Mobility time average time: {sum(mobility_time_times) / len(mobility_time_times)}")


2024-09-10 19:47:46,484 - INFO: Running Mobility spatiotemporal
2024-09-10 19:47:47,858 - INFO: 0/25. Query time: 1.3513069152832031, no. of results: 4, timespan 0/25: 2020-12-31 00:00:00-2020-12-31 00:01:00
2024-09-10 19:47:48,822 - INFO: 1/25. Query time: 0.9624075889587402, no. of results: 501, timespan 1/25: 2020-12-31 00:45:00-2020-12-31 00:47:00
2024-09-10 19:47:49,508 - INFO: 2/25. Query time: 0.6854124069213867, no. of results: 32, timespan 2/25: 2020-12-31 01:30:00-2020-12-31 01:33:00
2024-09-10 19:47:50,216 - INFO: 3/25. Query time: 0.7071573734283447, no. of results: 22, timespan 3/25: 2020-12-31 02:15:00-2020-12-31 02:19:00
2024-09-10 19:47:50,970 - INFO: 4/25. Query time: 0.7530879974365234, no. of results: 759, timespan 4/25: 2020-12-31 03:10:00-2020-12-31 03:15:00
2024-09-10 19:47:51,697 - INFO: 5/25. Query time: 0.7259371280670166, no. of results: 454, timespan 5/25: 2020-12-31 04:15:00-2020-12-31 04:21:00
2024-09-10 19:47:52,431 - INFO: 6/25. Query time: 0.733057975769

NameError: name 'mobility_time_times' is not defined

---

## Results

---

In [None]:
# Results

# Visualisations

---

In [None]:
i = 0

m: folium.Map = get_results_folium(mongo_output=mongo_spatiotemporal_results[i], bounding_boxes_to_display=bounding_boxes[i:i + 1])
m

In [33]:
i = 0
m: folium.Map = get_results_folium(mobility_output=mobility_spatiotemporal_results[i], display_large_results=1, bounding_boxes_to_display=bounding_boxes[i:i + 1])#, bounding_boxes_to_display=bounding_boxes[0:1])
print(timespans[i].get_start_end())
m

(datetime.datetime(2020, 12, 31, 0, 0), datetime.datetime(2020, 12, 31, 0, 1))


In [None]:
m: folium.Map = get_results_folium(mobility_output=mobility_spatial_results[5][:2500], display_large_results=1)#, bounding_boxes_to_display=bounding_boxes[0:1])
m

In [None]:
m: folium.Map = get_results_folium(bounding_boxes_to_display=bounding_boxes)
m

In [None]:
print(len(mongo_results))
print(len(mobility_result))