# Set up

## Imports

In [40]:
import json

from bson import SON

import logger_setup
import logging
import pymongo
from pymongo import MongoClient, GEO2D
from pymongo.database import Database, Collection
from csv_reader import ais_csv_to_gdf, split_df_by_batch_size


In [2]:
logger_setup.setup_logger(level=logging.INFO)
logger = logging.getLogger()

# Code

### Create geojson

In [3]:
main_gdf = ais_csv_to_gdf("data/AIS_2020_12_31.csv")
main_gdf['BaseDateTime'] = main_gdf['BaseDateTime'].dt.strftime('%Y-%m-%d %H:%M:%S')

gdfs = split_df_by_batch_size(main_gdf, 1_000_000)
# for gdf in gdfs:
#     # geojson = tgdf._to_geo(drop_id=True)
#     geojson = gdf.to_geo_dict(drop_id=True)
#     print(len(geojson["features"]))
# print(geojson["features"])

1000000
1000000
1000000
1000000
1000000
1000000
420411


In [21]:
mongo_url = "mongodb://localhost:55001/"
database_name = "temp"
collection_name = "temp"

In [9]:
client: MongoClient = MongoClient(mongo_url)
db: Database = client[database_name]
collection: Collection = db[collection_name]

collection.drop()

2024-05-07 19:46:13,590 - INFO: {"message": "Waiting for suitable server to become available", "selector": "<function writable_server_selector at 0x78dfce6a2560>", "operation": "drop", "topologyDescription": "<TopologyDescription id: 663a68e5e7a0f2055d7df16a, topology_type: Unknown, servers: [<ServerDescription ('localhost', 55000) server_type: Unknown, rtt: None>]>", "clientId": {"$oid": "663a68e5e7a0f2055d7df16a"}, "remainingTimeMS": 29}


### Insert into database

In [10]:
client: MongoClient = MongoClient(mongo_url)
db: Database = client[database_name]
collection: Collection = db[collection_name]

for i, gdf in enumerate(gdfs):
    print(f"Processing {i + 1} / {len(gdfs)} geodataframe.")
    collection.insert_many(gdf.to_geo_dict(drop_id=True)["features"])

client.close()

Processing 1 / 7 geodataframe.
Processing 2 / 7 geodataframe.
Processing 3 / 7 geodataframe.
Processing 4 / 7 geodataframe.
Processing 5 / 7 geodataframe.
Processing 6 / 7 geodataframe.
Processing 7 / 7 geodataframe.


### Create spatial index

In [17]:
client: MongoClient = MongoClient(mongo_url)
db: Database = client[database_name]
collection: Collection = db[collection_name]

collection.create_index([("geometry.coordinates", GEO2D)])

client.close()

2024-05-07 20:06:13,543 - INFO: {"message": "Waiting for suitable server to become available", "selector": "<function writable_server_selector at 0x78dfce6a2560>", "operation": "createIndexes", "topologyDescription": "<TopologyDescription id: 663a6d95e7a0f2055ddfe92c, topology_type: Unknown, servers: [<ServerDescription ('localhost', 55000) server_type: Unknown, rtt: None>]>", "clientId": {"$oid": "663a6d95e7a0f2055ddfe92c"}, "remainingTimeMS": 29}


### Retreive from database

In [56]:
search_location = [-87.68, 41.85]

In [57]:
from pprint import pprint

client: MongoClient = MongoClient(mongo_url)
db: Database = client[database_name]
collection: Collection = db[collection_name]

# query = {"geometry.coordinates": {"$within": {"$center": [search_location, 10]}}}
query = {"geometry.coordinates": SON([("$near", search_location), ("$maxDistance", 10)])}

results = list(collection.find(query, {'_id': False}).limit(1000))

for doc in results:
    pprint(doc)

client.close()

2024-05-07 20:42:05,936 - INFO: {"message": "Waiting for suitable server to become available", "selector": "Primary()", "operation": "find", "topologyDescription": "<TopologyDescription id: 663a75fde7a0f2055ddfe944, topology_type: Unknown, servers: [<ServerDescription ('localhost', 55001) server_type: Unknown, rtt: None>]>", "clientId": {"$oid": "663a75fde7a0f2055ddfe944"}, "remainingTimeMS": 29}


{'geometry': {'coordinates': [-87.6754, 41.84179], 'type': 'Point'},
 'properties': {'BaseDateTime': '2020-12-31 21:59:59',
                'COG': 53.4,
                'CallSign': 'WDJ6110',
                'Cargo': None,
                'Draft': None,
                'Heading': 511.0,
                'IMO': None,
                'LAT': 41.84179,
                'LON': -87.6754,
                'Length': None,
                'MMSI': 367794470,
                'SOG': 3.8,
                'Status': 15.0,
                'TransceiverClass': 'A',
                'VesselName': 'GWYNETH ANNE',
                'VesselType': 31.0,
                'Width': None},
 'type': 'Feature'}
{'geometry': {'coordinates': [-87.67538, 41.84177], 'type': 'Point'},
 'properties': {'BaseDateTime': '2020-12-31 23:01:46',
                'COG': 57.3,
                'CallSign': 'WDH6184',
                'Cargo': None,
                'Draft': None,
                'Heading': 60.0,
                'IMO': None

In [None]:
import folium

CHICAGO_COORDINATES = (41.85, -87.68)

map_attributions = ('&copy; <a href="http://www.openstreetmap.org/copyright">OpenStreetMap</a> '
                    'contributors, &copy; <a href="http://cartodb.com/attributions">CartoDB</a>')

m = folium.Map(location=CHICAGO_COORDINATES,
               # attr=map_attributions,
               # tiles='Cartodb Positron',  #'OpenStreetMap',
               zoom_start=5,
               control_scale=True,
               height=1000,
               width=1000)
for result in results:
    folium.GeoJson(result).add_to(m)

m