# Set up

## Imports

In [2]:
from pprint import pprint
from bson import SON
from dotenv import load_dotenv
import os 
import logger_setup
import logging
from pymongo import MongoClient, GEO2D
from pymongo.database import Database, Collection
from csv_reader import ais_csv_to_gdf, split_df_by_batch_size


In [3]:
logger_setup.setup_logging(level=logging.INFO)
logger = logging.getLogger()

In [4]:
load_dotenv()
mongo_url = "http://localhost:" + os.environ.get("MONGO_PORT", "55000")

logger.info(f"MongoDB endpoint: {mongo_url}")
database_name = "temp"
collection_name = "aisdata"

2024-12-01 19:06:04,394 - INFO: MongoDB endpoint: http://localhost:55001


# Code

### Create geojson

In [5]:
main_gdf = ais_csv_to_gdf("data/AIS_2020_12_31.csv")
main_gdf['BaseDateTime'] = main_gdf['BaseDateTime'].dt.strftime('%Y-%m-%d %H:%M:%S')

gdfs = split_df_by_batch_size(main_gdf, 1_000_000)
# for gdf in gdfs:
#     # geojson = tgdf._to_geo(drop_id=True)
#     geojson = gdf.to_geo_dict(drop_id=True)
#     print(len(geojson["features"]))
# print(geojson["features"])

KeyboardInterrupt: 

### Drop collection

In [9]:
client: MongoClient = MongoClient(mongo_url)
db: Database = client[database_name]
collection: Collection = db[collection_name]

collection.drop()

2024-05-07 19:46:13,590 - INFO: {"message": "Waiting for suitable server to become available", "selector": "<function writable_server_selector at 0x78dfce6a2560>", "operation": "drop", "topologyDescription": "<TopologyDescription id: 663a68e5e7a0f2055d7df16a, topology_type: Unknown, servers: [<ServerDescription ('localhost', 55000) server_type: Unknown, rtt: None>]>", "clientId": {"$oid": "663a68e5e7a0f2055d7df16a"}, "remainingTimeMS": 29}


### Insert into database

This uploads raw data into the specified collection

In [1]:
client: MongoClient = MongoClient(mongo_url)
db: Database = client[database_name]
collection: Collection = db[collection_name]

for i, gdf in enumerate(gdfs):
    print(f"Processing {i + 1} / {len(gdfs)} geodataframe.")
    collection.insert_many(gdf.to_geo_dict(drop_id=True)["features"])
    # Possibly gdf._to_geo(drop_id=True)

client.close()

NameError: name 'MongoClient' is not defined

### Create spatial index

This creates spatial index over specified collection

In [17]:
client: MongoClient = MongoClient(mongo_url)
db: Database = client[database_name]
collection: Collection = db[collection_name]

collection.create_index([("geometry.coordinates", GEO2D)])

client.close()

2024-05-07 20:06:13,543 - INFO: {"message": "Waiting for suitable server to become available", "selector": "<function writable_server_selector at 0x78dfce6a2560>", "operation": "createIndexes", "topologyDescription": "<TopologyDescription id: 663a6d95e7a0f2055ddfe92c, topology_type: Unknown, servers: [<ServerDescription ('localhost', 55000) server_type: Unknown, rtt: None>]>", "clientId": {"$oid": "663a6d95e7a0f2055ddfe92c"}, "remainingTimeMS": 29}


# Fix the dates

In [7]:
# client: MongoClient = MongoClient(mongo_url)
client: MongoClient = MongoClient("localhost:55001")
db: Database = client[database_name]
collection: Collection = db[collection_name]

collection.update_many(
    { "properties.BaseDateTime": { "$exists": True } },
    [
        {
            "$set": {
                "properties.BaseDateTime": {
                    "$toDate": "$properties.BaseDateTime"
                }
            }
        }
    ]
)

2024-12-01 19:07:25,570 - INFO: {"message": "Waiting for suitable server to become available", "selector": "<function writable_server_selector at 0x7cc9a3fd1000>", "operation": "update", "topologyDescription": "<TopologyDescription id: 674ca5ddea944ceb7938ed76, topology_type: Unknown, servers: [<ServerDescription ('localhost', 55001) server_type: Unknown, rtt: None>]>", "clientId": {"$oid": "674ca5ddea944ceb7938ed76"}, "remainingTimeMS": 29}


UpdateResult({'n': 6420411, 'nModified': 6420411, 'ok': 1.0, 'updatedExisting': True}, acknowledged=True)

### Retreive from database

These are some queries possible with this collection 

In [7]:
search_location = [-87.68, 41.85]

In [11]:
client: MongoClient = MongoClient(mongo_url)
db: Database = client[database_name]
collection: Collection = db[collection_name]

# query = {"geometry.coordinates": {"$within": {"$center": [search_location, 10]}}}
query = {"geometry.coordinates": SON([("$near", search_location), ("$maxDistance", 10)])}

results = list(collection.find(query, {'_id': False}).limit(1500))

for doc in results:
    pprint(doc)

client.close()

2024-05-29 11:26:24,877 - INFO: {"message": "Waiting for suitable server to become available", "selector": "Primary()", "operation": "find", "topologyDescription": "<TopologyDescription id: 6656f4c05f57b519ac8728ef, topology_type: Unknown, servers: [<ServerDescription ('localhost', 55000) server_type: Unknown, rtt: None>]>", "clientId": {"$oid": "6656f4c05f57b519ac8728ef"}, "remainingTimeMS": 29}


{'geometry': {'coordinates': [-87.6754, 41.84179], 'type': 'Point'},
 'properties': {'BaseDateTime': '2020-12-31 21:59:59',
                'COG': 53.4,
                'CallSign': 'WDJ6110',
                'Cargo': None,
                'Draft': None,
                'Heading': 511.0,
                'IMO': None,
                'LAT': 41.84179,
                'LON': -87.6754,
                'Length': None,
                'MMSI': 367794470,
                'SOG': 3.8,
                'Status': 15.0,
                'TransceiverClass': 'A',
                'VesselName': 'GWYNETH ANNE',
                'VesselType': 31.0,
                'Width': None},
 'type': 'Feature'}
{'geometry': {'coordinates': [-87.67538, 41.84177], 'type': 'Point'},
 'properties': {'BaseDateTime': '2020-12-31 23:01:46',
                'COG': 57.3,
                'CallSign': 'WDH6184',
                'Cargo': None,
                'Draft': None,
                'Heading': 60.0,
                'IMO': None

In [21]:
import folium

CHICAGO_COORDINATES = (41.85, -87.68)

map_attributions = ('&copy; <a href="http://www.openstreetmap.org/copyright">OpenStreetMap</a> '
                    'contributors, &copy; <a href="http://cartodb.com/attributions">CartoDB</a>')

m = folium.Map(location=CHICAGO_COORDINATES,
               # attr=map_attributions,
               # tiles='Cartodb Positron',  #'OpenStreetMap',
               zoom_start=5,
               control_scale=True,
               height=800,
               width=1400)
for result in results:
    geojson = (folium.GeoJson(result))
    popup = folium.Popup(f"mmsi: {result['properties']['MMSI']}, name {result['properties']['VesselName']}, time: {result['properties']['BaseDateTime']}")
    popup.add_to(geojson)
    geojson.add_to(m)

m