# US-Ignite San Diego ETL Notebook

In [19]:
#import modules
from cartoframes.auth import set_default_credentials, Credentials
from cartoframes.viz import Map, Layer
from cartoframes.data import Dataset
from cartoframes.viz.helpers import size_continuous_layer
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point

# import CityIq and libraries for timing
from cityiq import CityIq
import time
import json

#load .env with credentials
from dotenv import load_dotenv
load_dotenv()

import os
BASE_URL = os.getenv("BASE_URL")
API_KEY = os.getenv("API_KEY")
USERNAME = os.getenv("USERNAME")

set_default_credentials(
    base_url=BASE_URL,
    api_key=API_KEY
)

credentials = Credentials(USERNAME, API_KEY)


## Extract: Bring Data into Notebook

In [3]:
# set time frame for use when querying for events (epoch time in milliseconds)
endTime = int(time.time())*1000 # time when demo.py is run
startTime = endTime-3600000 # startTime is 1 hour before endTime

# get CityIq token
myCIQ = CityIq("City")
myCIQ.fetchToken()

# getting assets - assets with PEDEVT events
myCIQ.fetchMetadata("assets","pedestrian","eventTypes:PEDEVT")
san_diego_pedestrian_sensor_metadata = myCIQ.getAssets()

## Transform
Add here some code to do data transformation (remove outliers, null values, etc.). It's probably best to do this type of work on the geopandas dataframe 

In [4]:
san_diego_pedestrian_sensor_metadata_df = pd.DataFrame(san_diego_pedestrian_sensor_metadata)
san_diego_pedestrian_sensor_metadata_df.dropna(inplace = True)
# split coordinates into lat and lng 
latlng = san_diego_pedestrian_sensor_metadata_df["coordinates"].str.split(":", n = 1, expand = True) 

san_diego_pedestrian_sensor_metadata_df["latitude"]= latlng[0].astype(float)
san_diego_pedestrian_sensor_metadata_df["longitude"]= latlng[1].astype(float)
#print(san_diego_pedestrian_sensor_metadata_df)

san_diego_pedestrian_sensor_metadata_gdf = gpd.GeoDataFrame(san_diego_pedestrian_sensor_metadata_df, geometry=gpd.points_from_xy(san_diego_pedestrian_sensor_metadata_df.longitude, san_diego_pedestrian_sensor_metadata_df.latitude))
print(san_diego_pedestrian_sensor_metadata_gdf)

   assetType                              assetUid  \
0     CAMERA  00423a29-2ad6-4776-9bb6-c9be6e31a964   
1     CAMERA  03a7eaf3-c758-4526-b8a4-1db39bb6697c   
2     CAMERA  05330a7b-5b72-4135-9680-b650ede1cb32   
3     CAMERA  06fc5f10-5081-41e7-bf25-e71728ae18fb   
4     CAMERA  083ea2b1-0b82-4c5a-a620-db0ca3808a59   
5     CAMERA  0b977f7b-92d4-4081-a09f-5274a4489758   
6     CAMERA  0de8fc8b-8078-4005-9303-a6433be727d1   
7     CAMERA  139e7832-20f2-46c0-823d-6fb4cde7f11c   
8     CAMERA  18ba13cd-5070-419d-a713-06fba3ed047c   
9     CAMERA  18fa1950-1e69-4097-bcad-634e73bb1ec5   
10    CAMERA  1a059929-fdc8-4c8a-81e3-0e72c75e40aa   
11    CAMERA  1b576c8d-2b0c-4e44-991d-98dadc839bcc   
12    CAMERA  28d2b864-c954-4686-9f00-76f69a5a5705   
13    CAMERA  2ffd6259-98f0-491a-8257-10374ac2c041   
14    CAMERA  321a03e9-0146-4d29-95b0-6fc9201f6308   
15    CAMERA  32659876-9f3f-4b5a-bc93-89e1926f3f62   
16    CAMERA  327867d7-02af-41f1-9ccf-4874eb002a54   
17    CAMERA  33ac5ea9-38e0-

## Load: Upload to CARTO

In [5]:
san_diego_pedestrian_sensor_metadata_d = Dataset(san_diego_pedestrian_sensor_metadata_gdf)
san_diego_pedestrian_sensor_metadata_d.upload(table_name='san_diego_pedestrian_sensor_metadata', if_exists='replace', credentials=credentials)

The following columns were changed in the CARTO copy of this dataframe:
[1massetType[0m -> [1massettype[0m
[1massetUid[0m -> [1massetuid[0m
[1meventTypes[0m -> [1meventtypes[0m
[1mmediaType[0m -> [1mmediatype[0m
[1mparentAssetUid[0m -> [1mparentassetuid[0m




<cartoframes.data.dataset.Dataset at 0x123519cf8>

## ETL for loading Events

In [17]:
# empty list to collect events
san_diego_pedestrian_sensor_events_list = []
# empty list of aggregates
san_diego_pedestrian_sensor_aggregate_list = []


# loop through assets to fetch events for each asset
for index, row in san_diego_pedestrian_sensor_metadata_gdf.iterrows():
    myCIQ.fetchEvents("assets", row.assetUid, "PEDEVT", startTime, endTime, pageSize=500)
    assetEvents = myCIQ.getEvents()
    for a in assetEvents:
        a["latitude"] = row.latitude
        a["longitude"] = row.longitude
        a["directionUnit"] = a["properties"]["directionUnit"]
        a["speedUnit"] = a["properties"]["speedUnit"]
        a["eventUid"] = a["properties"]["eventUid"]
        a["counter_direction_speed"] = a["measures"]["counter_direction_speed"]
        a["counter_direction_pedestrianCount"] = a["measures"]["counter_direction_pedestrianCount"]
        a["pedestrianCount"] = a["measures"]["pedestrianCount"]
        a["counter_direction"] = a["measures"]["counter_direction"]
        a["speed"] = a["measures"]["speed"]
        a["direction"] = a["measures"]["direction"]
        san_diego_pedestrian_sensor_events_list.append(a)
    
# print(san_diego_pedestrian_sensor_events_list)
san_diego_pedestrian_sensor_events_df = pd.DataFrame(san_diego_pedestrian_sensor_events_list)
san_diego_pedestrian_sensor_events_df.dropna(inplace = True)
# print(san_diego_pedestrian_sensor_events_df)

# group by location ID to get a sum of pedestrianCounts 
grouped_SD_ped_sensor_events_df = san_diego_pedestrian_sensor_events_df.groupby('assetUid').agg({'pedestrianCount': ['sum']})
grouped_SD_ped_sensor_events_df.columns = ['pedestrianCount_sum']
grouped_SD_ped_sensor_events_df.reset_index()
# print(grouped_SD_ped_sensor_events_df)

# join counts back to sensor metadata
merged_metadata_ped_counts_df = pd.merge(san_diego_pedestrian_sensor_metadata_df, grouped_SD_ped_sensor_events_df, on=['assetUid'])
# print(merged_metadata_ped_counts_df)

merged_metadata_ped_counts_gdf = gpd.GeoDataFrame(merged_metadata_ped_counts_df, geometry=gpd.points_from_xy(merged_metadata_ped_counts_df.longitude, merged_metadata_ped_counts_df.latitude))

merged_metadata_ped_counts_d = Dataset(merged_metadata_ped_counts_gdf)
merged_metadata_ped_counts_d.upload(table_name='merged_metadata_ped_counts_san_diego', if_exists='replace', credentials=credentials)

    
#san_diego_pedestrian_sensor_events_gdf = gpd.GeoDataFrame(san_diego_pedestrian_sensor_events_df, geometry=gpd.points_from_xy(san_diego_pedestrian_sensor_events_df.longitude, san_diego_pedestrian_sensor_events_df.latitude))
#san_diego_pedestrian_sensor_events_d = Dataset(san_diego_pedestrian_sensor_events_gdf)
#san_diego_pedestrian_sensor_events_d.upload(table_name='san_diego_pedestrian_sensor_events', if_exists='append', credentials=credentials)

    

The following columns were changed in the CARTO copy of this dataframe:
[1massetType[0m -> [1massettype[0m
[1massetUid[0m -> [1massetuid[0m
[1meventTypes[0m -> [1meventtypes[0m
[1mmediaType[0m -> [1mmediatype[0m
[1mparentAssetUid[0m -> [1mparentassetuid[0m
[1mpedestrianCount_sum[0m -> [1mpedestriancount_sum[0m




<cartoframes.data.dataset.Dataset at 0x125bcdc18>

## Make a quick map using one of CARTO's helper functions

In [20]:
Map(
    size_continuous_layer(merged_metadata_ped_counts_d, 'pedestrianCount_sum', 'Pedestrian Count')
)