# Let's Pull some Data

I created this notebook so that I can isloate my SQL queuries from the rest of my code. 

In [26]:
# Set up the env.
!conda init
!conda env list ## to see the availble options
!conda activate civil_unrest ## to activate the targeted env


no change     /opt/anaconda3/condabin/conda
no change     /opt/anaconda3/bin/conda
no change     /opt/anaconda3/bin/conda-env
no change     /opt/anaconda3/bin/activate
no change     /opt/anaconda3/bin/deactivate
no change     /opt/anaconda3/etc/profile.d/conda.sh
no change     /opt/anaconda3/etc/fish/conf.d/conda.fish
no change     /opt/anaconda3/shell/condabin/Conda.psm1
no change     /opt/anaconda3/shell/condabin/conda-hook.ps1
no change     /opt/anaconda3/lib/python3.12/site-packages/xontrib/conda.xsh
no change     /opt/anaconda3/etc/profile.d/conda.csh
no change     /Users/warrenweissbluth/.bash_profile
No action taken.

# conda environments:
#
base                   /opt/anaconda3
civil_unrest         * /opt/anaconda3/envs/civil_unrest


CondaError: Run 'conda init' before 'conda activate'



In [27]:
# Import the necessary module
from google.cloud import bigquery
import pandas as pd
from matplotlib import pyplot as plt
import folium
from folium.plugins import HeatMap
import numpy as np
from matplotlib import colors
from geopy.distance import geodesic


## First let's ensure we are pulling useful information. We want information along the path of the commuter

In [28]:
def expand_rectangle_by_mile(start_lat, start_lon, end_lat, end_lon):
    """
    Expands a rectangle by 1 mile in both latitude and longitude directions.
    Each coordinate is shifted outward by 1/2 mile.

    Parameters:
        start_lat (float): Starting latitude of the rectangle.
        start_lon (float): Starting longitude of the rectangle.
        end_lat (float): Ending latitude of the rectangle.
        end_lon (float): Ending longitude of the rectangle.

    Returns:
        dict: Expanded rectangle coordinates.
              {"start_lat": float, "start_lon": float, "end_lat": float, "end_lon": float}
    """
    # Calculate shifts of 1/2 mile in latitude and longitude
    half_mile_in_lat = geodesic(miles=0.5).destination((start_lat, start_lon), 0).latitude - start_lat
    half_mile_in_lon = geodesic(miles=0.5).destination((start_lat, start_lon), 90).longitude - start_lon

    # Determine the minimal and maximal coordinates
    min_lat = min(start_lat, end_lat)
    max_lat = max(start_lat, end_lat)
    min_lon = min(start_lon, end_lon)
    max_lon = max(start_lon, end_lon)

    # Expand the rectangle
    expanded_start_lat = min_lat - half_mile_in_lat
    expanded_start_lon = min_lon - half_mile_in_lon
    expanded_end_lat = max_lat + half_mile_in_lat
    expanded_end_lon = max_lon + half_mile_in_lon

    return {
        "start_lat": expanded_start_lat,
        "start_lon": expanded_start_lon,
        "end_lat": expanded_end_lat,
        "end_lon": expanded_end_lon
    }

# Example usage:
start_lat, start_lon = 41.91118832433419, -87.67514378155508  # nearby my home
end_lat, end_lon = 41.87300017458362, -87.62765043486581     # nearby my school
expanded_coords = expand_rectangle_by_mile(start_lat, start_lon, end_lat, end_lon)
print(expanded_coords)


{'start_lat': 41.8657555523296, 'start_lon': -87.68484261360975, 'end_lat': 41.91843294658821, 'end_lon': -87.61795160281115}


In [29]:
# Create a map centered around the midpoint of the expanded rectangle
mid_lat = (expanded_coords['start_lat'] + expanded_coords['end_lat']) / 2
mid_lon = (expanded_coords['start_lon'] + expanded_coords['end_lon']) / 2
m = folium.Map(location=[mid_lat, mid_lon], zoom_start=13)

# Add a rectangle to the map
folium.Rectangle(
    bounds=[
        [expanded_coords['start_lat'], expanded_coords['start_lon']],
        [expanded_coords['end_lat'], expanded_coords['end_lon']]
    ],
    color='blue',
    fill=True,
    fill_color='blue',
    fill_opacity=0.2
).add_to(m)

# Add starting point marker
folium.Marker(
    location=[start_lat, start_lon],
    popup='Starting Point',
    icon=folium.Icon(color='green')
).add_to(m)

# Add ending point marker
folium.Marker(
    location=[end_lat, end_lon],
    popup='Ending Point',
    icon=folium.Icon(color='red')
).add_to(m)

# Display the map
m

### Now let's Pull the Data Set
* I want the last 2000 events that occured
* I want to ensure it's only nearby the commuting route 

In [30]:
# Log in to Google Cloud
# !gcloud auth application-default login

In [31]:
# Initialize BigQuery client with the project ID
client = bigquery.Client(project="civil-unrest-predictor")

query = f"""
SELECT
    SQLDATE,
    EventCode,
    ActionGeo_FullName,
    ActionGeo_Lat,
    ActionGeo_Long,
    AvgTone
FROM
    `gdelt-bq.full.events`
WHERE
    EventCode IN ('145', '1451', '1452', '1453', '1454')
    AND ActionGeo_Lat BETWEEN {expanded_coords['start_lat']} AND {expanded_coords['end_lat']}
    AND ActionGeo_Long BETWEEN {expanded_coords['start_lon']} AND {expanded_coords['end_lon']}
    AND CAST(SQLDATE AS STRING) >= '20150101'
ORDER BY
    SQLDATE DESC
LIMIT 2000;
"""

# Execute the updated query
query_job = client.query(query)

# Convert results to a DataFrame
data = query_job.result().to_dataframe()

In [32]:
data

Unnamed: 0,SQLDATE,EventCode,ActionGeo_FullName,ActionGeo_Lat,ActionGeo_Long,AvgTone
0,20240823,145,"Union Park, Illinois, United States",41.8839,-87.6648,-3.046968
1,20240822,145,"Union Park, Illinois, United States",41.8839,-87.6648,0.000000
2,20240820,145,"Union Park, Illinois, United States",41.8839,-87.6648,-4.319654
3,20240820,145,"Union Park, Illinois, United States",41.8839,-87.6648,-4.319654
4,20240627,145,"Buckingham Fountain, Illinois, United States",41.8756,-87.6189,-7.052186
...,...,...,...,...,...,...
142,20160320,145,"University Of Illinois At Chicago, Illinois, U...",41.8720,-87.6492,-7.417219
143,20160313,145,"University Of Illinois At Chicago, Illinois, U...",41.8720,-87.6492,-8.571429
144,20160313,145,"University Of Illinois At Chicago, Illinois, U...",41.8720,-87.6492,-8.571429
145,20160312,145,"Chicago Loop, Illinois, United States",41.8811,-87.6298,-2.366864


In [33]:
data.to_csv('data.csv', index=False)

In [40]:
# Define a colormap
colormap = plt.cm.get_cmap('Reds')

# Normalize the AvgTone values to the range [0, 1]
norm = colors.Normalize(vmin=data['AvgTone'].min(), vmax=data['AvgTone'].max())

# Add markers for each event in the data DataFrame
for index, row in data.iterrows():
    # Get the color based on the normalized AvgTone value
    color = colors.rgb2hex(colormap(norm(row['AvgTone'])))
    folium.Circle(
        location=[row['ActionGeo_Lat'], row['ActionGeo_Long']],
        radius=100,
        color=color,
        fill=True,
        fill_color=color,
        fill_opacity=0.7,
        popup=f"Date: {row['SQLDATE']}, EventCode: {row['EventCode']}, AvgTone: {row['AvgTone']}"
    ).add_to(m)

# Display the updated map
m

  colormap = plt.cm.get_cmap('Reds')
