# Starting From Scratch to Keep Things Simple



In [14]:
from geopy.distance import geodesic, distance
from geopy import Point

# Original coordinates
home_lat, home_lon = 41.91118832433419, -87.67514378155508
school_lat, school_lon = 41.87300017458362, -87.62765043486581

min_lat = min(home_lat, school_lat)
max_lat = max(home_lat, school_lat)

min_lon = min(home_lon, school_lon)
max_lon = max(home_lon, school_lon)
# Add 3 miles to max_lat and max_lon, subtract 3 miles from min_lat and min_lon
max_lat += distance(miles=3).destination(Point(max_lat, max_lon), 0).latitude - max_lat
max_lon += distance(miles=3).destination(Point(max_lat, max_lon), 90).longitude - max_lon

min_lat -= min_lat - distance(miles=3).destination(Point(min_lat, min_lon), 180).latitude
min_lon -= min_lon - distance(miles=3).destination(Point(min_lat, min_lon), 270).longitude

In [15]:
import folium

# Create a map centered around the initial home coordinates
m = folium.Map(location=[home_lat, home_lon], zoom_start=13)

# Add markers for the initial home and school coordinates
folium.Marker([home_lat, home_lon], popup='Home', icon=folium.Icon(color='blue')).add_to(m)
folium.Marker([school_lat, school_lon], popup='School', icon=folium.Icon(color='red')).add_to(m)
# Add a rectangle to represent the bounding box
folium.Rectangle(
    bounds=[[min_lat, min_lon], [max_lat, max_lon]],
    color='green',
    fill=True,
    fill_opacity=0.2
).add_to(m)

# Display the map
m

In [16]:
# Log in to Google Cloud
#!gcloud auth application-default login

In [17]:
from google.cloud import bigquery

# Initialize BigQuery client with the project ID
client = bigquery.Client(project="civil-unrest-predictor")

query = f"""
SELECT
    GlobalEventID,
    SQLDATE,
    DATEADDED,
    IsRootEvent,
    EventCode,
    EventBaseCode,
    EventRootCode,
    GoldsteinScale,
    NumMentions,
    AvgTone,
    ActionGeo_FullName,
    ActionGeo_Lat,
    ActionGeo_Long
FROM
    `gdelt-bq.full.events`
WHERE
    EventCode IN ('145', '1451', '1452', '1453', '1454', '024', '0241', '0242', '0243', '0244', '0252', '0253', 
                  '090', '091', '092', '093', '094', '104', '1041', '1042', '1043', '1044', '1052', '1053', 
                  '111', '113', '114', '1232', '1233', '1234', '1242', '1243', '133', '1383', '139', 
                  '141', '1411', '1412', '1413', '1414', '144', '143', '1723', '1724', '175', 
                  '182', '183', '184')
    AND ActionGeo_Lat BETWEEN {min_lat} AND {max_lat}
    AND ActionGeo_Long BETWEEN {min_lon} AND {max_lon}
ORDER BY
    SQLDATE DESC
LIMIT 1000000;
"""

# Execute the updated query
query_job = client.query(query)

# Convert results to a DataFrame
data = query_job.result().to_dataframe()


In [18]:
display(data.head())

Unnamed: 0,GlobalEventID,SQLDATE,DATEADDED,IsRootEvent,EventCode,EventBaseCode,EventRootCode,GoldsteinScale,NumMentions,AvgTone,ActionGeo_FullName,ActionGeo_Lat,ActionGeo_Long
0,1218008600,20241229,20241229,1,111,111,11,-2.0,3,6.336634,"Chicago, Illinois, United States",41.85,-87.6501
1,1218059434,20241229,20241229,1,90,90,9,-2.0,10,-6.25,"Chicago, Illinois, United States",41.85,-87.6501
2,1217968727,20241229,20241229,1,111,111,11,-2.0,4,0.806952,"Chicago, Illinois, United States",41.85,-87.6501
3,1218064446,20241229,20241229,1,90,90,9,-2.0,10,0.884956,"Chicago, Illinois, United States",41.85,-87.6501
4,1218009043,20241229,20241229,1,111,111,11,-2.0,3,6.336634,"Chicago, Illinois, United States",41.85,-87.6501


In [19]:
from datetime import datetime
import pandas as pd

# Convert SQLDATE to datetime format
data['SQLDATE'] = pd.to_datetime(data['SQLDATE'], format='%Y%m%d')

# Rename the 'SQLDATE' column to 'Date'
data.rename(columns={'SQLDATE': 'Date'}, inplace=True)

In [20]:
# Convert DATEADDED to datetime format
data['DATEADDED'] = pd.to_datetime(data['DATEADDED'], format='%Y%m%d')

In [21]:
display(data)

Unnamed: 0,GlobalEventID,Date,DATEADDED,IsRootEvent,EventCode,EventBaseCode,EventRootCode,GoldsteinScale,NumMentions,AvgTone,ActionGeo_FullName,ActionGeo_Lat,ActionGeo_Long
0,1218008600,2024-12-29,2024-12-29,1,111,111,11,-2.0,3,6.336634,"Chicago, Illinois, United States",41.8500,-87.6501
1,1218059434,2024-12-29,2024-12-29,1,090,090,09,-2.0,10,-6.250000,"Chicago, Illinois, United States",41.8500,-87.6501
2,1217968727,2024-12-29,2024-12-29,1,111,111,11,-2.0,4,0.806952,"Chicago, Illinois, United States",41.8500,-87.6501
3,1218064446,2024-12-29,2024-12-29,1,090,090,09,-2.0,10,0.884956,"Chicago, Illinois, United States",41.8500,-87.6501
4,1218009043,2024-12-29,2024-12-29,1,111,111,11,-2.0,3,6.336634,"Chicago, Illinois, United States",41.8500,-87.6501
...,...,...,...,...,...,...,...,...,...,...,...,...,...
211382,896123210,1920-01-01,2020-01-01,1,114,114,11,-2.0,2,-5.252918,"Chicago, Illinois, United States",41.8500,-87.6501
211383,896167951,1920-01-01,2020-01-01,1,182,182,18,-9.5,5,-6.410256,"Chicago, Illinois, United States",41.8500,-87.6501
211384,896076819,1920-01-01,2020-01-01,1,182,182,18,-9.5,4,-7.142857,"East Garfield Park, Illinois, United States",41.8809,-87.7028
211385,896076841,1920-01-01,2020-01-01,1,182,182,18,-9.5,3,-7.142857,"East Garfield Park, Illinois, United States",41.8809,-87.7028


In [22]:
data.to_csv('csv/data.csv', index=False)