In [None]:
pip install earthengine-api



In [None]:
import ee
# Authenticate with Google Earth Engine
ee.Authenticate()
# Initialize the Earth Engine API
project_id = 'bright-link-438701-h9'
ee.Initialize(project=project_id)


*** Earth Engine *** Share your feedback by taking our Annual Developer Satisfaction Survey: https://google.qualtrics.com/jfe/form/SV_0JLhFqfSY1uiEaW?source=Init


In [None]:
pip install rasterio xlsxwriter pydrive2



In [None]:
import pandas as pd
import rasterio
from datetime import datetime, timedelta

#crime dataset
crime_data_path = "/content/drive/MyDrive/CS524/crime_data_2013_onward.csv"
crimes = pd.read_csv(crime_data_path)
crimes['date'] = pd.to_datetime(crimes['date'], errors='coerce')

# Remove records where 'latitude', 'longitude', or 'date' is NaN
crimes = crimes.dropna(subset=['latitude', 'longitude', 'date'])


In [None]:
base_path = '/content/drive/MyDrive/CS524/VIIRS/VIIRS_Chicago_'

# fetch light pollution from GeoTIFF
def get_light_pollution(lat, lon, tiff_path):
    if tiff_path is None:
        return None
    try:
        with rasterio.open(tiff_path) as src:
            row, col = src.index(lon, lat)
            value = src.read(1)[row, col]
            return value
    except Exception as e:
        print(f"Error reading light pollution from {tiff_path} for {lat}, {lon}: {e}")
        return None

# Loop through each year
for year in range(2013, 2024):
    print(f"Processing year {year}...")

    # these months don't have data in VIIRS
    skip_months = [5, 6, 7]
    crimes_year = crimes[crimes['date'].dt.year == year]

    # filter crimes that occurred between 5 PM and 5 AM
    crimes_year = crimes_year[(crimes_year['date'].dt.hour >= 17) | (crimes_year['date'].dt.hour < 5)]
    crimes_year = crimes_year[['id', 'primary_type', 'description', 'date', 'latitude', 'longitude']]

    monthly_tiff_paths = {}
    for m in range(1, 13):
        monthly_tiff_paths[f'{year}-{str(m).zfill(2)}'] = f'{base_path}{year}-{str(m).zfill(2)}.tif'
    monthly_tiff_paths[f'{year-1}-12'] = f'{base_path}{year-1}-12.tif'
    monthly_tiff_paths[f'{year+1}-01'] = f'{base_path}{year+1}-01.tif'

    # Calculate LPI for each crime record
    results = []
    total_rows = len(crimes_year)
    completed_rows = 0

    for _, row in crimes_year.iterrows():
        crime_date = row['date']
        lat, lon = row['latitude'], row['longitude']

        current_month = crime_date.strftime('%Y-%m')
        current_month_num = crime_date.month
        previous_month = (crime_date - timedelta(days=30)).strftime('%Y-%m')
        next_month = (crime_date + timedelta(days=30)).strftime('%Y-%m')

        if current_month_num in skip_months:
          print(f"Skipping current month: {current_month}")
          continue

        print(f"Previous: {previous_month}, Current: {current_month}, Next: {next_month}")
        light_prev = get_light_pollution(lat, lon, monthly_tiff_paths.get(previous_month))
        light_curr = get_light_pollution(lat, lon, monthly_tiff_paths.get(current_month))
        light_next = get_light_pollution(lat, lon, monthly_tiff_paths.get(next_month))

        # Formula for LPI
        if light_prev is not None and light_curr is not None and light_next is not None and light_curr > 0:
            lpi = (light_prev + light_next) / (2 * light_curr)
        else:
            lpi = None

        results.append({
            'crime_id': row['id'],
            'primary_type': row['primary_type'],
            'description': row['description'],
            'date': crime_date,
            'latitude': lat,
            'longitude': lon,
            'light_pollution_previous': light_prev,
            'light_pollution_current': light_curr,
            'light_pollution_next': light_next,
            'light_pollution_index': lpi
        })

        completed_rows += 1
    lpi_data = pd.DataFrame(results)

    output_path = f"/content/drive/MyDrive/CS524/LPI/light_pollution_crime_data_{year}.xlsx"
    with pd.ExcelWriter(output_path, engine='xlsxwriter') as writer:
        lpi_data.to_excel(writer, index=False, sheet_name='Data')
    print(f"Light Pollution Index dataset for {year} saved to {output_path}")


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
Previous: 2015-12, Current: 2016-01, Next: 2016-02
Previous: 2015-12, Current: 2016-01, Next: 2016-02
Previous: 2015-12, Current: 2016-01, Next: 2016-02
Previous: 2015-12, Current: 2016-01, Next: 2016-02
Previous: 2015-12, Current: 2016-01, Next: 2016-02
Previous: 2015-12, Current: 2016-01, Next: 2016-02
Previous: 2015-12, Current: 2016-01, Next: 2016-02
Previous: 2015-12, Current: 2016-01, Next: 2016-02
Previous: 2015-12, Current: 2016-01, Next: 2016-02
Previous: 2015-12, Current: 2016-01, Next: 2016-02
Previous: 2015-12, Current: 2016-01, Next: 2016-02
Previous: 2015-12, Current: 2016-01, Next: 2016-02
Previous: 2015-12, Current: 2016-01, Next: 2016-02
Previous: 2015-12, Current: 2016-01, Next: 2016-02
Previous: 2015-12, Current: 2016-01, Next: 2016-02
Previous: 2015-12, Current: 2016-01, Next: 2016-02
Previous: 2015-12, Current: 2016-01, Next: 2016-02
Previous: 2015-12, Current: 2016-01, Next: 2016-02
Previous: 2015-12