In [1]:
#import libraries
import numpy as np
import pandas as pd
import ast
import requests
import urllib.parse

In [2]:
#get data
visits_df = pd.read_csv("your_data_may_12_2022_0506pm.csv.gz", compression = "gzip")

In [3]:
#convert dates to datetime
visits_df['date_range_start'] = pd.to_datetime(visits_df['date_range_start'], infer_datetime_format = True)
visits_df['date_range_end'] = pd.to_datetime(visits_df['date_range_end'], infer_datetime_format = True)

In [4]:
#get lat and long for unique streets
lat = []
lon = []
adds = []

for str_addr in visits_df["street_address"].unique():
    try:
        url = 'https://nominatim.openstreetmap.org/search/' + urllib.parse.quote(str_addr) +'?format=json'

        response = requests.get(url).json()
        lat.append(response[0]["lat"])
        lon.append(response[0]["lon"])
    except:
        adds.append(str_addr)
        lat.append(np.nan)
        lon.append(np.nan)

In [5]:
#substitute addresses that don't return responses
adds_sub = ['1650 Cochrane Circle, Fort Carson', '6110 Martinez St', '6520 Specker Ave', 'AAFES, Chiles Ave',
 '1804 Prussman Blvd', 'Martinez St, Fort Carson', '1925 Specker Ave', '900 Magrath Ave', 'Ware Street, Fort Carson',
 '1637 Flint St, Fort Carson', 'Fort st, Fort Carson', 'McCullough Circle, Fort Carson',
 '5115 Chiles Ave', '980 O\'Connell Boulevard, Fort Carson',
 '5553 Fort St, Fort Carson', 'Martinez St, Fort Carson', 'Wilderness Road, Colorado', 'Butts Rd, Fort Carson', 
 '6980 Quinn St', '1725 Woodfill Rd', '1511 chiles Ave, Fort Carson', '1553 Wetzel Ave, Fort Carson']

In [6]:
adds_dict = dict(zip(adds, adds_sub))

In [7]:
visits_df["street_address"] = visits_df["street_address"].apply(lambda st: adds_dict.get(st, st))

In [8]:
#get lat and long for unique streets
add_lat = {}
add_lon = {}

for str_addr in visits_df["street_address"].unique():
    try:
        url = 'https://nominatim.openstreetmap.org/search/' + urllib.parse.quote(str_addr) +'?format=json'

        response = requests.get(url).json()
        add_lat[str_addr] = response[0]["lat"]
        add_lon[str_addr] = response[0]["lon"]
    except:
        add_lat[str_addr] = np.nan
        add_lon[str_addr] = np.nan

In [9]:
#getting lat and long for the data
visits_df["latitude"] = visits_df["street_address"].apply(lambda st: add_lat[st])
visits_df["longitude"] = visits_df["street_address"].apply(lambda st: add_lon[st])

In [10]:
visits_df.isnull().sum()

placekey                                         0
parent_placekey                               3599
location_name                                    0
street_address                                   0
city                                             0
region                                           0
postal_code                                      0
iso_country_code                                 0
safegraph_brand_ids                           2702
brands                                        2702
date_range_start                                 0
date_range_end                                   0
raw_visit_counts                                 0
raw_visitor_counts                               0
visits_by_day                                    0
visits_by_each_hour                              0
poi_cbg                                          0
visitor_home_cbgs                                0
visitor_home_aggregation                         0
visitor_daytime_cbgs           

In [11]:
#sort by date
visits_df = visits_df.sort_values(by = ["date_range_start"])

In [12]:
#convert string of list to list
visits_df["visits_by_day"] = visits_df["visits_by_day"].apply(lambda x: ast.literal_eval(x))
visits_df["visits_by_each_hour"] = visits_df["visits_by_each_hour"].apply(lambda x: ast.literal_eval(x))

In [13]:
#split visits by dates
visits_df_split = visits_df.explode("visits_by_day")

In [14]:
#add 7 indices for each week
visits_df_split["date_idx"] = list(range(0, 7))*visits_df.shape[0]

In [15]:
#convert indices to timedelta from the start of the week
visits_df_split["date"] = visits_df_split["date_idx"].apply(lambda idx: pd.Timedelta(days = idx))

In [16]:
#get date by adding timedelta to the start of the week
visits_df_split['date'] = visits_df_split['date_range_start'] + visits_df_split["date"]



In [17]:
#get visits by each hour per day
visits_df_split['visits_by_each_hour'] = visits_df_split.apply(lambda df: 
                                                               df.visits_by_each_hour[df.date_idx * 24: 
                                                                                      df.date_idx * 24 + 24], 
                                                               axis=1)

In [18]:
visits_df_split[["date"]]

Unnamed: 0,date
672,2020-09-28 00:00:00-06:00
672,2020-09-29 00:00:00-06:00
672,2020-09-30 00:00:00-06:00
672,2020-10-01 00:00:00-06:00
672,2020-10-02 00:00:00-06:00
...,...
0,2021-09-22 00:00:00-06:00
0,2021-09-23 00:00:00-06:00
0,2021-09-24 00:00:00-06:00
0,2021-09-25 00:00:00-06:00


In [19]:
#add week
visits_df_split['week'] = pd.to_datetime(visits_df_split['date'], infer_datetime_format = True, utc = True).dt.isocalendar().week

In [20]:
#add month
visits_df_split['month'] = pd.to_datetime(visits_df_split['date'], infer_datetime_format = True, utc = True).dt.month

In [21]:
visits_df_split.to_csv("safegraph.csv")

In [22]:
visits_df_split

Unnamed: 0,placekey,parent_placekey,location_name,street_address,city,region,postal_code,iso_country_code,safegraph_brand_ids,brands,...,normalized_visits_by_total_visits,normalized_visits_by_total_visitors,normalized_visits_by_region_naics_visits,normalized_visits_by_region_naics_visitors,latitude,longitude,date_idx,date,week,month
672,222-22c@5q9-7d4-mx5,,Out of the Norm Designs,6110 Martinez St,Fort Carson,CO,80913,US,,,...,0.000018,0.000047,0.001594,0.002099,38.7370794797105,-104.796681,0,2020-09-28 00:00:00-06:00,40,9
672,222-22c@5q9-7d4-mx5,,Out of the Norm Designs,6110 Martinez St,Fort Carson,CO,80913,US,,,...,0.000018,0.000047,0.001594,0.002099,38.7370794797105,-104.796681,1,2020-09-29 00:00:00-06:00,40,9
672,222-22c@5q9-7d4-mx5,,Out of the Norm Designs,6110 Martinez St,Fort Carson,CO,80913,US,,,...,0.000018,0.000047,0.001594,0.002099,38.7370794797105,-104.796681,2,2020-09-30 00:00:00-06:00,40,9
672,222-22c@5q9-7d4-mx5,,Out of the Norm Designs,6110 Martinez St,Fort Carson,CO,80913,US,,,...,0.000018,0.000047,0.001594,0.002099,38.7370794797105,-104.796681,3,2020-10-01 00:00:00-06:00,40,10
672,222-22c@5q9-7d4-mx5,,Out of the Norm Designs,6110 Martinez St,Fort Carson,CO,80913,US,,,...,0.000018,0.000047,0.001594,0.002099,38.7370794797105,-104.796681,4,2020-10-02 00:00:00-06:00,40,10
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
0,zzy-222@5q9-7cw-q75,,Evans ACH FT Carson,"1650 Cochrane Circle, Fort Carson",Fort Carson,CO,80913,US,,,...,0.000118,0.000220,0.011520,0.023156,38.71985545,-104.79725291870511,2,2021-09-22 00:00:00-06:00,38,9
0,zzy-222@5q9-7cw-q75,,Evans ACH FT Carson,"1650 Cochrane Circle, Fort Carson",Fort Carson,CO,80913,US,,,...,0.000118,0.000220,0.011520,0.023156,38.71985545,-104.79725291870511,3,2021-09-23 00:00:00-06:00,38,9
0,zzy-222@5q9-7cw-q75,,Evans ACH FT Carson,"1650 Cochrane Circle, Fort Carson",Fort Carson,CO,80913,US,,,...,0.000118,0.000220,0.011520,0.023156,38.71985545,-104.79725291870511,4,2021-09-24 00:00:00-06:00,38,9
0,zzy-222@5q9-7cw-q75,,Evans ACH FT Carson,"1650 Cochrane Circle, Fort Carson",Fort Carson,CO,80913,US,,,...,0.000118,0.000220,0.011520,0.023156,38.71985545,-104.79725291870511,5,2021-09-25 00:00:00-06:00,38,9
