In [175]:
import pandas as pd
import numpy as np

from geopy.geocoders import Nominatim
from geopy.exc import GeocoderTimedOut
import re

import warnings
warnings.filterwarnings("ignore")

In [162]:
# Load raw data
data = pd.read_csv('/Users/vanessafarias/RecommendME/data/raw/thrift_stores.csv')
print(data.shape)
data.head()

(38, 4)


Unnamed: 0,section,name,url,address
0,Hand-Picked Cool,Annex x LOCAL,https://www.annexvintage.com/us/,5364 Saint-Laurent Boulevard
1,Hand-Picked Cool,Ex-Voto,https://exvoto.ca/en,6534 Saint-Laurent Boulevard
2,Hand-Picked Cool,LNF,https://www.lnfshop.com/,5319 Park Avenue
3,Hand-Picked Cool,Lazy Vintage,https://www.lazymtl.com/,"1682 Mont-Royal Avenue East, 3730 Ontario Stre..."
4,Hand-Picked Cool,Le Ninety,https://www.instagram.com/le.ninety/?hl=en,4361 Saint-Denis Street


In [230]:
def correct_address(address):
    """Returns the validated address, latitude, and longitude using OpenStreetMap."""
    # Initialize the Nominatim geocoder
    geolocator = Nominatim(user_agent="canada_address_validator")
    try:
        location = geolocator.geocode(address + ", Canada", timeout=10)
        if location:
            return location.address
        else:
            return None
    except GeocoderTimedOut:
        return None
    
def geocode_address(address):
    """Returns the validated address, latitude, and longitude using OpenStreetMap."""
    # Initialize the Nominatim geocoder
    geolocator = Nominatim(user_agent="canada_address_validator")
    try:
        location = geolocator.geocode(address + ", Canada", timeout=10)
        if location:
            return location.latitude, location.longitude
        else:
            return None, None
    except GeocoderTimedOut:
        return None, None
    
def clean_address(address):
    return re.sub(r",?\s*#\d+\w*", "", address) 

def extract_neighborhoods(address):
    parts = address.split(", ")  # Split by commas
    neighborhood_1 = parts[2] if len(parts) > 2 else None  # Position 2
    neighborhood_2 = parts[3] if len(parts) > 3 else None  # Position 3
    return pd.Series([neighborhood_1, neighborhood_2])

In [231]:
# Fix data formatting

# Store 1 
row_to_duplicate = data.loc[[3]]  # Select row 3
df = pd.concat([data, row_to_duplicate] , ignore_index=True)

# Fix address formatting using `.at[]` to avoid chained indexing
df.at[3, 'address'] = df.at[3, 'address'].split(',')[0]
df.at[38, 'address'] = df.at[38, 'address'].split(',')[1]

df.drop_duplicates(inplace = True)

# Store 2
# Fix data formatting
row_to_duplicate = data.loc[[8]]  # Select row 3
df2 = pd.concat([df, row_to_duplicate] , ignore_index=True)

empire_exchange_1 = '5225 Saint-Laurent Boulevard'
empire_exchange_2 = '6796 Saint-Laurent Boulevard'

# Fix address formatting using `.at[]` to avoid chained indexing
df2.at[8, 'address'] = empire_exchange_1
df2.at[39, 'address'] = empire_exchange_2

df2.drop_duplicates(inplace = True)

# translate key words in addresses
df2['address'] = df2.address.str.replace('East', 'Est')\
                            .str.replace('West', 'Ouest')\
                            .str.replace('Street', 'Rue')

In [232]:
# Remove unit # in address
df2['address'] = df2['address'].apply(clean_address)

# Specify city and province to improve accuracy
df2['address'] = df2['address'] + ', Montreal' + ', Quebec' 

# Geocode
df2['corrected_address'] = df2['address'].apply(correct_address)
df2['coordinates'] = df2['address'].apply(geocode_address)

In [233]:
# Apply function to extract neighborhoods
df2[["neighborhood", "neighborhood_2"]] = df2["corrected_address"].apply(extract_neighborhoods)

# Correct neighborhood
df2.loc[[8, 13, 15, 16, 17, 18, 19, 20, 22, 32, 38], 'neighborhood'] = df2.loc[[8, 13, 15, 16, 17, 18, 19, 20, 22, 32, 38], 'neighborhood_2']

df2.drop('neighborhood_2', axis = 1, inplace = True)

In [236]:
df2['latitude'] = df2['coordinates'].str[0]
df2['longitude'] = df2['coordinates'].str[1]

In [237]:
df2

Unnamed: 0,section,name,url,address,corrected_address,coordinates,neighborhood,latitude,longitude
0,Hand-Picked Cool,Annex x LOCAL,https://www.annexvintage.com/us/,"5364 Saint-Laurent Boulevard, Montreal, Quebec","5364, Boulevard Saint-Laurent, Mile-End, Le Pl...","(45.5248006, -73.5970816)",Mile-End,45.524801,-73.597082
1,Hand-Picked Cool,Ex-Voto,https://exvoto.ca/en,"6534 Saint-Laurent Boulevard, Montreal, Quebec","6534, Boulevard Saint-Laurent, La Petite-Patri...","(45.5307855, -73.6101907)",La Petite-Patrie,45.530786,-73.610191
2,Hand-Picked Cool,LNF,https://www.lnfshop.com/,"5319 Park Avenue, Montreal, Quebec","Avenue du Parc, Parc-Extension, Villeray–Saint...","(45.5300931, -73.62239595)",Villeray–Saint-Michel–Parc-Extension,45.530093,-73.622396
3,Hand-Picked Cool,Lazy Vintage,https://www.lazymtl.com/,"1682 Mont-Royal Avenue Est, Montreal, Quebec","1682, Avenue du Mont-Royal Est, Le Plateau-Mon...","(45.5324619, -73.5747044)",Le Plateau-Mont-Royal,45.532462,-73.574704
4,Hand-Picked Cool,Le Ninety,https://www.instagram.com/le.ninety/?hl=en,"4361 Saint-Denis Rue, Montreal, Quebec","4361, Rue Saint-Denis, Le Plateau-Mont-Royal, ...","(45.5232092, -73.5807711)",Le Plateau-Mont-Royal,45.523209,-73.580771
5,Hand-Picked Cool,Bohème Vintage,https://bohemevintage.com/,"206 Saint-Viateur Ouest, Montreal, Quebec","206, Rue Saint-Viateur Ouest, Mile-End, Le Pla...","(45.5231636, -73.6011042)",Mile-End,45.523164,-73.601104
6,Hand-Picked Cool,Shwap Club,https://www.shwapclub.com/,"4710 Saint-Ambroise Rue, Montreal, Quebec","OODA Technologies Inc., 4710, Rue Saint-Ambroi...","(45.4699455, -73.5860855)",Rue Saint-Ambroise,45.469946,-73.586085
7,Hand-Picked Cool,Common Sort,https://commonsort.com/,"3667 Saint-Laurent Boulevard, Montreal, Quebec","3667, Boulevard Saint-Laurent, Le Plateau-Mont...","(45.5145924, -73.5737081)",Le Plateau-Mont-Royal,45.514592,-73.573708
8,Hand-Picked Cool,Empire Exchange,https://empiremtl.com/,"5225 Saint-Laurent Boulevard, Montreal, Quebec","Empire de l'échange, 5225, Boulevard Saint-Lau...","(45.524204, -73.5950112)",Mile-End,45.524204,-73.595011
9,Restored & Upcycled,Citizen Vintage,https://www.citizenvintage.com/,"5330 Saint-Laurent Boulevard, Montreal, Quebec","5330, Boulevard Saint-Laurent, Mile-End, Le Pl...","(45.5245895, -73.5966767)",Mile-End,45.524589,-73.596677
