In [9]:
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime #for working with times objects
from datetime import timedelta #for working with times objects
import math
from math import radians, sin, cos, sqrt, atan2
import random
import glob

In [2]:
Phil = pd.read_csv('cleaned_Phil.csv', low_memory=False)
POI = pd.read_csv('POI_PA_Cleaned.csv', low_memory=False)

In [5]:
POI.head()
POI.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4222 entries, 0 to 4221
Data columns (total 7 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   id        4222 non-null   int64  
 1   lat       4222 non-null   float64
 2   lon       4222 non-null   float64
 3   operator  412 non-null    object 
 4   amenity   3882 non-null   object 
 5   atm       53 non-null     object 
 6   tourism   310 non-null    object 
dtypes: float64(2), int64(1), object(4)
memory usage: 231.0+ KB


In [3]:
# unique start station coordinations; start stations = end stations
unique_coordinates = Phil[["start_coordinates", "start_station"]].drop_duplicates().values.tolist()

In [35]:
#Haversine formula for distance between two points on a sphere 

def haversine_formula(lat1, lon1, lat2, lon2):
    R = 6371.0 # earth radius
    
    lat1 = radians(lat1)  #degree into radians
    lon1 = radians(lon1)
    lat2 = radians(lat2)
    lon2 = radians(lon2)
    
    phi_lat = lat2 - lat1
    phi_lon = lon2 - lon1
    
    a = sin(phi_lat/2)**2 + cos(lat1) * cos(lat2) * sin(phi_lon/2)**2
    c = 2 * atan2(sqrt(a), sqrt(1-a))
    
    radius = R * c
    
    return radius

In [36]:
#counter for POI for each station

def count_POI(station, POI_data):
    
    lat1, lon1 = [float(x) for x in station[0].split(",")]
    poi_count = 0
    
    for index, row in POI_data.iterrows():
        lat2, lon2 = row['lat'], row['lon']
        distance = haversine_formula(lat1, lon1, lat2, lon2)
        if distance <= 0.2:  # radius of 200 meter 
            poi_count += 1
    return poi_count

In [37]:
POI_counts = []

for station in unique_coordinates:
    count = count_POI(station, POI)
    POI_counts.append({'Station': station[1], 'POI_Count': count})



In [38]:
sorted_POI_counts = sorted(POI_counts, key=lambda x: x["Station"])
sorted_POI_counts

[{'Station': 3004, 'POI_Count': 43},
 {'Station': 3005, 'POI_Count': 48},
 {'Station': 3006, 'POI_Count': 23},
 {'Station': 3007, 'POI_Count': 9},
 {'Station': 3008, 'POI_Count': 9},
 {'Station': 3009, 'POI_Count': 22},
 {'Station': 3010, 'POI_Count': 31},
 {'Station': 3011, 'POI_Count': 5},
 {'Station': 3012, 'POI_Count': 20},
 {'Station': 3013, 'POI_Count': 4},
 {'Station': 3014, 'POI_Count': 10},
 {'Station': 3015, 'POI_Count': 17},
 {'Station': 3016, 'POI_Count': 1},
 {'Station': 3017, 'POI_Count': 0},
 {'Station': 3018, 'POI_Count': 64},
 {'Station': 3019, 'POI_Count': 7},
 {'Station': 3020, 'POI_Count': 7},
 {'Station': 3021, 'POI_Count': 53},
 {'Station': 3022, 'POI_Count': 35},
 {'Station': 3024, 'POI_Count': 43},
 {'Station': 3025, 'POI_Count': 8},
 {'Station': 3026, 'POI_Count': 16},
 {'Station': 3028, 'POI_Count': 26},
 {'Station': 3029, 'POI_Count': 44},
 {'Station': 3030, 'POI_Count': 15},
 {'Station': 3031, 'POI_Count': 37},
 {'Station': 3032, 'POI_Count': 31},
 {'Station

In [42]:
Counted_POI = pd.DataFrame(sorted_POI_counts)
Counted_POI.to_csv("Counted_POI.csv", index = False)