In [1]:
import pandas as pd

In [2]:
datasets = {"bus_hdb_matrix.csv": "bus_hdb", 
                'bus_private_matrix.csv': "bus_private", 
                "hawker_hdb_matrix.csv": "hawker_hdb",
                "hawker_private_matrix.csv": "hawker_private",
                "malls_hdb_matrix.csv": "malls_hdb",
                "malls_private_matrix.csv": "malls_private",
                "mrtlrt_hdb_matrix.csv": "mrtlrt_hdb",
                "mrtlrt_private_matrix.csv": 'mrtlrt_private',
                "parks_hdb_matrix.csv": 'parks_hdb',
                "parks_private_matrix.csv": "parks_private",
                "school_hdb_matrix.csv": "school_hdb",
                "school_private_matrix.csv": "school_private",
                "supermarket_hdb_matrix.csv": "supermarket_hdb",
                "supermarket_private_matrix.csv": "supermarket_private"
               }

# Shortest Distances

In [3]:
def shortestdistance(file_name, output_name):
    df = pd.read_csv("Raw Matrix/" + file_name)
    
    shortest_distances = df.loc[df.groupby([df.columns[1], df.columns[2]])['distance'].idxmin()]
    shortest_distances.reset_index(drop=True, inplace=True)
    shortest_distances.drop(columns=shortest_distances.columns[0], axis=1, inplace=True)
    
    shortest_distances.to_csv("Shortest Matrix/" + output_name + '_shortest.csv')
    print("----------")
    print(file_name)
    print(shortest_distances.head())

In [4]:
for keys, values in datasets.items():
    shortestdistance(keys, values)
        

----------
bus_hdb_matrix.csv
    hdb_lat     hdb_lon   bus_lat     bus_lon  distance
0  1.270380  103.823236  1.271277  103.823118  0.100105
1  1.270919  103.822685  1.271277  103.823118  0.062350
2  1.271409  103.810888  1.270735  103.810200  0.106798
3  1.271463  103.825683  1.271354  103.824739  0.105799
4  1.271691  103.809852  1.271166  103.809519  0.068855
----------
bus_private_matrix.csv
   private_lat  private_lon   bus_lat     bus_lon  distance
0     1.239785   103.837277  1.255769  103.824031  2.301543
1     1.240058   103.830936  1.255769  103.824031  1.899588
2     1.241285   103.840100  1.255769  103.824031  2.400685
3     1.242347   103.838161  1.255769  103.824031  2.162376
4     1.242964   103.837106  1.255769  103.824031  2.030359
----------
hawker_hdb_matrix.csv
    hdb_lat     hdb_lon  hawker_lat  hawker_lon  distance
0  1.270380  103.823236    1.272716  103.822117  0.286806
1  1.270919  103.822685    1.272716  103.822117  0.208560
2  1.271409  103.810888    1.2732

# Within Distance count

In [5]:
def count_within_thresholds(file_name, output_name):
    df = pd.read_csv("Raw Matrix/" + file_name)

    thresholds = [0.1, 0.3, 0.5, 1, 1.5, 2.0]
    counts_df = pd.DataFrame()
    
    for threshold in thresholds:
        counts_df[f'within_{threshold}'] = df.groupby([df.columns[1], df.columns[2]])['distance'].apply(lambda x: (x <= threshold).sum())
    
   
    counts_df.reset_index(inplace=True)
    counts_df.to_csv("Count Matrix/" + output_name + '_counts.csv', index=False)
    
    print("----------")
    print(file_name)
    print(counts_df.head())


In [6]:
for keys, values in datasets.items():
    count_within_thresholds(keys, values)
        

----------
bus_hdb_matrix.csv
    hdb_lat     hdb_lon  within_0.1  within_0.3  within_0.5  within_1  \
0  1.270380  103.823236           0           5          12        35   
1  1.270919  103.822685           1           5          13        36   
2  1.271409  103.810888           0           6          12        28   
3  1.271463  103.825683           0           7          16        39   
4  1.271691  103.809852           1           7          13        32   

   within_1.5  within_2.0  
0          71         114  
1          73         121  
2          73         118  
3          72         125  
4          69         118  
----------
bus_private_matrix.csv
   private_lat  private_lon  within_0.1  within_0.3  within_0.5  within_1  \
0     1.239785   103.837277           0           0           0         0   
1     1.240058   103.830936           0           0           0         0   
2     1.241285   103.840100           0           0           0         0   
3     1.242347   103.

----------
supermarket_private_matrix.csv
   private_lat  private_lon  within_0.1  within_0.3  within_0.5  within_1  \
0     1.239785   103.837277           0           0           0         1   
1     1.240058   103.830936           0           0           0         0   
2     1.241285   103.840100           0           0           0         2   
3     1.242347   103.838161           0           0           0         2   
4     1.242964   103.837106           0           0           0         2   

   within_1.5  within_2.0  
0           2           2  
1           4           4  
2           2           2  
3           2           2  
4           2           2  
