# The code to get the utils data file

In [28]:
import pandas as pd
import geopandas as gpd

### Loading the data

In [29]:
df_pop = pd.read_csv('data_raw\population.csv')

In [30]:
df_pop = (
    df_pop[df_pop['region_type'] == 'Neighbourhood']
    .groupby('region', as_index=False)['inhabitants'].sum()
)

In [31]:
df_light = (
    gpd.read_file('data_raw\lighting.geojson')
    .drop(['object_nummer',
            'mast_product',
            'straat',
            'armatuur_product',
            'lamp_product',
            'mast_hoogte'],
            axis=1)
)

In [32]:
df_work = (
    gpd.read_file('data_raw\work_locations.geojson')
    .drop(['identificatie',
            'validatiecode',
            'locatie',
            'woonplaats',
            'gemeente',
            'type_bedrijf'],
            axis=1)
    .rename(columns={'objectid': 'workplace_count'})
)

In [33]:
df_sport = (
    gpd.read_file('data_raw\sport_buildings.geojson')
    .drop(['IDENTIFICATIE',
            'NAAM',
            'STRAAT',
            'HUISNUMMER',
            'HUISLETTER',
            'HUISNUMMER_TOEVOEGING',
            'POSTCODE',
            'PLAATS',
            'SPORT_SOORT',
            'HOOFDBELEIDSVELD'],
            axis=1)
    .rename(columns={'OBJECTID': 'sport_building_count'})
)

In [34]:
df_regions = (
    gpd.read_file('data_raw\Buurten.geojson')
        .drop(['OBJECTID',
                'BUURTCODE',
                'WOONPLAATS',
                'GOUWCODE',
                'GEMEENTE',
                'GEMEENTECODE',
                'Shape__Area',
                'Shape__Length',
                'WOONPLAATSCODE',
                'OPPERVLAKTE'], axis=1)
        .rename(columns={'GOUW': 'WIJK'})
)

In [35]:
df_regions.head()

Unnamed: 0,BUURT,WIJK,geometry
0,Kievitsloop,Breda noord-west,"POLYGON ((4.75141 51.61890, 4.75136 51.61882, ..."
1,Princenhage,Breda west,"POLYGON ((4.74373 51.57291, 4.74363 51.57176, ..."
2,Heusdenhout,Breda oost,"POLYGON ((4.82923 51.59268, 4.82853 51.59194, ..."
3,Buitengebied Bavel,Bavel,"POLYGON ((4.87119 51.56827, 4.87135 51.56790, ..."
4,Chassé,Breda centrum,"POLYGON ((4.78630 51.58690, 4.78298 51.58366, ..."


In [36]:
df_drugs = (
    pd.read_csv('data_raw\drugscords.csv')
        .drop(['totalScore',
               'reviewsCount',
               'street','city','state',
               'countryCode',
               'website',
               'phone',
               'categoryName',
               'url'],
                axis=1)
        
)

In [37]:
df_drugs = (
    gpd.GeoDataFrame(df_drugs,
                    geometry=gpd.points_from_xy(df_drugs['longitude'],
                                                df_drugs['latitude'],
                                                crs="EPSG:4326"))
        .drop(['latitude', 'longitude'], axis=1)
)

In [38]:
df_drugs.head()

Unnamed: 0,title,geometry
0,the Baron,POINT (4.78469 51.59150)
1,Pax,POINT (4.76606 51.59057)
2,Coffeeshop Paradise,POINT (4.77811 51.58868)
3,Sky Coffeeshop,POINT (4.76005 51.58442)
4,The Cat,POINT (4.80523 51.58700)


### Joining the data

In [39]:
df_join = df_regions.sjoin(df_light, how='inner').drop(['index_right'], axis=1)

In [40]:
df_join = df_join.dissolve(by=['WIJK', 'BUURT'], aggfunc='count', as_index=False)

In [41]:
df_join = df_join.rename(columns={'OBJECTID': 'light_count'})

In [42]:
df_join = df_join.sjoin(df_sport, how='left').drop('index_right', axis=1)

In [43]:
df_join = df_join.dissolve(by=['WIJK', 'BUURT', 'light_count'], aggfunc='count', as_index=False)

In [44]:
df_join = df_join.sjoin(df_work, how='left').drop('index_right', axis=1)

In [45]:
df_join = df_join.dissolve(by=['WIJK',
                                 'BUURT',
                                 'light_count', 
                                 'sport_building_count'],
                                 aggfunc='count',
                                 as_index=False)

In [46]:
df_join = df_join.sjoin(df_drugs, how='left')

In [47]:
df_join = df_join.dissolve(by=['WIJK',
                                'BUURT',
                                'light_count', 
                                'sport_building_count',
                                'workplace_count'],
                                aggfunc='count',
                                as_index=False)

In [48]:
df_join = df_join.merge(df_pop, left_on='BUURT', right_on='region', how='inner')

### Renaming, dropping and changing the names of some columns

In [49]:
df_join = df_join.drop('region', axis=1)

In [50]:
df_join = df_join.rename(columns={'WIJK': 'regions', 'BUURT': 'neighborhood'})

In [51]:
centre = df_join[df_join['neighborhood'] == 'City']['geometry'].to_crs('EPSG:3310').centroid.iloc[0]

df_join = df_join.assign(
    light_per_1000 = lambda df: df['light_count'] / (df['inhabitants'] / 1000),
    sport_building_per_1000 = lambda df: df['sport_building_count'] / (df['inhabitants'] / 1000),
    area_sqkm = lambda df: df['geometry'].to_crs('EPSG:3310').area / 1_000_000,
    distance_from_centre_km = lambda df: df['geometry'].to_crs('EPSG:3310').centroid.distance(centre) / 1000
)

In [52]:
df_join = df_join.drop(['index_right', 'title'], axis=1)

In [53]:
df_join

Unnamed: 0,regions,neighborhood,light_count,sport_building_count,workplace_count,geometry,inhabitants,light_per_1000,sport_building_per_1000,area_sqkm,distance_from_centre_km
0,Bavel,Bavel,1253,2,2,"POLYGON ((4.83844 51.56120, 4.83911 51.56024, ...",5445,230.119376,0.367309,1.750374,4.59422
1,Bavel,Buitengebied Bavel,308,6,1,"POLYGON ((4.87112 51.56696, 4.87096 51.56694, ...",570,540.350877,10.526316,8.559273,5.384812
2,Bavel,Nieuw Wolfslaar,515,0,0,"POLYGON ((4.81440 51.55980, 4.81426 51.55978, ...",2365,217.758985,0.0,1.027985,3.831573
3,Breda centrum,Chassé,647,0,3,"POLYGON ((4.78176 51.58262, 4.78164 51.58253, ...",3365,192.273403,0.0,0.408947,0.566532
4,Breda centrum,City,668,0,3,"POLYGON ((4.78086 51.58674, 4.78031 51.58705, ...",2575,259.417476,0.0,0.435064,0.0
5,Breda centrum,Fellenoord,240,0,2,"POLYGON ((4.77310 51.58228, 4.77310 51.58229, ...",1565,153.354633,0.0,0.277823,0.648226
6,Breda centrum,Schorsmolen,329,0,1,"POLYGON ((4.77039 51.58832, 4.77033 51.58828, ...",3450,95.362319,0.0,0.276399,0.584711
7,Breda centrum,Station,318,0,3,"POLYGON ((4.79096 51.59455, 4.79091 51.59450, ...",2310,137.662338,0.0,0.291627,0.692898
8,Breda centrum,Valkenberg,402,0,3,"POLYGON ((4.78866 51.59307, 4.78870 51.59305, ...",2005,200.498753,0.0,0.414723,0.594466
9,Breda noord,Belcrum,637,0,4,"POLYGON ((4.77500 51.59479, 4.77479 51.59477, ...",4005,159.051186,0.0,0.532119,0.935204


### Saving the data to a GeoJSON file

In [54]:
df_join.to_file('data_raw\\utils.geojson', driver='GeoJSON')