# The code for the neighborhood planner

In [768]:
import geopandas as gpd
import pandas as pd

## setting the base tuples with the start weight and sorting order (ascending = *boolean*)

In [769]:
sport_building_weight = (3, False)
distance_from_centre_weight = (3, False)
green_score_weight = (3, False)
livability_score_weight = (3, False)
jobs_count_weight = (3, False)
price_weight = (3, True)
proximity_score_weight = (3, False)
density_weight = (3, True)
crime_and_nuisance_weight = (3, True)

## Creating a dictionary with the values for every feature

In [770]:
weights = {
    'sport_building_count': sport_building_weight,
    'distance_from_centre_km': distance_from_centre_weight,
    'green_score': green_score_weight,
    'livability_score': livability_score_weight,
    'jobs_count': jobs_count_weight,
    'price_2022': price_weight,
    'proximity_score': proximity_score_weight,
    'density': density_weight,
    'crime_and_nuisance': crime_and_nuisance_weight
}

## Instantiating the class for the sorting code

In [771]:
class neighborhood_sorter():
    '''
    This is a class that stores all the code for chosing the best neighborhood
    based on weighted feature scores. The lower the score of a neighborhood,
    The better match with the user's preferences.

    Input while instantiating:
        df: a dataframe that contains all the merged data (don't touch that)
        weights: a dictionary containing all the column names,
            weights and sorting directions for each feature
    
    Output:
        df: a Pandas DataFrame containing all the neiborhoods
        sorted ascending (from best match to worst) with the scores
    '''
    def __init__(self,
                 df,
                 weights=weights):
        
        self.df = df
        self.weights = weights
    
    def preprocess_data(self):
        '''
        A function that assigns new columns, drops unused ones and returns cleaned
        dataframe ready for assigning scores to features.

        Output:
            df: a Pandas DataFrame
        '''
        df = self.df.assign(
            density = self.df['inhabitants'] / self.df['area_sqkm'],
            crime_and_nuisance = self.df['Total felonies'] + self.df['Total nuisance registrations'])

        df = df.drop(['Accidents (road)',
            'Encroachment on public order', 'Fraud (other)', 'Horizontal Fraud',
            'Human trafficking', 'Nature and landscape', 'Quality of life (other)',
            'Road (other)', 'Spatial planning', 'Special Laws',
            'Transport of hazardous substances', 'Under the influence (water)',
            'Abuse', 'Air (other)', 'Animals', 'Arms Trade', 'Building materials',
            'Cybercrime', 'Discrimination', 'Domestic Violation',
            'Drug trafficking', 'Drugs/drink nuisance', 'Fire/Explosion',
            'Fireworks', 'Food safety', 'Home theft/burglary', 'Immigration care',
            'Most', 'Motor Vehicle Theft', 'Murder, Manslaughter',
            'Neighbor rumor (relationship problems)', 'Open violence (person)',
            'Other property crimes', 'People smuggling', 'Pesticides',
            'Pickpocketing', 'Robbery', 'Shoplifting', 'Soil', 'Street robbery',
            'Structure of the Environmental Management Act',
            'Theft from/from motor vehicles',
            'Theft of mopeds, mopeds and bicycles',
            'Theft/burglary box/garage/shed', 'Theft/burglary of companies, etc.',
            'Thefts (water)', 'Threat', 'Total felonies',
            'Under the influence (air)', 'Under the influence (road)',
            'Vertical Fraud', 'Waste', 'Water'], axis=1)
        
        df = df.drop(['Total nuisance registrations',
                    'Nuisance by confused person',
                    'Youth nuisance report',
                    'Nuisance due to alcohol/drugs',
                    'Nuisance drifters',
                    'Public intoxication',
                    'Noise nuisance catering',
                    'Noise nuisance event',
                    'Other noise nuisance'],
                    axis=1)
        
        df = df.drop(['Childcare',
                    'Education',
                    'Health and well-being',
                    'Hospitality',
                    'Retail',
                    'inhabitants',
                    'light_count',
                    'light_per_1000',
                    'workplace_count',
                    'sport_building_per_1000',
                    'area_sqkm',
                    'drug_store_count'],
                    axis=1)
        
        return df
    
    def create_scores(self, df):
        '''
        A function that assigns points for each feature based on the neiborhood's place (after sorting)
        
        Input:
            df: a Pandas DataFrame preprocessed with 'preprocess_data' function
        
        Output:
            df_scores: a Pandas DataFrame with all the points for every feature
        '''
        df_scores = df[['neighborhood']]
        for feature, (weight, asc_bool) in self.weights.items():
            df_merge = df.sort_values(feature, ascending=asc_bool, ignore_index=True)[['neighborhood']]
            df_merge = df_merge.assign(score = weight * pd.Series([x + 1 for x in range(56)]))
            df_scores = df_scores.merge(df_merge, how='left', on='neighborhood', suffixes=(None, f'_{feature}'))
        return df_scores
    
    def summarize_scores(self):
        '''
        A function that summarizes the score columns created by 'create_scores' function.

        Output:
            df: a Pandas DataFrame containing all the neiborhoods
                sorted ascending (from best match to worst) with the scores
        '''
        df = self.preprocess_data(self.df)
        df = self.create_scores(df)
        df = df.assign(total = (
        df['score']+
        df['score_distance_from_centre_km']+
        df['score_distance_from_centre_km']+
        df['score_livability_score']+
        df['score_jobs_count']+
        df['score_price_2022']+
        df['score_proximity_score']+
        df['score_density']+
        df['score_crime_and_nuisance']))

        return df[['neighborhood', 'total']].sort_values('total')

## reading the data from a file

In [772]:
df = gpd.read_file('data_merged\\full_join.geojson')

## Using the sorter to create neiborhood ranking for the default weights (3 in for every feature)

In [773]:
sorter = neighborhood_sorter(df)
sums = sorter.summarize_scores()
sums

Unnamed: 0,neighborhood,total
49,Buitengebied Prinsenbeek,462
19,Hagebeemd,465
1,Buitengebied Bavel,543
47,Overakker,546
32,Effen-Rith,558
24,Muizenberg,585
46,Mastbos,591
38,Princenhage,615
37,Liesbos,621
28,Hoogeind,636
