### Here we assign workplaces to people 
In each polygon we know the number of people. So we generate workplaces there. 

In [1]:
import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt
from shapely.geometry import Point
from tqdm import tqdm
from geopy.distance import great_circle
import random

In [2]:
# SOME AUXILIARY FUNCTIONS
def isSchoolAge(row):
    return row['age'] >= 7 and row['age'] <= 17


def isWorkingAge(row):
    return row['age'] >= 18 and (row['sex'] == 'F' and row['age'] <= 60 or row['sex'] == 'M' and row['age'] <= 55)


def rowPointLocation(row):
    return Point(float(row['latitude']), float(row['longitude']))


def findDistToObj(row, point):
    # finds distances from the selected point to the object represented by DataFrame row
    obj_point = rowPointLocation(row)
    dist = great_circle((point.x, point.y), (obj_point.x, obj_point.y)).km
    return dist

In [3]:
city_name = 'chelyabinsk'

input_dir_workplaces = '../data/initial_' + city_name + '/workplaces_' + city_name + '.txt'
input_dir_people = '../data/initial_' + city_name + '/people_' + city_name + '_splitted_dwellings.txt'
input_dir_households = '../data/initial_' + city_name + '/households.txt'
output_dir = '../data/initial_' + city_name + '/people_' + city_name + '_assigned_workplaces.txt'


people_df = pd.read_csv(input_dir_people, sep='\t')
households_df = pd.read_csv(input_dir_households, sep='\t')
workplaces_df = pd.read_csv(input_dir_workplaces, sep='\t')



hh_points_dic = {}

list_keys = households_df['sp_id'].tolist()
list_values = [Point([row['latitude'], row['longitude']])
               for idx, row in tqdm(households_df.iterrows())]

for key, value in zip(list_keys, list_values):
    hh_points_dic[key] = value

1189525it [00:45, 25936.93it/s]


In [4]:
def assignWorkplaces(df_orig, hh_points_dic, workplaces_df_orig) -> pd.DataFrame:

    print("Assigning workplaces started...")
    assign_type = '15km' # 'closest' or any ohter string to apply less than 15km radius workplace
    print("CLOSEST WORKPLACE" if assign_type == 'closest' else "IN DISTANCE <= 15km")
    df = df_orig[df_orig.apply(isWorkingAge, axis=1)]
    
    print('Removed people without work...')
    
    close_work_ids = []
    workers_num = {}
    
    print("Workplaces, total: {}".format(workplaces_df_orig['size'].sum()))
    print("Working people, total: {}".format(df.shape[0]))

    df_add = pd.DataFrame(list(np.zeros(workplaces_df_orig.shape[0])), columns=[
                          'distances'], index=workplaces_df_orig.index)
    
    workplaces_df = workplaces_df_orig.join(df_add)
    print(workplaces_df)
    allWorkplacesFilled = False
    
    print(df.shape)
    for idx in tqdm(range(int(df.shape[0]))):
    # for idx in tqdm(range(100)):
        person_row = df.iloc[idx]
        person_point = hh_points_dic[person_row['sp_hh_id']]
        
        series_distance = workplaces_df.apply(
            findDistToObj, args=(person_point,), axis=1)
        # print(series_distance)
        df_add = pd.DataFrame(series_distance.tolist(), columns=[
                              'distances'], index=workplaces_df.index)
        
        workplaces_df.update(df_add)
        isWorkerAssigned = False
        
        while not isWorkerAssigned:
            # print("Schools: \n", schools_df)
            # print("Work distances: \n", workplaces_df['distances'])
            
            close_workplace = None
            min_index = 0
            ### ASSIGN CLOSEST WORKPLACE
            if (assign_type == 'closest'):
                min_index = workplaces_df['distances'].idxmin()
                close_workplace = workplaces_df.loc[min_index]
                print(close_workplace)
            ### ASSIGN FROM SOME DISTANCE    
            else: 
                workplaceCloseEnough = False 
                counter = 0
                while (not workplaceCloseEnough):
                    random.seed(10)
                    min_index = np.random.choice(workplaces_df['distances'].index, size=1)[0]
                    close_workplace = workplaces_df.loc[min_index]
                    # print(close_workplace['distances'])
                    # print(close_workplace.shape)
                    # print(close_workplace['distances'])
                    counter+=1
                    if counter >= 100:
                        workplaceCloseEnough = True
                        print('Too much tries to choose working place')
                    if (close_workplace['distances'] <= 15): 
                        # print(close_workplace)
                        workplaceCloseEnough = True
            
            ### ASSIGN             
            if not (close_workplace['sp_id'] in workers_num.keys()):  # checking capacity
                workers_num[close_workplace['sp_id']] = 1
            else:
                workers_num[close_workplace['sp_id']
                             ] = workers_num[close_workplace['sp_id']] + 1
                
            if workers_num[close_workplace['sp_id']] == close_workplace['size']:
                # print("Before: {}".format(schools_df.shape[0]))
                # (schools_df.index[min_index])
                # print(min_index)
                
                workplaces_df = workplaces_df.drop(min_index, errors='ignore')
                # print("After: {}".format(schools_df.shape[0]))

                if workplaces_df.shape[0] == 0:
                    allWorkplacesFilled = True
                    
            isWorkerAssigned = True
        
        close_work_ids.append(close_workplace['sp_id'])

        if allWorkplacesFilled:
            print("All workplaces filled to capacity!")
            break
            
        
    df_orig.update(pd.DataFrame(close_work_ids, columns=[
                   'work_id'], index=df.index[:len(close_work_ids)]))    
    df_orig.to_csv(output_dir, sep='\t')
    return df_orig


import time

start = time.time()
df_assigned = assignWorkplaces(people_df, hh_points_dic, workplaces_df)
end = time.time()
print("Time of execution: {} min".format((end - start)/60))

Assigning workplaces started...
IN DISTANCE <= 15km
Removed people without work...
Workplaces, total: 867317.0
Working people, total: 637555
     sp_id   latitude  longitude  size  distances
0      1.0  55.186586  61.597408  25.0        0.0
1      2.0  55.159652  61.570179  31.0        0.0
2      3.0  55.177610  61.570179  41.0        0.0
3      4.0  55.195560  61.570179  41.0        0.0
4      5.0  55.213502  61.570179  28.0        0.0
..     ...        ...        ...   ...        ...
179  180.0  55.042728  61.216206   3.0        0.0
180  181.0  55.060739  61.216206   1.0        0.0
181  182.0  55.033720  61.188977  34.0        0.0
182  183.0  55.051734  61.188977  33.0        0.0
183  184.0  55.042728  61.161748  26.0        0.0

[184 rows x 5 columns]
(637555, 8)


100%|██████████████████████████████████| 637555/637555 [43:54<00:00, 242.02it/s]


Time of execution: 44.073092528184254 min


In [5]:
min_idx = workplaces_df['size'].idxmin()
workplaces_df.loc[min_idx]
chosen_idx = np.random.choice(workplaces_df.index, size=1)
workplaces_df = workplaces_df.drop(chosen_idx)
np.random.choice(workplaces_df.index, size=1)[0]

1

In [6]:
df_assigned['work_id'].unique()

array(['X', 173.0, 182.0, 34.0, 162.0, 51.0, 88.0, 89.0, 35.0, 49.0, 80.0,
       38.0, 139.0, 21.0, 120.0, 81.0, 149.0, 140.0, 180.0, 152.0, 107.0,
       174.0, 64.0, 105.0, 175.0, 82.0, 118.0, 161.0, 37.0, 172.0, 134.0,
       67.0, 63.0, 99.0, 119.0, 70.0, 103.0, 150.0, 148.0, 125.0, 135.0,
       151.0, 87.0, 176.0, 160.0, 121.0, 153.0, 163.0, 68.0, 137.0, 65.0,
       164.0, 123.0, 100.0, 9.0, 20.0, 85.0, 183.0, 117.0, 104.0, 184.0,
       53.0, 181.0, 98.0, 138.0, 8.0, 122.0, 101.0, 48.0, 23.0, 7.0, 36.0,
       22.0, 83.0, 69.0, 86.0, 136.0, 124.0, 106.0, 84.0, 50.0, 165.0,
       52.0, 24.0, 39.0, 66.0, 108.0, 102.0, 54.0, 141.0, 71.0, 10.0,
       40.0, 11.0, 90.0, 126.0, 177.0, 109.0, 154.0, 178.0, 167.0, 166.0,
       142.0, 155.0, 12.0, 72.0, 25.0, 91.0, 127.0, 26.0, 55.0, 179.0,
       168.0, 2.0, 41.0, 56.0, 110.0, 73.0, 156.0, 128.0, 143.0, 92.0,
       57.0, 13.0, 5.0, 42.0, 28.0, 1.0, 74.0, 43.0, 14.0, 3.0, 4.0, 27.0,
       15.0, 58.0, 29.0, 111.0, 129.0, 93.0, 144.0

In [7]:
# just some tests
# for i in range(100):
#     person_row = df_assigned.iloc[i]
#     hh_id = person_row['sp_hh_id']
#     work_id = person_row['work_id']
#     if person_row['work_id'] != 'X':
#         work = Point(workplaces_df[workplaces_df['sp_id'] == work_id].latitude, workplaces_df[workplaces_df['sp_id'] == work_id].longitude)  
#         hh =  Point(households_df[households_df['sp_id'] == hh_id].iloc[0].latitude, households_df[households_df['sp_id'] == hh_id].iloc[0].longitude) 
#         dist = great_circle((work.x, work.y), (hh.x, hh.y)).km
#         if (dist > 20):
#             print(person_row, dist)
