In [None]:
import warnings
warnings.filterwarnings('ignore')

import requests
import overpass

import pandas as pd
import numpy as np
import geopandas as gpd
from shapely.geometry import Point, LineString, Polygon, MultiPoint, MultiLineString, MultiPolygon
from geopy import distance

from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error
from sklearn.linear_model import LinearRegression

overpass_url = "http://overpass-api.de/api/interpreter"

from geopy.geocoders import Nominatim
from geopy.extra.rate_limiter import RateLimiter

geolocator = Nominatim(user_agent = 'chelych@mail.com')
reverse = RateLimiter(geolocator.reverse, min_delay_seconds=1,error_wait_seconds=2.0,swallow_exceptions=True,
                      return_value_on_exception=None)

getcen = RateLimiter(geolocator.geocode, min_delay_seconds=1,error_wait_seconds=2.0,swallow_exceptions=True,
                      return_value_on_exception=None)


In [None]:
from functools import partial
import pyproj
from shapely.ops import transform

proj_wgs84 = pyproj.Proj('+proj=longlat +datum=WGS84')

def geodesic_point_buffer(lon, lat, m):
    aeqd_proj = '+proj=aeqd +lat_0={lat} +lon_0={lon} +x_0=0 +y_0=0'
    project = partial(
        pyproj.transform,
        pyproj.Proj(aeqd_proj.format(lon=lon, lat=lat)),
        proj_wgs84)
    buf = Point(0, 0).buffer(m)  
    return transform(project, buf)


### Read data

In [None]:
train = pd.read_csv('data/mf_geo_train.csv')


In [None]:
from keplergl import KeplerGl
kepler = KeplerGl()
kepler.add_data(train)
kepler

In [None]:

reverse = RateLimiter(geolocator.reverse, min_delay_seconds=1,error_wait_seconds=2.0,swallow_exceptions=True,
                      return_value_on_exception=None)
a = []
for i in range(len(train)):
    cord =  str(train['lat'][i]) , str(train['lon'][i]) 
    cordst = ', '.join(cord) 
    location = reverse(cordst)
    if location.raw.get('address').get('city') is None:
        a.append(location.raw.get('address').get('state'))
    elif "район" in location.raw.get('address').get('city') or "Район" in location.raw.get('address').get('city'):
          a.append(location.raw.get('address').get('state'))
    else: a.append(location.raw.get('address').get('city'))
train['City'] = a
train['City'] = train['City'].str.replace('городской округ ', '', regex=False)

In [None]:
geodesic_buffers_700m = gpd.GeoDataFrame()

geodesic_buffers_700m['geometry'] = gpd.GeoDataFrame(
    geometry=[Point(x,y) for x, y in zip(train['lon'], train['lat'])]
).apply(lambda x: geodesic_point_buffer(x['geometry'].x, x['geometry'].y, 700), axis=1)

In [None]:
err = []
subway_data = pd.DataFrame(columns = ['lat','lon','name','city'])
for i in set(train.City.values):

    overpass_query = '''
    [out:json];
    area[name="{}"]->.b;
    node(area.b)[railway=station][station=subway];
    out center;
    node(area.b)[station=light_rail];
    out meta;
    '''.format(i)

    response = requests.get(overpass_url, 
                        params={'data': overpass_query})
    try:
        subway = response.json()
    except:
        err.append(i)
        print(i)
        continue
    subway_osm = pd.DataFrame(subway['elements'])
    subway_osm = subway_osm.join(
        pd.DataFrame([x['tags'] for x in subway['elements']]),lsuffix='_left', rsuffix='_right')
    if 'lat' in subway_osm.columns:
        subway_osm = subway_osm[['lat','lon','name']]
    else:
        continue
    subway_osm['city'] = i
    subway_data = pd.concat([subway_data, subway_osm], axis=0)
subway_data = subway_data.reset_index(drop=True)

In [None]:
for i in err:

    overpass_query = '''
    [out:json];
    area[name="{}"]->.b;
    node(area.b)[railway=station][station=subway];
    out center;
    node(area.b)[station=light_rail];
    out meta;
    '''.format(i)

    response = requests.get(overpass_url, 
                        params={'data': overpass_query})
    try:
        subway = response.json()
    except:
        err.append(i)
        print(i)
        continue
    subway_osm = pd.DataFrame(subway['elements'])
    subway_osm = subway_osm.join(
        pd.DataFrame([x['tags'] for x in subway['elements']]),lsuffix='_left', rsuffix='_right')
    if 'lat' in subway_osm.columns:
        subway_osm = subway_osm[['lat','lon','name']]
    else:
        continue
    subway_osm['city'] = i
    subway_data = pd.concat([subway_data, subway_osm], axis=0)
subway_data = subway_data.reset_index(drop=True)

In [None]:
train['stations_cnt'] = 0
train['min_dist_2station'] = 0
k  = 0
a = []
mm = 0
for i in range(len(train)):
    for j in range(len(subway_data)):
        if Point(subway_data['lon'][j] , subway_data['lat'][j]).within(geodesic_buffers_700m.geometry[i]):
            k += 1
            d = round(distance.geodesic((train['lat'][i] , train['lon'][i]),(subway_data['lat'][j] , subway_data['lon'][j])).m, 3)
            a.append(d)
    train['stations_cnt'][i] = k
    k = 0
    try:
        mm = np.amin(a)
    except ValueError:
        pass
    train['min_dist_2station'][i] = mm
    a= []
    mm = 0

In [None]:
geodesic_buffers_100m = gpd.GeoDataFrame()

geodesic_buffers_100m['geometry'] = gpd.GeoDataFrame(
    geometry=[Point(x,y) for x, y in zip(train['lon'], train['lat'])]
).apply(lambda x: geodesic_point_buffer(x['geometry'].x, x['geometry'].y, 100), axis=1)

In [None]:
bus_data = pd.DataFrame(columns = ['lat','lon','name','city'])
err = []
for i in set(train.City.values):

    overpass_query = '''
    [out:json];
    area[name="{}"]->.b;
    node(area.b)[highway=bus_stop];
    out meta;
    '''.format(i)

    response = requests.get(overpass_url, 
                        params={'data': overpass_query})
    print(i)
    try:
        bus = response.json()
    except ValueError :
        err.append(i)
        continue
    bus_osm = pd.DataFrame(bus['elements'])
    bus_osm = bus_osm.join(
        pd.DataFrame([x['tags'] for x in bus['elements']]),lsuffix='_left', rsuffix='_right')
    if 'lat' in bus_osm.columns:
        bus_osm = bus_osm[['lat','lon','name']]
    else:
        continue
    bus_osm['city'] = i
    bus_data = pd.concat([bus_data, bus_osm], axis=0)
bus_data = bus_data.reset_index(drop=True)

In [None]:
for i in err:

    overpass_query = '''
    [out:json];
    area[name="{}"]->.b;
    node(area.b)[highway=bus_stop];
    out meta;
    '''.format(i)

    response = requests.get(overpass_url, 
                        params={'data': overpass_query})
    print(i)
    try:
        bus = response.json()
    except ValueError :
        err.append(i)
        continue
    bus_osm = pd.DataFrame(bus['elements'])
    bus_osm = bus_osm.join(
        pd.DataFrame([x['tags'] for x in bus['elements']]),lsuffix='_left', rsuffix='_right')
    if 'lat' in bus_osm.columns:
        bus_osm = bus_osm[['lat','lon','name']]
    else:
        continue
    bus_osm['city'] = i
    bus_data = pd.concat([bus_data, bus_osm], axis=0)
bus_data = bus_data.reset_index(drop=True)

In [None]:
train['busstop_cnt'] = 0
k  = 0

for i in range(len(train)):
    for j in range(len(bus_data)):
        if Point(bus_data['lon'][j] , bus_data['lat'][j]).within(geodesic_buffers_100m.geometry[i]):
            k += 1
    train['busstop_cnt'][i] = k
    k = 0

In [None]:
il = [] 
pointl = []

for i in set(train.City.values):
    getcen = RateLimiter(geolocator.geocode, min_delay_seconds=1,error_wait_seconds=2.0,swallow_exceptions=True,
                      return_value_on_exception=None)
    center = getcen(i + ', Россия')
    point = (float(center.raw.get('lat')) , float(center.raw.get('lon')))
    il.append(i)
    pointl.append(point)
City_Center = pd.DataFrame({'City': il,
                            'Point': pointl})

In [None]:
tc = train.merge(City_Center, how = 'inner', left_on='City', right_on='City')
train['distance_to_center'] = 0
d = 0
for i in range(len(train)):
    d = round(distance.geodesic((tc['lat'][i] , tc['lon'][i]),tc['Point'][i]).m, 3)
    train['distance_to_center'][i] = d 
    d = 0

In [None]:
geodesic_buffers_150m = gpd.GeoDataFrame()

geodesic_buffers_150m['geometry'] = gpd.GeoDataFrame(
    geometry=[Point(x,y) for x, y in zip(train['lon'], train['lat'])]
).apply(lambda x: geodesic_point_buffer(x['geometry'].x, x['geometry'].y, 150), axis=1)

In [None]:
train['neighbors'] = 0 
k  = 0

for i in range(len(geodesic_buffers_150m)):
    for j in range(len(geodesic_buffers_150m)):
        if j != i:
            if geodesic_buffers_150m.geometry[j].intersects(geodesic_buffers_150m.geometry[i]):
                k += 1
    train['neighbors'][i] = k
    k = 0

In [None]:
rail_data = pd.DataFrame(columns = ['lat','lon','name','city'])
for i in set(train.City.values):

    overpass_query = '''
    [out:json];
    area[name="{}"]->.b;
    node(area.b)[amenity=bus_station];
    out center;
    node(area.b)[railway=station][train=yes][station!=light_rail];
    out center;
    '''.format(i)

    response = requests.get(overpass_url, 
                        params={'data': overpass_query})
    rail = response.json()
    rail_osm = pd.DataFrame(rail['elements'])
    rail_osm = rail_osm.join(
        pd.DataFrame([x['tags'] for x in rail['elements']]),lsuffix='_left', rsuffix='_right')
    if 'lat' in rail_osm.columns:
        rail_osm = rail_osm[['lat','lon','name']]
    else:
        continue
    rail_osm['city'] = i
    rail_data = pd.concat([rail_data, rail_osm], axis=0)
rail_data = rail_data.reset_index(drop=True)

In [None]:
geodesic_buffers_1000m = gpd.GeoDataFrame()

geodesic_buffers_1000m['geometry'] = gpd.GeoDataFrame(
    geometry=[Point(x,y) for x, y in zip(train['lon'], train['lat'])]
).apply(lambda x: geodesic_point_buffer(x['geometry'].x, x['geometry'].y, 1000), axis=1)

In [None]:
train['rails_cnt'] = 0
train['min_dist_2rail'] = 0
k  = 0
a = []
mm = 0
for i in range(len(train)):
    for j in range(len(rail_data)):
        if Point(rail_data['lon'][j] , rail_data['lat'][j]).within(geodesic_buffers_1000m.geometry[i]):
            k += 1
            d = round(distance.geodesic((train['lat'][i] , train['lon'][i]),(rail_data['lat'][j] , rail_data['lon'][j])).m, 3)
            a.append(d)
    train['rails_cnt'][i] = k
    k = 0
    try:
        mm = np.amin(a)
    except ValueError:
        pass
    train['min_dist_2rail'][i] = mm
    a= []
    mm = 0

In [None]:
amenity_data = pd.DataFrame(columns = ['lat','lon','city'])
for i in set(train.City.values):

    overpass_query = '''
    [out:json];
    area[name="{}"]->.b;
    node(area.b)[tourism=hotel];
    out center;
    node(area.b)[tourism=hostel];
    out center;
    node(area.b)[tourism=motel];
    out center;
    node(area.b)[amenity=hospital];
    out center;
    '''.format(i)

    response = requests.get(overpass_url, 
                        params={'data': overpass_query})
    amenity = response.json()
    amenity_osm = pd.DataFrame(amenity['elements'])
    amenity_osm = rail_osm.join(
        pd.DataFrame([x['tags'] for x in amenity['elements']]),lsuffix='_left', rsuffix='_right')
    if 'lat' in amenity_osm.columns:
        amenity_osm = amenity_osm[['lat','lon']]
    else:
        continue
    amenity_osm['city'] = i
    amenity_data = pd.concat([amenity_data, amenity_osm], axis=0)
amenity_data = amenity_data.reset_index(drop=True)

In [None]:
geodesic_buffers_2000m_t = gpd.GeoDataFrame()

geodesic_buffers_2000m_t['geometry'] = gpd.GeoDataFrame(
    geometry=[Point(x,y) for x, y in zip(train['lon'], train['lat'])]
).apply(lambda x: geodesic_point_buffer(x['geometry'].x, x['geometry'].y, 2000), axis=1)

In [None]:
train['amenity_cnt'] = 0
train['min_amenity_2rail'] = 0
k  = 0
a = []
mm = 0
for i in range(len(train)):
    for j in range(len(amenity_data)):
        if Point(amenity_data['lon'][j] , amenity_data['lat'][j]).within(geodesic_buffers_2000m_t.geometry[i]):
            k += 1
            d = round(distance.geodesic((train['lat'][i] , train['lon'][i]),(amenity_data['lat'][j] , amenity_data['lon'][j])).m, 3)
            a.append(d)
    train['amenity_cnt'][i] = k
    k = 0
    try:
        mm = np.amin(a)
    except ValueError:
        pass
    train['min_amenity_2rail'][i] = mm
    a= []
    mm = 0

In [None]:
train

### Fit model

In [None]:
X_train, X_valid, y_train, y_valid = train_test_split(train.drop('target', axis=1), train[['target']])
X_train = pd.get_dummies(X_train, prefix='City',columns=['City'])
X_valid = pd.get_dummies(X_valid, prefix='City',columns=['City'])

In [None]:
for i in X_train.columns:
    if i not in X_valid.columns:
        X_train = X_train.drop(i,axis=1)
        
for i in X_valid.columns:
    if i not in X_train.columns:
        X_valid = X_valid.drop(i,axis=1)

In [None]:
model = LinearRegression().fit(X_train.drop('point_id', axis=1), y_train)

In [None]:
mean_absolute_error(y_valid, model.predict(X_valid.drop('point_id', axis=1)))

### For Test


In [None]:
test = pd.read_csv('data/mf_geo_test.csv')

In [None]:
at = []
for i in range(len(test)):
    cord =  str(test['lat'][i]) , str(test['lon'][i]) 
    cordst = ', '.join(cord) 
    location = reverse(cordst)
    if location.raw.get('address').get('city') is None:
        at.append(location.raw.get('address').get('state'))
    elif "район" in location.raw.get('address').get('city') or "Район" in location.raw.get('address').get('city'):
          at.append(location.raw.get('address').get('state'))
    else: at.append(location.raw.get('address').get('city'))
test['City'] = at
test['City'] = test['City'].str.replace('городской округ ', '', regex=False)

In [None]:
geodesic_buffers_700m_t = gpd.GeoDataFrame()

geodesic_buffers_700m_t['geometry'] = gpd.GeoDataFrame(
    geometry=[Point(x,y) for x, y in zip(test['lon'], test['lat'])]
).apply(lambda x: geodesic_point_buffer(x['geometry'].x, x['geometry'].y, 700), axis=1)

In [None]:
err = []
subway_data_t = pd.DataFrame(columns = ['lat','lon','name','city'])
for i in set(test.City.values):

    overpass_query = '''
    [out:json];
    area[name="{}"]->.b;
    node(area.b)[railway=station][station=subway];
    out center;
    node(area.b)[station=light_rail];
    out meta;
    '''.format(i)

    response = requests.get(overpass_url, 
                        params={'data': overpass_query})
    try:
        subway_t = response.json()
    except:
        err.append(i)
        print(i)
        continue
    subway_osm_t = pd.DataFrame(subway_t['elements'])
    subway_osm_t = subway_osm_t.join(
        pd.DataFrame([x['tags'] for x in subway_t['elements']]),lsuffix='_left', rsuffix='_right')
    if 'lat' in subway_osm_t.columns:
        subway_osm_t = subway_osm_t[['lat','lon','name']]
    else:
        continue
    subway_osm_t['city'] = i
    subway_data_t = pd.concat([subway_data_t, subway_osm_t], axis=0)
subway_data_t = subway_data_t.reset_index(drop=True)

In [None]:
test['stations_cnt'] = 0
test['min_dist_2station'] = 0
k_t  = 0
a_t = []
mm_t = 0
for i in range(len(test)):
    for j in range(len(subway_data_t)):
        if Point(subway_data_t['lon'][j] , subway_data_t['lat'][j]).within(geodesic_buffers_700m_t.geometry[i]):
            k_t += 1
            d = round(distance.geodesic((test['lat'][i] , test['lon'][i]),(subway_data_t['lat'][j] , subway_data_t['lon'][j])).m, 3)
            a_t.append(d)
    test['stations_cnt'][i] = k_t
    k_t = 0
    try:
        mm_t = np.amin(a_t)
    except ValueError:
        pass
    test['min_dist_2station'][i] = mm_t
    a_t= []
    mm = 0

In [None]:
geodesic_buffers_100m_t = gpd.GeoDataFrame()

geodesic_buffers_100m_t['geometry'] = gpd.GeoDataFrame(
    geometry=[Point(x,y) for x, y in zip(test['lon'], test['lat'])]
).apply(lambda x: geodesic_point_buffer(x['geometry'].x, x['geometry'].y, 100), axis=1)

In [None]:
err = []
bus_data_t = pd.DataFrame(columns = ['lat','lon','name','city'])
for i in set(test.City.values):

    overpass_query = '''
    [out:json];
    area[name="{}"]->.b;
    node(area.b)[highway=bus_stop];
    out meta;
    '''.format(i)

    response = requests.get(overpass_url, 
                        params={'data': overpass_query})
    print(i)
    try:
        bus_t = response.json()
    except ValueError :
        err.append(i)
        continue
    bus_osm_t = pd.DataFrame(bus_t['elements'])
    bus_osm_t = bus_osm_t.join(
        pd.DataFrame([x['tags'] for x in bus_t['elements']]),lsuffix='_left', rsuffix='_right')
    if 'lat' in bus_osm_t.columns:
        bus_osm_t = bus_osm_t[['lat','lon','name']]
    else:
        continue
    bus_osm_t['city'] = i
    bus_data_t = pd.concat([bus_data_t, bus_osm_t], axis=0)
bus_data_t = bus_data_t.reset_index(drop=True)

In [None]:
for i in err:

    overpass_query = '''
    [out:json];
    area[name="{}"]->.b;
    node(area.b)[highway=bus_stop];
    out meta;
    '''.format(i)

    response = requests.get(overpass_url, 
                        params={'data': overpass_query})
    print(i)
    try:
        bus_t = response.json()
    except ValueError :
        err.append(i)
        continue
    bus_osm_t = pd.DataFrame(bus_t['elements'])
    bus_osm_t = bus_osm_t.join(
        pd.DataFrame([x['tags'] for x in bus_t['elements']]),lsuffix='_left', rsuffix='_right')
    if 'lat' in bus_osm_t.columns:
        bus_osm_t = bus_osm_t[['lat','lon','name']]
    else:
        continue
    bus_osm_t['city'] = i
    bus_data_t = pd.concat([bus_data_t, bus_osm_t], axis=0)
bus_data_t = bus_data_t.reset_index(drop=True)

In [None]:
test['busstop_cnt'] = 0
k_t  = 0

for i in range(len(test)):
    for j in range(len(bus_data_t)):
        if Point(bus_data_t['lon'][j] , bus_data_t['lat'][j]).within(geodesic_buffers_100m_t.geometry[i]):
            k_t += 1
    test['busstop_cnt'][i] = k_t
    k_t = 0

In [None]:
il_t = [] 
pointl_t = []

for i in set(test.City.values):
    center = getcen(i + ', Россия')
    point = (float(center.raw.get('lat')) , float(center.raw.get('lon')))
    il_t.append(i)
    pointl_t.append(point)
City_Center_t = pd.DataFrame({'City': il_t,
                            'Point': pointl_t})

In [None]:
tc_t = test.merge(City_Center_t, how = 'inner', left_on='City', right_on='City')
test['distance_to_center'] = 0
d = 0
for i in range(len(test)):
    d = round(distance.geodesic((tc_t['lat'][i] , tc_t['lon'][i]),tc_t['Point'][i]).m, 3)
    test['distance_to_center'][i] = d 
    d = 0

In [None]:
geodesic_buffers_150m_t = gpd.GeoDataFrame()

geodesic_buffers_150m_t['geometry'] = gpd.GeoDataFrame(
    geometry=[Point(x,y) for x, y in zip(test['lon'], test['lat'])]
).apply(lambda x: geodesic_point_buffer(x['geometry'].x, x['geometry'].y, 150), axis=1)

In [None]:
test['neighbors'] = 0 
k  = 0

for i in range(len(geodesic_buffers_150m_t)):
    for j in range(len(geodesic_buffers_150m_t)):
        if j != i:
            if geodesic_buffers_150m_t.geometry[j].intersects(geodesic_buffers_150m_t.geometry[i]):
                k += 1
    test['neighbors'][i] = k
    k = 0

In [None]:
rail_data_t = pd.DataFrame(columns = ['lat','lon','name','city'])
for i in set(test.City.values):

    overpass_query = '''
    [out:json];
    area[name="{}"]->.b;
    node(area.b)[amenity=bus_station];
    out center;
    node(area.b)[railway=station][train=yes][station!=light_rail];
    out center;
    '''.format(i)

    response = requests.get(overpass_url, 
                        params={'data': overpass_query})
    rail = response.json()
    rail_osm = pd.DataFrame(rail['elements'])
    rail_osm = rail_osm.join(
        pd.DataFrame([x['tags'] for x in rail['elements']]),lsuffix='_left', rsuffix='_right')
    if 'lat' in rail_osm.columns:
        rail_osm = rail_osm[['lat','lon','name']]
    else:
        continue
    rail_osm['city'] = i
    rail_data_t = pd.concat([rail_data_t, rail_osm], axis=0)
rail_data_t = rail_data_t.reset_index(drop=True)

In [None]:
geodesic_buffers_1000m_t = gpd.GeoDataFrame()

geodesic_buffers_1000m_t['geometry'] = gpd.GeoDataFrame(
    geometry=[Point(x,y) for x, y in zip(test['lon'], test['lat'])]
).apply(lambda x: geodesic_point_buffer(x['geometry'].x, x['geometry'].y, 1000), axis=1)

In [None]:
test['rails_cnt'] = 0
test['min_dist_2rail'] = 0
k  = 0
a = []
mm = 0
for i in range(len(test)):
    for j in range(len(rail_data_t)):
        if Point(rail_data_t['lon'][j] , rail_data_t['lat'][j]).within(geodesic_buffers_1000m_t.geometry[i]):
            k += 1
            d = round(distance.geodesic((test['lat'][i] , test['lon'][i]),(rail_data_t['lat'][j] , rail_data_t['lon'][j])).m, 3)
            a.append(d)
    test['rails_cnt'][i] = k
    k = 0
    try:
        mm = np.amin(a)
    except ValueError:
        pass
    test['min_dist_2rail'][i] = mm
    a= []
    mm = 0

In [None]:
amenity_data = pd.DataFrame(columns = ['lat','lon','city'])
for i in set(test.City.values):

    overpass_query = '''
    [out:json];
    area[name="{}"]->.b;
    node(area.b)[tourism=hotel];
    out center;
    node(area.b)[tourism=hostel];
    out center;
    node(area.b)[tourism=motel];
    out center;
    node(area.b)[amenity=hospital];
    out center;
    '''.format(i)

    response = requests.get(overpass_url, 
                        params={'data': overpass_query})
    amenity = response.json()
    amenity_osm = pd.DataFrame(amenity['elements'])
    amenity_osm = rail_osm.join(
        pd.DataFrame([x['tags'] for x in amenity['elements']]),lsuffix='_left', rsuffix='_right')
    if 'lat' in amenity_osm.columns:
        amenity_osm = amenity_osm[['lat','lon']]
    else:
        continue
    amenity_osm['city'] = i
    amenity_data = pd.concat([amenity_data, amenity_osm], axis=0)
amenity_data = amenity_data.reset_index(drop=True)

In [None]:
geodesic_buffers_2000m_t = gpd.GeoDataFrame()

geodesic_buffers_2000m_t['geometry'] = gpd.GeoDataFrame(
    geometry=[Point(x,y) for x, y in zip(test['lon'], test['lat'])]
).apply(lambda x: geodesic_point_buffer(x['geometry'].x, x['geometry'].y, 2000), axis=1)

In [None]:
test['amenity_cnt'] = 0
test['min_amenity_2rail'] = 0
k  = 0
a = []
mm = 0
for i in range(len(test)):
    for j in range(len(amenity_data)):
        if Point(amenity_data['lon'][j] , amenity_data['lat'][j]).within(geodesic_buffers_2000m_t.geometry[i]):
            k += 1
            d = round(distance.geodesic((test['lat'][i] , test['lon'][i]),(amenity_data['lat'][j] , amenity_data['lon'][j])).m, 3)
            a.append(d)
    test['amenity_cnt'][i] = k
    k = 0
    try:
        mm = np.amin(a)
    except ValueError:
        pass
    test['min_amenity_2rail'][i] = mm
    a= []
    mm = 0

In [None]:
test[test['min_dist_2rail'] >0]

### Forecast

In [None]:
X_train,  y_train,  = train.drop('target', axis=1), train[['target']]
X_test, y_test = test.drop('target', axis=1), test[['target']]
X_train = pd.get_dummies(X_train, prefix='City',columns=['City'])
X_test = pd.get_dummies(X_test, prefix='City',columns=['City'])


In [None]:
for i in X_train.columns:
    if i not in X_test.columns:
        X_train = X_train.drop(i,axis=1)
        
for i in X_test.columns:
    if i not in X_train.columns:
        X_test = X_test.drop(i,axis=1)

In [None]:
model = LinearRegression().fit(X_train.drop('point_id', axis=1), y_train)

In [None]:
mean_absolute_error(y_test, model.predict(X_test.drop('point_id', axis=1)))

### Make submission

In [None]:
submission = pd.read_csv('data/my_submission_01.csv')
submission['target'] = model.predict(X_test.drop('point_id', axis=1))
submission.to_csv('data/my_submission_03.csv', index=False)