In [1]:
import pandas as pd
import numpy as np
from geopy.distance import geodesic 
import math
from sklearn.metrics import mean_absolute_error, r2_score, median_absolute_error
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV
from sklearn.ensemble import RandomForestRegressor
from sklearn.preprocessing import LabelEncoder
import matplotlib.pyplot as plt
%matplotlib inline
%config InlineBackend.figure_format = 'retina' 
import warnings
warnings.filterwarnings('ignore')

In [2]:
def get_azimuth(latitude, longitude):
    city_center_coordinates = [55.7522, 37.6156]

    rad = 6372795

    llat1 = city_center_coordinates[0]
    llong1 = city_center_coordinates[1]
    llat2 = latitude
    llong2 = longitude

    lat1 = llat1*math.pi/180.
    lat2 = llat2*math.pi/180.
    long1 = llong1*math.pi/180.
    long2 = llong2*math.pi/180.

    cl1 = math.cos(lat1)
    cl2 = math.cos(lat2)
    sl1 = math.sin(lat1)
    sl2 = math.sin(lat2)
    delta = long2 - long1
    cdelta = math.cos(delta)
    sdelta = math.sin(delta)

    y = math.sqrt(math.pow(cl2*sdelta,2)+math.pow(cl1*sl2-sl1*cl2*cdelta,2))
    x = sl1*sl2+cl1*cl2*cdelta
    ad = math.atan2(y,x)

    x = (cl1*sl2) - (sl1*cl2*cdelta)
    y = sdelta*cl2
    z = math.degrees(math.atan(-y/x))

    if (x < 0):
        z = z+180.

    z2 = (z+180.) % 360. - 180.
    z2 = - math.radians(z2)
    anglerad2 = z2 - ((2*math.pi)*math.floor((z2/(2*math.pi))) )
    angledeg = (anglerad2*180.)/math.pi
    
    return round(angledeg, 2)


In [3]:
file_path = 'moscow_dataset_2020.csv'
df = pd.read_csv(file_path)

#Выводим 5 первых строк датафрейма
df.head(5)

Unnamed: 0,wallsMaterial,floorNumber,floorsTotal,totalArea,kitchenArea,latitude,longitude,price
0,brick,1,5.0,18.0,3.0,55.723379,37.628577,5600000
1,brick,1,5.0,15.0,3.0,55.72598,37.671031,4650000
2,brick,1,5.0,11.9,1.5,55.735976,37.657817,2990000
3,brick,1,7.0,18.4,3.0,55.786698,37.595321,4390000
4,brick,2,5.0,17.6,2.0,55.767894,37.66592,4890000


In [4]:
#Создаем новый столбец Стоимость 1 кв.м путем построчного деления стоимостей квартир на их общие площади
df['priceMetr'] = df['price']/df['totalArea']

#Задаем широту и долготу центра города и рассчитываем для каждой квартиры расстояние от центра и азимут 
city_center_coordinates = [55.7522, 37.6156]
df['distance'] = list(map(lambda x, y: geodesic(city_center_coordinates, [x, y]).meters, df['latitude'], df['longitude']))
df['azimuth'] = list(map(lambda x, y: get_azimuth(x, y), df['latitude'], df['longitude']))
df['priceMetr'] = df['priceMetr'].round(0)
df['distance'] = df['distance'].round(0)
df['azimuth'] = df['azimuth'].round(0)

In [5]:
df.to_csv("./moscow_dataset_2020_add_feature.csv")