# Evaluating distance

#### 1 step. Manual data collection

In [None]:
#-----------------------------

import pandas as pd

columns = ['latitude', 'longitude', 'type']
df1 = pd.DataFrame(columns=columns)

def collect_data():
    while True:
        object_type = input("Enter object type (school, kindergarten, pharmacy) or 'exit' to quit: ").lower()
        if object_type == 'exit':
            break
        if object_type not in ['school', 'kindergarten', 'pharmacy']:
            print("Invalid object type. Please enter 'school', 'kindergarten', or 'pharmacy'.")
            continue
        
        print(f"You selected: {object_type}. Enter coordinates (latitude and longitude) for this object.")
        
        while True:
            coordinates = input("Enter latitude and longitude separated by a comma (e.g., 53.30361562179967, 69.39193949334926) or 'exit' to change object: ")
            
            if coordinates.lower() == 'exit':
                break
            
            try:
                latitude, longitude = map(float, coordinates.split(','))
                
                df.loc[len(df)] = [latitude, longitude, object_type]
                
                print(f"Data for {object_type} successfully added!")
            except ValueError:
                print("Input error! Please enter valid data in the format latitude, longitude.")
        
    df1.to_csv('locations_data.csv', index=False)
    print("Data saved to 'locations_data.csv'.")

collect_data()


#### 2 step. Upload the data 

In [10]:
#-----------------------------

df1 = pd.read_csv('C:/Users/User/Desktop/Assignment 7-8/Data/Clean data/locations_data.csv')
df = pd.read_csv('C:/Users/User/Desktop/Assignment 7-8/Data cleaning and visualization/apartment_price_in_Kokshetau.csv')
df

Unnamed: 0.1,Unnamed: 0,price,area,flat_toilets,balcony,current_floors,total_floors,ceiling,dorm,mortgage,year,type_of_house,repair_status,street,latitude,longitude,is_in_kokshetau
0,0,9500000,39.0,совмещенный,лоджия,4.0,5.0,2.7,нет,нет,1979,кирпичный,частично,"Юбилейный 43, Кокшетау",53.269046,69.424945,True
1,1,39000000,76.1,раздельный,нет,4.0,10.0,3.0,нет,нет,2021,кирпичный,частично,"Кенесары 41а, Кокшетау",53.262199,69.369769,True
2,2,29990000,75.0,раздельный,нет,9.0,10.0,3.0,нет,нет,2024,кирпичный,полностью,"Сарыарка 2г, Кокшетау",53.320954,69.385960,True
3,3,10900000,37.0,совмещенный,балкон,1.0,5.0,2.7,нет,нет,1966,кирпичный,частично,"Абылай хана, Кокшетау",53.285470,69.381409,True
4,4,13400000,50.8,совмещенный,нет,6.0,6.0,2.7,нет,нет,1990,панельный,частично,"Абылай хана 20, Кокшетау",53.300842,69.387853,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5115,5115,20500000,57.1,раздельный,нет,4.0,5.0,2.7,нет,нет,1986,кирпичный,частично,"Микрорайон васильковский 16, Кокшетау",53.315894,69.392442,True
5116,5116,8500000,48.9,раздельный,нет,2.0,4.0,2.7,да,нет,1962,кирпичный,частично,"Пушкина 11, Кокшетау",53.287913,69.404818,True
5117,5117,9500000,40.3,совмещенный,нет,1.0,4.0,2.7,нет,нет,1964,кирпичный,полностью,"Пушкина 9 а, Кокшетау",53.288519,69.405605,True
5118,5118,10000000,42.0,совмещенный,нет,3.0,5.0,2.7,да,нет,1982,кирпичный,частично,"Назарбаева 27, Кокшетау",53.292727,69.386308,True


#### 3 step. Сalculating the distance to the nearest

In [11]:
#-----------------------------

import math

def haversine(lat1, lon1, lat2, lon2):
    R = 6371  # Радиус Земли в километрах
    phi1 = math.radians(lat1)
    phi2 = math.radians(lat2)
    delta_phi = math.radians(lat2 - lat1)
    delta_lambda = math.radians(lon2 - lon1)
    
    a = math.sin(delta_phi / 2)**2 + math.cos(phi1) * math.cos(phi2) * math.sin(delta_lambda / 2)**2
    c = 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a))
    
    distance = R * c
    return distance

def calculate_min_distances(df, df1):
    distances_school = []
    distances_kindergarten = []
    distances_pharmacy = []

    for _, apartment in df.iterrows():
        min_distance_school = float('inf')
        min_distance_kindergarten = float('inf')
        min_distance_pharmacy = float('inf')
        
        for _, location in df1.iterrows():
            distance = haversine(apartment['latitude'], apartment['longitude'], location['latitude'], location['longitude'])
            
            if location['type'] == 'school':
                min_distance_school = min(min_distance_school, distance)
            elif location['type'] == 'kindergarten':
                min_distance_kindergarten = min(min_distance_kindergarten, distance)
            elif location['type'] == 'pharmacy':
                min_distance_pharmacy = min(min_distance_pharmacy, distance)
        
        distances_school.append(min_distance_school)
        distances_kindergarten.append(min_distance_kindergarten)
        distances_pharmacy.append(min_distance_pharmacy)

    df['distance_to_nearest_school'] = distances_school
    df['distance_to_nearest_kindergarten'] = distances_kindergarten
    df['distance_to_nearest_pharmacy'] = distances_pharmacy
    return df

df_with_distances = calculate_min_distances(df, df1)


print(df_with_distances[['price', 'latitude', 'longitude', 'distance_to_nearest_school', 
                         'distance_to_nearest_kindergarten', 'distance_to_nearest_pharmacy']])


         price   latitude  longitude  distance_to_nearest_school  \
0      9500000  53.269046  69.424945                    0.335102   
1     39000000  53.262199  69.369769                    0.738231   
2     29990000  53.320954  69.385960                    0.393297   
3     10900000  53.285470  69.381409                    0.560507   
4     13400000  53.300842  69.387853                    0.785759   
...        ...        ...        ...                         ...   
5115  20500000  53.315894  69.392442                    0.100153   
5116   8500000  53.287913  69.404818                    0.364670   
5117   9500000  53.288519  69.405605                    0.340605   
5118  10000000  53.292727  69.386308                    0.686371   
5119  22000000  53.297802  69.384596                    0.819713   

      distance_to_nearest_kindergarten  distance_to_nearest_pharmacy  
0                             0.336834                      0.468495  
1                             2.777238   

#### 4 step. Calculating the distance to the center 

In [12]:
#-----------------------------


center_lat = 53.28383411414435
center_lon = 69.37488758063544

def haversine(lat1, lon1, lat2, lon2):
    R = 6371
    phi1 = math.radians(lat1)
    phi2 = math.radians(lat2)
    delta_phi = math.radians(lat2 - lat1)
    delta_lambda = math.radians(lon2 - lon1)
    
    a = math.sin(delta_phi / 2)**2 + math.cos(phi1) * math.cos(phi2) * math.sin(delta_lambda / 2)**2
    c = 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a))
    
    distance = R * c
    return distance


df['distance_to_center'] = df.apply(lambda row: haversine(row['latitude'], row['longitude'], center_lat, center_lon), axis=1)



#### 5 step. Ready DataFrame with distance to the nearest to smth

In [13]:
#-----------------------------


print(df)
print()
print(df.isnull().sum())
print()
print(df.info())

      Unnamed: 0     price  area flat_toilets balcony  current_floors  \
0              0   9500000  39.0  совмещенный  лоджия             4.0   
1              1  39000000  76.1   раздельный     нет             4.0   
2              2  29990000  75.0   раздельный     нет             9.0   
3              3  10900000  37.0  совмещенный  балкон             1.0   
4              4  13400000  50.8  совмещенный     нет             6.0   
...          ...       ...   ...          ...     ...             ...   
5115        5115  20500000  57.1   раздельный     нет             4.0   
5116        5116   8500000  48.9   раздельный     нет             2.0   
5117        5117   9500000  40.3  совмещенный     нет             1.0   
5118        5118  10000000  42.0  совмещенный     нет             3.0   
5119        5119  22000000  67.5   раздельный     нет             1.0   

      total_floors  ceiling dorm  mortgage  ...  type_of_house repair_status  \
0              5.0      2.7  нет       нет 