In [2]:
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
import os
import sys
import pandas as pd
from tqdm import tqdm


def calc_distance(lat1, lon1, lat2, lon2, distance):
    x, y = abs(lon1 - lon2) * 88800, abs(lat1 - lat2) * 111320
    z = (x**2 + y**2) ** (1 / 2)
    return distance > z


def tagging(buildings, label):
    new_tags = [
        Tag(
            building_id=building.id,
            label=label,
        )
        for building in buildings
    ]
    session.add_all(new_tags)


HOSPITAL_DISTANCE = 3000
SUBWAY_STATION_DISTANCE = 500
BUS_STATION_DISTANCE = 50

from models import Address, Hospital, BusStation, Subway, Building, Tag

engine = create_engine(os.getenv("DATABASE_URL"), echo=False)
Session = sessionmaker(bind=engine)
session = Session()

In [3]:
results = session.query(Address).all()
address_df = pd.DataFrame(row.__dict__ for row in results)
address_df = address_df[["latitude", "longitude", "id"]]
address_df

Unnamed: 0,latitude,longitude,id
0,37.5495,127.152,1
1,37.5177,126.874,2
2,37.4903,126.857,3
3,37.5447,127.143,4
4,37.4982,126.937,5
...,...,...,...
43871,37.5998,126.925,43872
43872,37.5368,126.845,43873
43873,37.5588,126.902,43874
43874,37.4656,126.892,43875


In [4]:
results = session.query(Hospital).all()
hospital_df = pd.DataFrame(row.__dict__ for row in results)
hospital_df = hospital_df[["latitude", "longitude"]]
hospital_df

Unnamed: 0,latitude,longitude
0,37.5183,126.937
1,37.6336,126.916
2,37.4856,126.957
3,37.5010,127.051
4,37.4856,127.040
...,...,...
67,37.5235,126.910
68,37.5599,127.045
69,37.5353,127.084
70,37.5284,126.864


In [5]:
results = session.query(BusStation).all()
bus_station_df = pd.DataFrame(row.__dict__ for row in results)
bus_station_df = bus_station_df[["latitude", "longitude"]]
bus_station_df

Unnamed: 0,latitude,longitude
0,37.5698,126.988
1,37.5794,126.997
2,37.5826,126.998
3,37.5686,126.988
4,37.5862,127.002
...,...,...
11285,37.5504,127.139
11286,37.5506,127.140
11287,37.5336,127.124
11288,37.5362,127.125


In [6]:
results = session.query(Subway).all()
subway_station_df = pd.DataFrame(row.__dict__ for row in results)
subway_station_df = subway_station_df[["latitude", "longitude"]]
subway_station_df

Unnamed: 0,latitude,longitude
0,37.5532,126.973
1,37.5636,126.975
2,37.5702,126.983
3,37.5704,126.992
4,37.5710,127.002
...,...,...
271,37.4516,127.160
272,37.4451,127.157
273,37.4410,127.148
274,37.4376,127.141


In [7]:
for data in tqdm(address_df.itertuples()):
    buildings = session.query(Building).filter_by(address_id=data.id).all()
    if any(
        calc_distance(
            data.latitude,
            data.longitude,
            row.latitude,
            row.longitude,
            SUBWAY_STATION_DISTANCE,
        )
        for row in subway_station_df.itertuples()
    ):
        tagging(buildings, "역세권")
    if any(
        calc_distance(
            data.latitude,
            data.longitude,
            row.latitude,
            row.longitude,
            BUS_STATION_DISTANCE,
        )
        for row in bus_station_df.itertuples()
    ):
        tagging(buildings, "버세권")
    if any(
        calc_distance(
            data.latitude,
            data.longitude,
            row.latitude,
            row.longitude,
            HOSPITAL_DISTANCE,
        )
        for row in hospital_df.itertuples()
    ):
        tagging(buildings, "병세권")


session.commit()
session.close()

43876it [12:10, 60.08it/s]
