In [1]:
import pandas as pd
from datetime import datetime
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report

In [2]:
parks_df = pd.read_csv('parks.csv')
reports_df = pd.read_csv('reports.csv')

In [3]:
from datetime import datetime

reports_df["datetime"] = pd.to_datetime(
    reports_df["민원접수일"].astype(str) + " " + reports_df["민원접수시간"].astype(str),
    errors="coerce"
)

In [4]:
def is_within_operating_hours(report_time, start_str, end_str):
    if start_str == "00:00:00" and end_str == "00:00:00":
        return False
    try:
        start = datetime.strptime(start_str, "%H:%M:%S").time()
        end = datetime.strptime(end_str, "%H:%M:%S").time()
        return start <= report_time.time() <= end
    except:
        return False

def time_to_float(tstr):
    try:
        t = datetime.strptime(tstr, "%H:%M:%S").time()
        return t.hour + t.minute / 60
    except:
        return 0


In [13]:
import pandas as pd
import numpy as np

# 0) parks 좌표 전처리 & 라디안 변환 (한 번만)
parks_df = parks_df.dropna(subset=['위도','경도']).reset_index(drop=True)
parks_rad = np.radians(parks_df[['위도','경도']].values)  # shape=(n_parks,2)

# 1) reports 전처리 & 10만 건 랜덤 샘플링
reports_df['datetime'] = pd.to_datetime(reports_df['datetime'])
reports_df = reports_df.dropna(subset=['위도','경도']).reset_index(drop=True)
reports_sample = reports_df.sample(n=1000000, random_state=42).reset_index(drop=True)

# 2) 최적화된 해버사인 함수 (입력은 모두 라디안)
def haversine_rad(lat1, lon1, lat2_arr, lon2_arr, R=6371000):
    dlat = lat2_arr - lat1
    dlon = lon2_arr - lon1
    a = np.sin(dlat / 2.0)**2 + np.cos(lat1) * np.cos(lat2_arr) * np.sin(dlon / 2.0)**2
    return 2 * R * np.arcsin(np.sqrt(a))

records = []

# 3) 샘플 순회하며 거리 계산 → 최단 거리 주차장 찾기
for _, report in reports_sample.iterrows():
    # 3-1) 리포트 좌표를 라디안으로 변환
    lat1 = np.radians(report['위도'])
    lon1 = np.radians(report['경도'])
    
    # 3-2) 벡터화된 해버사인 호출
    dists = haversine_rad(
        lat1, lon1,
        parks_rad[:, 0],  # parks 위도(라디안)
        parks_rad[:, 1]   # parks 경도(라디안)
    )
    
    # 3-3) 최단거리 인덱스 & 주차장 정보 추출
    idx_min = np.argmin(dists)
    nearest = parks_df.iloc[idx_min]
    dist_m = dists[idx_min]
    
    # 3-4) 운영시간 내외 판단 (사용자 정의 함수)
    label = is_within_operating_hours(
        report['datetime'],
        nearest['평일시작'],
        nearest['평일종료']
    )
    
    # 3-5) 결과 저장
    records.append({
        'distance': dist_m,
        '시작시간': time_to_float(nearest['평일시작']),
        '종료시간': time_to_float(nearest['평일종료']),
        '요금': nearest.get('1시간 요금', 0) or 0,
        'label': int(label)
    })

# 4) DataFrame 변환
df = pd.DataFrame(records)

from pandas import ExcelWriter

output_file = 'parking_complaint_analysis.xlsx'
with ExcelWriter(output_file, engine='openpyxl') as writer:
    df.to_excel(writer, index=True, sheet_name='분석결과')
print(f"✅ 파일로 저장 완료: {output_file}")




✅ 파일로 저장 완료: parking_complaint_analysis.xlsx


In [14]:
import pandas as pd
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression

# 1) 데이터 준비
X = df[["distance", "시작시간", "종료시간", "요금"]]
y = df["label"]
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.3, random_state=42
)

# 2) 모델 학습 & 예측
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

# 3) classification_report 를 dict 형태로 추출
report_dict = classification_report(y_test, y_pred, output_dict=True)

# 4) DataFrame 으로 변환
report_df = pd.DataFrame(report_dict).T

from pandas import ExcelWriter

output_file = "classification_report.xlsx"
with ExcelWriter(output_file, engine='openpyxl') as writer:
    report_df.to_excel(writer, index=True, sheet_name='분석결과')
print(f"✅ 파일로 저장 완료: {output_file}")


✅ 파일로 저장 완료: classification_report.xlsx
