In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
cd /content/drive/Othercomputers/내 노트북/2024-1/데이터마이닝/final project

In [None]:
!pip install workalendar

In [None]:
import pandas as pd
from workalendar.asia import SouthKorea

## **시계열정보 및 벚꽃개화시기**

### **데이터 불러오기**

In [None]:
base = pd.read_csv('hangang+people.csv')
base['date'] = pd.to_datetime(base['date'], format='%Y-%m-%d')
base.head()

### **연도, 월, 요일, 주말여부, 공휴일, 계절, 벚꽃개화시기 생성**

In [None]:
base['year'] = base['date'].dt.year
base['month'] = base['date'].dt.month
base['day'] = base['date'].dt.day
base['weekday'] = base['date'].dt.weekday

In [None]:
base['weekend'] = base['weekday'].apply(lambda x: 1 if x in [5, 6] else 0)

In [None]:
cal = SouthKorea()

start_date = pd.Timestamp('2020-01-01').date()
end_date = pd.Timestamp('2023-05-31').date()

holidays_list = []

for year in range(start_date.year, end_date.year + 1):
    holidays = cal.holidays(year)
    for holiday_date, holiday_name in holidays:
        if start_date <= holiday_date <= end_date:
            holidays_list.append((holiday_date, holiday_name))

holidays_df = pd.DataFrame(holidays_list, columns=['date', 'holiday_name'])
holidays_df['date'] = pd.to_datetime(holidays_df['date'], format='%Y-%m-%d')
holidays_df.head()

In [None]:
base = base.merge(holidays_df, how='left', on='date')
base['holiday'] = base['holiday_name'].apply(lambda x: 1 if pd.notna(x) else 0)

In [None]:
base.loc[(base['month'].isin([3, 4, 5])), 'season'] = 'spring'
base.loc[(base['month'].isin([6, 7, 8])), 'season'] = 'summer'
base.loc[(base['month'].isin([9, 10, 11])), 'season'] = 'fall'
base.loc[(base['month'].isin([12, 1, 2])), 'season'] = 'winter'

In [None]:
date_range_2020 = pd.date_range(start='2020-03-27', end='2020-04-10')
date_range_2021 = pd.date_range(start='2021-03-24', end='2021-04-07')
date_range_2022 = pd.date_range(start='2022-04-04', end='2022-04-18')
date_range_2023 = pd.date_range(start='2023-03-25', end='2023-04-08')

combined_date_range = date_range_2020.union(date_range_2021).union(date_range_2022).union(date_range_2023)

base['cherry_blossom'] = base['date'].isin(combined_date_range) * 1

In [None]:
base

In [None]:
base.isna().sum()

In [None]:
base.info()

## **시계열정보 없는 데이터**

### **한강공원기본+주차장정보**

In [None]:
df1 = pd.read_csv('한강공원기본+주차장정보.csv')
df1

In [None]:
df = base.merge(df1)
df

In [None]:
df.isna().sum()

In [None]:
df.info()

### **버스+지하철 정보**

In [None]:
df2 = pd.read_csv('한강버스+지하철정보.csv')
df2

In [None]:
df = df.merge(df2)
df

In [None]:
df.isna().sum()

In [None]:
df.info()

## **한강체육시설**

### **데이터 불러오기**

In [None]:
df3 = pd.read_csv('한강체육시설.csv')
df3

In [None]:
df = df.merge(df3)
df

In [None]:
df.isna().sum()

In [None]:
df.info()

### **이용기간 반영**

In [None]:
condition = (df['hangang'].isin(['강서', '뚝섬'])) & (df['month'].isin([1, 2]))
df.loc[condition, 'climbing'] = 0

In [None]:
condition = (df['hangang'].isin(['난지'])) & (df['month'].isin([1, 2]))
df.loc[condition, 'baseball'] = 0

In [None]:
condition = (df['hangang'].isin(['이촌'])) & (df['weekday'].isin([1, 3]))
df.loc[condition, 'soccer'] = df.loc[condition, 'soccer'] - 1

In [None]:
condition = (df['hangang'].isin(['여의도'])) & ((df['month'].isin([1, 2, 3, 12])) | (df['weekday'].isin([0])))
df.loc[condition, 'parkgolf'] = 0

In [None]:
df.isna().sum()

In [None]:
df.info()

## **한강그외시설**

### **데이터 불러오기**

In [None]:
df4 = pd.read_csv('한강그외시설.csv')
df4

In [None]:
df = df.merge(df4)
df

In [None]:
df.isna().sum()

In [None]:
df.info()

### **이용기간 반영**

In [None]:
condition = (df['hangang'].isin(['뚝섬'])) & (df['month'].isin([1, 2, 12]))
df.loc[condition, 'toilet'] = df.loc[condition, 'toilet'] - 3

---

In [None]:
condition = (df['hangang'].isin(['광나루'])) & (df['month'].isin([1, 2, 3, 11, 12]))
df.loc[condition, 'busking'] = 0

In [None]:
condition = (df['hangang'].isin(['여의도'])) & (df['month'].isin([1, 2, 3, 4, 11, 12]))
df.loc[condition, 'stage'] = 0

In [None]:
condition = (df['hangang'].isin(['여의도'])) &
 ((df['month'].isin([3, 4, 5, 6, 7, 8, 9, 10, 11])) | (df['holiday_name'].isin(['New year', "Korean New Year's Day", 'Christmas Day'])))
df.loc[condition, 'learning_space'] = df.loc[condition, 'learning_space'] - 1

In [None]:
condition = (df['hangang'].isin(['여의도'])) & ((df['month'].isin([1, 2, 3, 12])) | (df['weekday'].isin([0])) | (df['holiday']==1))
df.loc[condition, 'learning_space'] = df.loc[condition, 'learning_space'] - 1

In [None]:
hol_list = ['2020-01-25','2021-02-12', '2022-02-01', '2023-01-22', '2020-10-01', '2021-09-21', '2022-09-10']
hol_list = pd.to_datetime(hol_list, format='%Y-%m-%d')

condition = (df['hangang'].isin(['뚝섬'])) & ((df['weekday'].isin([0])) | (df['date'].isin(hol_list)))
df.loc[condition, 'learning_space'] = 0

---

In [None]:
date_range_2022 = pd.date_range(start='2022-06-24', end='2022-08-21')

condition = (df['hangang'].isin(['뚝섬', '광나루', '여의도', '잠원', '난지', '양화'])) & (~df['date'].isin(date_range_2022))
df.loc[condition, 'swimming_pool'] = 0

In [None]:
date_range_2022 = pd.date_range(start='2022-12-23', end='2023-02-12')

condition = (df['hangang'].isin(['뚝섬', '잠원'])) & (~df['date'].isin(date_range_2022))
df.loc[condition, 'snow_sled'] = 0

In [None]:
condition = (df['hangang'].isin(['난지'])) & (df['weekday'] == 1) &
 (df.groupby(['year', 'month','weekday'])['day'].rank(method='dense') == 2)
df.loc[condition, 'camping'] = 0

In [None]:
condition = (df['hangang'].isin(['광나루', '난지', '이촌'])) &
 ((df['month'].isin([1, 2, 12])) | ((df['month'].isin([11])) & (df['weekday'].isin([0,1,2,3]))))
df.loc[condition, 'bicycle'] = df.loc[condition, 'bicycle'] - 1

In [None]:
condition = (df['hangang'].isin(['강서', '양화', '잠실', '잠원'])) &
 ((df['month'].isin([1, 2, 12])) | ((df['month'].isin([11])) & (df['weekday'].isin([0,1,2,3]))))
df.loc[condition, 'bicycle'] = 0

In [None]:
condition = (df['hangang'].isin(['뚝섬', '반포', '여의도'])) & (df['month'].isin([1, 2, 11, 12]))
df.loc[condition, 'bicycle'] = 0

In [None]:
condition = (df['hangang'].isin(['난지', '이촌'])) & (df['month'].isin([1, 2, 3, 4, 11, 12]))
df.loc[condition, 'fountain'] = df.loc[condition, 'fountain'] - 1

In [None]:
condition = (df['hangang'].isin(['난지', '뚝섬'])) & (df['month'].isin([1, 2, 3, 4, 5, 6, 9, 10, 11, 12]))
df.loc[condition, 'fountain'] = df.loc[condition, 'fountain'] - 1

In [None]:
condition = (df['hangang'].isin(['뚝섬'])) & (df['month'].isin([1, 2, 3, 4, 11, 12]))
df.loc[condition, 'fountain'] = df.loc[condition, 'fountain'] - 3

In [None]:
condition = (df['hangang'].isin(['뚝섬', '여의도', '반포'])) & (df['month'].isin([1, 2, 3, 11, 12]))
df.loc[condition, 'fountain'] = df.loc[condition, 'fountain'] - 1

In [None]:
condition = (df['hangang'].isin(['여의도'])) & (df['month'].isin([1, 2, 3, 4, 11, 12]))
df.loc[condition, 'fountain'] = df.loc[condition, 'fountain'] - 2

---

In [None]:
condition = (df['hangang'].isin(['반포', '망원'])) & (df['weekday'].isin([0]))
df.loc[condition, 'water_facility'] = df.loc[condition, 'water_facility'] - 2

In [None]:
condition = (df['hangang'].isin(['반포'])) & (df['month'].isin([1]))
df.loc[condition, 'water_facility'] = df.loc[condition, 'water_facility'] - 1

In [None]:
condition = (df['hangang'].isin(['반포'])) & (df['month'].isin([1, 2, 11, 12]))
df.loc[condition, 'water_facility'] = df.loc[condition, 'water_facility'] - 1

In [None]:
condition = (df['hangang'].isin(['여의도', '난지', '뚝섬', '이촌', '잠원', '양화'])) & (df['month'].isin([1, 2, 3, 11, 12]))
df.loc[condition, 'water_facility'] = df.loc[condition, 'water_facility'] - 1

In [None]:
condition = (df['hangang'].isin(['여의도', '이촌'])) & (df['month'].isin([1, 2, 12]))
df.loc[condition, 'water_facility'] = df.loc[condition, 'water_facility'] - 1

In [None]:
condition = (df['hangang'].isin(['난지'])) & (df['weekday'].isin([0]))
df.loc[condition, 'water_facility'] = df.loc[condition, 'water_facility'] - 1

In [None]:
condition = (df['hangang'].isin(['뚝섬'])) & (df['month'].isin([1, 2, 3, 4, 11, 12]))
df.loc[condition, 'water_facility'] = df.loc[condition, 'water_facility'] - 1

In [None]:
condition = (df['hangang'].isin(['뚝섬', '양화'])) & (df['month'].isin([1, 2]))
df.loc[condition, 'water_facility'] = df.loc[condition, 'water_facility'] - 1

In [None]:
condition = (df['hangang'].isin(['잠원'])) & ((df['month'].isin([1, 2, 3, 11, 12])) | (df['weekend']==1))
df.loc[condition, 'water_facility'] = df.loc[condition, 'water_facility'] - 1

In [None]:
condition = (df['hangang'].isin(['잠원'])) & (df['month'].isin([1, 2, 3, 12]))
df.loc[condition, 'water_facility'] = df.loc[condition, 'water_facility'] - 1

In [None]:
condition = (df['hangang'].isin(['잠원'])) & (df['month'].isin([1, 2, 3]))
df.loc[condition, 'water_facility'] = df.loc[condition, 'water_facility'] - 1

---

In [None]:
date_range_2020 = pd.date_range(start='2020-02-20', end='2020-06-30')
date_range_2021 = pd.date_range(start='2021-02-20', end='2021-06-30')
date_range_2022 = pd.date_range(start='2022-02-20', end='2022-06-30')
date_range_2023 = pd.date_range(start='2023-02-20', end='2023-05-31')

combined_date_range = date_range_2020.union(date_range_2021).union(date_range_2022).union(date_range_2023)

condition = (df['hangang'].isin(['난지'])) & (df['date'].isin(combined_date_range))
df.loc[condition, 'nature'] = df.loc[condition, 'nature'] - 1

## **최종 데이터 저장**

In [None]:
df = df.drop(columns=['day', 'holiday_name'])
df

In [None]:
df.isna().sum()

In [None]:
df.isna().sum().sum()

In [None]:
df.info()

In [None]:
df.to_csv('hangang+people+timeseries+cherryblossom+park.csv', index=False, sep=',', encoding='cp949')