In [None]:
!sudo apt-get install -y fonts-nanum
!sudo fc-cache -fv
!rm ~/.cache/matplotlib -rf

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
plt.rc('font', family='NanumBarunGothic')
plt.rcParams['axes.unicode_minus'] =False

In [None]:
cd /content/drive/MyDrive/24-1 데마 플젝/DATA

In [None]:
df = pd.read_csv('df+13334+74.csv')
df

In [None]:
df.columns

## **2. Time-Series, Histogram, Density plot, Boxplot**

### **2.1 방문객 수 정보**
  - 'people'

In [None]:
df['date'] = pd.to_datetime(df['date'])

In [None]:
df = df.sort_values(['hangang', 'date']).reset_index(drop=True)

# 공원별 고유 색상 지정
color_map = {
    '강서': '#980000',
    '광나루': '#CC3D3D',
    '난지': '#CC723D',
    '뚝섬': '#CCA63D',
    '망원': '#9FC93C',
    '반포': '#47C83E',
    '양화': '#3DB7CC',
    '여의도': '#4174D9',
    '이촌': '#4641D9',
    '잠실': '#7E41D9',
    '잠원': '#D941C5'
}

columns = ['people']
num_columns = len(columns)

titles = ['방문자 수']

# 필요한 subplot 수 계산
num_rows = 2  # Time-Series, Boxplot 각각 1행씩
fig, axs = plt.subplots(num_rows, num_columns, figsize=(12, 6 * num_rows))

for i, (column, title) in enumerate(zip(columns, titles)):

    # Time-Series plot
    for hangang in color_map.keys():
        subset = df[df['hangang'] == hangang]
        axs[0].plot(subset['date'], subset[column], label=f'{hangang}', color=color_map[hangang])
    axs[0].legend(title='Hangang')

    # Boxplot
    sns.boxplot(y='hangang', x=column, data=df, palette=color_map, ax=axs[1], hue='hangang', legend=False)
    axs[1].tick_params(axis='y', labelsize=14)

plt.tight_layout()
plt.show()

In [None]:
filtered_df = pd.DataFrame()

for hangang in df['hangang'].unique():
    subset = df[df['hangang'] == hangang]
    Q1 = subset['people'].quantile(0.25)
    Q3 = subset['people'].quantile(0.75)
    IQR = Q3 - Q1
    lower_fence = Q1 - 1.5 * IQR
    upper_fence = Q3 + 1.5 * IQR
    filtered_subset = subset[(subset['people'] >= lower_fence) & (subset['people'] <= upper_fence)]
    filtered_df = pd.concat([filtered_df, filtered_subset], ignore_index=True)

In [None]:
filtered_df.people.describe()

In [None]:
# 방문자 수 최대값인 날 확인
filtered_df[filtered_df['people'] == 79737]

In [None]:
# 박스플롯에서 울타리를 벗어나는 데이터를 제거한 새로운 데이터프레임 생성
filtered_df = pd.DataFrame()

for hangang in df['hangang'].unique():
    subset = df[df['hangang'] == hangang]
    Q1 = subset['people'].quantile(0.25)
    Q3 = subset['people'].quantile(0.75)
    IQR = Q3 - Q1
    lower_fence = Q1 - 1.5 * IQR
    upper_fence = Q3 + 1.5 * IQR
    filtered_subset = subset[(subset['people'] >= lower_fence) & (subset['people'] <= upper_fence)]
    filtered_df = pd.concat([filtered_df, filtered_subset], ignore_index=True)

# 공원별 고유 색상 지정
color_map = {
    '강서': '#980000',
    '광나루': '#CC3D3D',
    '난지': '#CC723D',
    '뚝섬': '#CCA63D',
    '망원': '#9FC93C',
    '반포': '#47C83E',
    '양화': '#3DB7CC',
    '여의도': '#4174D9',
    '이촌': '#4641D9',
    '잠실': '#7E41D9',
    '잠원': '#D941C5'
}

columns = ['people']
num_columns = len(columns)

# 필요한 subplot 수 계산
num_rows = 4  # Time-Series, Histogram, Density plot, Boxplot 각각 1행씩
fig, axs = plt.subplots(num_rows, num_columns, figsize=(12, 4 * num_rows))

for i, column in enumerate(columns):

    # Time-Series plot
    for hangang in color_map.keys():
        subset = filtered_df[filtered_df['hangang'] == hangang]
        axs[0].plot(subset['date'], subset[column], label=f'{hangang}', color=color_map[hangang])

    axs[0].set_title(f'{column} Time-Series by Hangang')
    axs[0].set_xlabel('Date')
    axs[0].set_ylabel(column)
    axs[0].legend(title='Hangang')

    # Histogram
    for hangang in color_map.keys():
        subset = filtered_df[filtered_df['hangang'] == hangang]
        sns.histplot(subset[column], kde=False, color=color_map[hangang], label=hangang, ax=axs[1])
    axs[1].set_title(f'{column} Histogram by Hangang')
    axs[1].set_xlabel(column)
    axs[1].set_ylabel('Count')
    axs[1].legend(title='Hangang')

    # Density plot
    for hangang in color_map.keys():
        subset = filtered_df[filtered_df['hangang'] == hangang]
        sns.kdeplot(subset[column], color=color_map[hangang], label=hangang, ax=axs[2])
    axs[2].set_title(f'{column} Density Plot by Hangang')
    axs[2].set_xlabel(column)
    axs[2].set_ylabel('Density')
    axs[2].legend(title='Hangang')

    # Boxplot
    sns.boxplot(y='hangang', x=column, data=filtered_df, palette=color_map, ax=axs[3], hue='hangang', legend=False)
    axs[3].set_title(f'{column} Boxplot by Hangang')
    axs[3].set_ylabel('Hangang')
    axs[3].set_xlabel(column)

plt.tight_layout()
plt.show()

### **2.2 주변 상권 정보**

In [None]:
# 분기로 변경
df['quarter'] = df['date'].dt.quarter.astype(str)
df_quarter = df.drop_duplicates(['year', 'quarter', 'hangang'])
df_quarter['year_quarter'] = df_quarter['year'].astype(str) + '-' + df_quarter['quarter'].astype(str)

- 점포 수
  - restaurant_n : 'bunsik_n', 'restaurant_n', 'chicken_n', 'fastfood_n', 'pub_n'
  - cafe_n : 'bread_n', 'cafe_n'

In [None]:
df = df.sort_values(['hangang', 'date']).reset_index(drop=True)

# 공원별 고유 색상 지정
color_map = {
    '강서': '#980000',
    '광나루': '#CC3D3D',
    '난지': '#CC723D',
    '뚝섬': '#CCA63D',
    '망원': '#9FC93C',
    '반포': '#47C83E',
    '양화': '#3DB7CC',
    '여의도': '#4174D9',
    '이촌': '#4641D9',
    '잠실': '#7E41D9',
    '잠원': '#D941C5'
}

columns = ['restaurant_n', 'cafe_n', 'orak_n', 'convenience_store_n']
num_columns = len(columns)

titles = ['식당 수', '카페 수', '오락시설 수', '편의점 수']

# 필요한 subplot 수 계산
num_rows = 3
fig, axs = plt.subplots(num_rows, num_columns, figsize=(4 * num_columns, 4 * num_rows))

for i, (column, title) in enumerate(zip(columns, titles)):

    # Time-Series plot
    for hangang in color_map.keys():
        subset = df_quarter[df_quarter['hangang'] == hangang]
        axs[0, i].plot(subset['year_quarter'], subset[column], label=f'{hangang}', color=color_map[hangang])
    axs[0, i].set_title(f'{title}', fontsize=20)
    axs[0, i].set_xlabel('Quarter')
    axs[0, i].legend(title='Hangang')
    axs[0, i].tick_params(axis='x', rotation=45)

    # Density plot
    for hangang in color_map.keys():
        subset = df_quarter[df_quarter['hangang'] == hangang]
        sns.kdeplot(subset[column], color=color_map[hangang], label=hangang, ax=axs[1, i])
    axs[1, i].set_xlabel(column)
    axs[1, i].set_ylabel('Density')
    axs[1, i].legend(title='Hangang')

    # Boxplot
    sns.boxplot(y='hangang', x=column, data=df, palette=color_map, ax=axs[2, i], hue='hangang', legend=False)
    axs[2, i].tick_params(axis='y', labelsize=14)

plt.tight_layout()
plt.show()

- 점포 매출액
  - restaurant_revenue : 'bunsik_revenue', 'restaurant_revenue', 'chicken_revenue', 'fastfood_revenue', 'pub_revenue'
  - cafe_revenue : 'bread_revenue', 'cafe_revenue'
  - 로그 변환 적용


In [None]:
df = df.sort_values(['hangang', 'date']).reset_index(drop=True)

# 공원별 고유 색상 지정
color_map = {
    '강서': '#980000',
    '광나루': '#CC3D3D',
    '난지': '#CC723D',
    '뚝섬': '#CCA63D',
    '망원': '#9FC93C',
    '반포': '#47C83E',
    '양화': '#3DB7CC',
    '여의도': '#4174D9',
    '이촌': '#4641D9',
    '잠실': '#7E41D9',
    '잠원': '#D941C5'
}

columns = ['restaurant_revenue', 'cafe_revenue', 'orak_revenue', 'convenience_store_revenue']
num_columns = len(columns)

titles = ['식당 매출 총액', '카페 매출 총액', '오락시설 매출 총액', '편의점 매출 총액']

# 필요한 subplot 수 계산
num_rows = 3
fig, axs = plt.subplots(num_rows, num_columns, figsize=(4 * num_columns, 4 * num_rows))

for i, (column, title) in enumerate(zip(columns, titles)):

    # Time-Series plot
    for hangang in color_map.keys():
        subset = df_quarter[df_quarter['hangang'] == hangang]
        axs[0, i].plot(subset['year_quarter'], subset[column], label=f'{hangang}', color=color_map[hangang])
    axs[0, i].set_title(f'{title}', fontsize=20)
    axs[0, i].set_xlabel('Quarter')
    axs[0, i].legend(title='Hangang')
    axs[0, i].tick_params(axis='x', rotation=45)

    # Density plot
    for hangang in color_map.keys():
        subset = df_quarter[df_quarter['hangang'] == hangang]
        sns.kdeplot(subset[column], color=color_map[hangang], label=hangang, ax=axs[1, i])
    axs[1, i].set_xlabel(column)
    axs[1, i].set_ylabel('Density')
    axs[1, i].legend(title='Hangang')

    # Boxplot
    sns.boxplot(y='hangang', x=column, data=df, palette=color_map, ax=axs[2, i], hue='hangang', legend=False)
    axs[2, i].tick_params(axis='y', labelsize=14)

plt.tight_layout()
plt.show()

### **2.3 대기환경/날씨 정보**
  - 날씨 : 'avg_temp', 'rain', 'avg_wind', 'temp_diff', 'humidity'

In [None]:
df = df.sort_values(['hangang', 'date']).reset_index(drop=True)

# 공원별 고유 색상 지정
color_map = {
    '강서': '#980000',
    '광나루': '#CC3D3D',
    '난지': '#CC723D',
    '뚝섬': '#CCA63D',
    '망원': '#9FC93C',
    '반포': '#47C83E',
    '양화': '#3DB7CC',
    '여의도': '#4174D9',
    '이촌': '#4641D9',
    '잠실': '#7E41D9',
    '잠원': '#D941C5'
}

columns = ['avg_temp', 'rain', 'avg_wind', 'temp_diff', 'humidity']
num_columns = len(columns)

titles = ['평균 기온', '강수량', '평균 풍속', '일교차', '습도']

# 필요한 subplot 수 계산
num_rows = 2
fig, axs = plt.subplots(num_rows, num_columns, figsize=(6 * num_columns, 5 * num_rows))

for i, (column, title) in enumerate(zip(columns, titles)):

    # Time-Series plot
    for hangang in color_map.keys():
        subset = df[df['hangang'] == hangang]
        axs[0, i].plot(subset['date'], subset[column], label=f'{hangang}', color=color_map[hangang], alpha=0.7)
    axs[0, i].set_title(f'{title}', fontsize=24)
    axs[0, i].tick_params(axis='x', rotation=45)
    axs[0, i].legend(title='Hangang')

    # Boxplot
    sns.boxplot(y='hangang', x=column, data=df, palette=color_map, ax=axs[1, i], hue='hangang', legend=False)
    axs[1, i].set_ylabel('Hangang')
    axs[1, i].tick_params(axis='y', labelsize=14)

plt.tight_layout()
plt.show()

### **2.4 소비 정보**
  - 'drink_cost', 'leisure_cost', 'food_cost'

In [None]:
df = df.sort_values(['hangang', 'date']).reset_index(drop=True)

# 공원별 고유 색상 지정
color_map = {
    '강서': '#980000',
    '광나루': '#CC3D3D',
    '난지': '#CC723D',
    '뚝섬': '#CCA63D',
    '망원': '#9FC93C',
    '반포': '#47C83E',
    '양화': '#3DB7CC',
    '여의도': '#4174D9',
    '이촌': '#4641D9',
    '잠실': '#7E41D9',
    '잠원': '#D941C5'
}

columns = ['drink_cost', 'leisure_cost', 'food_cost']
num_columns = len(columns)

titles = ['유흥 지출 총액', '여가 문화 지출 총액', '음식 지출 총액']

# 필요한 subplot 수 계산
num_rows = 2
fig, axs = plt.subplots(num_rows, num_columns, figsize=(4 * num_columns, 3 * num_rows))

for i, (column, title) in enumerate(zip(columns, titles)):

    # Time-Series plot
    for hangang in color_map.keys():
        subset = df_quarter[df_quarter['hangang'] == hangang]
        axs[0, i].plot(subset['year_quarter'], subset[column], label=f'{hangang}', color=color_map[hangang])
    axs[0, i].set_title(f'{title}', fontsize=16)
    axs[0, i].set_xlabel('Quarter')
    axs[0, i].legend(title='Hangang')
    axs[0, i].tick_params(axis='x', rotation=45)

    # Boxplot
    sns.boxplot(y='hangang', x=column, data=df, palette=color_map, ax=axs[1, i], hue='hangang', legend=False)
    axs[1, i].tick_params(axis='y', labelsize=12)

plt.tight_layout()
plt.show()

### **2.5 축제 정보**
  - 'buzz_sum', 'buzz_max'

In [None]:
filtered_df = filtered_df.sort_values(['hangang', 'date']).reset_index(drop=True)

# 공원별 고유 색상 지정
color_map = {
    '강서': '#980000',
    '광나루': '#CC3D3D',
    '난지': '#CC723D',
    '뚝섬': '#CCA63D',
    '망원': '#9FC93C',
    '반포': '#47C83E',
    '양화': '#3DB7CC',
    '여의도': '#4174D9',
    '이촌': '#4641D9',
    '잠실': '#7E41D9',
    '잠원': '#D941C5'
}

columns = ['buzz_sum', 'buzz_max']
num_columns = len(columns)

titles = ['행사 검색 버즈량 합', '행사 검색 버즈량 최대값']

# 필요한 subplot 수 계산
num_rows = 2
fig, axs = plt.subplots(num_rows, num_columns, figsize=(4 * num_columns, 4 * num_rows))

for i, (column, title) in enumerate(zip(columns, titles)):

    # Time-Series plot
    for hangang in color_map.keys():
        subset = filtered_df[filtered_df['hangang'] == hangang]
        axs[0, i].plot(subset['date'], subset[column], label=f'{hangang}', color=color_map[hangang], alpha=0.7)
    axs[0, i].set_title(f'{title}', fontsize=16)
    axs[0, i].tick_params(axis='x', rotation=45)
    axs[0, i].legend(title='Hangang')

    # Boxplot
    sns.boxplot(y='hangang', x=column, data=filtered_df, palette=color_map, ax=axs[1, i], hue='hangang', legend=False)
    axs[1, i].set_ylabel('Hangang')
    axs[1, i].tick_params(axis='y', labelsize=14)

plt.tight_layout()
plt.show()

### **2.6 코로나 정보**
  - 'covid_people'

In [None]:
df = df.sort_values(['hangang', 'date']).reset_index(drop=True)

# 공원별 고유 색상 지정
color_map = {
    '강서': '#980000',
    '광나루': '#CC3D3D',
    '난지': '#CC723D',
    '뚝섬': '#CCA63D',
    '망원': '#9FC93C',
    '반포': '#47C83E',
    '양화': '#3DB7CC',
    '여의도': '#4174D9',
    '이촌': '#4641D9',
    '잠실': '#7E41D9',
    '잠원': '#D941C5'
}

columns = ['covid_people']
num_columns = len(columns)

titles = ['코로나 확진자 수']

# 필요한 subplot 수 계산
num_rows = 2
fig, axs = plt.subplots(num_rows, num_columns, figsize=(4 * num_columns, 4 * num_rows))

for i, (column, title) in enumerate(zip(columns, titles)):

    # Time-Series plot
    for hangang in color_map.keys():
        subset = df[df['hangang'] == hangang]
        axs[0].plot(subset['date'], subset[column], label=f'{hangang}', color=color_map[hangang], alpha=0.7)
    axs[0].set_title(f'{title}', fontsize=16)
    axs[0].tick_params(axis='x', rotation=45)
    axs[0].legend(title='Hangang')

    # Boxplot
    sns.boxplot(y='hangang', x=column, data=df, palette=color_map, ax=axs[1], hue='hangang', legend=False)
    axs[1].set_ylabel('Hangang')
    axs[1].tick_params(axis='y', labelsize=14)

plt.tight_layout()
plt.show()