In [3]:
import pandas as pd
import numpy as np
import os 
import geopandas as gpd
import plotly.express as px
from scipy.spatial import KDTree
import plotly.graph_objects as go
from plotly.subplots import make_subplots

mf = gpd.read_file('./Marine_forest/Marine_forest.shp')
count = len(mf[(mf['면적'] == mf['구역']) & (mf['면적'] == mf['규모'])])
mf.drop(columns=['구역','규모'],inplace=True)
col_names = {'시도명': 'province','시군구명':'city','시군구코드':'city_code','위도':'latitude','경도':'longitude','마을어장':'town','면적':'area'}
mf.rename(columns=col_names, inplace=True)
#기본 칼럼인 위도 경도는 오류가 있었음
mf['latitude'] = mf['geometry'].y
mf['longitude'] = mf['geometry'].x

상관관계 분석 및 관련 논문을 참조한 결과,

1) 물리적 원인
가장 유력한 원인은 수온의 상승과 이로 인한 바닷속의 용존산소량의 감소
이 밖에도 다시마와 같은 해조류를 섭취하는 조식동물의 지나친 증식과 오염된 담수의 유입

2) 화학적 원인
해수의 저염수화(바다에 담수가 많이 유입되어 해수의 염도가 낮아짐)와 
해양오염(불투명도 증가와 오염물질의 확산)

따라서 
저희가 가지고 있는 데이터셋에서는
1. 수온, 2. 용존산소, 3. 염분,  4. 부유물질 농도 순으로 보면 될 것 같습니다!

칼럼별 상관계수 측정

In [85]:
df = pd.read_csv('./data/merged_data.csv')

df.drop(columns=['station', 'date_time', 'longitude', 'latitude','bot.depth[m]','depth(text)','depth(m)','전체수심(m)','유분[mg/L]','용존무기질소[μg/L]','아질산성질소[μg/L]', '인산염인[μg/L]',
       '질산성질소[μg/L]','암모니아성 질소[μg/L]'],inplace=True)
# 상관계수 계산
print(df.columns)
correlation_matrix = df.corr()
title = "Pearson Correlation Matrix"
fig = px.imshow(correlation_matrix, aspect="auto",title=title)
fig.update_layout(width=600, height=600, margin=dict(t=50, b=50, l=50, r=50))
fig.show()


Index(['총질소[μg/L]', '염분[psu]', '수온[℃]', '클로로필-a[μg/L]', '부유물질 농도[mg/L]',
       '용존산소[mg/L]', '수소이온농도[무단위]', '규산염[μg/L]', '투명도[m]', '화학적산소요구량[mg/L]',
       '총인[μg/L]', '부유물질 농도[μg/L]'],
      dtype='object')


In [86]:
spearman_corr = df.corr(method='kendall')
title = "Kendall Correlation Matrix"
fig = px.imshow(correlation_matrix, aspect="auto",title=title)
fig.update_layout(width=600, height=600, margin=dict(t=50, b=50, l=50, r=50))
fig.show()


In [87]:
spearman_corr = df.corr(method='spearman')
title = "Spearman Correlation Matrix"
fig = px.imshow(correlation_matrix, aspect="auto",title=title)
fig.update_layout(width=600, height=600, margin=dict(t=50, b=50, l=50, r=50))
fig.show()


# 바다숲 지형정보

바다숲 전체

In [88]:
title = '2013~2022년 바다숲 조성사업 현황'
buff = mf.loc[mf['시설년도'] <= 2022]
buff = buff.to_crs(epsg=32652)
scaling_factor = 1
center = {'lat': 35.8, 'lon': buff['longitude'].median()}
fig = px.scatter_mapbox(buff, lat=buff['latitude'], lon=buff['longitude'], size=(buff['area']* 10000)*scaling_factor,
                        color=buff['area'], size_max=20, zoom=5, center=center, title =title)
fig.update_layout(mapbox_style="open-street-map", margin={"r":10,"t":70,"l":20,"b":20},
                  width=1000 , height=800,legend=dict(orientation="h",yanchor="bottom",y=1.02,xanchor="right",x=1.15))
fig.show()

특정 년도마다 해양지형정보 측정기와 바다숲 위치정보

In [36]:
def map_scatter(title, ocean_loc):
    title = f'{title}'
    ocean_df  = pd.read_csv(f'{ocean_loc}')

    buff = mf.loc[mf['시설년도'] <= int(ocean_loc[-8:-4])]
    buff = buff.to_crs(epsg=32652)
    scaling_factor = 1
    center = {'lat': 35.8, 'lon': buff['longitude'].median()}
    fig = px.scatter_mapbox(buff, lat=buff['latitude'], lon=buff['longitude'], size=(buff['area']* 10000)*scaling_factor,
                            color=buff['area'], size_max=30, zoom=5, center=center, title =title)
    fig.add_trace(
        go.Scattermapbox(
            lat=ocean_df['latitude'],
            lon=ocean_df['longitude'],
            mode='markers',
            marker=dict(size=5, color='red'),  # 예시로 빨간색 마커를 사용
            
            name='Surface Data',
            
        )
    )
    fig.update_layout(mapbox_style="open-street-map", margin={"r":10,"t":70,"l":20,"b":20},
                    width=800 , height=800,legend=dict(orientation="h",yanchor="bottom",y=1.02,xanchor="right",x=1.15))

    fig.show()
#['클로로필-a[μg/L]','수소이온농도[무단위]', '규산염[μg/L]', '투명도[m]', '화학적산소요구량[mg/L]','총인[μg/L]', '부유물질 농도[mg/L]']
#1. 수온, 2. 용존산소, 3. 염분,  4. 부유물질 농도
def sc_by_col(ocean_path, origin_path, radius,title):
    # 데이터 불러오기
    columns_to_plot = ['수온[℃]','용존산소[mg/L]','염분[psu]','총질소[μg/L]', '부유물질 농도[mg/L]']
    ocean_df = pd.read_csv(ocean_path)
    origin = pd.read_csv(origin_path)

    # date_time 컬럼을 연-월 형식으로 변경
    ocean_df['date_time'] = pd.to_datetime(ocean_df['date_time'], format='mixed')
    ocean_df['date_time'] = pd.to_datetime(ocean_df['date_time']).dt.strftime('%Y-%m')
    origin['date_time'] = pd.to_datetime(origin['date_time'], format='mixed')
    origin['date_time'] = pd.to_datetime(origin['date_time']).dt.strftime('%Y-%m')

    # 동해, 서해, 남해 좌표 범위 설정
    regions = {
        '동해': [(128.5, 130), (34.5, 38.5)],  # 동해 지역의 좌표 범위 설정
        '서해': [(124.5, 127), (34, 38)],  # 서해 지역의 좌표 범위 설정
        '남해': [(126, 130), (33, 35)]  # 남해 지역의 좌표 범위 설정
    }
    subplot_titles = []
    for col in columns_to_plot:
        for region in regions.keys():
            subplot_titles.append(f"{col} ({region})")  # 각 지역별로 subplot 제목 생성
        subplot_titles.append(f"{col} (차이)")  # 막대 그래프를 위한 subplot 제목 생성

    total_cols = len(regions) + 1  # +1 for the bar chart
    fig = make_subplots(rows=len(columns_to_plot), cols=total_cols, shared_xaxes=True, subplot_titles=subplot_titles)
    

    # Line plots
    for i, col in enumerate(columns_to_plot):
        for j, (region, (lon_range, lat_range)) in enumerate(regions.items()):
            # 바다 데이터에서 해당 지역의 경도와 위도 범위에 해당하는 데이터만 필터링
            filtered_ocean = ocean_df[(ocean_df['longitude'] >= lon_range[0]) & (ocean_df['longitude'] <= lon_range[1]) &
                                    (ocean_df['latitude'] >= lat_range[0]) & (ocean_df['latitude'] <= lat_range[1])]
            # 원본 데이터에서 해당 지역의 경도와 위도 범위에 해당하는 데이터만 필터링
            filtered_origin = origin[(origin['longitude'] >= lon_range[0]) & (origin['longitude'] <= lon_range[1]) &
                                    (origin['latitude'] >= lat_range[0]) & (origin['latitude'] <= lat_range[1])]
            
            # 바다 데이터에서 날짜별로 해당 열의 평균 계산
            ocean_avg = filtered_ocean.groupby('date_time')[col].mean().reset_index()
            # 원본 데이터에서 날짜별로 해당 열의 평균 계산
            origin_avg = filtered_origin.groupby('date_time')[col].mean().reset_index()
            
            # 원본 데이터의 평균을 선 그래프로 추가
            fig.add_trace(go.Scatter(x=origin_avg['date_time'], y=origin_avg[col], mode="lines", 
                                    name=f"{region} 전체"), row=i+1, col=j+1)
            # 바다 데이터의 평균을 선 그래프로 추가
            fig.add_trace(go.Scatter(x=ocean_avg['date_time'], y=ocean_avg[col], mode="lines", 
                                    name=f"바다숲 반경{radius}km"), row=i+1, col=j+1)
            

    # 차이를 나타내는 막대 그래프
    diffs = {}
    for region, (lon_range, lat_range) in regions.items():
        # 바다 데이터에서 해당 지역의 경도와 위도 범위에 해당하는 데이터만 필터링
        filtered_ocean = ocean_df[(ocean_df['longitude'] >= lon_range[0]) & (ocean_df['longitude'] <= lon_range[1]) &
                                (ocean_df['latitude'] >= lat_range[0]) & (ocean_df['latitude'] <= lat_range[1])]
        # 원본 데이터에서 해당 지역의 경도와 위도 범위에 해당하는 데이터만 필터링
        filtered_origin = origin[(origin['longitude'] >= lon_range[0]) & (origin['longitude'] <= lon_range[1]) &
                                (origin['latitude'] >= lat_range[0]) & (origin['latitude'] <= lat_range[1])]
        
        # 바다 데이터의 열별 평균 계산
        ocean_avg = filtered_ocean[columns_to_plot].mean()
        # 원본 데이터의 열별 평균 계산
        origin_avg = filtered_origin[columns_to_plot].mean()
        
        # 지역별로 열의 평균 차이 계산
        diffs[region] = origin_avg - ocean_avg

    for i, col in enumerate(columns_to_plot):
        # 막대 그래프의 y값 설정
        y_values = [diffs[region][col] for region in regions]
        # 막대 그래프 추가
        fig.add_trace(go.Bar(x=list(regions.keys()), y=y_values, name=col), row=i+1, col=total_cols)

    # 그래프 레이아웃 설정 및 출력
    fig.update_layout(title = {
        'text' :title,
        'y':0.95,
        'x':0.5,
        'xanchor': 'center',
        'yanchor': 'top',
        'font': {
        'size': 24,
        'color': 'black',
                
        }},width=1500, height=100*len(columns_to_plot), margin=dict(t=100, b=40, l=40, r=40))
    fig.show()

def plot_difference(merged_data_path, columns_to_plot,title):
    # 데이터 불러오기
    df = pd.read_csv(merged_data_path)
    df['date_time'] = pd.to_datetime(df['date_time'], format='mixed')
    df['year'] = df['date_time'].dt.year

    years = sorted(df['year'].unique())

    regions = {
        '서해': [(124.5, 127), (34, 38)],
        '남해': [(126, 130), (33, 35)],
        '동해': [(128.5, 130), (34.5, 38.5)]
    }

    diffs = {region: {} for region in regions}

    for year in years[1:]:  # 첫 번째 년도는 이전 년도가 없으므로 제외
        for region, (lon_range, lat_range) in regions.items():
            current_data = df[(df['year'] == year) & 
                              (df['longitude'] >= lon_range[0]) & (df['longitude'] <= lon_range[1]) &
                              (df['latitude'] >= lat_range[0]) & (df['latitude'] <= lat_range[1])]
            prev_data = df[(df['year'] == year - 1) & 
                           (df['longitude'] >= lon_range[0]) & (df['longitude'] <= lon_range[1]) &
                           (df['latitude'] >= lat_range[0]) & (df['latitude'] <= lat_range[1])]

            current_avg = current_data[columns_to_plot].mean()
            prev_avg = prev_data[columns_to_plot].mean()

            diffs[region][year] = current_avg - prev_avg

    # 시각화
    fig = make_subplots(rows=len(columns_to_plot), cols=len(regions), subplot_titles=[f"{col} ({region})" for col in columns_to_plot for region in regions])

    for i, col in enumerate(columns_to_plot):
        for j, region in enumerate(regions):
            y_values = [diffs[region][year][col] for year in years[1:]]
            fig.add_trace(go.Bar(x=years[1:], y=y_values, name=f"{region} {col}"), row=i+1, col=j+1)

    fig.update_layout(title=title, height=200*len(columns_to_plot), width = 1000)
    fig.show()

def plot_monthly_difference(merged_data_path, columns_to_plot,title):
    # 데이터 불러오기
    df = pd.read_csv(merged_data_path)
    df['date_time'] = pd.to_datetime(df['date_time'], format='mixed')
    df['year'] = df['date_time'].dt.year
    df['month'] = df['date_time'].dt.month

    years = sorted(df['year'].unique())
    months = range(1, 13)

    regions = {
        '서해': [(124.5, 127), (34, 38)],
        '남해': [(126, 130), (33, 35)],
        '동해': [(128.5, 130), (34.5, 38.5)]
    }

    diffs = {region: {} for region in regions}

    for year in years[1:]:  # 첫 번째 년도는 이전 년도가 없으므로 제외
        for month in months:
            for region, (lon_range, lat_range) in regions.items():
                current_data = df[(df['year'] == year) & (df['month'] == month) &
                                  (df['longitude'] >= lon_range[0]) & (df['longitude'] <= lon_range[1]) &
                                  (df['latitude'] >= lat_range[0]) & (df['latitude'] <= lat_range[1])]
                prev_data = df[(df['year'] == year - 1) & (df['month'] == month) &
                               (df['longitude'] >= lon_range[0]) & (df['longitude'] <= lon_range[1]) &
                               (df['latitude'] >= lat_range[0]) & (df['latitude'] <= lat_range[1])]

                current_avg = current_data[columns_to_plot].mean()
                prev_avg = prev_data[columns_to_plot].mean()

                diffs[region][f"{year}-{month}"] = current_avg - prev_avg

    # 시각화
    fig = make_subplots(rows=len(columns_to_plot), cols=len(regions), subplot_titles=[f"{col} ({region})" for col in columns_to_plot for region in regions])

    for i, col in enumerate(columns_to_plot):
        for j, region in enumerate(regions):
            y_values = [diffs[region][f"{year}-{month}"][col] for year in years[1:] for month in months]
            x_values = [f"{year}-{month}" for year in years[1:] for month in months]
            fig.add_trace(go.Bar(x=x_values, y=y_values, marker=dict(color='red')), row=i+1, col=j+1)


    fig.update_layout(title=title, height=200*len(columns_to_plot), width = 1200, showlegend=False)
    fig.show()




## 연도별 해양지형정보

In [16]:
def year_month_plot(merged_data_path, columns_to_plot,year,title):
        
    # 데이터 불러오기
    df = pd.read_csv(merged_data_path)

    # 'date_time' 컬럼을 datetime 객체로 변환
    df['date_time'] = pd.to_datetime(df['date_time'])

    # 특정 연도 필터링
    df = df[df['date_time'].dt.year == year]

    # 월 컬럼 추가
    df['month'] = df['date_time'].dt.month

    # 해역별로 데이터를 필터링할 딕셔너리
    regions = {
        '동해': [(128.5, 130), (34.5, 38.5)],
        '서해': [(124.5, 127), (34, 38)],
        '남해': [(126, 130), (33, 35)]
    }

    

    # 서브플롯 설정
    fig = make_subplots(rows=len(columns_to_plot), cols=len(regions), 
                        subplot_titles=[f"{region} {col}" for col in columns_to_plot for region in regions],
                        shared_xaxes=True)

    # 각 해역별로 그래프를 그립니다.
    for j, (region, (lon_range, lat_range)) in enumerate(regions.items(), start=1):
        # 해당 해역의 데이터만 필터링
        region_df = df[(df['longitude'] >= lon_range[0]) & (df['longitude'] <= lon_range[1]) &
                    (df['latitude'] >= lat_range[0]) & (df['latitude'] <= lat_range[1])]
        
        # 월별로 그룹화하고 평균을 계산
        monthly_avg = region_df.groupby('month')[columns_to_plot].mean().reset_index()
        
        # 각 변수별로 서브플롯 생성
        for i, col in enumerate(columns_to_plot, start=1):
            fig.add_trace(
                go.Scatter(x=monthly_avg['month'], y=monthly_avg[col], mode='lines+markers', name=f"{region} {col}"),
                row=i, col=j
            )

    # 레이아웃 업데이트
    fig.update_layout(title=title,
                      barmode='group',showlegend=False,
                      height=800, width = 800,)

    # 그래프 출력
    fig.show()


In [17]:
# 시각화할 컬럼들
columns_to_plot = ['수온[℃]', '용존산소[mg/L]', '염분[psu]', '총질소[μg/L]', '부유물질 농도[mg/L]']
year_month_plot("data/surface/merged/2020.csv",columns_to_plot,2010,"2010년 해양지형정보(표층)")
year_month_plot("data/low/merged/2020.csv",columns_to_plot,2010,"2010년 해양지형정보(저층)")

In [19]:
# 시각화할 컬럼들
columns_to_plot = ['수온[℃]', '용존산소[mg/L]', '염분[psu]', '총질소[μg/L]', '부유물질 농도[mg/L]']
year_month_plot("data/surface/merged/2020.csv",columns_to_plot,2015,"2015년 해양지형정보(표층)")
year_month_plot("data/low/merged/2020.csv",columns_to_plot,2015,"2015년 해양지형정보(저층)")

In [29]:
import pandas as pd
import plotly.graph_objs as go
from plotly.subplots import make_subplots

def plot_year_comparison(df, year1, year2, columns_to_plot):
    # 'date_time' 컬럼을 datetime 객체로 변환
    df['date_time'] = pd.to_datetime(df['date_time'])
    # 월 컬럼 추가
    df['month'] = df['date_time'].dt.month

    # 해역별로 데이터를 필터링할 딕셔너리
    regions = {
        '동해': [(128.5, 130), (34.5, 38.5)],
        '서해': [(124.5, 127), (34, 38)],
        '남해': [(126, 130), (33, 35)]
    }

    # 서브플롯 설정
    fig = make_subplots(rows=len(columns_to_plot), cols=len(regions), 
                        subplot_titles=[f"{region} {col}" for col in columns_to_plot for region in regions],
                        shared_xaxes=True)

    # 각 해역별로 그래프를 그립니다.
    for j, (region, (lon_range, lat_range)) in enumerate(regions.items(), start=1):
        # 연도별, 해당 해역의 데이터만 필터링
        region_df_year1 = df[(df['date_time'].dt.year == year1) & 
                             (df['longitude'] >= lon_range[0]) & (df['longitude'] <= lon_range[1]) &
                             (df['latitude'] >= lat_range[0]) & (df['latitude'] <= lat_range[1])]
        region_df_year2 = df[(df['date_time'].dt.year == year2) & 
                             (df['longitude'] >= lon_range[0]) & (df['longitude'] <= lon_range[1]) &
                             (df['latitude'] >= lat_range[0]) & (df['latitude'] <= lat_range[1])]
        
        # 월별로 그룹화하고 평균을 계산
        monthly_avg_year1 = region_df_year1.groupby('month')[columns_to_plot].mean().reset_index()
        monthly_avg_year2 = region_df_year2.groupby('month')[columns_to_plot].mean().reset_index()
        
        # 각 변수별로 서브플롯 생성
        for i, col in enumerate(columns_to_plot, start=1):
            fig.add_trace(
                go.Bar(x=monthly_avg_year1['month'], y=monthly_avg_year1[col], name=f"{year1} {region} {col}", marker_color='rgb(55, 83, 109)'),
                row=i, col=j
            )
            fig.add_trace(
                go.Bar(x=monthly_avg_year2['month'], y=monthly_avg_year2[col], name=f"{year2} {region} {col}", marker_color='rgb(26, 118, 255)'),
                row=i, col=j
            )
    
    # 레이아웃 업데이트
    fig.update_layout(barmode='group',showlegend=False,
                      height=800, width = 800, title_text=f"{year1}년 {year2}년 해양지형정보 비교")

    # 그래프 출력
    fig.show()

df = pd.read_csv("data/surface/merged/2020.csv")
# 시각화할 컬럼들
columns_to_plot = ['수온[℃]', '용존산소[mg/L]', '염분[psu]', '총질소[μg/L]', '부유물질 농도[mg/L]']

# 함수 호출
plot_year_comparison(df, 2010, 2015, columns_to_plot)


ValueError: Invalid property specified for object of type plotly.graph_objs.Layout: 'showleged'

Did you mean "showlegend"?

    Valid properties:
        activeselection
            :class:`plotly.graph_objects.layout.Activeselection`
            instance or dict with compatible properties
        activeshape
            :class:`plotly.graph_objects.layout.Activeshape`
            instance or dict with compatible properties
        annotations
            A tuple of
            :class:`plotly.graph_objects.layout.Annotation`
            instances or dicts with compatible properties
        annotationdefaults
            When used in a template (as
            layout.template.layout.annotationdefaults), sets the
            default property values to use for elements of
            layout.annotations
        autosize
            Determines whether or not a layout width or height that
            has been left undefined by the user is initialized on
            each relayout. Note that, regardless of this attribute,
            an undefined layout width or height is always
            initialized on the first call to plot.
        autotypenumbers
            Using "strict" a numeric string in trace data is not
            converted to a number. Using *convert types* a numeric
            string in trace data may be treated as a number during
            automatic axis `type` detection. This is the default
            value; however it could be overridden for individual
            axes.
        bargap
            Sets the gap (in plot fraction) between bars of
            adjacent location coordinates.
        bargroupgap
            Sets the gap (in plot fraction) between bars of the
            same location coordinate.
        barmode
            Determines how bars at the same location coordinate are
            displayed on the graph. With "stack", the bars are
            stacked on top of one another With "relative", the bars
            are stacked on top of one another, with negative values
            below the axis, positive values above With "group", the
            bars are plotted next to one another centered around
            the shared location. With "overlay", the bars are
            plotted over one another, you might need to reduce
            "opacity" to see multiple bars.
        barnorm
            Sets the normalization for bar traces on the graph.
            With "fraction", the value of each bar is divided by
            the sum of all values at that location coordinate.
            "percent" is the same but multiplied by 100 to show
            percentages.
        boxgap
            Sets the gap (in plot fraction) between boxes of
            adjacent location coordinates. Has no effect on traces
            that have "width" set.
        boxgroupgap
            Sets the gap (in plot fraction) between boxes of the
            same location coordinate. Has no effect on traces that
            have "width" set.
        boxmode
            Determines how boxes at the same location coordinate
            are displayed on the graph. If "group", the boxes are
            plotted next to one another centered around the shared
            location. If "overlay", the boxes are plotted over one
            another, you might need to set "opacity" to see them
            multiple boxes. Has no effect on traces that have
            "width" set.
        calendar
            Sets the default calendar system to use for
            interpreting and displaying dates throughout the plot.
        clickmode
            Determines the mode of single click interactions.
            "event" is the default value and emits the
            `plotly_click` event. In addition this mode emits the
            `plotly_selected` event in drag modes "lasso" and
            "select", but with no event data attached (kept for
            compatibility reasons). The "select" flag enables
            selecting single data points via click. This mode also
            supports persistent selections, meaning that pressing
            Shift while clicking, adds to / subtracts from an
            existing selection. "select" with `hovermode`: "x" can
            be confusing, consider explicitly setting `hovermode`:
            "closest" when using this feature. Selection events are
            sent accordingly as long as "event" flag is set as
            well. When the "event" flag is missing, `plotly_click`
            and `plotly_selected` events are not fired.
        coloraxis
            :class:`plotly.graph_objects.layout.Coloraxis` instance
            or dict with compatible properties
        colorscale
            :class:`plotly.graph_objects.layout.Colorscale`
            instance or dict with compatible properties
        colorway
            Sets the default trace colors.
        computed
            Placeholder for exporting automargin-impacting values
            namely `margin.t`, `margin.b`, `margin.l` and
            `margin.r` in "full-json" mode.
        datarevision
            If provided, a changed value tells `Plotly.react` that
            one or more data arrays has changed. This way you can
            modify arrays in-place rather than making a complete
            new copy for an incremental change. If NOT provided,
            `Plotly.react` assumes that data arrays are being
            treated as immutable, thus any data array with a
            different identity from its predecessor contains new
            data.
        dragmode
            Determines the mode of drag interactions. "select" and
            "lasso" apply only to scatter traces with markers or
            text. "orbit" and "turntable" apply only to 3D scenes.
        editrevision
            Controls persistence of user-driven changes in
            `editable: true` configuration, other than trace names
            and axis titles. Defaults to `layout.uirevision`.
        extendfunnelareacolors
            If `true`, the funnelarea slice colors (whether given
            by `funnelareacolorway` or inherited from `colorway`)
            will be extended to three times its original length by
            first repeating every color 20% lighter then each color
            20% darker. This is intended to reduce the likelihood
            of reusing the same color when you have many slices,
            but you can set `false` to disable. Colors provided in
            the trace, using `marker.colors`, are never extended.
        extendiciclecolors
            If `true`, the icicle slice colors (whether given by
            `iciclecolorway` or inherited from `colorway`) will be
            extended to three times its original length by first
            repeating every color 20% lighter then each color 20%
            darker. This is intended to reduce the likelihood of
            reusing the same color when you have many slices, but
            you can set `false` to disable. Colors provided in the
            trace, using `marker.colors`, are never extended.
        extendpiecolors
            If `true`, the pie slice colors (whether given by
            `piecolorway` or inherited from `colorway`) will be
            extended to three times its original length by first
            repeating every color 20% lighter then each color 20%
            darker. This is intended to reduce the likelihood of
            reusing the same color when you have many slices, but
            you can set `false` to disable. Colors provided in the
            trace, using `marker.colors`, are never extended.
        extendsunburstcolors
            If `true`, the sunburst slice colors (whether given by
            `sunburstcolorway` or inherited from `colorway`) will
            be extended to three times its original length by first
            repeating every color 20% lighter then each color 20%
            darker. This is intended to reduce the likelihood of
            reusing the same color when you have many slices, but
            you can set `false` to disable. Colors provided in the
            trace, using `marker.colors`, are never extended.
        extendtreemapcolors
            If `true`, the treemap slice colors (whether given by
            `treemapcolorway` or inherited from `colorway`) will be
            extended to three times its original length by first
            repeating every color 20% lighter then each color 20%
            darker. This is intended to reduce the likelihood of
            reusing the same color when you have many slices, but
            you can set `false` to disable. Colors provided in the
            trace, using `marker.colors`, are never extended.
        font
            Sets the global font. Note that fonts used in traces
            and other layout components inherit from the global
            font.
        funnelareacolorway
            Sets the default funnelarea slice colors. Defaults to
            the main `colorway` used for trace colors. If you
            specify a new list here it can still be extended with
            lighter and darker colors, see
            `extendfunnelareacolors`.
        funnelgap
            Sets the gap (in plot fraction) between bars of
            adjacent location coordinates.
        funnelgroupgap
            Sets the gap (in plot fraction) between bars of the
            same location coordinate.
        funnelmode
            Determines how bars at the same location coordinate are
            displayed on the graph. With "stack", the bars are
            stacked on top of one another With "group", the bars
            are plotted next to one another centered around the
            shared location. With "overlay", the bars are plotted
            over one another, you might need to reduce "opacity" to
            see multiple bars.
        geo
            :class:`plotly.graph_objects.layout.Geo` instance or
            dict with compatible properties
        grid
            :class:`plotly.graph_objects.layout.Grid` instance or
            dict with compatible properties
        height
            Sets the plot's height (in px).
        hiddenlabels
            hiddenlabels is the funnelarea & pie chart analog of
            visible:'legendonly' but it can contain many labels,
            and can simultaneously hide slices from several
            pies/funnelarea charts
        hiddenlabelssrc
            Sets the source reference on Chart Studio Cloud for
            `hiddenlabels`.
        hidesources
            Determines whether or not a text link citing the data
            source is placed at the bottom-right cored of the
            figure. Has only an effect only on graphs that have
            been generated via forked graphs from the Chart Studio
            Cloud (at https://chart-studio.plotly.com or on-
            premise).
        hoverdistance
            Sets the default distance (in pixels) to look for data
            to add hover labels (-1 means no cutoff, 0 means no
            looking for data). This is only a real distance for
            hovering on point-like objects, like scatter points.
            For area-like objects (bars, scatter fills, etc)
            hovering is on inside the area and off outside, but
            these objects will not supersede hover on point-like
            objects in case of conflict.
        hoverlabel
            :class:`plotly.graph_objects.layout.Hoverlabel`
            instance or dict with compatible properties
        hovermode
            Determines the mode of hover interactions. If
            "closest", a single hoverlabel will appear for the
            "closest" point within the `hoverdistance`. If "x" (or
            "y"), multiple hoverlabels will appear for multiple
            points at the "closest" x- (or y-) coordinate within
            the `hoverdistance`, with the caveat that no more than
            one hoverlabel will appear per trace. If *x unified*
            (or *y unified*), a single hoverlabel will appear
            multiple points at the closest x- (or y-) coordinate
            within the `hoverdistance` with the caveat that no more
            than one hoverlabel will appear per trace. In this
            mode, spikelines are enabled by default perpendicular
            to the specified axis. If false, hover interactions are
            disabled.
        iciclecolorway
            Sets the default icicle slice colors. Defaults to the
            main `colorway` used for trace colors. If you specify a
            new list here it can still be extended with lighter and
            darker colors, see `extendiciclecolors`.
        images
            A tuple of :class:`plotly.graph_objects.layout.Image`
            instances or dicts with compatible properties
        imagedefaults
            When used in a template (as
            layout.template.layout.imagedefaults), sets the default
            property values to use for elements of layout.images
        legend
            :class:`plotly.graph_objects.layout.Legend` instance or
            dict with compatible properties
        mapbox
            :class:`plotly.graph_objects.layout.Mapbox` instance or
            dict with compatible properties
        margin
            :class:`plotly.graph_objects.layout.Margin` instance or
            dict with compatible properties
        meta
            Assigns extra meta information that can be used in
            various `text` attributes. Attributes such as the
            graph, axis and colorbar `title.text`, annotation
            `text` `trace.name` in legend items, `rangeselector`,
            `updatemenus` and `sliders` `label` text all support
            `meta`. One can access `meta` fields using template
            strings: `%{meta[i]}` where `i` is the index of the
            `meta` item in question. `meta` can also be an object
            for example `{key: value}` which can be accessed
            %{meta[key]}.
        metasrc
            Sets the source reference on Chart Studio Cloud for
            `meta`.
        minreducedheight
            Minimum height of the plot with margin.automargin
            applied (in px)
        minreducedwidth
            Minimum width of the plot with margin.automargin
            applied (in px)
        modebar
            :class:`plotly.graph_objects.layout.Modebar` instance
            or dict with compatible properties
        newselection
            :class:`plotly.graph_objects.layout.Newselection`
            instance or dict with compatible properties
        newshape
            :class:`plotly.graph_objects.layout.Newshape` instance
            or dict with compatible properties
        paper_bgcolor
            Sets the background color of the paper where the graph
            is drawn.
        piecolorway
            Sets the default pie slice colors. Defaults to the main
            `colorway` used for trace colors. If you specify a new
            list here it can still be extended with lighter and
            darker colors, see `extendpiecolors`.
        plot_bgcolor
            Sets the background color of the plotting area in-
            between x and y axes.
        polar
            :class:`plotly.graph_objects.layout.Polar` instance or
            dict with compatible properties
        scattergap
            Sets the gap (in plot fraction) between scatter points
            of adjacent location coordinates. Defaults to `bargap`.
        scattermode
            Determines how scatter points at the same location
            coordinate are displayed on the graph. With "group",
            the scatter points are plotted next to one another
            centered around the shared location. With "overlay",
            the scatter points are plotted over one another, you
            might need to reduce "opacity" to see multiple scatter
            points.
        scene
            :class:`plotly.graph_objects.layout.Scene` instance or
            dict with compatible properties
        selectdirection
            When `dragmode` is set to "select", this limits the
            selection of the drag to horizontal, vertical or
            diagonal. "h" only allows horizontal selection, "v"
            only vertical, "d" only diagonal and "any" sets no
            limit.
        selectionrevision
            Controls persistence of user-driven changes in selected
            points from all traces.
        selections
            A tuple of
            :class:`plotly.graph_objects.layout.Selection`
            instances or dicts with compatible properties
        selectiondefaults
            When used in a template (as
            layout.template.layout.selectiondefaults), sets the
            default property values to use for elements of
            layout.selections
        separators
            Sets the decimal and thousand separators. For example,
            *. * puts a '.' before decimals and a space between
            thousands. In English locales, dflt is ".," but other
            locales may alter this default.
        shapes
            A tuple of :class:`plotly.graph_objects.layout.Shape`
            instances or dicts with compatible properties
        shapedefaults
            When used in a template (as
            layout.template.layout.shapedefaults), sets the default
            property values to use for elements of layout.shapes
        showlegend
            Determines whether or not a legend is drawn. Default is
            `true` if there is a trace to show and any of these: a)
            Two or more traces would by default be shown in the
            legend. b) One pie trace is shown in the legend. c) One
            trace is explicitly given with `showlegend: true`.
        sliders
            A tuple of :class:`plotly.graph_objects.layout.Slider`
            instances or dicts with compatible properties
        sliderdefaults
            When used in a template (as
            layout.template.layout.sliderdefaults), sets the
            default property values to use for elements of
            layout.sliders
        smith
            :class:`plotly.graph_objects.layout.Smith` instance or
            dict with compatible properties
        spikedistance
            Sets the default distance (in pixels) to look for data
            to draw spikelines to (-1 means no cutoff, 0 means no
            looking for data). As with hoverdistance, distance does
            not apply to area-like objects. In addition, some
            objects can be hovered on but will not generate
            spikelines, such as scatter fills.
        sunburstcolorway
            Sets the default sunburst slice colors. Defaults to the
            main `colorway` used for trace colors. If you specify a
            new list here it can still be extended with lighter and
            darker colors, see `extendsunburstcolors`.
        template
            Default attributes to be applied to the plot. This
            should be a dict with format: `{'layout':
            layoutTemplate, 'data': {trace_type: [traceTemplate,
            ...], ...}}` where `layoutTemplate` is a dict matching
            the structure of `figure.layout` and `traceTemplate` is
            a dict matching the structure of the trace with type
            `trace_type` (e.g. 'scatter'). Alternatively, this may
            be specified as an instance of
            plotly.graph_objs.layout.Template.  Trace templates are
            applied cyclically to traces of each type. Container
            arrays (eg `annotations`) have special handling: An
            object ending in `defaults` (eg `annotationdefaults`)
            is applied to each array item. But if an item has a
            `templateitemname` key we look in the template array
            for an item with matching `name` and apply that
            instead. If no matching `name` is found we mark the
            item invisible. Any named template item not referenced
            is appended to the end of the array, so this can be
            used to add a watermark annotation or a logo image, for
            example. To omit one of these items on the plot, make
            an item with matching `templateitemname` and `visible:
            false`.
        ternary
            :class:`plotly.graph_objects.layout.Ternary` instance
            or dict with compatible properties
        title
            :class:`plotly.graph_objects.layout.Title` instance or
            dict with compatible properties
        titlefont
            Deprecated: Please use layout.title.font instead. Sets
            the title font. Note that the title's font used to be
            customized by the now deprecated `titlefont` attribute.
        transition
            Sets transition options used during Plotly.react
            updates.
        treemapcolorway
            Sets the default treemap slice colors. Defaults to the
            main `colorway` used for trace colors. If you specify a
            new list here it can still be extended with lighter and
            darker colors, see `extendtreemapcolors`.
        uirevision
            Used to allow user interactions with the plot to
            persist after `Plotly.react` calls that are unaware of
            these interactions. If `uirevision` is omitted, or if
            it is given and it changed from the previous
            `Plotly.react` call, the exact new figure is used. If
            `uirevision` is truthy and did NOT change, any
            attribute that has been affected by user interactions
            and did not receive a different value in the new figure
            will keep the interaction value. `layout.uirevision`
            attribute serves as the default for `uirevision`
            attributes in various sub-containers. For finer control
            you can set these sub-attributes directly. For example,
            if your app separately controls the data on the x and y
            axes you might set `xaxis.uirevision=*time*` and
            `yaxis.uirevision=*cost*`. Then if only the y data is
            changed, you can update `yaxis.uirevision=*quantity*`
            and the y axis range will reset but the x axis range
            will retain any user-driven zoom.
        uniformtext
            :class:`plotly.graph_objects.layout.Uniformtext`
            instance or dict with compatible properties
        updatemenus
            A tuple of
            :class:`plotly.graph_objects.layout.Updatemenu`
            instances or dicts with compatible properties
        updatemenudefaults
            When used in a template (as
            layout.template.layout.updatemenudefaults), sets the
            default property values to use for elements of
            layout.updatemenus
        violingap
            Sets the gap (in plot fraction) between violins of
            adjacent location coordinates. Has no effect on traces
            that have "width" set.
        violingroupgap
            Sets the gap (in plot fraction) between violins of the
            same location coordinate. Has no effect on traces that
            have "width" set.
        violinmode
            Determines how violins at the same location coordinate
            are displayed on the graph. If "group", the violins are
            plotted next to one another centered around the shared
            location. If "overlay", the violins are plotted over
            one another, you might need to set "opacity" to see
            them multiple violins. Has no effect on traces that
            have "width" set.
        waterfallgap
            Sets the gap (in plot fraction) between bars of
            adjacent location coordinates.
        waterfallgroupgap
            Sets the gap (in plot fraction) between bars of the
            same location coordinate.
        waterfallmode
            Determines how bars at the same location coordinate are
            displayed on the graph. With "group", the bars are
            plotted next to one another centered around the shared
            location. With "overlay", the bars are plotted over one
            another, you might need to reduce "opacity" to see
            multiple bars.
        width
            Sets the plot's width (in px).
        xaxis
            :class:`plotly.graph_objects.layout.XAxis` instance or
            dict with compatible properties
        yaxis
            :class:`plotly.graph_objects.layout.YAxis` instance or
            dict with compatible properties
        
Did you mean "showlegend"?

Bad property path:
showleged
^^^^^^^^^

In [54]:
from scipy.stats import ttest_ind

def analyze_difference(data_path, columns_to_analyze,title):
    # 데이터 불러오기
    df = pd.read_csv(data_path)
    df['date_time'] = pd.to_datetime(df['date_time'], format='mixed')
    df['year'] = df['date_time'].dt.year

    # 2013년을 기준으로 데이터 분리
    before = df[df['year'] < 2018]
    after = df[df['year'] >= 2018]

    # 평균 계산
    avg_before = before[columns_to_analyze].mean()
    avg_after = after[columns_to_analyze].mean()

    # 차이 계산
    difference = avg_after - avg_before

    # t-검정
    t_values = []
    p_values = []
    for col in columns_to_analyze:
        t, p = ttest_ind(before[col].dropna(), after[col].dropna())
        t_values.append(t)
        p_values.append(p)

    # 시각화
    fig = go.Figure(data=[
        go.Bar(name='Before 2018', x=columns_to_analyze, y=avg_before),
        go.Bar(name='After 2018', x=columns_to_analyze, y=avg_after)
    ])
    
    # 레이아웃 설정
    fig.update_layout(title=title,
                      barmode='group',
                      height=600, width = 600,)
    
    fig.show()

    # t-검정 결과 출력
    for col, t, p in zip(columns_to_analyze, t_values, p_values):
        print(f"For {col}: t-value = {t:.2f}, p-value = {p:.4f}")

# 사용 예시
columns_to_analyze = ['수온[℃]', '용존산소[mg/L]', '염분[psu]', '총질소[μg/L]', '부유물질 농도[mg/L]']
analyze_difference("data/surface/0.05/2020.csv", columns_to_analyze,"2018년도 기준 바다숲 설치 이전 이후 비교(표층)")
analyze_difference("data/low/0.05/2020.csv", columns_to_analyze,"2018년도 기준 바다숲 설치 이전 이후 비교(저층)")

For 수온[℃]: t-value = -5.62, p-value = 0.0000
For 용존산소[mg/L]: t-value = -1.73, p-value = 0.0834
For 염분[psu]: t-value = -3.77, p-value = 0.0002
For 총질소[μg/L]: t-value = 25.86, p-value = 0.0000
For 부유물질 농도[mg/L]: t-value = -4.36, p-value = 0.0000


For 수온[℃]: t-value = -7.93, p-value = 0.0000
For 용존산소[mg/L]: t-value = 0.77, p-value = 0.4405
For 염분[psu]: t-value = -2.89, p-value = 0.0039
For 총질소[μg/L]: t-value = 27.30, p-value = 0.0000
For 부유물질 농도[mg/L]: t-value = -4.36, p-value = 0.0000


p-value: p-value는 귀무 가설 (두 그룹 간의 차이가 없다는 가설)이 참일 때 관측된 통계치 (t-value)와 같거나 더 극단적인 통계치가 관측될 확률을 나타냅니다. 일반적으로 p-value가 0.05보다 작으면, 두 그룹 간의 차이가 통계적으로 유의미하다고 판단합니다.

t-value: t-value는 두 그룹 간의 평균 차이의 크기를 표준 오차로 나눈 값입니다. t-value의 절댓값이 크면 클수록 두 그룹 간의 차이가 크다는 것을 의미합니다. t-value의 부호는 그룹 간의 차이의 방향을 나타냅니다.


수온[℃]: p-value = 0.0000 (< 0.05) 이므로 두 그룹 간의 차이는 통계적으로 유의미합니다.

용존산소[mg/L]: p-value = 0.0834 (> 0.05) 이므로 두 그룹 간의 차이는 통계적으로 유의미하지 않습니다.

염분[psu]: p-value = 0.0002 (< 0.05) 이므로 두 그룹 간의 차이는 통계적으로 유의미합니다.

총질소[μg/L]: p-value = 0.0000 (< 0.05) 이므로 두 그룹 간의 차이는 통계적으로 유의미합니다.

부유물질 농도[mg/L]: p-value = 0.0000 (< 0.05) 이므로 두 그룹 간의 차이는 통계적으로 유의미합니다.

따라서, '용존산소[mg/L]'을 제외한 모든 변수에서 2013년 이전과 이후의 차이는 통계적으로 유의미합니다.

In [35]:
# 사용 예시
columns_to_plot = ['수온[℃]', '용존산소[mg/L]', '염분[psu]', '총질소[μg/L]', '부유물질 농도[mg/L]']
plot_difference('data/surface/merged/2020.csv', columns_to_plot, "연도별 차이(표층)")
plot_difference('data/low/merged/2020.csv', columns_to_plot, "연도별 차이(저층)")

In [37]:
# 사용 예시
columns_to_plot = ['수온[℃]', '용존산소[mg/L]', '염분[psu]', '총질소[μg/L]', '부유물질 농도[mg/L]']
plot_monthly_difference('data/surface/merged/2020.csv', columns_to_plot,"월별 차이 (표층)")
plot_monthly_difference('data/low/merged/2020.csv', columns_to_plot,"월별 차이 (저층)")

## 2010년~ 2020년 해양환경 변화 시각화

해향지형정보 한 포인트당 겹치는 바다숲의 개수와 미치는 영향에 대해 알아봄
바다숲은 통상적으로 5년이상 되야 효과가 나타난다고 본거같음

먼저 kdTree를 활용해 좌표 거리를 계산, 특정 radius에 들어온 station을 df로 만들고 각 포인트 마다 정보의 변화를 찾기

['수온[℃]','용존산소[mg/L]','염분[psu]','총질소[μg/L]', '부유물질 농도[mg/L]']가 사용됨 더 많은 정보를 가져올수도 있음

## 2018년

차이가 양의 값을 가지면 전체의 평균이 더 큽니다.

In [93]:
# ocean_df = pd.read_csv(f'data/surface/0.05/2018.csv')
# origin = pd.read_csv('./data/2018.csv')
# print(ocean_df.columns)
# print(len(origin['station'].unique().tolist()))
# unique_values = ocean_df['station'].unique().tolist()
# print("개수:",len(unique_values))
# fig = make_subplots(rows=5, cols=2)


# for i, d in enumerate(unique_values[:10]):
#     row = i // 2 + 1  # 행 결정
#     col = i % 2 + 1   # 열 결정
#     x = ocean_df.loc[ocean_df['station'] == d].sort_values(by='date_time').reset_index()
    
#     fig.add_trace(go.Scatter(x=x['date_time'], y=x['용존산소[mg/L]'], name=f'용존산소 {d}'), row=row, col=col)
#     fig.add_trace(go.Scatter(x=x['date_time'], y=x['총질소[μg/L]'], name=f'총질소 {d}'), row=row, col=col)
#     fig.add_trace(go.Scatter(x=x['date_time'], y=x['총인[μg/L]'], name=f'총인 {d}'), row=row, col=col)
#     fig.add_trace(go.Scatter(x=x['date_time'], y=x['용존무기질소[μg/L]'], name=f'용존무기질소 {d}'), row=row, col=col)
#     fig.add_trace(go.Scatter(x=x['date_time'], y=x['화학적산소요구량[mg/L]'], name=f'화학적산소요구량 {d}'), row=row, col=col)
#     fig.add_trace(go.Scatter(x=x['date_time'], y=x['염분[psu]'], name=f'화학적산소요구량 {d}'), row=row, col=col)
#     fig.add_trace(go.Scatter(x=x['date_time'], y=x['아질산성질소[μg/L]'], name=f'아질산성질소 {d}'), row=row, col=col)

# fig.update_layout(width=1200, height=1200, margin=dict(t=50, b=50, l=50, r=50))

# fig.show()

Index(['station', 'date_time', 'longitude', 'latitude', '총질소[μg/L]', '염분[psu]',
       '수온[℃]', '클로로필-a[μg/L]', '부유물질 농도[mg/L]', '용존산소[mg/L]', '수소이온농도[무단위]',
       '규산염[μg/L]', '아질산성질소[μg/L]', '인산염인[μg/L]', '질산성질소[μg/L]', '투명도[m]',
       '용존무기질소[μg/L]', '화학적산소요구량[mg/L]', '암모니아성 질소[μg/L]', '총인[μg/L]',
       'overlap_count', '유분[mg/L]', '부유물질 농도[μg/L]'],
      dtype='object')


425
개수: 116


In [92]:
map_scatter("2018년도 해양지형정보, 바다숲 위치(5.555km)",'data/surface/0.05/2018.csv')

In [148]:
sc_by_col('data/surface/0.05/2018.csv','./data/surface/merged/2018.csv',5,'2018년 바다숲 반경 5km 표층')

저층- 5km반경- 2018년도 까지

차이가 양의 값을 가지면 전체의 평균이 더 큽니다.

In [149]:
sc_by_col('data/low/0.05/2018.csv','./data/surface/merged/2018.csv',5, '2018년 바다숲 반경 5km 저층')

1km

In [95]:
map_scatter("2018년도 해양지형정보, 바다숲 위치(1km)",'data/surface/0.01/2018.csv')

In [152]:
sc_by_col('data/surface/0.01/2018.csv','./data/surface/merged/2018.csv',1,"2018년 바다숲 반경 1km 저층")

## 2019

In [154]:
map_scatter("2019년도 해양지형정보, 바다숲 위치(5.555km)",'data/surface/0.05/2019.csv')
sc_by_col('data/surface/0.05/2019.csv','./data/surface/merged/2019.csv',5,'2018년 바다숲 반경 5km 표층')
sc_by_col('data/surface/0.05/2019.csv','./data/surface/merged/2019.csv',5,'2018년 바다숲 반경 5km 저층')

## 2020

In [155]:
map_scatter("2020년도 해양지형정보, 바다숲 위치(5.555km)",'data/surface/0.05/2020.csv')
sc_by_col('data/surface/0.05/2020.csv','./data/surface/merged/2020.csv',5,'2020년 바다숲 반경 5km 표층')
sc_by_col('data/surface/0.05/2020.csv','./data/surface/merged/2020.csv',5,'2020년 바다숲 반경 5km 저층')