In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
# import cufflinks as cf


## 생활폐기물

In [None]:
# 생활폐기물 발생현황 데이터
hld_19 =  pd.read_csv('data/waste/household/생활_19.csv', encoding='cp949')
hld_20 =  pd.read_csv('data/waste/household/생활_20.csv', encoding='cp949')
hld_19.shape, hld_20.shape

((756, 5), (756, 5))

In [None]:
# 컬럼명 확인
hld_19 = hld_19.rename(columns={'시도(1)': '행정구명', '항목별(1)' :'배출된방식', '항목별(2)': '폐기물종류', '항목별(3)': '폐기물'})
hld_20 = hld_20.rename(columns={'구분(1)': '행정구명', '구분(2)' :'배출된방식', '구분(3)': '폐기물종류', '구분(4)': '폐기물'})
hld_19.columns, hld_20.columns


(Index(['행정구명', '배출된방식', '폐기물종류', '폐기물', '2019'], dtype='object'),
 Index(['행정구명', '배출된방식', '폐기물종류', '폐기물', '2020'], dtype='object'))

In [None]:
# tidy data
hld_19 = hld_19.melt(id_vars=['행정구명', '배출된방식', '폐기물종류', '폐기물'])
hld_20 = hld_20.melt(id_vars=['행정구명', '배출된방식', '폐기물종류', '폐기물'])


In [None]:
hld_20.head()

Unnamed: 0,행정구명,배출된방식,폐기물종류,폐기물,variable,value
0,전국,합계,소계,소계,2020,17303386.4
1,전국,종량제방식 등 혼합배출,소계,소계,2020,8041965.2
2,전국,종량제방식 등 혼합배출,가연성,소계,2020,6929046.9
3,전국,종량제방식 등 혼합배출,가연성,폐지류,2020,1829611.5
4,전국,종량제방식 등 혼합배출,가연성,폐합성수지류,2020,1706740.2


## 대기오염

In [None]:
# 미세먼지 데이터
# 단위 μg/m³
dust = pd.read_csv('data/waste/air/미세먼지_PM2.5_1920.csv', encoding='cp949')
dust.head(2)

Unnamed: 0,구분(1),구분(2),2019.01,2019.02,2019.03,2019.04,2019.05,2019.06,2019.07,2019.08,...,2020.03,2020.04,2020.05,2020.06,2020.07,2020.08,2020.09,2020.10,2020.11,2020.12
0,총계,소계,35,34,39,20,25,20,17,17,...,21,18,18,19,12,14,12,17,21,24
1,서울특별시,서울특별시,38,35,45,21,29,20,19,16,...,25,21,19,21,13,14,11,17,24,27


In [None]:
# 아황산가스 데이터
# 단위 ppm
acid = pd.read_csv('data/waste/air/아황산_1920.csv',encoding='cp949')
acid.head(2)

Unnamed: 0,구분(1),구분(2),2019.01,2019.02,2019.03,2019.04,2019.05,2019.06,2019.07,2019.08,...,2020.03,2020.04,2020.05,2020.06,2020.07,2020.08,2020.09,2020.10,2020.11,2020.12
0,총계,소계,0.005,0.004,0.004,0.004,0.004,0.004,0.003,0.003,...,0.003,0.003,0.003,0.003,0.003,0.003,0.003,0.003,0.003,0.003
1,서울특별시,서울특별시,0.005,0.005,0.005,0.004,0.005,0.004,0.004,0.004,...,0.003,0.003,0.003,0.003,0.002,0.003,0.003,0.003,0.003,0.004


In [None]:
# 오존 데이터
ozone = pd.read_csv('data/waste/air/오존_월별_도시별_대기오염도_20220608213127.csv', encoding='cp949')
ozone.head(2)

Unnamed: 0,구분(1),구분(2),2019.01,2019.02,2019.03,2019.04,2019.05,2019.06,2019.07,2019.08,...,2020.03,2020.04,2020.05,2020.06,2020.07,2020.08,2020.09,2020.10,2020.11,2020.12
0,총계,소계,0.019,0.025,0.036,0.036,0.05,0.042,0.032,0.032,...,0.033,0.042,0.041,0.045,0.031,0.023,0.032,0.028,0.021,0.02
1,서울특별시,서울특별시,0.012,0.018,0.028,0.029,0.043,0.039,0.031,0.03,...,0.027,0.036,0.034,0.044,0.031,0.019,0.028,0.022,0.015,0.014


In [None]:
# tidy data
air_dust = dust.melt(id_vars=['구분(1)', '구분(2)'], var_name='시점', value_name='미세먼지', )
air_acid = acid.melt(id_vars=['구분(1)', '구분(2)'], var_name='시점', value_name='아황산가스')
air_oz = ozone.melt(id_vars=['구분(1)', '구분(2)'], var_name='시점', value_name='오존')

In [None]:
# 컬럼명 변경
air_dust = air_dust.rename(columns={'구분(1)': '행정구명', '구분(2)': '도시명'})
air_acid = air_acid.rename(columns={'구분(1)': '행정구명', '구분(2)': '도시명'})
air_oz = air_oz.rename(columns={'구분(1)': '행정구명', '구분(2)': '도시명'})
air_dust.columns, air_acid.columns, air_oz.columns

(Index(['행정구명', '도시명', '시점', '미세먼지'], dtype='object'),
 Index(['행정구명', '도시명', '시점', '아황산가스'], dtype='object'),
 Index(['행정구명', '도시명', '시점', '오존'], dtype='object'))

In [None]:
air = pd.merge(air_dust, air_acid, on=['행정구명', '도시명', '시점'])
air = pd.merge(air, air_oz, on=['행정구명', '도시명', '시점'])
air.head(2)

Unnamed: 0,행정구명,도시명,시점,미세먼지,아황산가스,오존
0,총계,소계,2019.01,35,0.005,0.019
1,서울특별시,서울특별시,2019.01,38,0.005,0.012


## 토양오염

In [None]:
# 토양오염 데이터
soil_19 = pd.read_excel('data/waste/soil/토양오염 실태조사_검색결과_20220608 (1).xlsx', header=1)
soil_20 = pd.read_excel('data/waste/soil/토양오염 실태조사_검색결과_20220608.xlsx', header=1)
soil_19.shape, soil_20.shape

In [None]:
# 필요한 칼럼만
soil_20 = soil_20.drop(columns=['조사지역종류', '지점명칭', '지목', '지역구분', '면적(㎡)', '시료번호', '비고'])
soil_19 = soil_19.drop(columns=['조사지역종류', '지점명칭', '지목', '지역구분', '면적(㎡)', '시료번호', '비고'])
soil_19.columns, soil_20.columns

(Index(['년도', '시도', '시료깊이(m)', 'Cd', 'Cu', 'As', 'Hg', 'Pb', 'Cr6+', 'Zn', 'Ni',
        'F', '유기인', 'PCB', 'CN', '페놀', '벤젠', '톨루엔', '에틸벤젠', '크실렌', 'TPH', 'TCE',
        'PCE', '벤조(a)피렌', '1,2-디클로로에탄', 'pH'],
       dtype='object'),
 Index(['년도', '시도', '시료깊이(m)', 'Cd', 'Cu', 'As', 'Hg', 'Pb', 'Cr6+', 'Zn', 'Ni',
        'F', '유기인', 'PCB', 'CN', '페놀', '벤젠', '톨루엔', '에틸벤젠', '크실렌', 'TPH', 'TCE',
        'PCE', '벤조(a)피렌', '1,2-디클로로에탄', 'pH'],
       dtype='object'))

In [None]:
soil_19.head(2)

Unnamed: 0,년도,시도,시료깊이(m),Cd,Cu,As,Hg,Pb,Cr6+,Zn,Ni,F,유기인,PCB,CN,페놀,벤젠,톨루엔,에틸벤젠,크실렌,TPH,TCE,PCE,벤조(a)피렌,"1,2-디클로로에탄",pH
0,2019,서울특별시,0~1,0.87,69.6,6.89,0.11,106.6,0.7,226.9,10.1,200,0.0,0.0,0.0,0.0,-,-,-,-,-,0.0,0.0,-,0.0,7.5
1,2019,서울특별시,1-2,0.0,6.8,5.35,0.02,16.1,0.0,33.9,10.4,162,0.0,0.0,0.0,0.02,-,-,-,-,-,0.0,0.0,-,0.0,7.7


In [None]:
soil_20.head(2)

Unnamed: 0,년도,시도,시료깊이(m),Cd,Cu,As,Hg,Pb,Cr6+,Zn,Ni,F,유기인,PCB,CN,페놀,벤젠,톨루엔,에틸벤젠,크실렌,TPH,TCE,PCE,벤조(a)피렌,"1,2-디클로로에탄",pH
0,2020,서울특별시,2~3,0.94,229.0,11.91,1.31,511.7,0.0,363.0,19.0,-,-,-,-,-,-,-,-,-,-,-,-,-,-,8.7
1,2020,서울특별시,2~3,1.88,43.2,4.16,0.13,60.9,0.0,557.5,19.3,-,0.00,-,-,0.02,0.0,0.0,0.0,0.0,73,0.0,0.0,-,0.0,11.0


In [None]:
# tidy data
soil_cols = [
    'Cd', 'Cu', 'As', 'Hg', 'Pb', 'Cr6+', 'Zn', 'Ni',
    'F', '유기인', 'PCB', 'CN', '페놀', '벤젠', '톨루엔', '에틸벤젠', '크실렌', 'TPH', 'TCE',
    'PCE', '벤조(a)피렌', '1,2-디클로로에탄', 'pH'
]
soil_19 = soil_19.melt(id_vars=['년도', '시도', '시료깊이(m)'], value_vars=soil_cols, var_name='토양항목',)
soil_20 = soil_20.melt(id_vars=['년도', '시도', '시료깊이(m)'], value_vars=soil_cols, var_name='토양항목',)


In [None]:
soil_20.

Unnamed: 0,년도,시도,시료깊이(m),토양항목,value
0,2020,서울특별시,2~3,Cd,0.94
1,2020,서울특별시,2~3,Cd,1.88
2,2020,서울특별시,2~3,Cd,0.57
3,2020,서울특별시,2~3,Cd,0.14
4,2020,서울특별시,3~4,Cd,0.2


## 수질오염

In [None]:
# 지하수 수질기준 평균 항목 농도
water = pd.read_csv('data/waste/water/지하수_1920.csv', encoding='cp949')
water.shape

(1080, 5)

In [None]:
# 컬럼명 변경
water = water.rename(columns={'구분(1)': '행정구명', '구분(2)': '분기', '항목': '수질항목'})
water.head(2)

Unnamed: 0,행정구명,분기,수질항목,2019,2020
0,전국,전체,pH (pH),7.1,7.1
1,전국,전체,총대장균군 (군수/100mL),122.0,506.0


In [None]:
# tidy data
water.melt(id_vars=['행정구명', '분기', '수질항목'])

Unnamed: 0,행정구명,분기,수질항목,variable,value
0,전국,전체,pH (pH),2019,7.1
1,전국,전체,총대장균군 (군수/100mL),2019,122
2,전국,전체,질산성질소NO3-N (mg/L),2019,3.6
3,전국,전체,염소이온Cl- (mg/L),2019,67.8
4,전국,전체,카드뮴Cd (mg/L),2019,0.000
...,...,...,...,...,...
2155,제주특별자치도,하반기,1.1.1-트리클로로에탄TCE111 (mg/L),2020,불검출
2156,제주특별자치도,하반기,벤젠Benzene (mg/L),2020,불검출
2157,제주특별자치도,하반기,톨루엔Toluene (mg/L),2020,불검출
2158,제주특별자치도,하반기,에틸벤젠Ethyl Benzene (mg/L),2020,불검출


## 사업장 폐기물

#### 발생 - 생활 폐기물

In [None]:
# 19년도 데이터 가져오기
bis_ocr_hld_19 = pd.read_csv(
    'data/business/발생_사업장_생활/폐기물_발생현황_사업장생활계폐기물_20220609133950.csv', encoding='cp949')
bis_ocr_hld_19.head()


Unnamed: 0,시도(1),항목별(1),항목별(2),항목별(3),2019
0,합계,총계,소계,소계,12049.3
1,합계,종량제방식에의한혼합배출,소계,소계,8536.5
2,합계,종량제방식에의한혼합배출,가연성,소계,6113.3
3,합계,종량제방식에의한혼합배출,가연성,음식물류폐기물,276.4
4,합계,종량제방식에의한혼합배출,가연성,폐지류,122.4


In [None]:
# 폐기물명
bis_ocr_hld_19['항목별(3)'].unique()
# 폐기물종류
bis_ocr_hld_19['항목별(2)'].unique()

array(['소계', '가연성', '불연성', '건설폐재류', '폐지류', '폐유리병류', '금속캔', '폐합성수지류',
       '폐고무류', '폐전기전자제품', '폐전지류', '영농폐기물', '폐형광등', '고철류', '폐의류', '폐섬유류',
       '폐가구류', '폐식용유', '기타'], dtype=object)

In [None]:
# 20년도 데이터 가져오기
bis_ocr_hld_20 = pd.read_csv('data/business/발생_사업장_생활/폐기물_발생현황_사업장비배출시설계_생활계_폐기물_20220609134420.csv', encoding='cp949')
bis_ocr_hld_20.head()

Unnamed: 0,구분(1),구분(2),구분(3),구분(4),2020
0,전국,합계,소계,소계,5241248.2
1,전국,종량제방식 등 혼합배출,소계,소계,477063.9
2,전국,종량제방식 등 혼합배출,가연성,소계,387627.2
3,전국,종량제방식 등 혼합배출,가연성,폐지류,28884.3
4,전국,종량제방식 등 혼합배출,가연성,폐합성수지류,190611.3


#### 발생 - 배출시설 폐기물

In [None]:
# 19년도 데이터 가져오기
bis_ocr_dis_19 = pd.read_csv(
    'data/business/발생_사업장_배출시설/폐기물_발생현황_사업장배출시설계폐기물_20220609134310.csv', encoding='cp949')
bis_ocr_dis_19.head()


Unnamed: 0,시도(1),항목별(1),항목별(2),항목별(3),2019
0,합계,총계,소계,소계,202619.0
1,합계,가연성,소계,소계,47262.6
2,합계,가연성,폐지류,소계,29.2
3,합계,가연성,폐목재류,소계,3620.8
4,합계,가연성,폐섬유류,소계,216.6


In [None]:
# 처리?
bis_ocr_dis_19['항목별(3)'].unique()
# 폐기물명
bis_ocr_dis_19['항목별(2)'].unique()

array(['소계', '폐지류', '폐목재류', '폐섬유류', '폐합성수지류', ' 폐합성고무류', '폐전기전자제품류',
       '유기성오니류', '동식물성잔재물', '폐식용유', '기타', '폐금속류', '광재류', '연소잔재물', '소각재',
       '분진류', '폐주물사및폐사', '폐석재·폐콘크리트류', '폐석회석고및폐석회', '폐흡착재폐흡수재', '폐촉매',
       '유리·도자기편류', '무기성오니류'], dtype=object)

In [None]:
# 20년도 데이터 가져오기
bis_ocr_dis_20 = pd.read_csv(
    'data/business/발생_사업장_배출시설/폐기물_발생현황_사업장배출시설계폐기물_20220609134029.csv', encoding='cp949')
bis_ocr_dis_20.head()


Unnamed: 0,구분(1),구분(2),구분(3),구분(4),2020
0,전국,총계,소계,소계,80868004.6
1,전국,가연성,소계,소계,20307280.6
2,전국,가연성,폐지류,소계,41501.6
3,전국,가연성,폐목재류,소계,1547507.3
4,전국,가연성,폐섬유류,소계,66099.2


#### 처리 - 생활 폐기물

In [None]:
# 19년도 데이터 가져오기
bis_prs_hld_19= pd.read_csv(
    'data/business/처리_사업장_생활/폐기물_처리주체별_처리현황_사업장생활계폐기물_20220609134830.csv', encoding='cp949')
bis_prs_hld_19.head()

Unnamed: 0,시도(1),항목별(1),항목별(2),2019
0,합계,총계,계,12049.3
1,합계,총계,매립,582.5
2,합계,총계,소각,1817.7
3,합계,총계,재활용,8726.1
4,합계,총계,기타,923.0


In [None]:
# 어떤 업체에서 처리를 했는가
bis_prs_hld_19['항목별(1)'].unique()
# 처리 방식
bis_prs_hld_19['항목별(2)'].unique()

array(['계', '매립', '소각', '재활용', '기타', '소계'], dtype=object)

In [None]:
# 20년도 데이터 가져오기
bis_prs_hld_20= pd.read_csv(
    'data/business/처리_사업장_생활/폐기물_처리주체별_처리현황_사업장비배출시설계_생활계_폐기물_20220609134732.csv', encoding='cp949')
bis_prs_hld_20.head()

Unnamed: 0,구분(1),항목별(1),항목별(2),2020
0,합계,총계,소계,5241248.2
1,합계,총계,재활용,3656151.5
2,합계,총계,소각,695063.9
3,합계,총계,매립,228078.8
4,합계,총계,기타,661954.0


#### 처리 - 배출시설 폐기물

In [None]:
# 19년도 데이터 가져오기
bis_prs_dis_19= pd.read_csv(
    'data/business/처리_사업장_배출시설/폐기물_처리주체별_처리현황_사업장배출시설계폐기물_20220609134937.csv', encoding='cp949')
bis_prs_dis_19.head()

Unnamed: 0,시도별(1),항목별(1),항목별(2),2019
0,총계,계,소계,202619.0
1,총계,계,매립,18575.7
2,총계,계,소각,8182.9
3,총계,계,재활용,167299.5
4,총계,계,기타,8560.9


In [None]:
bis_prs_dis_19['항목별(1)'].unique()
bis_prs_dis_19['항목별(2)'].unique()
bis_prs_dis_19['시도별(1)'].unique()

array(['총계', '서울', '부산', '대구', '인천', '광주', '대전', '울산', '세종', '경기', '강원',
       '충북', '충남', '전북', '전남', '경북', '경남', '제주'], dtype=object)

In [None]:
# 20년도 데이터 가져오기
bis_prs_dis_20= pd.read_csv(
    'data/business/처리_사업장_배출시설/폐기물_처리주체별_처리현황_사업장배출시설계폐기물_20220609135052.csv', encoding='cp949')
bis_prs_dis_20.head()

Unnamed: 0,구분(1),항목별(1),항목별(2),2020
0,총계,총계,소계,80868004.6
1,총계,총계,재활용,68207209.2
2,총계,총계,소각,3480607.9
3,총계,총계,매립,5657671.1
4,총계,총계,기타,3522516.4


## 생활 폐기물

#### 발생 - 생활

In [None]:
# 19년도 데이터 가져오기
hld_ocr_19= pd.read_csv(
    'data/household/발생_생활/폐기물_발생현황_생활폐기물_20220609141524.csv', encoding='cp949')
hld_ocr_19.head()

Unnamed: 0,구분(1),구분(2),구분(3),구분(4),2020
0,전국,합계,소계,소계,17303386.4
1,전국,종량제방식 등 혼합배출,소계,소계,8041965.2
2,전국,종량제방식 등 혼합배출,가연성,소계,6929046.9
3,전국,종량제방식 등 혼합배출,가연성,폐지류,1829611.5
4,전국,종량제방식 등 혼합배출,가연성,폐합성수지류,1706740.2


In [None]:
hld_ocr_19['구분(2)'].unique() # 배출 방식
hld_ocr_19['구분(3)'].unique() # 폐기물 종류
hld_ocr_19['구분(4)'].unique() # 폐기물명


array(['소계', '폐지류', '폐합성수지류', '폐고무류', '폐섬유류', '음식물류 폐기물', '폐목재류', '기타',
       '폐금속류', '폐유리류', '폐토사류', '폐타일 및 도자기류', '연탄재', '건설폐재류(공사장생활폐기물)',
       '종이팩', '비닐류', '발포수지류', 'PET병', '농약용기류', '폐비닐'], dtype=object)

In [None]:
# 20년도 데이터 가져오기
hld_ocr_20= pd.read_csv(
    'data/household/발생_생활/폐기물_발생현황_생활폐기물_20220609141524.csv', encoding='cp949')
hld_ocr_20.head()

Unnamed: 0,구분(1),구분(2),구분(3),구분(4),2020
0,전국,합계,소계,소계,17303386.4
1,전국,종량제방식 등 혼합배출,소계,소계,8041965.2
2,전국,종량제방식 등 혼합배출,가연성,소계,6929046.9
3,전국,종량제방식 등 혼합배출,가연성,폐지류,1829611.5
4,전국,종량제방식 등 혼합배출,가연성,폐합성수지류,1706740.2


#### 처리 - 생활

In [None]:
# 19년도 데이터 가져오기
hld_prs_19= pd.read_csv(
    'data/household/처리_생활/폐기물_처리주체별_처리현황_생활폐기물_20220609141718.csv', encoding='cp949')
hld_prs_19.head()

Unnamed: 0,시도(1),항목별(1),항목별(2),2019
0,합계,총계,계,45912.1
1,합계,총계,매립,6753.5
2,합계,총계,소각,13101.4
3,합계,총계,재활용,25887.2
4,합계,총계,기타,170.0


In [None]:
hld_prs_19['항목별(1)'].unique() # 업체 처리
hld_prs_19['항목별(2)'].unique() # 처리 방식

array(['계', '매립', '소각', '재활용', '기타', '소계'], dtype=object)

In [None]:
# 20년도 데이터 가져오기
hld_prs_20= pd.read_csv(
    'data/household/처리_생활/폐기물_처리주체별_처리현황_생활폐기물_20220609141754.csv', encoding='cp949')
hld_prs_20.head()

Unnamed: 0,구분(1),항목별(1),항목별(2),2020
0,합계,총계,소계,17303386.4
1,합계,총계,재활용,9762502.9
2,합계,총계,소각,5058818.7
3,합계,총계,매립,2424367.8
4,합계,총계,기타,57697.0


In [None]:
hld_prs_20['항목별(1)'].unique()

array(['총계', '공공처리', '자가처리', '위탁처리'], dtype=object)

In [None]:
hld_prs_19.head()

Unnamed: 0,시도(1),항목별(1),항목별(2),2019
0,합계,총계,계,45912.1
1,합계,총계,매립,6753.5
2,합계,총계,소각,13101.4
3,합계,총계,재활용,25887.2
4,합계,총계,기타,170.0


In [None]:
bis_prs_hld_20.head()


Unnamed: 0,구분(1),항목별(1),항목별(2),2020
0,합계,총계,소계,5241248.2
1,합계,총계,재활용,3656151.5
2,합계,총계,소각,695063.9
3,합계,총계,매립,228078.8
4,합계,총계,기타,661954.0


In [None]:
hld_prs_19 = hld_prs_19.groupby(by=['시도(1)', '항목별(2)']).sum()
hld_prs_20 = hld_prs_20.groupby(by=['구분(1)', '항목별(2)']).sum()
bis_prs_dis_19 = bis_prs_dis_19.groupby(by=['시도별(1)', '항목별(2)']).sum()
bis_prs_dis_20 = bis_prs_dis_20.groupby(by=['구분(1)', '항목별(2)']).sum()
bis_prs_hld_19 = bis_prs_hld_19.groupby(by=['시도(1)', '항목별(2)']).sum()
bis_prs_hld_20 = bis_prs_hld_20.groupby(by=['구분(1)', '항목별(2)']).sum()

In [None]:
hld_prs_19.to_csv('output/household_19.csv', index=True)
hld_prs_20.to_csv('output/household_20.csv', index=True)
bis_prs_dis_19.to_csv('output/business_dis_19.csv', index=True)
bis_prs_dis_20.to_csv('output/business_dis_20.csv', index=True)
bis_prs_hld_19.to_csv('output/business_hld_19.csv', index=True)
bis_prs_hld_20.to_csv('output/business_hld_20.csv', index=True)
