In [1]:
import pandas as pd
import numpy as np

# 경고무시
import warnings
warnings.filterwarnings(action='ignore')

In [2]:
class FM:
    def __init__(self, thinDF, plantDF, year):
        try:
            self.thinDF = thinDF[['PNU', 'plan_area', 't_jijuk_area', 't_shape_area', 't_year']]
            self.thinDF.columns = ['PNU', 'PlanArea', 'JijukArea', 'GeoArea', 'year']

            self.plantDF = plantDF[['PNU', 'p_plan_area', 'p_jijuk_area', 'p_shape_area', 'p_year', 'p_project_name']]
            self.plantDF.columns = ['PNU', 'PlanArea', 'JijukArea', 'GeoArea', 'year', 'p_project_name']

            self.year = year
        except:
            print("❗ 오류 ❗ : 데이터프레임에 변수가 있는지 확인해주세요.")
            print("숲가꾸기 : 'PNU', 'plan_area', 't_jijuk_area', 't_shape_area', 't_year'")
            print("조림 : 'PNU', 'p_plan_area', 'p_jijuk_area', 'p_shape_area', 'p_year', 'p_project_name'")
            
    def preprocessing_null_zero(self, df):
        # 작업면적이 Null, 0인 경우
        df = df.dropna(subset=["PlanArea"])
        df = df[(df['PlanArea'] != 0)]
        df.reset_index(drop=True, inplace=True)
        
        # 지적면적 0 -> Null
        df.JijukArea = df.JijukArea.replace(0, np.nan)
        # Nan인 것 면적보정 제외
        df['JijukNan'] = np.where(df.JijukArea.isnull(), "Nan", "999")
        df.JijukArea.fillna(df.GeoArea, inplace=True)
        return df

    def unit_correction_area(self, df):
        cond1 = (np.log10(df.PlanArea / df.JijukArea)==4) & (df['JijukNan'] != 'Nan')
        cond2 = (np.log10(df.JijukArea).astype(int) - np.log10(df.GeoArea).astype(int)) == 0
        cond3 = (np.log10(df.PlanArea).astype(int) - np.log10(df.GeoArea).astype(int)) == 0
        
        df.loc[cond1 & cond2 & ~cond3, 'PlanArea'] = df.loc[cond1 & cond2 & ~cond3, 'PlanArea']/10000
        df.loc[cond1 & ~cond2 & cond3, 'JijukArea'] = df.loc[cond1 & ~cond2 & cond3, 'JijukArea']*10000
        
        df.drop(['JijukNan'], axis=1, inplace=True)
        return df

    def comparison_result(self, df):
        # 작업면적, 지적면적 비교 후, 작은값 사용
        df['comparison_result'] = np.select([df.JijukArea >= df.PlanArea, df.JijukArea < df.PlanArea],
                                            [df.PlanArea, df.JijukArea])
        return df
    
    # 최대시업법
    def max_practice_method(self, df) -> object:
        #df['max_area_final'] = df['comparison_result']
        df = df.groupby('PNU').max().reset_index()
        return df
    
    def thinning(self, year):
        df = self.thinDF[self.thinDF['year']<=year]
        df_NornullZero = self.preprocessing_null_zero(df)
        df_unit_correction_area = self.unit_correction_area(df_NornullZero)
        df_comparison_result = self.comparison_result(df_unit_correction_area)
        df_result = self.max_practice_method(df_comparison_result)
        return df_result

    def drop_replanting(self, df):
        return df[(df['p_project_name']==0)] 
        # 데이터에 p_project_name이 nan인 것도 있음 -> 2와 3이 아닌 것으로 필터링하면 0과 nan이 들어감
        
    def groupBySum(self, df):
        # 조림의 경우, 보식을 제외하고는 중복되지 않고 조림한다는 가정하에, PNU내의 작업들의 면적을 합산해줌
        df = df.groupby(['year','PNU']).sum().reset_index()
        return df
    
    def comparison_result_2(self, df):
        ### comparison_result_x 는 개별 pnu의 유효면적임, comparison_result_y는 같은 연도내 같은 pnu의 면적 합산임
        ### 같은 연도내 같은 pnu의 전체 면적 (comparison_result_y)은 JijukArea_x 보다 작거나 같아야 함
        ### comparison_result_y, JijukArea_x 비교 후, 작은값 사용
        df['comparison_result'] = np.select(
            [df.JijukArea >= df.comparison_result_y, df.JijukArea < df.comparison_result_y],
            [df.comparison_result_y, df.JijukArea])
        return df
    
    def planting(self, year):
        df = self.plantDF[self.plantDF['year']<=year]
        df_NornullZero = self.preprocessing_null_zero(df)
        df_NornullZero = self.drop_replanting(df_NornullZero)
        df_unit_correction_area = self.unit_correction_area(df_NornullZero)
        df_comparison_result = self.comparison_result(df_unit_correction_area)
        
        sum_area_planting = self.groupBySum(df_comparison_result)
        df_sum_merge = pd.merge(df_comparison_result, sum_area_planting[['PNU', 'year', 'comparison_result']], on=["PNU", "year"]).sort_values('PNU')
        df_comparison_result = self.comparison_result_2(df_sum_merge)
        
        df_result = self.max_practice_method(df_comparison_result)
        df_result = df_result[['PNU', 'PlanArea', 'JijukArea', 'GeoArea', 'year', 'comparison_result']]
        return df_result
        
        
    def total(self, filepath=None):
        # 2003년부터 고려
        for i in range(1990, self.year+1):
            maxPractice_thinning = self.thinning(i)
            maxPractice_planting = self.planting(i)
            
            df_maxPractice_all = pd.concat([maxPractice_thinning, maxPractice_planting])

            df_maxPractice_all['max_area_final'] = df_maxPractice_all['comparison_result']
            df_result = self.max_practice_method(df_maxPractice_all)
            
            if filepath:
                df_result.to_csv(f'{filepath}/FM_{i}.csv', index=False)
                print('===============================================')
                print(f"FM_{i}.csv saved")
            print(f'FM_{i}_area : ', df_result.max_area_final.sum())
            

# 실행

In [3]:
t_0320_df_sas = pd.read_csv('숲가꾸기0320.txt', sep='\t', dtype={"PNU": str, "GeoArea": float, "permit_num":str})
p_0320_df_sas = pd.read_csv('조림0320.txt', sep='\t', dtype={"PNU": str, "permit_num" : str, "start_year" : str})

**해당 변수 필요**
- 숲가꾸기 : 'PNU', 'plan_area', 't_jijuk_area', 't_shape_area', 't_year'
- 조림 : 'PNU', 'p_plan_area', 'p_jijuk_area', 'p_shape_area', 'p_year', 'p_project_name'

In [4]:
# 변수 = FM(숲가꾸기 데이터, 조림 데이터, 최대 연도)
fm = FM(t_0320_df_sas, p_0320_df_sas, 2020)

In [5]:
# 숲가꾸기와 조림 모두 합친 연도별 산림경영면적 산출
# 인자로 파일을 저장할 경로를 입력하면 해당 경로에 연도별 파일 저장
# 아무 인자를 주지 않으면 파일을 저장하지 않음
fm.total()

FM_2003_area :  1374202611.807755
FM_2004_area :  2632421386.367024
FM_2005_area :  3916547615.290738
FM_2006_area :  4955002512.099842
FM_2007_area :  6108312014.190181
FM_2008_area :  7527644754.59168
FM_2009_area :  9243525957.766006
FM_2010_area :  10661530572.692896
FM_2011_area :  12029503897.322435
FM_2012_area :  13554615583.746122
FM_2013_area :  14808799388.518696
FM_2014_area :  15686748302.394009
FM_2015_area :  16429420472.668344
FM_2016_area :  17042820114.894192
FM_2017_area :  17638889801.25374
FM_2018_area :  18067351694.57828
FM_2019_area :  18402769147.02925
FM_2020_area :  18846361118.652336


In [6]:
# test 폴더에 파일 저장
fm.total("test")

FM_2003.csv saved
FM_2003_area :  1374202611.807755
FM_2004.csv saved
FM_2004_area :  2632421386.367024
FM_2005.csv saved
FM_2005_area :  3916547615.290738
FM_2006.csv saved
FM_2006_area :  4955002512.099842
FM_2007.csv saved
FM_2007_area :  6108312014.190181
FM_2008.csv saved
FM_2008_area :  7527644754.59168
FM_2009.csv saved
FM_2009_area :  9243525957.766006
FM_2010.csv saved
FM_2010_area :  10661530572.692896
FM_2011.csv saved
FM_2011_area :  12029503897.322435
FM_2012.csv saved
FM_2012_area :  13554615583.746122
FM_2013.csv saved
FM_2013_area :  14808799388.518696
FM_2014.csv saved
FM_2014_area :  15686748302.394009
FM_2015.csv saved
FM_2015_area :  16429420472.668344
FM_2016.csv saved
FM_2016_area :  17042820114.894192
FM_2017.csv saved
FM_2017_area :  17638889801.25374
FM_2018.csv saved
FM_2018_area :  18067351694.57828
FM_2019.csv saved
FM_2019_area :  18402769147.02925
FM_2020.csv saved
FM_2020_area :  18846361118.652336


In [5]:
# 숲가꾸기 데이터만 얻고싶다면
# fm.thnning(연도)
# ~ (입력한 연도)까지의 데이터를 얻을 수 있음
fm.thinning(2014)

Unnamed: 0,PNU,PlanArea,JijukArea,GeoArea,year,comparison_result
0,1111010100100070027,1549.0,1484.259592,1484.259592,2009,1484.259592
1,1111010100200040038,15977.0,16847.624124,16847.624124,2009,15977.000000
2,1111010100200040041,23898.0,22859.777368,22859.777368,2009,22859.777368
3,1111010100200070002,4828.0,4766.048979,4766.048979,2009,4766.048979
4,1111010100200070014,751.0,690.110072,690.110072,2009,690.110072
...,...,...,...,...,...,...
689620,5013032026200060003,500.0,1983.000000,2060.566288,2012,500.000000
689621,5013032026200090000,6800.0,7323.000000,7184.672606,2012,6800.000000
689622,5013032026200100000,5000.0,7140.000000,7162.509354,2012,5000.000000
689623,5013032026200110000,20700.0,61785.000000,62107.095926,2012,20700.000000


In [6]:
# 조림 데이터만 얻고싶다면
# fm.planting(연도)
# ~ (입력한 연도)까지의 데이터를 얻을 수 있음
fm.planting(2014)

Unnamed: 0,PNU,PlanArea,JijukArea,GeoArea,year,comparison_result
0,1111018700200030004,5000.0,50493.0000,51452.849049,2012,5000.0000
1,1111018700200030012,5000.0,20374.0000,20966.193629,2012,5000.0000
2,1114016200200370225,3500.0,6259.0000,6757.384741,2014,6259.0000
3,1114016200200510000,10000.0,12955.0000,13181.161375,2014,10000.0000
4,1117013100107260180,10000.0,19365.0000,19224.047621,2014,10000.0000
...,...,...,...,...,...,...
57482,5013032024131510000,20000.0,20549.0000,20392.660847,2012,20000.0000
57483,5013032024200010000,50000.0,292535.0000,292443.032470,2009,50000.0000
57484,5013032024200870024,80000.0,632854.0000,636880.004940,2007,280000.0000
57485,5013032024200870029,80000.0,163233.2445,163233.244500,2011,163233.2445
