In [1]:
#import geopandas as gpd
import matplotlib.pyplot as plt
import requests
import pandas as pd
import numpy as np
import json
import re
pd.options.mode.chained_assignment = None  # default='warn'

## 資料清洗

In [2]:
# read csv
df = pd.read_csv("../data/產量_臺灣地區果品生產概況(2011~2019).csv", encoding="utf8")
df

Unnamed: 0,年度,地區別,果品類別,種植株數,收穫株數,種植面積_公頃,收穫面積_公頃,每株平均產量_公斤,每公頃平均產量_公斤,產量_公噸
0,2019,新北市,香蕉,108619,102262,74.99,71.01,13.74,19793.48,1405.535
1,2019,台北市,香蕉,12830,12830,10.36,10.36,12.55,15543.15,161.027
2,2019,桃園市,香蕉,44420,43940,57.11,56.47,18.85,14666.65,828.226
3,2019,台中市,香蕉,574129,560455,629.71,614.39,15.54,14178.52,8711.141
4,2019,台南市,香蕉,1481610,1476420,1062.65,1057.46,16.26,22703.58,24008.13
...,...,...,...,...,...,...,...,...,...,...
7066,2011,基隆市,其他水果,327,327,1.01,1.01,10,3237.62,3.27
7067,2011,新竹市,其他水果,2538,2538,1.84,1.84,10.56,14560.87,26.792
7068,2011,嘉義市,其他水果,25711,25711,38.43,38.43,16.66,11147.46,428.397
7069,2011,金門縣,其他水果,0,0,0,0,0,0,0


### 保留鳳梨資料

In [3]:
df_pineapple = df.loc[df["果品類別"] == "鳳梨"]
df_pineapple

Unnamed: 0,年度,地區別,果品類別,種植株數,收穫株數,種植面積_公頃,收穫面積_公頃,每株平均產量_公斤,每公頃平均產量_公斤,產量_公噸
24,2019,新北市,鳳梨,72196,44500,2.61,1.65,1,26869.7,44.335
25,2019,台北市,鳳梨,0,0,0,0,0,0,0
26,2019,桃園市,鳳梨,2900,2280,0.1,0.08,1.18,33650,2.692
27,2019,台中市,鳳梨,5030287,4484837,145.14,129.24,1.37,47638.13,6156.752
28,2019,台南市,鳳梨,60682552,44679852,1761.48,1299.79,1.56,53561.26,69618.392
...,...,...,...,...,...,...,...,...,...,...
6340,2011,基隆市,鳳梨,0,0,0,0,0,0,0
6341,2011,新竹市,鳳梨,0,0,0,0,0,0,0
6342,2011,嘉義市,鳳梨,2282398,2282398,65.23,65.23,1.1,38492.1,2510.84
6343,2011,金門縣,鳳梨,0,0,0,0,0,0,0


### 去除不必要地區別["臺灣省","福建省"]

In [4]:
df_pineapple = df_pineapple.loc[~(df_pineapple["地區別"].isin(["臺灣省","福建省"]))]
df_pineapple

Unnamed: 0,年度,地區別,果品類別,種植株數,收穫株數,種植面積_公頃,收穫面積_公頃,每株平均產量_公斤,每公頃平均產量_公斤,產量_公噸
24,2019,新北市,鳳梨,72196,44500,2.61,1.65,1,26869.7,44.335
25,2019,台北市,鳳梨,0,0,0,0,0,0,0
26,2019,桃園市,鳳梨,2900,2280,0.1,0.08,1.18,33650,2.692
27,2019,台中市,鳳梨,5030287,4484837,145.14,129.24,1.37,47638.13,6156.752
28,2019,台南市,鳳梨,60682552,44679852,1761.48,1299.79,1.56,53561.26,69618.392
...,...,...,...,...,...,...,...,...,...,...
6340,2011,基隆市,鳳梨,0,0,0,0,0,0,0
6341,2011,新竹市,鳳梨,0,0,0,0,0,0,0
6342,2011,嘉義市,鳳梨,2282398,2282398,65.23,65.23,1.1,38492.1,2510.84
6343,2011,金門縣,鳳梨,0,0,0,0,0,0,0


In [5]:
# 將 "-" 取代為 0
df_pineapple = df_pineapple.replace("-", 0)

In [6]:
target_cols= ['種植株數', '收穫株數', '種植面積_公頃', '收穫面積_公頃', '每株平均產量_公斤', '每公頃平均產量_公斤', '產量_公噸']
df_pineapple[target_cols] = df_pineapple[target_cols].apply(pd.to_numeric)
df_pineapple["產量_公噸"] = df_pineapple["產量_公噸"].map(lambda x: round(x ,2))
df_pineapple

Unnamed: 0,年度,地區別,果品類別,種植株數,收穫株數,種植面積_公頃,收穫面積_公頃,每株平均產量_公斤,每公頃平均產量_公斤,產量_公噸
24,2019,新北市,鳳梨,72196,44500,2.61,1.65,1.00,26869.70,44.34
25,2019,台北市,鳳梨,0,0,0.00,0.00,0.00,0.00,0.00
26,2019,桃園市,鳳梨,2900,2280,0.10,0.08,1.18,33650.00,2.69
27,2019,台中市,鳳梨,5030287,4484837,145.14,129.24,1.37,47638.13,6156.75
28,2019,台南市,鳳梨,60682552,44679852,1761.48,1299.79,1.56,53561.26,69618.39
...,...,...,...,...,...,...,...,...,...,...
6340,2011,基隆市,鳳梨,0,0,0.00,0.00,0.00,0.00,0.00
6341,2011,新竹市,鳳梨,0,0,0.00,0.00,0.00,0.00,0.00
6342,2011,嘉義市,鳳梨,2282398,2282398,65.23,65.23,1.10,38492.10,2510.84
6343,2011,金門縣,鳳梨,0,0,0.00,0.00,0.00,0.00,0.00


In [7]:
# 各縣市2011~2019年平均(面積、產量)
df_pineapple.groupby(["地區別"]).agg({"種植面積_公頃": 'mean', "收穫面積_公頃":'mean',  "產量_公噸":'mean'}).round(2).sort_values(by="產量_公噸", ascending=False)

Unnamed: 0_level_0,種植面積_公頃,收穫面積_公頃,產量_公噸
地區別,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
屏東縣,3046.36,2672.03,136102.2
南投縣,1254.58,1159.52,71803.47
嘉義縣,1424.33,1176.22,59038.04
台南市,1535.54,1275.49,58815.03
高雄市,1429.3,1039.15,52652.9
雲林縣,752.31,657.49,34468.16
台東縣,346.08,333.89,14275.9
彰化縣,251.15,230.1,11622.35
花蓮縣,240.65,230.9,7412.41
台中市,121.59,117.38,5479.71


In [8]:
# 將整理後的檔案存入 cleaned data
df_pineapple.to_csv("cleaned data/產量_臺灣鳳梨生產概況(2011~2019).csv", index=False)