In [2]:
!pip install pandas numpy catboost openpyxl
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [42]:
import pandas as pd
import numpy as np

# 設定文件路徑
train_path = '/content/drive/MyDrive/Kaggle/simple/test-reindex-test_3.xlsx'

# 讀取資料
train = pd.read_excel(train_path)

In [43]:
# 1. 取得單價元平方公尺的中位數
median_price_per_sqm = train['單價元平方公尺'].median()
print(f"單價元平方公尺中位數: {median_price_per_sqm}")

# 2. 如果單價元平方公尺 = 空值 且 建物型態 = 工廠，令單價元平方公尺 = 單價元平方公尺中位數
train.loc[(train['單價元平方公尺'].isnull()) & (train['建物型態'] == '工廠'), '單價元平方公尺'] = median_price_per_sqm

# 3. 如果單價元平方公尺 = 0，令單價元平方公尺 = 單價元平方公尺中位數
train.loc[train['單價元平方公尺'] == 0, '單價元平方公尺'] = median_price_per_sqm

# 4. 如果單價元平方公尺為空值，統一填補為 0
train['單價元平方公尺'].fillna(0, inplace=True)

# 5. 計算總價元的邏輯
def calculate_total_price(row):
    # 如果車位移轉總面積平方公尺=0 且 車位總價元!=0
    if row['車位移轉總面積平方公尺'] == 0 and row['車位總價元'] != 0:
        return row['建物移轉總面積平方公尺'] * row['單價元平方公尺']

    # 如果車位移轉總面積平方公尺!=0 且 車位總價元=0
    elif row['車位移轉總面積平方公尺'] != 0 and row['車位總價元'] == 0:
        return row['建物移轉總面積平方公尺'] * row['單價元平方公尺'] + row['車位總價元']

    # 否則，計算公式為：(建物移轉總面積平方公尺 - 車位移轉總面積平方公尺) * 單價元平方公尺 + 車位總價元
    else:
        return (row['建物移轉總面積平方公尺'] - row['車位移轉總面積平方公尺']) * row['單價元平方公尺'] + row['車位總價元']

# 進行總價元計算
train['總價元'] = train.apply(calculate_total_price, axis=1)

# 將總價元取到最接近的萬位數（四捨五入）
train['總價元'] = train['總價元'].apply(lambda x: round(x, -4))

# 生成submission
submission = train[['編號', '總價元']]
submission_path = '/content/drive/MyDrive/Kaggle/simple/submission_10.csv'
submission.to_csv(submission_path, index=False, encoding='utf-8-sig')

print(f"Submission saved to {submission_path}")


單價元平方公尺中位數: 99833.0


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  train['單價元平方公尺'].fillna(0, inplace=True)


Submission saved to /content/drive/MyDrive/Kaggle/simple/submission_10.csv
