In [1]:
import pandas as pd

def show_columns(file_path):
    """
    顯示 .csv 或 .xlsx 檔案中所有變數名稱，
    並額外印出每個工作表或資料表的前 5 行內容作為概覽。
    """

    if file_path.endswith(".csv"):
        # === CSV 檔案處理 ===
        df = pd.read_csv(file_path, nrows=5)
        print(f"File: {file_path}")
        print("File Type: CSV\n")
        print("Columns:")
        for col in df.columns:
            print(f"- {col}")

        print("\nPreview (first 5 rows):")
        print(df.head())  # 顯示前五行內容
        print("\n" + "=" * 60 + "\n")

    elif file_path.endswith(".xlsx"):
        # === Excel 檔案處理 ===
        xls = pd.ExcelFile(file_path)
        print(f"File: {file_path}")
        print("File Type: Excel")
        print(f"Found {len(xls.sheet_names)} sheets: {xls.sheet_names}\n")

        # 逐一讀取每個工作表
        for sheet in xls.sheet_names:
            df = pd.read_excel(file_path, sheet_name=sheet, nrows=5)
            print(f"Sheet: {sheet}")
            print("Columns:")
            for col in df.columns:
                print(f"- {col}")

            print("\nPreview (first 5 rows):")
            print(df.head())  # 顯示前五行內容
            print("\n" + "=" * 60 + "\n")

    else:
        raise ValueError("Only .csv and .xlsx are supported")

In [2]:
# Example 1
show_columns(r"D:\Github\PyGRF-Result-automation\Step1-VarList\data\summary\Summary of PyGRF results 2.xlsx")

File: D:\Github\PyGRF-Result-automation\Step1-VarList\data\summary\Summary of PyGRF results 2.xlsx
File Type: Excel
Found 3 sheets: ['Global Elbow Features', 'Local SHAP Elbow Features', 'Local MDI Elbow Features']

Sheet: Global Elbow Features
Columns:
- City
- Var
- MDI
- SHAP

Preview (first 5 rows):
      City          Var  MDI  SHAP
0      NYC  house_price   60    78
1       DC  house_price   80    86
2       LA  house_price   66    60
3       SF  house_price   78    82
4  Houston  house_price   82    73


Sheet: Local SHAP Elbow Features
Columns:
- City
- Var
- Mean
- Median
- STD
- IQR

Preview (first 5 rows):
      City          Var  Mean  Median  STD  IQR
0      NYC  house_price    98      78  103   81
1       DC  house_price   101      94  108   91
2       LA  house_price    97      95   96   84
3       SF  house_price    84      67   89   76
4  Houston  house_price   110     124  108   94


Sheet: Local MDI Elbow Features
Columns:
- City
- Var
- Mean
- Median
- STD
- IQR

Pr

In [3]:
# Example 1
show_columns(r"D:\Github\PyGRF-Result-automation\Step1-VarList\data\summary\Summary of PyGRF results.xlsx")

File: D:\Github\PyGRF-Result-automation\Step1-VarList\data\summary\Summary of PyGRF results.xlsx
File Type: Excel
Found 3 sheets: ['Global Elbow Features', 'Local SHAP Elbow Features', 'Local MDI Elbow Features']

Sheet: Global Elbow Features
Columns:
- City
- MDI
- SHAP

Preview (first 5 rows):
      City  MDI  SHAP
0      NYC   61    60
1       DC   64    61
2       LA   50    47
3       SF   60    62
4  Houston   60    63


Sheet: Local SHAP Elbow Features
Columns:
- City
- Mean
- Median
- STD
- IQR

Preview (first 5 rows):
      City  Mean  Median  STD  IQR
0      NYC    72      63   83   65
1       DC    87      83   89   78
2       LA    76      77   80   70
3       SF    67      57   73   62
4  Houston    92     102   86   82


Sheet: Local MDI Elbow Features
Columns:
- City
- Mean
- Median
- STD
- IQR

Preview (first 5 rows):
      City  Mean  Median  STD  IQR
0      NYC    65      60   77   65
1       DC    80      79   86   75
2       LA    79      85   83   67
3       SF    

In [4]:
# Example 3
show_columns(r"D:\Github\PyGRF-Result-automation\Step1-VarList\data\ranked_features\Chicago_CensusTract_HousePrice_ranked_features.csv")

File: D:\Github\PyGRF-Result-automation\Step1-VarList\data\ranked_features\Chicago_CensusTract_HousePrice_ranked_features.csv
File Type: CSV

Columns:
- feature
- global_rank
- local_mean_rank
- local_median_rank
- local_std_rank
- local_iqr_rank

Preview (first 5 rows):
                                             feature  global_rank  \
0   MedianRentVSIncome_MED_RENT_AS_PCT_OF_MED_INCOME          1.0   
1  DistributionOfJobs_Proportion of jobs for work...          2.0   
2  DistributionOfJobs_Proportion of jobs for work...          3.0   
3  RecreationalFacilityDensityByArea_SOURCE_OVERL...          4.0   
4         JobAccessByDifferentTravelModes_Home_RATIO          5.0   

   local_mean_rank  local_median_rank  local_std_rank  local_iqr_rank  
0              1.0                1.0            19.0            15.0  
1             12.0               11.0            10.0            17.0  
2             13.0               14.0            12.0            20.0  
3              2.0       

In [5]:
# Example 4
show_columns(r"D:\Github\PyGRF-Result-automation\Step2-Filtered\data\Chicago\CensusTract\Chicago_CensusTract_merged.csv")

File: D:\Github\PyGRF-Result-automation\Step2-Filtered\data\Chicago\CensusTract\Chicago_CensusTract_merged.csv
File Type: CSV

Columns:
- Area_ID
- GEOID
- AccessToCBD_6_Chicago_CBD_1_Core_OVERLAP_RATIO_CAR_15
- AccessToCBD_6_Chicago_CBD_1_Core_OVERLAP_RATIO_CAR_30
- AccessToCBD_6_Chicago_CBD_1_Core_OVERLAP_RATIO_WALK,TRANSIT_15
- AccessToCBD_6_Chicago_CBD_1_Core_OVERLAP_RATIO_WALK,TRANSIT_30
- AccessToCBD_6_Chicago_CBD_1_Core_OVERLAP_RATIO_WALK_15
- AccessToCBD_6_Chicago_CBD_1_Core_OVERLAP_RATIO_WALK_30
- AccessToCBD_6_Chicago_CBD_2_Greater_OVERLAP_RATIO_CAR_15
- AccessToCBD_6_Chicago_CBD_2_Greater_OVERLAP_RATIO_CAR_30
- AccessToCBD_6_Chicago_CBD_2_Greater_OVERLAP_RATIO_WALK,TRANSIT_15
- AccessToCBD_6_Chicago_CBD_2_Greater_OVERLAP_RATIO_WALK,TRANSIT_30
- AccessToCBD_6_Chicago_CBD_2_Greater_OVERLAP_RATIO_WALK_15
- AccessToCBD_6_Chicago_CBD_2_Greater_OVERLAP_RATIO_WALK_30
- AccessToCBD_POPULATION
- AccessVariousJobOpportunities_JOB_OPPORTUNITY_SUM_DIVIDED_BY_POPULATION_PER_1000_CAR_15
-