In [2]:
import pandas as pd
import os

# Function to get the latest CSV file in the directory
def get_latest_csv(directory):
    csv_files = [f for f in os.listdir(directory) if f.endswith('.csv')]
    latest_file = max(csv_files, key=lambda x: os.path.getmtime(os.path.join(directory, x)))
    return latest_file

# Directory containing the CSV files
directory_path = './'  # Adjust to your directory path

# Get the latest CSV file
latest_csv = get_latest_csv(directory_path)

# Load the latest CSV file
df = pd.read_csv(latest_csv, encoding='utf-8')

# Mapping keywords into parts
keywords = {
    'Building': [],
    'Phase': ['Phase', 'Site', '(Block'],
    'Tower/Block': ['Tower', 'Block', 'Path', 'Street', 'Browdway',
                    'Celestial Avenue', 'Carpark', 'Road', 'Lane',
                    'Drive', 'Vista Avenue', 'Mansion', 'House',
                    'Sky One'],
    'Floor': ['Floor', '/F', 'LG', 'B/M', 'U/L', 'UCP', 'P3', 'P2',
              'Podium', 'PODIUM', 'U/G', 'P7', 'P4', 'UG1'],
    'Flat': ['Flat', 'Apartment', 'Duplex']
}

floor_types = ['Upper Floor', 'Middle Floor', 'Lower Floor']

# Classify parts with structured backward filling correctly targeting the rightmost columns
def classify_parts(parts):
    if isinstance(parts, str) and len(parts.split('・')) == 5:
        return parts.split('・') + [None]  # Adding None for Floor_Type
    elif isinstance(parts, str):
        structured = {key: None for key in ['Building', 'Phase', 'Tower/Block', 'Floor', 'Flat', 'Floor_Type']}
        parts_list = parts.split('・')
        structured['Building'] = parts_list[0]  # Assign the first part to Building
        
        used = [False] * len(parts_list)
        used[0] = True  # First part is already used for Building
        
        # Identify and assign parts by keywords and floor types
        for i, part in enumerate(parts_list):
            if i == 0:
                continue
            if any(ft in part for ft in floor_types):
                structured['Floor_Type'] = part
                used[i] = True
                
            for key, hints in keywords.items():
                if any(hint in part for hint in hints):
                    structured[key] = part
                    used[i] = True
                    break
                
        # Backward filling for unused parts
        unused_parts = [parts_list[i] for i in range(len(parts_list)) if not used[i]]
        fill_order = ['Flat', 'Floor', 'Tower/Block', 'Phase']
        
        for key in fill_order:
            if unused_parts and structured[key] is None:
                structured[key] = unused_parts.pop(0)
        
        return [structured[key] for key in ['Building', 'Phase', 'Tower/Block', 'Floor', 'Flat', 'Floor_Type']]
    else:
        return [None]*6  # Return None for all columns if parts is not a string

# Apply the function to Address column and expand to new structure
df[['Building', 'Phase', 'Tower/Block', 'Floor', 'Flat', 'Floor_Type']] = (
    df['Address'].apply(lambda x: classify_parts(x)).apply(pd.Series)
)

# Generate output file name based on input file name
output_file_name = latest_csv.replace('.csv', '_lv_2.csv')

# Output and save the result
df.to_csv(output_file_name, index=False)
