In [None]:
import pandas as pd

# Function to remove specific text and keep the number
def remove_text_and_keep_number(column, text_to_remove):
    return column.str.replace(text_to_remove, '', regex=False).str.extract('(\d+)', expand=False)

# Function to remove rows containing specific keywords
def remove_rows_with_keywords(df, column, keywords):
    pattern = '|'.join(keywords)
    return df[~df[column].str.contains(pattern, case=False, na=False)]

# Function to clean the data
def clean_data(df, text_to_remove, keywords):
    # Remove repetitions (duplicate rows)
    if 'title' in df.columns:
        df['title'] = df['title'].drop_duplicates()
    # Process columns to remove specific text and keep the number
    if 'Deal' in df.columns:
        df['Deal'] = remove_text_and_keep_number(df['Deal'], text_to_remove)

    # Remove rows with specific keywords
    if 'Deal' in df.columns:
        df = remove_rows_with_keywords(df, 'Deal', keywords)

    return df

def main():
    # Import Excel file
    input_file = 'Pet memorial data (unclean).xlsx'
    df = pd.read_excel(input_file)

    # Define the text to remove and keywords for deletion
    text_to_remove = '人付款'
    keywords = ['看过', 'keyword2', 'keyword3']

    # Clean the data
    cleaned_df = clean_data(df, text_to_remove, keywords)

    # Output the cleaned data to a new Excel file
    output_file = '5507 cleaned_data.xlsx'
    cleaned_df.to_excel(output_file, index=False)
    print(f"Cleaned data saved to {output_file}")

if __name__ == '__main__':
    main()