In [1]:
import pandas as pd

def tag_rows_from_sheet1(file_path):
    """
    Reads two sheets from an Excel file, tags rows from Sheet 1 that exist in Sheet 2,
    and returns a new DataFrame with an additional column indicating the tag.

    Args:
        file_path (str): The path to the Excel file.

    Returns:
        pandas.DataFrame: A DataFrame containing data from Sheet 1 with an additional column
                         indicating if the row exists in Sheet 2.
    """

    # Read both sheets into DataFrames
    df1 = pd.read_excel(file_path, sheet_name='Sheet1', usecols=['key', 'value'])
    df2 = pd.read_excel(file_path, sheet_name='Sheet2', usecols=['key', 'value'])

    # Merge the DataFrames based on the 'key' column to find matching rows
    merged_df = pd.merge(df1, df2, on='key', how='left', indicator='_merge')

    # Create a new column to indicate if a row exists in Sheet 2
    df1['exists_in_sheet2'] = (merged_df['_merge'] == 'both').astype(int)

    # Drop the unnecessary '_merge' column
    df1.drop(columns=['_merge'], inplace=True)

    return df1

In [2]:
# Example usage:
file_path = '360-zi-category.xlsx'
sheets = ["360-zi-category", "most-frequent-zi"]

In [4]:
df1 = pd.read_excel(file_path, sheet_name=sheets[0])
df2 = pd.read_excel(file_path, sheet_name=sheets[1])

In [5]:
df1.columns, df2.columns 

(Index(['Zi', 'Category'], dtype='object'), Index(['part'], dtype='object'))

In [6]:
tagged_zi = df2["part"].to_list()

In [9]:
tagged_zi_str = ""
for i in "".join(tagged_zi):
    if i.strip():
        tagged_zi_str += i.strip()

In [10]:
tagged_zi_str

'氵水木艹草口扌手亻人阝城月土地女虫忄心日王⺮竹纟讠言辶钅金火心目禾足疒病石一又山宀家鸟攵贝巾车衤衣犭力米隹十田酉页马大刂刀羽欠广人穴丿礻示羊殳言冖革罒白饣食刀厂耳立寸糸戈非皿子冫水雨八彳亠尸方丷衣丶夫分且鬼几工丨彡走鱼斤黑厶弓犬囗干门甫士由占小令'

In [12]:
df1["common_flag"] = df1["Zi"].map(lambda x: 'Y' if x in tagged_zi_str else '')

In [14]:
df1["baidu_url"] = ""

In [15]:
df1 

Unnamed: 0,Zi,Category,common_flag,baidu_url
0,金,Nature,Y,
1,木,Nature,Y,
2,水,"Nature, Social-Activity",Y,
3,火,Nature,Y,
4,土,Nature,Y,
...,...,...,...,...
346,之,Abstract-Literature,,
347,乎,Abstract-Literature,,
348,者,Abstract-Literature,,
349,矣,Abstract-Literature,,


In [16]:
df1.to_excel('360-zi-category-v2.xlsx', index=False)