In [1]:
import pandas as pd
import chardet

def detect_encoding(file_path):
    with open(file_path, "rb") as f:
        result = chardet.detect(f.read(100000))
    return result["encoding"]

files = ["meal_data_2016_2021.csv", "weather_data_2016_2021.csv", "special_day_data_2016_2021.csv"]

dataframes = {}
for file in files:
    encoding = detect_encoding(file)
    print(f"📂 Detected encoding for {file}: {encoding}")
    df = pd.read_csv(file, encoding=encoding, header=0)
    df.columns = df.columns.str.strip()
    df.columns = df.columns.str.replace("\ufeff", "")
    dataframes[file] = df

meal_df = dataframes["meal_data_2016_2021.csv"]
weather_df = dataframes["weather_data_2016_2021.csv"]
special_day_df = dataframes["special_day_data_2016_2021.csv"]

# Chuyển đổi 'Date' sang datetime
for df in [meal_df, weather_df, special_day_df]:
    if "Date" in df.columns:
        df["Date"] = pd.to_datetime(df["Date"], errors="coerce")
    else:
        raise KeyError(f"🚨 ERROR: 'Date' column not found in {df}")

# Merge meal_data với weather_data và special_day_data
merged_df = meal_df.merge(weather_df, on="Date", how="left")
merged_df = merged_df.merge(special_day_df, on="Date", how="left")

# Tạo cột 'special_day' (1 nếu có giá trị trong 'Special_Day_Name', 0 nếu không)
merged_df["special_day"] = merged_df["Special_Day_Name"].notna().astype(int)

# Sắp xếp theo ngày
merged_df = merged_df.sort_values(by="Date")

# Lưu vào file mới
merged_df.to_csv("merged_data.csv", index=False, encoding="utf-8-sig")

print("✅ Merging complete! The file 'merged_data.csv' has been created.")


📂 Detected encoding for meal_data_2016_2021.csv: EUC-KR
📂 Detected encoding for weather_data_2016_2021.csv: EUC-KR
📂 Detected encoding for special_day_data_2016_2021.csv: EUC-KR
✅ Merging complete! The file 'merged_data.csv' has been created.
