In [1]:
import os
import pandas as pd

# Base directory — adjust if needed
base_dir = os.path.abspath(os.path.join(os.getcwd(), '..', '..', 'data', 'raw_data', 'CRISIS_NLP', 'CrisisNLP_labeled_data_crowdflower_v2', 'CrisisNLP_labeled_data_crowdflower'))

print("📁 Base directory:", base_dir)

all_dfs = []

for event_folder in os.listdir(base_dir):
    event_path = os.path.join(base_dir, event_folder)

    if not os.path.isdir(event_path):
        continue

    print(f"\n📂 Processing event: {event_folder}")
    files = os.listdir(event_path)
    print("📄 Files:", files)

    # Dynamically find the first .tsv file (case-insensitive)
    tsv_file = next((f for f in files if f.lower().endswith('.tsv')), None)

    if not tsv_file:
        print("⚠️ No TSV file found in:", event_folder)
        continue

    tsv_path = os.path.join(event_path, tsv_file)

    try:
        df = pd.read_csv(tsv_path, sep='\t', encoding='utf-8')
        df['event'] = event_folder
        all_dfs.append(df)
        print(f"✅ Loaded: {tsv_file} ({len(df)} rows)")
    except Exception as e:
        print(f"❌ Failed to read {tsv_file}")
        print("   Error:", e)

# Save combined file
if all_dfs:
    combined_df = pd.concat(all_dfs, ignore_index=True)
    output_path = os.path.join(base_dir, '..', '..', '..', '..', 'interim_data', 'crisis_consolidated_crowdflower.tsv')
    combined_df.to_csv(output_path, sep='\t', index=False)
    print(f"\n✅ Combined TSV saved to:\n{output_path}")
else:
    print("\n⚠️ No data was loaded.")


📁 Base directory: c:\Users\MUHAMMAD ZAIN\Desktop\DisasterInsight_AI\DisasterInsight_AI_Global_Real-Time_Disaster_Analytics_Platform\data\raw_data\CRISIS_NLP\CrisisNLP_labeled_data_crowdflower_v2\CrisisNLP_labeled_data_crowdflower

📂 Processing event: 2013_Pakistan_eq
📄 Files: ['2013_Pakistan_eq_CF_labeled_data.tsv', 'labeling-instructions.txt']
✅ Loaded: 2013_Pakistan_eq_CF_labeled_data.tsv (1881 rows)

📂 Processing event: 2014_California_Earthquake
📄 Files: ['2014_California_Earthquake_CF_labeled_data.tsv', 'labeling-instructions.txt']
✅ Loaded: 2014_California_Earthquake_CF_labeled_data.tsv (1701 rows)

📂 Processing event: 2014_Chile_Earthquake_cl
📄 Files: ['2014_Chile_Earthquake_cl_labeled_data.tsv', 'labeling-instructions.txt']
✅ Loaded: 2014_Chile_Earthquake_cl_labeled_data.tsv (1585 rows)

📂 Processing event: 2014_Chile_Earthquake_en
📄 Files: ['2014_Chile_Earthquake_en_CF_labeled_data.tsv', 'labeling-instructions.txt']
✅ Loaded: 2014_Chile_Earthquake_en_CF_labeled_data.tsv (1932 