In [6]:
import pandas as pd

def merge_csv_files(file_paths, key_column='source_file'):
    """
    Merge multiple CSV files based on a shared key column, combining features from all files.

    Args:
        file_paths (list of str): List of file paths to the CSV files to be merged.
        key_column (str): The name of the column to merge the files on (default is 'source_file').

    Returns:
        pd.DataFrame: A DataFrame containing the merged data.
    """
    # Initialize an empty DataFrame for merging
    merged_df = pd.DataFrame()

    for file_path in file_paths:
        # Read the current CSV file
        current_df = pd.read_csv(file_path)

        # Ensure the key column exists
        if key_column not in current_df.columns:
            raise ValueError(f"Key column '{key_column}' not found in file: {file_path}")

        # Merge with the accumulated DataFrame
        if merged_df.empty:
            merged_df = current_df
        else:
            merged_df = pd.merge(merged_df, current_df, on=key_column, how='outer')

    return merged_df

In [7]:
src = "/home/udayan/AaFiles/Speech/SpeakerIdentification-Assamese/jupyterNotebooks/datasets/"

In [8]:
files = []
files.append("articulation_old.csv")
# files.append("glottal.csv")
# files.append("phonation.csv")
files.append("prosody_old.csv")

In [9]:
files = [src + file for file in files]

In [10]:
merged_data = merge_csv_files(files)

In [11]:
merged_data.to_csv("datasets/articulation_prosody_old.csv", index=False)