# 1. Import Necessary Libraries

In [15]:
from pathlib import Path
import pandas as pd
import seaborn as sns
import logging
import matplotlib.pyplot as plt
from scipy.stats import f_oneway

RELATIVE_FEATURES_DIR = "../data/features"
REMOVE_KIDS = True 
SEPARATE_KIDS_PLOTS = False
LANGUAGES = [
    'es',
    'hi',
    'ar',
    'ja'
]

In [17]:
# Collect pitch_mean values for each language
pitch_means = []

for language in LANGUAGES:
    # Check if the file exists
    file_path = Path(f'{RELATIVE_FEATURES_DIR}/{language}_features.csv')

    if not file_path.exists():
        print(f"File {language}_features.csv does not exist. Please run the feature extraction script first.")
        continue

    # Load the CSV file into a DataFrame
    df_features = pd.read_csv(file_path)

    # Ensure the pitch_mean column exists in the dataframe
    if 'pitch_mean' in df_features.columns:
        pitch_means.append(df_features['pitch_mean'])
    else:
        print(f"pitch_mean column not found in {language}_features.csv")
        continue

# Perform one-way ANOVA if there are four languages' data collected
if len(pitch_means) == len(LANGUAGES):
    f_statistic, p_value = f_oneway(*pitch_means)
    print(f"F-statistic: {f_statistic}, P-value: {p_value}")
else:
    print("ANOVA cannot be performed, ensure all languages have valid data.")


F-statistic: 7753.598106003851, P-value: 0.0
