# Cross-Lingual Transferability of Voice Analysis Models: a Parkinson's Disease Case Study

## Preliminaries

### Imports

In [None]:
from typing import Tuple, List, Dict, Union

In [None]:
import os
from ast import literal_eval

In [None]:
import pandas as pd

In [None]:
%matplotlib inline
from matplotlib import pyplot as plt
import seaborn as sns

### Constants

In [None]:
EN_TO_HI_EXPERIMENTS_DIR: str = ''
HI_TO_EN_EXPERIMENTS_DIR: str = ''

In [None]:
OUT_DIR_PATH: str = './experiments/results'
if not os.path.exists(OUT_DIR_PATH):
    os.mkdir(OUT_DIR_PATH)

In [None]:
FEATURES_MAPPING: Dict[str, str] = {
    'spectral': 'Acoustic and Spectral',
    'vggish': 'VGGish',
    'soundnet': 'SoundNet',
    'wav2vec': 'Wav2Vec 2.0'
}
POOLING_MAPPING: Dict[str, str] = {
    'avg': 'Avg. pooling',
    'max': 'Max pooling',
    'flatten': 'Flattening'
}
ADAPTATION_MAPPING: Dict[bool, str] = {
    False: 'None',
    True: 'CORrelation ALignment (CORAL)'
}
DATA_MAPPING: Dict[str, str] = {
    'src': 'Source data (test split)',
    'tgt': 'Target data'
}
METRICS_MAPPING: Dict[str, str] = {
    'accuracy': 'Accuracy',
    'precision': 'Precision',
    'recall': 'Recall',
    'fscore': '$F_1$-score',
    'specificity': 'Specificity',
    'roc_auc': 'ROC AUC',
    'support': 'Support',
    'fpr': 'False positives rate',
    'tpr': 'True postives rate',
    'roc_thresholds': 'Threshold (from ROC curve)',
    'precisions': 'Precision score',
    'recalls': 'Recall score',
    'pr_rc_thresholds': 'Threshold (from precision-recall) curve',
    'confusion_matrix': 'Confusion matrix'
}

In [None]:
DF_COLUMNS: List[str] = ['Input features', 'Pooling approach', 'Domain adaptation', 'Data set', 'Metric', 'Value']

### Utility function(s)

In [None]:
def convert_series(raw_series: pd.Series) -> Tuple[
    str, str, str, str, str, Union[float, int, List[float], List[List[int]]]
]:
    feature: str = FEATURES_MAPPING[raw_series['feature']]
    pooling: str = POOLING_MAPPING[raw_series['pooling']]
    adaptation: str = ADAPTATION_MAPPING[raw_series['adaptation']]
    data: str = DATA_MAPPING[raw_series['data_set']]
    metric: str = METRICS_MAPPING[raw_series['metric']]
    value = literal_eval(raw_series['value'])

    return feature, pooling, adaptation, data, metric, value

In [None]:
def convert_results_df(raw_df: pd.DataFrame) -> pd.DataFrame:
    df: pd.DataFrame = pd.DataFrame(
        [row for _, row in raw_df.iterrows()], columns=DF_COLUMNS
    )

    return df

## Data

Distribution of duration (in seconds) of audio files

## Results
Load DataFrames with results

In [None]:
df_en_to_hi: pd.DataFrame = pd.read_csv(os.path.join(EN_TO_HI_EXPERIMENTS_DIR, 'scores.csv'))
df_en_to_hi = convert_results_df(df_en_to_hi)

In [None]:
df_hi_to_en = pd.read_csv(os.path.join(HI_TO_EN_EXPERIMENTS_DIR, 'scores.csv'))
df_hi_to_en = convert_results_df(df_hi_to_en)

### Detection in English

### Transferring from English to Hindi

### Detection in Hindi

In [None]:
df = df_hi_to_en[
    (df_hi_to_en['Domain adaptation'] == ADAPTATION_MAPPING[False]) & (df_hi_to_en['Data set'] == DATA_MAPPING['src'])
]

#### Metrics comparison

In [None]:
fig, axes = plt.subplots(
        nrows=len(df['Pooling approach'].unique()),
        ncols=len(df['Input features'].unique()),
        figsize=(12, 24),
        sharex=True,
        sharey='row'
    )

for i, (_, feature) in enumerate(FEATURES_MAPPING.items()):
    for j, (_, pooling) in enumerate(POOLING_MAPPING):
        sns.barplot(
            data=df[(df['Input features'] == feature) & (df['Pooling approach'] == pooling)],
            x='Metric',
            y='Value',
            ax=axes[i][j],
            linewidth=1.,
            edgecolor='0',
            order=list(METRICS_MAPPING.keys())
        )
        axes[i].set_title(f'Input features: {feature}, Pooling approach: {pooling}')
        axes[i][j].set_xlabel('Metric')
        axes[i][j].set_ylabel('Score')
        # axes[0][i].set_xlim(0.9, tmp_df_coarse[0].max() + 1000)
plt.tight_layout()
plt.show()

In [None]:
fig.savefig(os.path.join(OUT_DIR_PATH, 'hi_cls_results.pdf'))

#### ROC curve

In [None]:
fig.savefig(os.path.join(OUT_DIR_PATH, 'hi_roc_curve.pdf'))

#### Precision-recall curve

In [None]:
fig.savefig(os.path.join(OUT_DIR_PATH, 'hi_pr_curve.pdf'))

#### Confusion matrix

In [None]:
fig.savefig(os.path.join(OUT_DIR_PATH, 'hi_cm.pdf'))

## Transferring from Hindi to English