In [None]:
pip install pandas numpy matplotlib seaborn nltk tqdm librosa soundfile praat-parselmouth opensmile

In [6]:
def create_integrated_stance_vector(row):
    # ------ DIMENSIONAL COMPONENTS ------
    
    # VALENCE (positive-negative) - blend acoustic and lexical features
    valence = (
        0.3 * standardize(row['std_pitch']) +             # Pitch variability
        0.2 * standardize(row['speech_rate']) +           # Speech rate
        0.3 * row['posemo'] -                             # Positive emotion words
        0.2 * row['negemo']                               # Negative emotion words
    )
    
    # AROUSAL (activation) - blend acoustic and lexical features
    arousal = (
        0.3 * standardize(row['mean_energy']) +           # Energy level
        0.2 * standardize(row['speech_rate']) +           # Speech rate
        0.2 * standardize(row['std_energy']) +            # Energy variability
        0.1 * (row['posemo'] + row['negemo']) +           # Emotional intensity
        0.1 * row['interrog'] +                           # Questions indicate engagement
        0.1 * row['has_exclamation']                      # Exclamations indicate high arousal
    )
    
    # DOMINANCE (power/control) - blend acoustic and lexical features
    dominance = (
        0.2 * standardize(row['mean_energy']) +           # Energy level
        0.2 * row['certain'] -                            # Certainty words
        0.2 * row['tentat'] +                             # Absence of tentative language
        0.2 * row['power'] +                              # Power-related words
        0.1 * (1 - row['has_question_mark']) +            # Absence of questions
        0.1 * row['i']                                    # Self-reference ("I" statements)
    )
    
    # ------ CATEGORICAL EMOTION COMPONENTS ------
    
    # Initialize emotion scores
    emotions = {
        'happiness': 0.1,
        'sadness': 0.1,
        'anger': 0.1,
        'fear': 0.1,
        'disgust': 0.1,
        'surprise': 0.1
    }
    
    # HAPPINESS - combine acoustic and lexical cues
    happiness_score = (
        0.3 * standardize(max(0, row['mean_pitch'])) +     # Higher pitch
        0.2 * standardize(max(0, row['std_pitch'])) +      # Pitch variability
        0.2 * standardize(max(0, row['speech_rate'])) +    # Faster speech
        0.3 * row['posemo']                                # Positive emotion words
    )
    emotions['happiness'] += max(0, happiness_score)
    
    # SADNESS - combine acoustic and lexical cues
    sadness_score = (
        0.3 * standardize(min(0, -row['mean_pitch'])) +    # Lower pitch
        0.2 * standardize(min(0, -row['std_pitch'])) +     # Less pitch variability
        0.2 * standardize(min(0, -row['speech_rate'])) +   # Slower speech
        0.3 * row['sad']                                   # Sadness-related words
    )
    emotions['sadness'] += max(0, sadness_score)
    
    # ANGER - combine acoustic and lexical cues
    anger_score = (
        0.3 * standardize(max(0, row['mean_energy'])) +    # Higher energy
        0.2 * standardize(max(0, row['std_energy'])) +     # Energy variability
        0.2 * row['anger'] +                               # Anger-related words
        0.2 * row['swear'] +                               # Swear words
        0.1 * row['negate']                                # Negations
    )
    emotions['anger'] += max(0, anger_score)
    
    # FEAR - combine acoustic and lexical cues
    fear_score = (
        0.3 * standardize(max(0, row['std_pitch'])) +      # Pitch variability
        0.2 * standardize(max(0, row['speech_rate'])) +    # Faster speech
        0.3 * row['anx'] +                                 # Anxiety-related words
        0.1 * row['tentat'] +                              # Tentative language
        0.1 * row['risk']                                  # Risk-related words
    )
    emotions['fear'] += max(0, fear_score)
    
    # DISGUST - harder to detect, but using available cues
    disgust_score = (
        0.4 * row['negemo'] +                              # Negative emotion words
        0.3 * standardize(min(0, -row['mean_pitch'])) +    # Lower pitch
        0.3 * row['negate']                                # Negations
    )
    emotions['disgust'] += max(0, disgust_score)
    
    # SURPRISE - combine acoustic and lexical cues
    surprise_score = (
        0.3 * standardize(max(0, row['max_pitch'])) +      # Pitch peaks
        0.3 * standardize(max(0, row['std_energy'])) +     # Energy variability
        0.2 * row['interrog'] +                            # Questions
        0.2 * row['has_exclamation']                       # Exclamations
    )
    emotions['surprise'] += max(0, surprise_score)
    
    # Normalize emotion probabilities
    total = sum(emotions.values())
    emotions = {k: v/total for k, v in emotions.items()}
    
    # ------ DIALOG ACT SPECIFIC COMPONENTS ------
    
    # CERTAINTY - relevant for statements, opinions, agreements
    certainty = (
        0.3 * row['certain'] -
        0.3 * row['tentat'] -
        0.2 * row['interrog'] +
        0.2 * (1 - row['has_question_mark'])
    )
    
    # AGREEMENT - relevant for agreement, acceptance, understanding
    agreement = (
        0.4 * row['assent'] +
        0.3 * row['has_affirmation_word'] -
        0.3 * row['has_negation_word']
    )
    
    # ENGAGEMENT - relevant for backchannels, questions
    engagement = (
        0.3 * row['interrog'] +
        0.2 * row['has_question_word'] +
        0.2 * row['has_you_know'] +
        0.3 * row['has_do_you']
    )
    
    # Construct the final stance vector - map all to 0-1 scale
    sigmoid = lambda x: 1 / (1 + np.exp(-x))
    
    # Return the complete stance vector
    return {
        # Dimensional components
        'valence': sigmoid(valence),
        'arousal': sigmoid(arousal),
        'dominance': sigmoid(dominance),
        
        # Categorical emotion components
        'happiness': emotions['happiness'],
        'sadness': emotions['sadness'],
        'anger': emotions['anger'],
        'fear': emotions['fear'],
        'disgust': emotions['disgust'],
        'surprise': emotions['surprise'],
        
        # Dialog act specific components
        'certainty': sigmoid(certainty),
        'agreement': sigmoid(agreement),
        'engagement': sigmoid(engagement)
    }

# Helper function to standardize values (assumes they are already z-scored)
def standardize(value):
    return max(-3, min(3, value)) / 3  # Clip to [-3, 3] and scale to [-1, 1]

In [8]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler

# Load both datasets
speech_df = pd.read_csv('speech_features_train_with_speech_rate.csv')
text_df = pd.read_csv('text_features_train.csv')

# Merge on dialog_id, speaker, and time information
merged_df = pd.merge(
    speech_df, 
    text_df,
    on=['dialog_id', 'speaker', 'da_tag', 'start_time', 'end_time'],
    how='inner'
)

# Apply the function to create stance vectors
df_stance = merged_df.apply(create_integrated_stance_vector, axis=1, result_type='expand')

# Add dialog act labels
df_stance['da_tag'] = merged_df['da_tag']

# Save the stance vectors to a CSV file
df_stance.to_csv('stance_vectors.csv', index=False)

In [9]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score, f1_score
from sklearn.preprocessing import StandardScaler

# Load the stance vectors (assuming you've already created them)
df_stance = pd.read_csv('stance_vectors.csv')

# Prepare features and target
X = df_stance.drop('da_tag', axis=1)
y = df_stance['da_tag']

# Split data into train, validation, and test sets
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42, stratify=y_temp)

# Train classifier
clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train, y_train)

# Evaluate on validation set
y_val_pred = clf.predict(X_val)
val_accuracy = accuracy_score(y_val, y_val_pred)
val_macro_f1 = f1_score(y_val, y_val_pred, average='macro')

print(f"Validation Set Metrics:")
print(f"Accuracy: {val_accuracy:.4f}")
print(f"Macro F1 Score: {val_macro_f1:.4f}")
print("\nDetailed Classification Report (Validation Set):")
print(classification_report(y_val, y_val_pred))

# Final evaluation on test set
y_test_pred = clf.predict(X_test)
test_accuracy = accuracy_score(y_test, y_test_pred)
test_macro_f1 = f1_score(y_test, y_test_pred, average='macro')

print(f"\nTest Set Metrics:")
print(f"Accuracy: {test_accuracy:.4f}")
print(f"Macro F1 Score: {test_macro_f1:.4f}")

# Feature importance analysis
feature_importance = pd.DataFrame({
    'feature': X.columns,
    'importance': clf.feature_importances_
}).sort_values('importance', ascending=False)

print("\nTop 10 most important features:")
print(feature_importance.head(10))

Validation Set Metrics:
Accuracy: 0.7250
Macro F1 Score: 0.4160

Detailed Classification Report (Validation Set):
              precision    recall  f1-score   support

           %       0.32      0.24      0.28       615
          aa       0.44      0.27      0.33       470
           b       0.73      0.83      0.78      1489
          ba       0.43      0.33      0.37       160
          fc       0.13      0.07      0.10        95
          ny       0.13      0.03      0.06       115
          qy       0.54      0.26      0.35       188
          sd       0.64      0.81      0.71      2873
          sv       0.34      0.17      0.23      1058
           x       0.93      0.97      0.95      4054

    accuracy                           0.73     11117
   macro avg       0.47      0.40      0.42     11117
weighted avg       0.69      0.73      0.70     11117


Test Set Metrics:
Accuracy: 0.7222
Macro F1 Score: 0.4170

Top 10 most important features:
      feature  importance
1     aro