In [None]:
Sure, let's go ahead with the steps to rename the original file, apply the corrections, and save the corrected data to a new CSV file.

### Step-by-Step Code to Apply Corrections and Save

1. **Rename the Original File**: Rename `development_scene_annotations.csv` to `development_scene_annotations.csv.orig`.
2. **Apply the Corrections**: Use the correction function to update the labels.
3. **Save the Corrected Data**: Save the corrected DataFrame to a new `development_scene_annotations.csv`.

### Step 1: Rename the Original File

```python

In [None]:
import os

# Path to the original file
original_file_path = '/path/to/development_scene_annotations.csv'
backup_file_path = '/path/to/development_scene_annotations.csv.orig'

# Rename the file
os.rename(original_file_path, backup_file_path)

In [None]:
```

### Step 2: Apply the Corrections

```python

In [None]:
import pandas as pd

# Load the original annotations CSV (now renamed to .orig)
scene_annotations_df = pd.read_csv(backup_file_path)

# Define a mapping based on filename patterns
command_mapping = {
    'Ofen_aus': 'Ofen aus',
    'Radio_an': 'Radio an',
    'Alarm_an': 'Alarm an',
    'Radio_aus': 'Radio aus',
    'Fernseher_aus': 'Fernseher aus',
    'Staubsauger_an': 'Staubsauger an',
    'Staubsauger_aus': 'Staubsauger aus'
    # Add more mappings as necessary
}

# Function to automatically correct labels
def auto_correct_label(row):
    filename = row['filename']
    for key, value in command_mapping.items():
        if key in filename:
            return value
    return row['command']

# Apply the automatic correction
scene_annotations_df['command'] = scene_annotations_df.apply(auto_correct_label, axis=1)

In [None]:
```

### Step 3: Save the Corrected Data

```python

In [None]:
# Save the corrected DataFrame to a new CSV file
corrected_file_path = '/path/to/development_scene_annotations.csv'
scene_annotations_df.to_csv(corrected_file_path, index=False)

In [None]:
```

### Full Script

Here is the full script combining all the steps:

```python

In [22]:
import os
import re
import shutil
import pandas as pd

# Paths to the original and working copy files
original_file_path = '../dataset/development_scene_annotations.csv.orig'
working_copy_path = '../dataset/development_scene_annotations.csv.0'
corrected_file_path = '../dataset/development_scene_annotations.csv'

# Step 1: Create a working copy of the original file
shutil.copy(original_file_path, working_copy_path)

# Step 2: Load the working copy into a DataFrame
df = pd.read_csv(working_copy_path)

# Define the pattern to parse the filename
filename_pattern = re.compile(r'(\d+)_speech_(true|false)_((?:[a-zA-ZäöüÄÖÜß]+_(?:an|aus)_?)+)', re.UNICODE)

# Function to parse filename and extract commands
def parse_filename(filename):
    match = filename_pattern.match(filename)
    if not match:
        return []
    
    commands_str = match.group(3)
    commands = commands_str.split('_')
    
    command_list = []
    for i in range(0, len(commands), 2):
        command_list.append(f"{commands[i]} {commands[i+1]}")
    
    return command_list

# Parse the commands from filenames and add to the DataFrame
df['parsed_commands'] = df['filename'].apply(parse_filename)

# Step 3: Group by filename and sort by start time
grouped = df.groupby('filename').apply(lambda x: x.sort_values(by='start')).reset_index(drop=True)

# Step 4: Assign the correct labels based on the order of commands in the filename
def assign_labels(group):
    commands = group['parsed_commands'].iloc[0]  # get the parsed commands from the first row
    group = group.reset_index(drop=True)
    for i in range(len(group)):
        if i < len(commands):
            group.at[i, 'command'] = commands[i]
        else:
            print(f"Warning: More segments than commands in {group['filename'].iloc[0]}")
    return group

# Apply the label assignment function
corrected_df = grouped.groupby('filename').apply(assign_labels).reset_index(drop=True)

# Drop the temporary column
corrected_df = corrected_df.drop(columns=['parsed_commands'])

# Step 5: Save the corrected DataFrame to a new CSV file
corrected_df.to_csv(corrected_file_path, index=False)

# Verify the saved corrections
print("Label corrections applied and saved successfully.")
print(corrected_df.head())


Label corrections applied and saved successfully.
                        filename       command     start       end
0    1003_speech_false_Licht_aus     Licht aus  12.20090  13.57599
1       1008_speech_true_Ofen_an       Ofen an   6.90112   8.52638
2      1010_speech_true_Radio_an      Radio an  13.03100  14.03146
3  1011_speech_true_Fernseher_an  Fernseher an  14.11030  15.36121
4   1012_speech_true_Heizung_aus   Heizung aus  11.20520  12.70590


In [17]:
``

Run this script to apply the corrections and save the updated labels to `development_scene_annotations.csv`. Let me know if you need any further assistance!

SyntaxError: invalid syntax (954856008.py, line 1)