##### "Os" allows to go through all the system and "Shutil" allows to do actions with files or directory.

In [1]:
import os
import shutil
from typing import Generator, List

### How it works - Example

The `get_chapters` function takes the root directory of a dataset and returns all directories that do not have subdirectories (final chapter directories). Here's an example:

**Root directory structure:**
```text
datasets/
└── LibriSpeech/
    └── dev-clean/
        ├── 1272/
        │   ├── 128104/
        │   └── 128105/
        └── 1988/
            ├── 147956/
            └── 147957/
```
**Would return:**
```text
[
    'datasets/LibriSpeech/dev-clean/1272/128104',
    'datasets/LibriSpeech/dev-clean/1272/128105',
    'datasets/LibriSpeech/dev-clean/1988/147956',
    'datasets/LibriSpeech/dev-clean/1988/147957'
]
```

In [18]:
def get_chapters(root_directory: str) -> list[str]:
    """ Get all chapters from the root directory. 

    Args:
        root_directory (str): path to the root directory

    Returns:
        list[str]: list of paths to chapters
    """    
    chapters = []
    for act_root, sub_dirs, files in os.walk(root_directory):
        if not sub_dirs:
            chapters.append(act_root)
    return chapters

### How it works - Example
The `get_groups` function takes a list of chapters and splits it into groups of a specified length.

Assume you have the following list of chapter paths:

```text
[
    'datasets/LibriSpeech/dev-clean/1272/128104',
    'datasets/LibriSpeech/dev-clean/1272/128105',
    'datasets/LibriSpeech/dev-clean/1988/147956',
    'datasets/LibriSpeech/dev-clean/1988/147957',
    'datasets/LibriSpeech/dev-clean/2010/128202'
]
```
calling the `get_groups(list_of_chapters, 2)` **would return:**

```text
[
    ['datasets/LibriSpeech/dev-clean/1272/128104', 'datasets/LibriSpeech/dev-clean/1272/128105'],
    ['datasets/LibriSpeech/dev-clean/1988/147956', 'datasets/LibriSpeech/dev-clean/1988/147957'],
    ['datasets/LibriSpeech/dev-clean/2010/128202']
]
```

In [None]:
def get_groups(list_of_chapters: List[str], 
                length_group: int
                ) -> Generator[List, None, None]:
    """Split the list of chapters into groups of a specified length.

    Args:
        list_of_chapters (List[str]): list of chapters to be grouped
        length_group (int): length of each group

    Yields:
        Generator[List, None, None]: generator yielding groups of chapters
    """    
    for i in range(0, len(list_of_chapters), length_group):
        yield list_of_chapters[i:i + length_group]

### How it works - Example

The `move_chapters` function takes a list of grouped chapter paths and moves 
each chapter into a new directory structure under a destination path.

Assume the following input:
```text
[
    ['datasets/LibriSpeech/dev-clean/1272/128104', 'datasets/LibriSpeech/dev-clean/1272/128105'],
    ['datasets/LibriSpeech/dev-clean/1988/147956', 'datasets/LibriSpeech/dev-clean/1988/147957'],
    ['datasets/LibriSpeech/dev-clean/2010/128202']
]
```
Calling `move_chapters(groups, dest_directory)` **would return:**
```text
datasets/
└── LibriSpeech/
    └── chapter-groups/
        ├── group_1/
        │   ├── 128104/
        │   └── 128105/
        └── group_2/
            ├── 147956/
            └── 147957/
```

In [None]:
def move_chapters(groups: list[str], dest_directory: str) -> None:
    """ Move chapters to a new directory.

    Args:
        groups (list[str]): list of groups of chapters
        dest_directory (str): path to the destination directory
    """    
    os.makedirs(dest_directory, exist_ok=True)
    
    # Move each group of chapters to a new directory
    for i, group in enumerate(groups):
        group_name = f"group_{i+1}"
        dest_group = os.path.join(dest_directory, group_name)
        os.makedirs(dest_group, exist_ok=True)
        
        # Move each chapter in the group to the new directory
        for chapter in group:
            name_chapter = os.path.basename(chapter)
            new_directory = os.path.join(dest_group, name_chapter) 
            shutil.copytree(chapter, new_directory)
            print(f"Moved {chapter} to {new_directory}")
            
        print(f"\nGroup {i + 1} completed ({len(group)} chapters) in {dest_group}.\n")

In [None]:
# Ensure that you have the correct path to your root directory
root_directory = "datasets/LibriSpeech/dev-clean" 
# Ensure that you have the correct path until "datasets/LibriSpeech/"
dest_directory = "datasets/LibriSpeech/chapter-groups" 

chapters = get_chapters(root_directory)
groups = list(get_groups(chapters, 100))
move_chapters(groups, dest_directory) 