In [2]:
import os
import bz2

In [4]:
def extract_bz2_to_txt(input_directory, output_directory):
    """
    Extracts all .bz2 files in the input_directory and subdirectories,
    saving the contents as .txt files in the output_directory.
    
    Parameters:
    - input_directory: The root directory containing .bz2 files in subdirectories.
    - output_directory: The directory to save extracted .txt files.
    """
    # Ensure the output directory exists
    os.makedirs(output_directory, exist_ok=True)
    
    # Walk through all subdirectories and files
    for subdir, _, files in os.walk(input_directory):
        for file in files:
            if file.endswith('.bz2'):
                # Construct full file path
                bz2_file_path = os.path.join(subdir, file)
                
                # Determine the output .txt file path
                # Use subdirectory structure in output directory to mirror input
                relative_path = os.path.relpath(subdir, input_directory)
                output_subdir = os.path.join(output_directory, relative_path)
                os.makedirs(output_subdir, exist_ok=True)
                
                # Create the output .txt file path
                txt_file_path = os.path.join(output_subdir, file.replace('.bz2', '.txt'))
                
                # Read from .bz2 and write to .txt
                with bz2.open(bz2_file_path, 'rt', encoding='utf-8') as bz2_file:
                    with open(txt_file_path, 'w', encoding='utf-8') as txt_file:
                        txt_file.write(bz2_file.read())
                
                print(f"Extracted {bz2_file_path} to {txt_file_path}")

In [6]:
# Define paths for input and output directories
input_directory = "C:/Users/Hi/My Works/My Py Scripts/Git Repos/29_Tamil Wiki/extracted_articles"
output_directory = "C:/Users/Hi/My Works/My Py Scripts/Git Repos/29_Tamil Wiki/txt_articles"

# Run the extraction
extract_bz2_to_txt(input_directory, output_directory)

Extracted C:/Users/Hi/My Works/My Py Scripts/Git Repos/29_Tamil Wiki/extracted_articles\AA\wiki_00.bz2 to C:/Users/Hi/My Works/My Py Scripts/Git Repos/29_Tamil Wiki/txt_articles\AA\wiki_00.txt
Extracted C:/Users/Hi/My Works/My Py Scripts/Git Repos/29_Tamil Wiki/extracted_articles\AA\wiki_01.bz2 to C:/Users/Hi/My Works/My Py Scripts/Git Repos/29_Tamil Wiki/txt_articles\AA\wiki_01.txt
Extracted C:/Users/Hi/My Works/My Py Scripts/Git Repos/29_Tamil Wiki/extracted_articles\AA\wiki_02.bz2 to C:/Users/Hi/My Works/My Py Scripts/Git Repos/29_Tamil Wiki/txt_articles\AA\wiki_02.txt
Extracted C:/Users/Hi/My Works/My Py Scripts/Git Repos/29_Tamil Wiki/extracted_articles\AA\wiki_03.bz2 to C:/Users/Hi/My Works/My Py Scripts/Git Repos/29_Tamil Wiki/txt_articles\AA\wiki_03.txt
Extracted C:/Users/Hi/My Works/My Py Scripts/Git Repos/29_Tamil Wiki/extracted_articles\AA\wiki_04.bz2 to C:/Users/Hi/My Works/My Py Scripts/Git Repos/29_Tamil Wiki/txt_articles\AA\wiki_04.txt
Extracted C:/Users/Hi/My Works/My P