In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
!pip install xlsxwriter

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting xlsxwriter
  Downloading XlsxWriter-3.1.0-py3-none-any.whl (152 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m152.7/152.7 kB[0m [31m5.9 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: xlsxwriter
Successfully installed xlsxwriter-3.1.0


In [3]:
import os
import pandas as pd
from tqdm import tqdm
from openpyxl.styles import Font
from openpyxl import load_workbook

In [4]:
import warnings
warnings.filterwarnings('ignore') # setting ignore as a parameter

In [5]:
def generate_dialogue_file(root_path, folder_id, dialogue_id):
    # Construct DataFrame
    df = pd.DataFrame({
        "語者一": [" " for _ in range(100)],
        "語者二": [" " for _ in range(100)],
        "情感": [" " for _ in range(100)],
    })
    # Construct folder path
    folder_path = os.path.join(root_path, folder_id)
    if not os.path.exists(folder_path):
        os.makedirs(folder_path)
    # Construct file path
    file_path = os.path.join(folder_path, f"{dialogue_id}.xlsx")
    # Construct ExcelWriter
    writer = pd.ExcelWriter(file_path)
    # Save DataFrame to Excel File
    df.to_excel(writer, sheet_name='Sheet1', encoding='utf-8', index=False)
    # Load the worksheet
    worksheet = writer.sheets['Sheet1']
    # Set column width
    worksheet.set_column('A:B', 80)
    # Set column height
    for i in range(100):
        worksheet.set_row(i, 25)
    # Save the Excel File
    writer.save()
    # Load the worksheet
    workbook = load_workbook(file_path)
    worksheet = workbook['Sheet1']
    # Set font size
    header_font, font = Font(size=17, bold=True), Font(size=17)
    flag = True
    for row in worksheet.iter_rows():
        for cell in row:
            if flag:
                cell.font = header_font
            else:
                cell.font = font
        flag = False
    # Save the Excel File
    workbook.save(file_path)

In [6]:
def generate_all_dialogue_files(root_path, total_dialogue_count):
    if not os.path.exists(root_path):
        os.makedirs(root_path)
    dialogue_ids = [f"dialogue-{str(i).zfill(4)}" for i in range(0, total_dialogue_count)]
    folder_ids = [f"dialogue {str(i).zfill(4)}-{str(i+99).zfill(4)}" for i in range(0, total_dialogue_count, 100)]
    for i in tqdm(range(len(folder_ids))):
        for j in range(i*100, i*100+100):
            folder_id = folder_ids[i]
            dialogue_id = dialogue_ids[j]
            # Generate files
            generate_dialogue_file(root_path, folder_id, dialogue_id)
            # Only print the first and last file name in each folder
            if j == i*100 or j == i*100+99:
                print(folder_id, dialogue_id)

In [7]:
# Main
root_path = "/content/drive/MyDrive/論文資料完整版/dialogues"
total_dialogue_count = 2000
generate_all_dialogue_files(root_path, total_dialogue_count)

  0%|          | 0/20 [00:00<?, ?it/s]

dialogue 0000-0099 dialogue-0000


  5%|▌         | 1/20 [00:09<02:58,  9.41s/it]

dialogue 0000-0099 dialogue-0099
dialogue 0100-0199 dialogue-0100


 10%|█         | 2/20 [00:20<03:06, 10.35s/it]

dialogue 0100-0199 dialogue-0199
dialogue 0200-0299 dialogue-0200


 15%|█▌        | 3/20 [00:29<02:46,  9.80s/it]

dialogue 0200-0299 dialogue-0299
dialogue 0300-0399 dialogue-0300


 20%|██        | 4/20 [00:36<02:15,  8.50s/it]

dialogue 0300-0399 dialogue-0399
dialogue 0400-0499 dialogue-0400


 25%|██▌       | 5/20 [00:43<02:01,  8.10s/it]

dialogue 0400-0499 dialogue-0499
dialogue 0500-0599 dialogue-0500


 30%|███       | 6/20 [00:49<01:45,  7.55s/it]

dialogue 0500-0599 dialogue-0599
dialogue 0600-0699 dialogue-0600


 35%|███▌      | 7/20 [00:57<01:37,  7.47s/it]

dialogue 0600-0699 dialogue-0699
dialogue 0700-0799 dialogue-0700


 40%|████      | 8/20 [01:03<01:25,  7.14s/it]

dialogue 0700-0799 dialogue-0799
dialogue 0800-0899 dialogue-0800


 45%|████▌     | 9/20 [01:11<01:19,  7.21s/it]

dialogue 0800-0899 dialogue-0899
dialogue 0900-0999 dialogue-0900


 50%|█████     | 10/20 [01:17<01:09,  6.99s/it]

dialogue 0900-0999 dialogue-0999
dialogue 1000-1099 dialogue-1000


 55%|█████▌    | 11/20 [01:24<01:04,  7.13s/it]

dialogue 1000-1099 dialogue-1099
dialogue 1100-1199 dialogue-1100


 60%|██████    | 12/20 [01:31<00:55,  6.89s/it]

dialogue 1100-1199 dialogue-1199
dialogue 1200-1299 dialogue-1200


 65%|██████▌   | 13/20 [01:38<00:49,  7.06s/it]

dialogue 1200-1299 dialogue-1299
dialogue 1300-1399 dialogue-1300


 70%|███████   | 14/20 [01:45<00:41,  6.91s/it]

dialogue 1300-1399 dialogue-1399
dialogue 1400-1499 dialogue-1400


 75%|███████▌  | 15/20 [01:52<00:35,  7.11s/it]

dialogue 1400-1499 dialogue-1499
dialogue 1500-1599 dialogue-1500


 80%|████████  | 16/20 [02:00<00:28,  7.14s/it]

dialogue 1500-1599 dialogue-1599
dialogue 1600-1699 dialogue-1600


 85%|████████▌ | 17/20 [02:07<00:21,  7.21s/it]

dialogue 1600-1699 dialogue-1699
dialogue 1700-1799 dialogue-1700


 90%|█████████ | 18/20 [02:14<00:14,  7.05s/it]

dialogue 1700-1799 dialogue-1799
dialogue 1800-1899 dialogue-1800


 95%|█████████▌| 19/20 [02:21<00:07,  7.21s/it]

dialogue 1800-1899 dialogue-1899
dialogue 1900-1999 dialogue-1900


100%|██████████| 20/20 [02:28<00:00,  7.44s/it]

dialogue 1900-1999 dialogue-1999



