In [1]:
import tensorflow as tf
import os
import json
import glob
from transformers import pipeline

# Verify GPU is active
device_name = tf.test.gpu_device_name()
if device_name != '/device:GPU:0':
  raise SystemError('GPU device not found')
print('Found GPU at: {}'.format(device_name))

# Utility functions
def run_with_gpu(function, *extra_args, gpu=True, which_gpu="/GPU:0"):
    """
    Runs functions with CUDA accelerator
    """
    if gpu:
        with tf.device(which_gpu):
            return function(*extra_args)
    else:
        return function(*extra_args)

Found GPU at: /device:GPU:0


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
# Taken from https://github.com/yinruiqing/pyannote-whisper
# Converts rttm data from pyannote.audio to create transcription
from pyannote.core import Segment, Annotation, Timeline


def get_text_with_timestamp(transcribe_res):
    timestamp_texts = []
    for item in transcribe_res['segments']:
        start = item['start']
        end = item['end']
        text = item['text']
        timestamp_texts.append((Segment(start, end), text))
    return timestamp_texts


def add_speaker_info_to_text(timestamp_texts, ann):
    spk_text = []
    for seg, text in timestamp_texts:
        spk = ann.crop(seg).argmax()
        spk_text.append((seg, spk, text))
    return spk_text


def merge_cache(text_cache):
    sentence = ''.join([item[-1] for item in text_cache])
    spk = text_cache[0][1]
    start = text_cache[0][0].start
    end = text_cache[-1][0].end
    return Segment(start, end), spk, sentence


PUNC_SENT_END = ['.', '?', '!']


def merge_sentence(spk_text):
    merged_spk_text = []
    pre_spk = None
    text_cache = []
    for seg, spk, text in spk_text:
        if spk != pre_spk and pre_spk is not None and len(text_cache) > 0:
            merged_spk_text.append(merge_cache(text_cache))
            text_cache = [(seg, spk, text)]
            pre_spk = spk

        elif text[-1] in PUNC_SENT_END:
            text_cache.append((seg, spk, text))
            merged_spk_text.append(merge_cache(text_cache))
            text_cache = []
            pre_spk = spk
        else:
            text_cache.append((seg, spk, text))
            pre_spk = spk
    if len(text_cache) > 0:
        merged_spk_text.append(merge_cache(text_cache))
    return merged_spk_text


def diarize_text(transcribe_res, diarization_result):
    timestamp_texts = get_text_with_timestamp(transcribe_res)
    spk_text = add_speaker_info_to_text(timestamp_texts, diarization_result)
    res_processed = merge_sentence(spk_text)
    return res_processed


def write_to_txt(spk_sent, file):
    with open(file, 'w') as fp:
        for seg, spk, sentence in spk_sent:
            line = f'{seg.start:.2f} {seg.end:.2f} {spk} {sentence}\n'
            fp.write(line)

In [3]:
def baseline_summary(path_to_source):
    """
    :param path_to_source Path to raw .txt files.
    Creates a summary and writes in the result of the summary in .json format
    """
    summarizer = pipeline("summarization", model="philschmid/bart-large-cnn-samsum", truncation=True)

    if not os.path.exists("./baseline_sum"):
        os.mkdir("./baseline_sum")
    for filename in glob.glob(f"{path_to_source}*.txt"):
        txt_raw = filename.split("/")[2].split(".")[0]
        result_dict = {}
        with open(filename, encoding="unicode_escape") as f:
            read_data = f.read()
            result_dict["filename"] = filename
            result_dict["transcript"] = read_data
            result_dict["summary"] = run_with_gpu(summarizer, read_data)
        with open(f"./baseline_sum/{txt_raw}.json", "w") as fp:
            json.dump(result_dict, fp)





    
baseline_summary("./data/")



Downloading (…)lve/main/config.json:   0%|          | 0.00/1.63k [00:00<?, ?B/s]Downloading (…)lve/main/config.json: 100%|██████████| 1.63k/1.63k [00:00<00:00, 1.03MB/s]
Downloading (…)"pytorch_model.bin";:   0%|          | 0.00/1.63G [00:00<?, ?B/s]Downloading (…)"pytorch_model.bin";:   3%|▎         | 52.4M/1.63G [00:00<00:03, 442MB/s]Downloading (…)"pytorch_model.bin";:   6%|▋         | 105M/1.63G [00:00<00:03, 441MB/s] Downloading (…)"pytorch_model.bin";:  10%|▉         | 157M/1.63G [00:00<00:03, 456MB/s]Downloading (…)"pytorch_model.bin";:  13%|█▎        | 210M/1.63G [00:00<00:03, 462MB/s]Downloading (…)"pytorch_model.bin";:  16%|█▌        | 262M/1.63G [00:00<00:03, 447MB/s]Downloading (…)"pytorch_model.bin";:  20%|█▉        | 325M/1.63G [00:00<00:02, 479MB/s]Downloading (…)"pytorch_model.bin";:  24%|██▍       | 388M/1.63G [00:00<00:02, 503MB/s]Downloading (…)"pytorch_model.bin";:  27%|██▋       | 440M/1.63G [00:00<00:02, 480MB/s]Downloading (…)"pytorch_model.bin";:  30