In [1]:
from transformers import pipeline, T5Tokenizer, TFT5ForConditionalGeneration
import tensorflow as tf

print("Libraries imported successfully!")
print(f"TensorFlow version: {tf.__version__}")

: 

In [None]:
# Cell 2: Our Sample Data (Input)
# (Please re-run this cell to ensure we're using this text)

MEETING_TRANSCRIPT = """
Tom: Okay everyone, let's kick off. The main goal today is to finalize the new marketing slogan for the Q4 launch. Sarah, what does your team have?

Sarah: Thanks, Tom. We've narrowed it down to three options. "Innovation for Tomorrow," "Your Future, Our Passion," and "Simply Better." The data suggests "Simply Better" is resonating most with our test groups.

Alex: I agree. It's clean and direct. "Innovation for Tomorrow" is too generic.

Tom: Good point, Alex. Let's go with "Simply Better." Sarah, can you please get the final design assets to the web team?

Sarah: Will do. I'll have them sent over by end-of-day Friday.

Alex: I also have an action item. I will coordinate with the legal team to get the trademark paperwork started for "Simply Better." I should have an update on that by our next meeting.

Tom: Perfect. That's all for today. Great work, team.
"""

print("Sample transcript loaded.")

In [None]:
print("Loading summarization model... (This may take a moment on first run)")

summarizer = pipeline("summarization", model="facebook/bart-large-cnn")

summary_output = summarizer(
    MEETING_TRANSCRIPT, 
    max_length=90, 
    min_length=30, 
    do_sample=False
)

print("\n--- ✅ MEETING SUMMARY ---")
print(summary_output[0]['summary_text'])

In [None]:
# Cell 4: Task 2 - Extract Action Items (Using a Simple Prompt)

print("\nLoading FLAN-T5 model with a new prompt...")

tokenizer = T5Tokenizer.from_pretrained("google/flan-t5-base")
model = TFT5ForConditionalGeneration.from_pretrained("google/flan-t5-base", from_pt=True)

# --- THIS IS THE NEW, SIMPLIFIED PROMPT ---
# We are removing the complex "Format as..." instructions
# and just giving a clear task.
prompt = f"""
What are the assigned tasks for this transcript.

Transcript:
{MEETING_TRANSCRIPT}

Assigned Tasks:
"""

# Now we run the model
inputs = tokenizer.encode(prompt, return_tensors="tf", max_length=1024, truncation=True)

outputs = model.generate(
    inputs, 
    max_length=200, 
    num_beams=4,
    early_stopping=True
)

# Decode the output
action_items_text = tokenizer.decode(outputs[0], skip_special_tokens=True)

print("\n--- ✅ ACTION ITEMS ---")
print(action_items_text)

In [None]:
# Cell 6: Full Pipeline (Audio -> Text -> Summary) - Corrected for Long Audio

from transformers import pipeline
import librosa
import tensorflow as tf 

AUDIO_FILE_PATH = "A1-044-LYRA-WHERE-DO-YOU-GO-IN-THE-MORNING.mp3" 

print(f"Loading audio file: {AUDIO_FILE_PATH}...")

try:
    input_audio_array, sample_rate = librosa.load(AUDIO_FILE_PATH, sr=16000)
    print("Audio loaded and resampled to 16kHz successfully.")
except Exception as e:
    print(f"Error loading audio file. Make sure '{AUDIO_FILE_PATH}' is in the same directory.")
    print(f"Error: {e}")
    raise

# --- 3. TASK 1: AUDIO-TO-TEXT (ASR) ---
print("\nLoading Whisper ASR model...")

asr_pipeline = pipeline(
    "automatic-speech-recognition",
    model="openai/whisper-base"
)

print("Transcribing audio... (This may take a moment)")

# --- THIS IS THE FIX ---
# We add 'chunk_length_s=30' to tell the pipeline to
# automatically chunk the long audio.
transcribed_output = asr_pipeline(input_audio_array, chunk_length_s=30)
transcribed_text = transcribed_output["text"]

print("\n--- ✅ TRANSCRIBED TEXT ---")
print(transcribed_text)


# --- 4. TASK 2: TEXT-TO-SUMMARY ---
print("\nLoading summarization model...")

summarizer = pipeline("summarization", model="facebook/bart-large-cnn")

summary_output = summarizer(
    transcribed_text, 
    max_length=150,
    min_length=30, 
    do_sample=False
)

print("\n--- ✅ FINAL SUMMARY (FROM AUDIO) ---")
print(summary_output[0]['summary_text'])