# 1. Installing Dependencies

In [None]:
# ───────────────────────────────────────────────────────────────────────────────
# Cell 1: Install all dependencies
# ───────────────────────────────────────────────────────────────────────────────
!pip install -q spacy diffusers accelerate safetensors ffmpeg-python moviepy gtts soundfile pydub git+https://github.com/coqui-ai/TTS.git
!python -m spacy download en_core_web_sm
!apt-get update -qq && apt-get install -y git-lfs ffmpeg libsndfile1 espeak-ng
!git-lfs install
!git clone https://huggingface.co/ByteDance/AnimateDiff-Lightning /content/AnimateDiff-Lightning
!pip install pyngrok
!pip install streamlit-lottie
!pip install streamlit-extras
!pip install numpy==1.26.4

  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m73.5/73.5 kB[0m [31m4.7 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.7/3.7 MB[0m [31m43.1 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m101.6/101.6 kB[0m [31m8.6 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m15.3/15.3 MB[0m [31m113.5 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m 

# Text Preprocessing

In [1]:
# ───────────────────────────────────────────────────────────────────────────────
# Cell 2: preprocessing.py
# ───────────────────────────────────────────────────────────────────────────────
%%bash
cat << 'EOF' > preprocessing.py
import re
import spacy
from typing import List

nlp = spacy.load("en_core_web_sm")

def clear_context_only(text: str,
                       pronouns: tuple = ("he","him","his","she","her")) -> List[str]:
    """
    1. Strip UI artifacts/numbering lines.
    2. Find main NP in first real sentence.
    3. Replace all pronouns → that NP.
    4. Split, dedupe & return sentences.
     """
    lines = []
    for line in text.splitlines():
        line = line.strip()
        if not line or line.startswith("----") or re.match(r"^\d+\.($|\s)", line):
            continue
        lines.append(line)
    cleaned = " ".join(lines)

    doc = nlp(cleaned)
    try:
        first_sent = list(doc.sents)[0]
        antecedent = next(first_sent.noun_chunks).text.strip()
    except Exception:
        raise ValueError("No noun phrase found in first sentence.")

    def _rep(m):
        p = m.group(0).lower()
        return antecedent + ("'s" if p=="his" else "") if p in pronouns else m.group(0)

    pattern = r"\b(" + "|".join(pronouns) + r")\b"
    resolved = re.sub(pattern, _rep, cleaned, flags=re.IGNORECASE)

    sents = [s.strip()+'.' for s in resolved.split('.') if s.strip() and not re.match(r"^\d+\.?$", s)]
    # dedupe
    seen = set(); out = []
    for s in sents:
        k = s.lower()
        if k not in seen:
            seen.add(k); out.append(s)
    return out
EOF


# 3. Model Utils

In [2]:
# ───────────────────────────────────────────────────────────────────────────────
# Cell 3: model.py
# ───────────────────────────────────────────────────────────────────────────────
%%bash
cat << 'EOF' > model.py
import os, torch, numpy as np
from safetensors.torch import load_file
from diffusers import AnimateDiffPipeline, MotionAdapter, EulerDiscreteScheduler
from PIL import Image
import ffmpeg
import tempfile

def load_pipeline(adapter_path="/content/AnimateDiff-Lightning"):
    device = "cuda" if torch.cuda.is_available() else "cpu"
    dtype = torch.float16 if device=="cuda" else torch.float32
    step = 4

    ckpt = os.path.join(adapter_path, f"animatediff_lightning_{step}step_diffusers.safetensors")
    adapter = MotionAdapter().to(device, dtype)
    adapter.load_state_dict(load_file(ckpt, device=device))

    base = "emilianJR/epiCRealism"
    pipe = AnimateDiffPipeline.from_pretrained(base, motion_adapter=adapter, torch_dtype=dtype).to(device)
    pipe.scheduler = EulerDiscreteScheduler.from_config(pipe.scheduler.config, timestep_spacing="trailing", beta_schedule="linear")
    pipe.enable_model_cpu_offload(); pipe.enable_attention_slicing()
    return pipe, step

def frames_to_video(frames, fps, out_path):
    with tempfile.TemporaryDirectory() as td:
        for i, f in enumerate(frames):
            Image.fromarray(f).save(f"{td}/frame_{i:05d}.png")
        (
            ffmpeg
            .input(f"{td}/frame_%05d.png", framerate=fps)
            .output(out_path, pix_fmt="yuv420p", vcodec="libx264")
            .overwrite_output().run(quiet=True)
        )

def generate_segments(pipe, step, prompts, fps=6):
    segs = []
    for idx, p in enumerate(prompts):
        print(f"→ Generating segment {idx+1}/{len(prompts)}")
        out = pipe(prompt=p, guidance_scale=1.0, num_inference_steps=step)
        frames = [np.array(f) for f in out.frames[0]]
        vid = f"seg_{idx}.mp4"
        frames_to_video(frames, fps, vid)
        segs.append((vid, p))
        torch.cuda.empty_cache()
    return segs
EOF


# 4. Audio Generating

In [3]:
%%bash
cat << 'EOF' > audio.py
import os, tempfile, re
from TTS.api import TTS
import soundfile as sf
from pydub import AudioSegment

# Regex to split on sentence boundaries
_SENT_RE = re.compile(r'(?<=[\\.\\!?]) +')

# Pre-defined models you can choose from:
AVAILABLE_MODELS = {
    "LJSpeech VITS":         "tts_models/en/ljspeech/vits",
    "Tacotron2-DDC":         "tts_models/en/ljspeech/tacotron2-DDC",
    "VCTK VITS":             "tts_models/en/vctk/vits",
    # Add more model keys ↔ HF IDs here…
}

def list_available_models():
    """Return human-readable model names."""
    return list(AVAILABLE_MODELS.keys())

def generate_audio(
    text: str,
    model_key: str = "LJSpeech VITS",
    speaker: str = None,
    language: str = None
) -> (str, list):
    """
    Generate speech for the given text.

    Args:
      text: full input text.
      model_key: key from AVAILABLE_MODELS.
      speaker: optional (for multi-speaker models).
      language: optional (for multilingual models).

    Returns:
      final_wav: path to the concatenated WAV file.
      timeline: list of (sentence, start_time, duration).
    """
    if model_key not in AVAILABLE_MODELS:
        raise ValueError(f"Unknown model '{model_key}'. Choose from {list_available_models()}")

    model_name = AVAILABLE_MODELS[model_key]
    tmpdir = tempfile.mkdtemp()

    # Split text into sentences
    sentences = _SENT_RE.split(text.strip())
    if not sentences:
        raise ValueError("No valid sentences found in text.")

    # Initialize TTS engine (no speaker/language here)
    tts = TTS(model_name=model_name)

    segs, timeline, current = [], [], 0.0
    for i, sent in enumerate(sentences, 1):
        out_path = os.path.join(tmpdir, f"seg_{i}.wav")
        # Pass speaker/language to tts_to_file
        tts.tts_to_file(text=sent, file_path=out_path,
                        speaker=speaker, language=language)
        data, sr = sf.read(out_path)
        dur = len(data) / sr

        segs.append(AudioSegment.from_wav(out_path))
        timeline.append((sent, current, dur))
        current += dur

    # Concatenate segments
    final_wav = os.path.join(tmpdir, "output.wav")
    combined = segs[0]
    for seg in segs[1:]:
        combined += seg
    combined.export(final_wav, format="wav")

    return final_wav, timeline
EOF


# 5. Streamlit UI

In [4]:
%%bash
cat << 'EOF' > streamlit_app.py
import streamlit as st
from PIL import Image

# Session state to manage page navigation
if "page" not in st.session_state:
    st.session_state.page = "home"

def go_to_creator():
    st.session_state.page = "creator"

if st.session_state.page == "home":
    st.set_page_config(page_title="🎬 Narrative Frames", layout="centered")
    st.title("🎬 Narrative Frames")
    st.markdown("#### Transform your text into compelling video stories.")

    st.markdown("""
    **Narrative Frames** is an AI-powered text-to-video storytelling platform.
    Just enter a story, and we generate beautiful animated visuals, synced with lifelike narration.
    Perfect for educators, creators, and storytellers!
    """)

    st.divider()
    st.subheader("👨‍💻 Meet the Team")

    col1, col2, col3 = st.columns(3)
    col1, col2, col3 = st.columns(3)

    with col1:
        st.image("/content/drive/MyDrive/Narrative_Frames/Vinit_jethwa.jpeg", caption="", use_container_width=True)
        st.markdown('<div style="text-align: center;"><strong>Vinit Jethwa</strong></div>', unsafe_allow_html=True)

    with col2:
        st.image("/content/drive/MyDrive/Narrative_Frames/sachin_singh.jpeg", caption="", use_container_width=True)
        st.markdown('<div style="text-align: center;"><strong>Sachin Singh</strong></div>', unsafe_allow_html=True)

    with col3:
        st.image("/content/drive/MyDrive/Narrative_Frames/kaif_qureshi.jpeg", caption="", use_container_width=True)
        st.markdown('<div style="text-align: center;"><strong>Kaif Qureshi</strong></div>', unsafe_allow_html=True)


    st.divider()
    st.markdown("### 🚀 Ready to create your story?")
    if st.button("Start", key="start_button"):
        go_to_creator()

elif st.session_state.page == "creator":
    from video_creator import show_video_creator_ui
    show_video_creator_ui()
EOF


In [5]:
%%bash
cat << 'EOF' > video_creator.py


import streamlit as st
import os
from preprocessing import clear_context_only
from model import load_pipeline, generate_segments
from audio import generate_audio, list_available_models
from moviepy.editor import VideoFileClip, concatenate_videoclips, AudioFileClip

def show_video_creator_ui():
    st.title("🎬 NarrativeFrames: Create Your Story Video")

    # Voice model selection
    model_choice = st.selectbox("🎙️ Choose Voice Model", list_available_models())

    # Story input
    story = st.text_area("📝 Enter your story:", height=200)

    if st.button("🚀 Start Generating"):
        if not story.strip():
            st.warning("⚠️ Please enter a story.")
            return

        # Preprocessing
        st.info("🔍 Extracting prompts...")
        prompts = clear_context_only(story)
        for i, p in enumerate(prompts):
            st.markdown(f"**{i+1}.** {p}")

        # Load animation model
        st.info("⏳ Loading animation pipeline...")
        pipe, step = load_pipeline()
        st.success("✅ Model loaded.")

        segments = []

        # Generate segments
        for i, prompt in enumerate(prompts):
            st.markdown(f"### 🎞️ Segment {i+1}")
            st.text(f"Processing:\n{prompt}")

            # Generate video
            vid_path, _ = generate_segments(pipe, step, [prompt])[0]

            # Generate audio
            audio_path, _ = generate_audio(prompt, model_key=model_choice)

            # Merge video and audio
            merged = f"merged_{i}.mp4"
            vc = VideoFileClip(vid_path)
            ac = AudioFileClip(audio_path)
            final = vc.set_audio(ac)
            final.write_videofile(merged, fps=vc.fps, codec="libx264", audio_codec="aac", verbose=False, logger=None)
            segments.append(merged)

        # Concatenate all segments
        st.info("🎞️ Concatenating all segments...")
        clips = [VideoFileClip(p) for p in segments]
        out = "final_story.mp4"
        final_video = concatenate_videoclips(clips, method="compose")
        final_video.write_videofile(out, fps=clips[0].fps, codec="libx264", audio_codec="aac", verbose=False, logger=None)

        # Show results
        st.success("✅ Done! Your video is ready.")
        st.video(out)
        with open(out, "rb") as f:
            st.download_button("⬇️ Download Final Video", data=f, file_name="final_story.mp4", mime="video/mp4")

        # Cleanup
        for path in segments:
            os.remove(path)

# EOF




# 6. Deployment : on Local host

In [None]:
# Install Streamlit & pyngrok, then verify Streamlit is installed
!pip install -q streamlit pyngrok

# Check that import works and print version
!python3 -c "import streamlit; print('✅ Streamlit version:', streamlit.__version__)"


✅ Streamlit version: 1.45.0


In [None]:
import os, time, socket
from pyngrok import ngrok

# 1) Kill any old Streamlit processes & tunnels
os.system("pkill -f streamlit || true")
ngrok.kill()

# 2) Launch updated Streamlit app
get_ipython().system_raw(
    "streamlit run streamlit_app.py "
    "--server.port 8501 --server.enableCORS false "
    "> /content/streamlit.log 2>&1 &"
)

# 3) Wait for Streamlit to boot
time.sleep(20)

# 4) Print logs for debugging
print("---- streamlit.log (first 20 lines) ----")
!head -n 20 /content/streamlit.log

# 5) Port availability check
def is_listening(port):
    try:
        with socket.create_connection(("127.0.0.1", port), timeout=5):
            return True
    except:
        return False

print(f"🔍 Listening on port 8501? → {is_listening(8501)}")

# 6) Create ngrok tunnel if Streamlit is up
if is_listening(8501):
    NGROK_AUTH_TOKEN = "<YOUR_NGROK_TOKEN>"
    ngrok.set_auth_token(NGROK_AUTH_TOKEN)
    url = ngrok.connect(8501, "http")
    print("🔗 Your Narrative Frames app is live at:", url)
else:
    print("❌ Streamlit app failed to launch. Check the log above.")


---- streamlit.log (first 20 lines) ----
2025-05-08 19:51:28.272 
'server.enableXsrfProtection=true'.
As a result, 'server.enableCORS' is being overridden to 'true'.

More information:
In order to protect against CSRF attacks, we send a cookie with each request.
To do so, we must specify allowable origins, which places a restriction on
cross-origin resource sharing.

If cross origin resource sharing is required, please disable server.enableXsrfProtection.
            

Collecting usage statistics. To deactivate, set browser.gatherUsageStats to false.


  You can now view your Streamlit app in your browser.

  Local URL: http://localhost:8501
  Network URL: http://172.28.0.12:8501
🔍 Listening on port 8501? → True
🔗 Your Narrative Frames app is live at: NgrokTunnel: "https://21c5-35-230-14-7.ngrok-free.app" -> "http://localhost:8501"
