In [25]:
%pip install Pillow python-magic moviepy


[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.0.1[0m[39;49m -> [0m[32;49m24.3.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


In [2]:
import boto3
import json
import base64
import time
from botocore.config import Config
config = Config(
       connect_timeout=1000,
    read_timeout=1000,
)

session = boto3.session.Session(region_name='us-east-1')
bedrock_runtime = session.client(service_name = 'bedrock-runtime', 
                                 config=config)

PRO_MODEL_ID = "us.amazon.nova-pro-v1:0"
LITE_MODEL_ID = "us.amazon.nova-lite-v1:0"
MICRO_MODEL_ID = "us.amazon.nova-micro-v1:0"

In [3]:
from json import JSONDecodeError
import re
def parse(pattern:str, text: str) -> str:
    match = re.search(pattern, text, re.DOTALL)
    if match:
        text = match.group(1)
        return text.strip()
    else:
        raise JSONDecodeError

## 分镜头

In [7]:
system_1 = \
"""
我需要你帮我把以下场景描述拆分成一系列分镜。每个分镜都应该：
1. 包含一个清晰的画面重点
2. 描述具体的视觉元素(如构图、光线、视角等)
3. 适合用于AI图像生成
4. 使用简洁的英文描述
5. 添加关键的艺术风格和氛围标签
6. 镜头不超过3个

#注意事项
- Prompting for image generation models differs from prompting for large language models (LLMs). Image generation models do not have the ability to reason or interpret explicit commands. Therefore, it's best to phrase your prompt as if it were an image caption rather than a command or conversation.
- Consider adding modifiers like aspect ratios, image quality settings, or post-processing instructions to refine the output.
- Avoid topics such as pornography, racial discrimination, and toxic words.
- Do not use negation words like "no", "not", "without", and so on in your prompt. The model doesn't understand negation in a prompt and attempting to use negation will result in the opposite of what you intend. For example, a prompt such as "a fruit basket with no bananas" will actually signal the model to include bananas. Instead, you can use a negative prompt, via the negative prompt, to specify any objects or characteristics that you want to exclude from the image. For example "bananas".

请将以下场景描述拆分为分镜，并以精简的 JSON 格式输出：
{
    "shots": [
        {
            "id": "shot_1",
            "description": "场景描述",
            "composition": "构图说明",
            "lighting": "光线说明",
            "angle": "视角说明",
            "distance": "景别说明",
            "style_tags": ["标签1", "标签2", "标签3"],
            "prompt": "英文提示词",
            "negative_prompt": "(可选)负向提示词"
        }
    ]
}


##示例##
场景描述：一个女孩在黄昏时分走在海边的沙滩上，远处是落日和帆船。

输出：
{
    "shots": [
        {
            "id": "shot_1",
            "description": "远景镜头，展现黄昏海滩的整体氛围",
            "composition": "wide angle composition",
            "lighting": "natural sunset lighting",
            "angle": "eye level",
            "distance": "long shot",
            "style_tags": ["cinematic", "golden hour", "peaceful", "warm colors"],
            "prompt": "wide shot of a beach at sunset, golden hour, sailing boats on horizon, cinematic lighting",
            "negative_prompt":""
        },
        {
            "id": "shot_2",
            "description": "女孩的背影剪影",
            "composition": "rule of thirds",
            "lighting": "backlight",
            "angle": "side view",
            "distance": "medium shot",
            "style_tags": ["atmospheric", "moody", "dramatic", "silhouette"],
            "prompt": "silhouette of a girl walking on beach, sunset backdrop, side view, dramatic lighting",
            "negative_prompt":"wrong leg"
        },
        {
            "id": "shot_3",
            "description": "特写镜头展现女孩的表情和周围环境细节",
            "composition": "centered composition",
            "lighting": "side lighting",
            "angle": "eye level",
            "distance": "close-up",
            "style_tags": ["portrait", "emotional", "soft lighting", "intimate"],
            "prompt": "close-up shot of a girl's face, warm sunset light, beach background, soft focus",
            "negative_prompt":""
        }
    ]
}
"""

In [8]:
import json
def invoke_nova(system, messages):

    # Configure the inference parameters.
    inf_params = {"maxTokens": 2000, "topP": 0.9, "temperature": 0.8}

    model_response = bedrock_runtime.converse_stream(
        modelId=PRO_MODEL_ID, messages=messages, system=system, inferenceConfig=inf_params
    )

    text = ""
    stream = model_response.get("stream")
    if stream:
        for event in stream:
            if "contentBlockDelta" in event:
                text += event["contentBlockDelta"]["delta"]["text"]
                print(event["contentBlockDelta"]["delta"]["text"], end="")
    return json.loads(text[:-3])

In [9]:
story = "在一片广袤的科技星空下，AWS如一柄闪耀着银色光芒的利剑静静悬浮。这把利剑的剑身流转着云计算的灵动数据流，剑锋锐利如同切割黎明的第一缕阳光。当我握住剑柄的那一刻，数字化转型的荆棘丛生之路顿时豁然开朗，如同劈开浓雾见晴天。利剑所指之处，道路两旁绽放出创新的繁花，照亮了企业腾飞的征程，恰似黎明前升起的启明星指引着前行的方向。"

In [10]:
system = [
    {
        "text": system_1
    }
]

messages = [
    {
        "role": "user",
        "content": [
         {"text": story},
        ],
    },
    {
         "role": "assistant",
         "content": [
         {"text": "```json"},
        ]
    }
]
    
shots = invoke_nova(system=system,messages=messages)

{
    "shots": [
        {
            "id": "shot_1",
            "description": "AWS利剑静静悬浮在科技星空下",
            "composition": "centered composition",
            "lighting": "silver radiant glow",
            "angle": "front view",
            "distance": "medium shot",
            "style_tags": ["futuristic", "ethereal", "high-tech", "majestic"],
            "prompt": "a shining silver sword floating in a vast technology-filled sky, emitting a radiant glow, clouds of data swirling around it",
            "negative_prompt": ""
        },
        {
            "id": "shot_2",
            "description": "数字化转型的荆棘丛生之路豁然开朗",
            "composition": "dynamic composition",
            "lighting": "dawn light breaking through",
            "angle": "low angle",
            "distance": "long shot",
            "style_tags": ["transformative", "clarity", "breakthrough", "dawn"],
            "prompt": "a path through a dense thicket suddenly clears, illuminated by the first light of dawn, a

In [11]:
prompts = [ f"{p['prompt']} {p['composition']} angle:{p['angle']} {p['distance']} {p['lighting']} {' '.join(p['style_tags'])}" for p in shots['shots']]

In [12]:
neg_prompts = [p['negative_prompt'] for p in shots['shots']]

In [13]:
import base64
import io
import os
import json
import logging
import time
from datetime import datetime
import boto3
from PIL import Image
from botocore.config import Config
from botocore.exceptions import ClientError

In [14]:

def generate_image(body):
    """
    Generate an image using Amazon Nova Canvas model on demand.
    Args:
        body (str) : The request body to use.
    Returns:
        image_bytes (bytes): The image generated by the model.
    """
    accept = "application/json"
    content_type = "application/json"

    response = bedrock_runtime.invoke_model(
        body=body, modelId='amazon.nova-canvas-v1:0', accept=accept, contentType=content_type
    )
    response_body = json.loads(response.get("body").read())
    image_bytes_list = []
    if "images" in response_body:
        print(f"num of images:{len(response_body['images'])}")
        for base64_image in response_body["images"]:
            base64_bytes = base64_image.encode('ascii')
            image_bytes = base64.b64decode(base64_bytes)
            image_bytes_list.append(image_bytes)

    finish_reason = response_body.get("error")

    if finish_reason is not None:
        raise ImageError(f"Image generation error. Error is {finish_reason}")

    return image_bytes_list

In [15]:
def generate_variations(reference_image_paths,prompt,negative_prompt,save_filepath):
    # Load all reference images as base64.
    images = []
    for path in reference_image_paths:
        with open(path, "rb") as image_file:
            images.append(base64.b64encode(image_file.read()).decode("utf-8"))

    # Configure the inference parameters.
    inference_params = {
        "taskType": "IMAGE_VARIATION",
        "imageVariationParams": {
            "images": images, # Images to use as reference
            "text": prompt, 
            "similarityStrength": 0.9,  # Range: 0.2 to 1.0
        },
        "imageGenerationConfig": {
            "numberOfImages": 1,  # Number of variations to generate. 1 to 5.
            "quality": "standard",  # Allowed values are "standard" and "premium"
            "width": 1280,  # See README for supported output resolutions
            "height": 720,  # See README for supported output resolutions
            "cfgScale": 4.0,  # How closely the prompt will be followed
            "seed": 0
        },
    }
    if len(negative_prompt):
        inference_params['imageVariationParams']["negativeText"] = negative_prompt
        
    body = json.dumps(inference_params)
    try:
        image_bytes_ret = generate_image( body=body)
        for idx,image_bytes in enumerate(image_bytes_ret):
            image = Image.open(io.BytesIO(image_bytes))
            image.save(save_filepath)
            print(f"image saved to {save_filepath}")
            # image.show()
    except Exception as err:
        print(str(err))
    

In [16]:
def generate_text2img(prompt,negative_prompt,save_filepath):
    textToImageParams =  { "text": prompt}
    if len(negative_prompt):
        textToImageParams["negativeText"] = negative_prompt 
    body = json.dumps({
        "taskType": "TEXT_IMAGE",
        "textToImageParams": textToImageParams,
        "imageGenerationConfig": {
            "numberOfImages": 1,
            "height": 720,
            "width": 1280,
            "cfgScale": 6.5,
            "seed": 0
        }
    })
    try:
        image_bytes_ret = generate_image( body=body)
        print(f"num:{len(image_bytes_ret)}")
        # print(f"image_bytes:{image_bytes_ret[:20]}")

        for idx,image_bytes in enumerate(image_bytes_ret):
            image = Image.open(io.BytesIO(image_bytes))
            image.save(save_filepath)  
            print(f"image saved to {save_filepath}")
            # image.show()
        return save_filepath

    except Exception as err:
        print(str(err))

In [17]:

timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
output_dir=os.path.join('shot_images',timestamp)
os.makedirs(output_dir, exist_ok=True)
image_files = []
for idx, prompt, neg_prompt in zip(range(len(prompts)),prompts,neg_prompts):
    print(f"prompt:{prompt}\nneg_prompt:{neg_prompt}")
    save_path = os.path.join(output_dir,f'shot_{idx}.png')
    #第一张图
    if not image_files: 
        generate_text2img(prompt,neg_prompt,save_path)
    else:
        generate_variations(image_files,prompt,neg_prompt,save_path)
    image_files.append(save_path)
        
    time.sleep(20)

prompt:a shining silver sword floating in a vast technology-filled sky, emitting a radiant glow, clouds of data swirling around it centered composition angle:front view medium shot silver radiant glow futuristic ethereal high-tech majestic
neg_prompt:
num of images:1
num:1
image saved to shot_images/20241227131000/shot_0.png
prompt:a path through a dense thicket suddenly clears, illuminated by the first light of dawn, a sword cutting through mist dynamic composition angle:low angle long shot dawn light breaking through transformative clarity breakthrough dawn
neg_prompt:
num of images:1
image saved to shot_images/20241227131000/shot_1.png
prompt:a path lined with blooming flowers of innovation, leading into the horizon under bright morning light, a guiding star above leading lines angle:eye level medium shot bright morning light flourishing innovative inspiring morning light
neg_prompt:
num of images:1
image saved to shot_images/20241227131000/shot_2.png


## 优化Reel 提示词

In [18]:
system_2 = \
"""
You are a Prompt rewriting expert for image-to-video models, with expertise in film industry knowledge and skilled at helping users output final text prompts based on input initial frame images and potentially accompanying text prompts. 
The main goal is to help other models produce better video outputs based on these prompts and initial frame images. Users may input only images or both an image and text prompt, where the text could be in Chinese or English.
Your final output should be a single paragraph of English prompt not exceeding 90 words.

##You are proficient in the knowledge mentioned in:##
-You have a comprehensive understanding of the world, knowing various physical laws and can envision video content showing interactions between all things.
-You are imaginative and can envision the most perfect, visually impactful video scenes based on user-input images and prompts.
-You possess extensive film industry knowledge as a master director, capable of supplementing the best cinematographic language and visual effects based on user-input images and simple descriptions.


##Please follow these guidelines for rewriting prompts:##
-Subject: Based on user-uploaded image content, describe the video subject's characteristics in detail, emphasizing details while adjusting according to user's text prompt.
-Scene: Detailed description of video background, including location, environment, setting, season, time, etc., emphasizing details.
-Emotion and Atmosphere: Description of emotions and overall atmosphere conveyed in the video, referencing the image and user's prompt.
-Cinematography: Specify shot types, camera angles, and perspectives, Please refer to the guideline in DocumentPDFmessages.
-Visual Effects: Description of the visual style from user-uploaded images, such as Pixar animation, film style, realistic style, 3D animation, including descriptions of color schemes, lighting types, and contrast.

##Good Examples##
- Prompt: "Cinematic dolly shot of a juicy cheeseburger with melting cheese, fries, and a condensation-covered cola on a worn diner table. Natural lighting, visible steam and droplets. 4k, photorealistic, shallow depth of field"
- Prompt: "Arc shot on a salad with dressing, olives and other vegetables; 4k; Cinematic;"
- Prompt: "First person view of a motorcycle riding through the forest road."
- Prompt: "Closeup of a large seashell in the sand. Gentle waves flow around the shell. Camera zoom in."
- Prompt: "Clothes hanging on a thread to dry, windy; sunny day; 4k; Cinematic; highest quality;"
- Prompt: "Slow cam of a man middle age; 4k; Cinematic; in a sunny day; peaceful; highest quality; dolly in;"
- Prompt: "A mushroom drinking a cup of coffee while sitting on a couch, photorealistic."

##Ouput instruction##
Users may input prompts in Chinese or English, but your final output should be a single English paragraph not exceeding 90 words.
Put your reponse in <prompt></prompt>

"""

In [22]:
system_3 = """
You are a Prompt Rewriting Expert for text-to-video models, with extensive knowledge in film and video production. 
You specialize in helping users improve their text prompts according to specific rules to achieve better model outputs, sometimes modifying the original intent if necessary.

##You excel in the following areas:##
Comprehensive understanding of the world, physical laws, and various interactive video scenarios
Rich imagination to visualize perfect, visually striking video scenes from simple prompts
Extensive film industry expertise as a master director, capable of enhancing simple video descriptions with optimal cinematography and visual effects

##Your prompt rewriting should follow these guidelines:##
Prompting for video generation models differs from prompting for large language models (LLMs).
Video generation models do not have the ability to reason or interpret explicit commands.
Therefore, it's best to phrase your prompt as if it were an image caption or summary of the video rather than a command or conversation.
You may want to include details about the subject, action, environment, lighting, style, and camera motion.

-Subject: Add detailed characteristics of video subjects
-Scene: Elaborate background details based on context
-Emotional atmosphere: Describe the mood and overall ambiance
-Visual effects: Define style (e.g., Pixar, cinematic, hyperrealistic, 3D animation) and describe lighting, color tones, and contrast.
-Cinematography: Specify shot types, camera angles, and perspectives (avoid complex camera movements),refer to 'Camera Prompt 运镜指南' in DocumentPDFmessages. 

##Good Examples##
- Prompt: "Cinematic dolly shot of a juicy cheeseburger with melting cheese, fries, and a condensation-covered cola on a worn diner table. Natural lighting, visible steam and droplets. 4k, photorealistic, shallow depth of field"
- Prompt: "Arc shot on a salad with dressing, olives and other vegetables; 4k; Cinematic;"
- Prompt: "First person view of a motorcycle riding through the forest road."
- Prompt: "Closeup of a large seashell in the sand. Gentle waves flow around the shell. Camera zoom in."
- Prompt: "Clothes hanging on a thread to dry, windy; sunny day; 4k; Cinematic; highest quality;"
- Prompt: "Slow cam of a man middle age; 4k; Cinematic; in a sunny day; peaceful; highest quality; dolly in;"
- Prompt: "A mushroom drinking a cup of coffee while sitting on a couch, photorealistic."

##Ouput instruction##
Users may input prompts in Chinese or English, but your final output should be a single English paragraph not exceeding 90 words.
Put your reponse in <prompt></prompt>
"""

In [19]:
import magic
from json import JSONDecodeError
import re
def img_mime(image_path):
    try:
        mime = magic.Magic(mime=True)
        return mime.from_file(image_path)
    
    except Exception as e:
        print(f"python-magic detection error: {str(e)}")
        return None

def parse(text: str) -> str:
    pattern = r"<prompt>(.*?)</prompt>"
    match = re.search(pattern, text, re.DOTALL)
    if match:
        text = match.group(1)
        return text.strip()
    else:
        raise JSONDecodeError

In [20]:
def optimize_reel_prompt(system,user_prompt,ref_image,doc_bytes):
    with open(ref_image, "rb") as f:
        image = f.read()
    mime_type = img_mime(ref_image)

    system = [
        {
            "text": system
        }
    ]
    messages = [
        {
            "role": "user",
            "content": [
             {
                "document": {
                    "format": "pdf",
                    "name": "DocumentPDFmessages",
                    "source": {
                        "bytes": doc_bytes
                    }
                }
            },
            {"image": {"format": mime_type.split('/')[1], "source": {"bytes": image}}},
             {"text": user_prompt},
            ],
        }
    ]

    # Configure the inference parameters.
    inf_params = {"maxTokens": 2000, "topP": 0.9, "temperature": 0.5}


    model_response = bedrock_runtime.converse_stream(
        modelId=LITE_MODEL_ID, messages=messages, system=system, inferenceConfig=inf_params
    )

    text = ""
    stream = model_response.get("stream")
    if stream:
        for event in stream:
            if "contentBlockDelta" in event:
                text += event["contentBlockDelta"]["delta"]["text"]
                print(event["contentBlockDelta"]["delta"]["text"], end="")
    return parse(text)

In [23]:
def optimize_reel_prompt_no_img(system,user_prompt,doc_bytes):
    system = [
        {
            "text": system
        }
    ]
    messages = [
        {
            "role": "user",
            "content": [
             {
                "document": {
                    "format": "pdf",
                    "name": "DocumentPDFmessages",
                    "source": {
                        "bytes": doc_bytes
                    }
                }
            },
             {"text": user_prompt},
            ],
        }
    ]

    # Configure the inference parameters.
    inf_params = {"maxTokens": 2000, "topP": 0.9, "temperature": 0.5}


    model_response = bedrock_runtime.converse_stream(
        modelId=LITE_MODEL_ID, messages=messages, system=system, inferenceConfig=inf_params
    )

    text = ""
    stream = model_response.get("stream")
    if stream:
        for event in stream:
            if "contentBlockDelta" in event:
                text += event["contentBlockDelta"]["delta"]["text"]
                print(event["contentBlockDelta"]["delta"]["text"], end="")
    return parse(text)

In [24]:
with open("Amazon_Nova_Reel.pdf", "rb") as file:
    doc_bytes = file.read()

### option 1， 全部采用image 2 video

In [None]:
reel_prompts = []
for p,ref_img in zip(shots['shots'],image_files):
    print(p['description'] )
    text = optimize_reel_prompt(system_2,p['description'],ref_img,doc_bytes)
    reel_prompts.append(text)
    



### option 2， 只对中间图用image 2 video

In [25]:
reel_prompts = []
idx = 0
for p,ref_img in zip(shots['shots'],image_files):
    print(p['description'] )
    if idx == 1:
        text = optimize_reel_prompt_no_img(system_2,p['description'],doc_bytes)
    else:
        text = optimize_reel_prompt(system_2,p['description'],ref_img,doc_bytes)
    reel_prompts.append(text)
    idx += 1

AWS利剑静静悬浮在科技星空下
<prompt>A majestic sword hovers serenely in a futuristic sky filled with advanced technology and glowing clouds, creating a sense of awe and wonder. The scene is captured with a First Person View Aerial shot, zooming in slowly to highlight the intricate details of the sword and its radiant aura. The video is rendered in Ultra HD, 8K resolution, ensuring crisp and photorealistic visuals. The atmosphere is cinematic, with a harmonious blend of warm and cool tones, evoking a sense of epic adventure.</prompt>数字化转型的荆棘丛生之路豁然开朗
<prompt>First Person View Aerial, approaching a grand luxury resort complex at dusk. The camera dollys in, revealing ornate beige buildings with clock towers and domes, set against a backdrop of manicured gardens and distant mountains. The sky transitions beautifully from pink to purple as the sun sets. Warm lights from windows and pathways enhance the cinematic quality. Ultra HD, 8K resolution, with crisp details and highest resolution.</prompt>创新的繁花绽放

In [26]:
reel_prompts

['A majestic sword hovers serenely in a futuristic sky filled with advanced technology and glowing clouds, creating a sense of awe and wonder. The scene is captured with a First Person View Aerial shot, zooming in slowly to highlight the intricate details of the sword and its radiant aura. The video is rendered in Ultra HD, 8K resolution, ensuring crisp and photorealistic visuals. The atmosphere is cinematic, with a harmonious blend of warm and cool tones, evoking a sense of epic adventure.',
 'First Person View Aerial, approaching a grand luxury resort complex at dusk. The camera dollys in, revealing ornate beige buildings with clock towers and domes, set against a backdrop of manicured gardens and distant mountains. The sky transitions beautifully from pink to purple as the sun sets. Warm lights from windows and pathways enhance the cinematic quality. Ultra HD, 8K resolution, with crisp details and highest resolution.',
 'A dynamic aerial shot of a grand luxury resort complex at dusk

In [29]:
BUCKET = "s3://bedrock-video-generation-us-east-1-jlvyiv"
bedrock_runtime = session.client("bedrock-runtime")

def generate_video(bucket,text_prompt,ref_image = None):
    model_input = {
        "taskType": "TEXT_VIDEO",
        "textToVideoParams": {
            "text": text_prompt,
        },
        "videoGenerationConfig": {
            "durationSeconds": 6,
            "fps": 24,
            "dimension": "1280x720",
            "seed": 0,  # Change the seed to get a different result
        },
    }

    if ref_image:
        with open(ref_image, "rb") as f:
            image = f.read()
            input_image_base64 = base64.b64encode(image).decode("utf-8")
            model_input['textToVideoParams']['images'] = [
            {
                "format": img_mime(ref_image).split('/')[1],
                "source": {
                    "bytes": input_image_base64
                }
            }
            ]
    try:
        # Start the asynchronous video generation job.
        invocation = bedrock_runtime.start_async_invoke(
            modelId="amazon.nova-reel-v1:0",
            modelInput=model_input,
            outputDataConfig={
                "s3OutputDataConfig": {
                    "s3Uri": BUCKET
                }
            }
        )
        return invocation

    except Exception as e:
        # Implement error handling here.
        message = e.response["Error"]["Message"]
        print(f"Error: {message}")
        return None

In [63]:
def generate_video_batch(prompts):
    invocation_arns = []
    for prompt,image_file in zip(prompts,image_files):
        invocation = generate_video(bucket= BUCKET, text_prompt = prompt,ref_image=image_file)
        invocation_arns.append(invocation['invocationArn'])
    return invocation_arns

#只对第二张图用ref image
def generate_video_batch_2(prompts):
    invocation_arns = []
    idx = 0
    for prompt,image_file in zip(prompts,image_files):
        if idx == 1:
            invocation = generate_video(bucket= BUCKET, text_prompt = prompt,ref_image=image_file)
        else:
            invocation = generate_video(bucket= BUCKET, text_prompt = prompt,ref_image=None)
        invocation_arns.append(invocation['invocationArn'])
        idx += 1
    return invocation_arns

In [64]:
#全部视频使用ref image
# invocation_arns = generate_video_batch(reel_prompts)

#只对中间视频用ref image
invocation_arns = generate_video_batch_2(reel_prompts)

In [65]:
# invocation_arns=['arn:aws:bedrock:us-east-1:434444145045:async-invoke/2p9mjzs41cu7',
#  'arn:aws:bedrock:us-east-1:434444145045:async-invoke/lgnytip3nrck',
#  'arn:aws:bedrock:us-east-1:434444145045:async-invoke/7udalld4x732',
#  'arn:aws:bedrock:us-east-1:434444145045:async-invoke/3n1ujwqbltrr']
invocation_arns

['arn:aws:bedrock:us-east-1:434444145045:async-invoke/zvp0acj8yp66',
 'arn:aws:bedrock:us-east-1:434444145045:async-invoke/2whq3cn3taxz',
 'arn:aws:bedrock:us-east-1:434444145045:async-invoke/q8fdq8era4pe']

In [73]:
def fetch_job_status(invocation_arns):
    final_responses = []
    for invocation in invocation_arns:
        while 1:
            response = bedrock_runtime.get_async_invoke(
                invocationArn=invocation
            )
            status = response["status"]
            print(f"{invocation}: {status}")
            time.sleep(5)
            if not status == 'InProgress':
                final_responses.append(response)
                break
    return final_responses

In [74]:
final_responses = fetch_job_status(invocation_arns)

arn:aws:bedrock:us-east-1:434444145045:async-invoke/zvp0acj8yp66: Completed
arn:aws:bedrock:us-east-1:434444145045:async-invoke/2whq3cn3taxz: Completed
arn:aws:bedrock:us-east-1:434444145045:async-invoke/q8fdq8era4pe: Completed


In [75]:
import random
import string
from datetime import datetime
def random_string_name(length=12):
    return ''.join(random.choices(string.ascii_lowercase + string.digits, k=length))

def download_video_from_s3(s3_uri, local_path):
    """
    Download a video file from S3 to local storage
    
    Parameters:
    s3_uri (str): S3 URI in format 's3://bucket-name/path/to/video.mp4'
    local_path (str): Local path where the video will be saved
    """
    try:
        # Initialize S3 client
        s3_client = boto3.client('s3')
        
        # Parse S3 URI to get bucket and key
        if not s3_uri.startswith('s3://'):
            raise ValueError("Invalid S3 URI format. Must start with 's3://'")
        
        # Remove 's3://' and split into bucket and key
        path_parts = s3_uri[5:].split('/', 1)
        if len(path_parts) != 2:
            raise ValueError("Invalid S3 URI format")
        
        bucket_name = path_parts[0]
        s3_key = path_parts[1]
        
        # Create directory if it doesn't exist
        os.makedirs(local_path, exist_ok=True)
        timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
        fname = timestamp+random_string_name()+'.mp4'
        # Download the file
        print(f"Downloading {s3_uri} to {local_path}/{fname}")
        s3_client.download_file(bucket_name, s3_key, local_path+'/'+fname)
        print("Download completed successfully!")
        
        return f"{local_path}/{fname}"
        
    except Exception as e:
        print(f"Error downloading file: {str(e)}")
        return False

In [76]:
video_files = []
for response in final_responses:
    output_uri = response['outputDataConfig']['s3OutputDataConfig']['s3Uri']+'/output.mp4'
    file_name = download_video_from_s3(output_uri,'./generated_videos')
    video_files.append(file_name)

Downloading s3://bedrock-video-generation-us-east-1-jlvyiv/zvp0acj8yp66/output.mp4 to ./generated_videos/20241227133909j7t4s82sn23r.mp4
Download completed successfully!
Downloading s3://bedrock-video-generation-us-east-1-jlvyiv/2whq3cn3taxz/output.mp4 to ./generated_videos/202412271339140fbbafuekyf9.mp4
Download completed successfully!
Downloading s3://bedrock-video-generation-us-east-1-jlvyiv/q8fdq8era4pe/output.mp4 to ./generated_videos/20241227133918jdw3vlqxa6cv.mp4
Download completed successfully!


## stitch videos

In [77]:
from moviepy import VideoFileClip, CompositeVideoClip
def stitch_videos(video1_path: str, video2_path: str, output_path: str):
    """
    Stitches two videos together and saves the result to a new file.

    Args:
        video1_path (str): The file path to the first video.
        video2_path (str): The file path to the second video.
        output_path (str): The file path to save the stitched video.
    """
    # Load the video clips
    clip1 = VideoFileClip(video1_path)
    clip2 = VideoFileClip(video2_path)

    final_clip = [
        clip1,
        clip2.with_start(clip1.duration),
    ]

    # Concatenate the clips
    final_clip = CompositeVideoClip(final_clip)

    # Write the result
    final_clip.write_videofile(output_path)

    # Clean up
    clip1.close()
    clip2.close()
    final_clip.close()
    print(f"Stitched video saved to {output_path}")
    return output_path

In [78]:
generated_fname = None
for idx in range(len(video_files)-1):
    output_path = video_files[0].rsplit("/",1)[0]
    if not generated_fname:
        generated_fname = stitch_videos(video_files[idx],video_files[idx+1],os.path.join(output_path,random_string_name()+'.mp4'))
    else:
        generated_fname = stitch_videos(generated_fname,video_files[idx+1],os.path.join(output_path,random_string_name()+'.mp4'))
print(f"Final stitch video:{generated_fname}")

MoviePy - Building video ./generated_videos/7c26vbm53ki8.mp4.
MoviePy - Writing video ./generated_videos/7c26vbm53ki8.mp4



                                                                        

MoviePy - Done !
MoviePy - video ready ./generated_videos/7c26vbm53ki8.mp4
Stitched video saved to ./generated_videos/7c26vbm53ki8.mp4
MoviePy - Building video ./generated_videos/nvqcbxt4qe46.mp4.
MoviePy - Writing video ./generated_videos/nvqcbxt4qe46.mp4



                                                                        

MoviePy - Done !
MoviePy - video ready ./generated_videos/nvqcbxt4qe46.mp4
Stitched video saved to ./generated_videos/nvqcbxt4qe46.mp4
Final stitch video:./generated_videos/nvqcbxt4qe46.mp4


### add caption

In [79]:
from moviepy import TextClip
def add_timed_captions(video_path, output_path, captions,font='./yahei.ttf'):
    # Load the video
    video = VideoFileClip(video_path)
    
    # Create text clips for each caption
    txt_clips = []
    
    for caption in captions:
        text, start_time, end_time = caption
        txt_clip = TextClip(text=text, font_size=50, color='white', font=font,text_align='center',margin=(20,20))
        txt_clip = txt_clip.with_position('bottom').with_start(start_time).with_end(end_time)
        txt_clips.append(txt_clip)
    
    # Combine video and all text clips
    final_video = CompositeVideoClip([video] + txt_clips)
    
    # Write output video
    final_video.write_videofile(output_path)
    
    # Close clips
    video.close()
    final_video.close()

In [80]:
import re

def split_caption(text):
    # delimiters = [',', '，', '。', '.', '!', '！', '?', '？', ';', '；', ' ', '\n', '\t']
    delimiters = [',', '，', '。', '.', ';', '；', '\n', '\t']
    pattern = '|'.join(map(re.escape, delimiters))
    parts = re.split(pattern, text)
    return parts

In [81]:
import math
duration = 6
captions = []
for idx, p in enumerate(shots['shots']):
    desc_arr = split_caption(p['description'])
    sub_duration = math.ceil(duration/len(desc_arr))
    for idy,sub_desc in enumerate(desc_arr):
        captions.append((desc_arr[idy],idx*duration+idy*sub_duration,idx*duration+(idy+1)*sub_duration))

captions

[('AWS利剑静静悬浮在科技星空下', 0, 6),
 ('数字化转型的荆棘丛生之路豁然开朗', 6, 12),
 ('创新的繁花绽放', 12, 15),
 ('照亮企业腾飞的征程', 15, 18)]

In [82]:
# captions = [ (p['description'],idx*duration,(idx+1)*duration) for idx, p in enumerate(shots['shots'])]
# captions = [('广袤的科技星空中', 0, 3),
#  ('AWS如一柄闪耀着银色光芒的利剑静静悬浮', 3, 6),
#  ('利剑的剑身流转着云计算的灵动数据流', 6, 12),
#  ('当我握住剑柄的那一刻', 12, 15),
#  ('数字化转型的荆棘丛生之路顿时豁然开朗', 15, 18),
#  ('利剑所指之处', 18, 20),
#  ('道路两旁绽放出创新的繁花', 20, 22),
#  ('照亮了企业腾飞的征程', 22, 24)]

In [83]:
caption_video_file = os.path.splitext(generated_fname)[0]+"_caption.mp4"
add_timed_captions(generated_fname,caption_video_file,captions)

MoviePy - Building video ./generated_videos/nvqcbxt4qe46_caption.mp4.
MoviePy - Writing video ./generated_videos/nvqcbxt4qe46_caption.mp4



                                                                        

MoviePy - Done !
MoviePy - video ready ./generated_videos/nvqcbxt4qe46_caption.mp4


In [84]:
from IPython.display import Video, HTML

Video(caption_video_file,width=1280, height=720)