In [None]:
import os
# full path to 30 FPS video
seq_id = '2024_08_15__19_55_49'
rgb_video_path = f"/media/zipa/Data/egowalk_30FPS/egowalk_cosmos_processed/video/rgb/{seq_id}__rgb.mp4"
dpt_video_path = f"/media/zipa/Data/egowalk_30FPS/egowalk_cosmos_processed/video/depth/{seq_id}__depth.mkv"
assert(os.path.exists(rgb_video_path))
assert(os.path.exists(dpt_video_path))

In [235]:
import numpy as np
import cv2
import json

def calc_initial_frame(src_video_path, h, m, s):
    obj = cv2.VideoCapture(src_video_path)
    fps = int(obj.get(cv2.CAP_PROP_FPS))
    width = int(obj.get(cv2.CAP_PROP_FRAME_WIDTH))
    height = int(obj.get(cv2.CAP_PROP_FRAME_HEIGHT))
    return (h * 60 * 60 + m * 60 + s ) * fps, (fps, width, height)

def load_video_fragment(src_video_path, initial_frame, frames_count, frames_step, drop_chans=False):
    frames = []
    rdr = cv2.VideoCapture(src_video_path)
    # print(fps := rdr.get(cv2.CAP_PROP_FPS))
    i_frame = 0
    while True:
        have_read, frame_data = rdr.read()
        if not have_read:
            break
        if i_frame >= initial_frame and (i_frame - initial_frame) % frames_step == 0:
            # print(f"Pushing frame {i_frame}")
            if drop_chans:
                frame_data = frame_data[:, :, 0]
            frames.append(frame_data)
            if len(frames) == frames_count:
                break
        i_frame += 1
    assert(len(frames) == frames_count)
    return frames

def save_video(output_path, frames, fps):
    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    out = cv2.VideoWriter(output_path, fourcc, fps, (frames[0].shape[1], frames[0].shape[0]))
    for frame in frames:
        if len(frame.shape) == 2:
            frame = np.stack([frame, frame, frame], axis=2)
        out.write(frame)
    out.release()

def save_experiment(rgb, dpt, fps, prompt, is_full_prompt, experiment_folder, relative_root='./'):
    rgb_name = f'{experiment_folder}/rgb.mp4'
    dpt_name = f'{experiment_folder}/depth.mp4'
    json_name = f'{experiment_folder}/spec.json'
    shell_name = f'{experiment_folder}/runme.sh'

    os.makedirs(os.path.join(relative_root, experiment_folder), 0o777, True)
    save_video(os.path.join(relative_root, rgb_name), rgb, fps)
    save_video(os.path.join(relative_root, dpt_name), dpt, fps)

    with open(os.path.join(relative_root, json_name), 'w') as fp:
        json.dump({"prompt": prompt, "input_video_path": rgb_name,
                    "depth": {"input_control": dpt_name, "control_weight": 1.0},
                    "edge": {"control_weight": 1.0},
                    "vis": {"control_weight": 1.0},
                    "seg": {"control_weight": 1.0}
                    }, fp)

    # generate script to call it from shell
    shell_script_full_path = os.path.join(relative_root, shell_name)
    output_string = """
    export CUDA_VISIBLE_DEVICES="${CUDA_VISIBLE_DEVICES:=0}"
    export CHECKPOINT_DIR="${CHECKPOINT_DIR:=./checkpoints}"
    export NUM_GPU="${NUM_GPU:=1}"
    """

    output_string += f"PYTHONPATH=$(pwd) torchrun --nproc_per_node=$NUM_GPU --nnodes=1 --node_rank=0 cosmos_transfer1/diffusion/inference/transfer.py \
        --checkpoint_dir $CHECKPOINT_DIR \
        --video_save_folder {experiment_folder}/output \
        --controlnet_specs {json_name} \
        --offload_text_encoder_model \
        --offload_guardrail_models \
        --offload_prompt_upsampler \
        --num_gpus $NUM_GPU"
    if not is_full_prompt:
        output_string += " --upsample_prompt" + '\n'
    else:
        output_string += '\n'
    with open(shell_script_full_path, 'w') as fp:
        print(output_string, file=fp)

    os.chmod(shell_script_full_path, 0o777)

In [None]:
starting_frame, _ = calc_initial_frame(rgb_video_path, h=0, m=2, s=0)
rgb = load_video_fragment(rgb_video_path, starting_frame, 4 * 30, frames_step=1, drop_chans=False)
dpt = load_video_fragment(dpt_video_path, starting_frame, 4 * 30, frames_step=1, drop_chans=True)

In [None]:
from matplotlib import pyplot as plt
fig, axes = plt.subplots(1, 2)
fig.set_size_inches(15, 5)
for i_axis, data in enumerate([rgb[0], dpt[0]]):
    axes[i_axis].imshow(data)
    axes[i_axis].axis('off')


In [None]:
# full code for generating the experiment
fps = 30
# prompt = "The video captures a series of frames showing a street view from a pedastrian. \
#     The street is lined with buildings of various colors, including red, yellow, and white. \
#     There are no visible pedestrians, and the traffic is minimal with a few cars and a motorcycle passing by. \
#     The sky is partly cloudy with blue patches, and the overall atmosphere appears calm."
prompt = "moving through city street. buildings around, few people ahead."

is_full_prompt = False
start_time_in_minutes = 2 # minutes
fragment_length_in_seconds = 4 # seconds
    
starting_frame, _ = calc_initial_frame(rgb_video_path, h=0, m=start_time_in_minutes, s=0)

experiment_folder = f'{seq_id}_{starting_frame}_{fps}_{fragment_length_in_seconds}_{"full" if is_full_prompt else "short"}' 

rgb = load_video_fragment(rgb_video_path, starting_frame, fragment_length_in_seconds * fps, frames_step=1, drop_chans=False)
dpt = load_video_fragment(dpt_video_path, starting_frame, fragment_length_in_seconds * fps, frames_step=1, drop_chans=True)
save_experiment(rgb, dpt, fps, prompt, is_full_prompt, experiment_folder)

In [None]:
# generate several experiments
prompt = "moving through city street. buildings around, few people ahead."
is_full_prompt = False
downsampling_resolution = 4
fps_range = [4]#[4, 30]
duration_range = [2]#[2, 4, 8]
start_range = [0, 1, 2, 3]
prefix = 'small'

for fps in fps_range:
    for fragment_length_in_seconds in duration_range:
        for start_time_in_minutes in start_range:
            starting_frame, _ = calc_initial_frame(rgb_video_path, h=0, m=start_time_in_minutes, s=0)
            experiment_folder = prefix + f'{seq_id}_{starting_frame}_{fps}_{fragment_length_in_seconds}_{"full" if is_full_prompt else "short"}' 
            if fps == 30:
                frames_step = 1
            elif fps == 4:
                frames_step = 7
            rgb = load_video_fragment(rgb_video_path, starting_frame, fragment_length_in_seconds * fps, frames_step=frames_step, drop_chans=False)
            dpt = load_video_fragment(dpt_video_path, starting_frame, fragment_length_in_seconds * fps, frames_step=frames_step, drop_chans=True)
            if downsampling_resolution > 1:
                for i_frame in range(len(rgb)):
                    rgb[i_frame] = rgb[i_frame][::downsampling_resolution, ::downsampling_resolution, :]
                    dpt[i_frame] = dpt[i_frame][::downsampling_resolution, ::downsampling_resolution]
            save_experiment(rgb, dpt, fps, prompt, is_full_prompt, experiment_folder)

In [None]:
# wait for file to get ready on nabo
import time
while not os.path.exists('./small2024_08_15__19_55_49_18000_4_2_short/output/output.txt'):
    os.system("sshpass -p 'megaB0SS' scp -r kzipa@10.16.84.42:/mnt/vol2_raid/shared_data/cosmos/cosmos-transfer1/small2024_08_15__19_55_49_18000_4_2_short ..")
    time.sleep(300)

In [None]:
# update prompts in follow-up
frames = {0: 0, 1: 6000, 2: 12000, 3: 18000}
prompts_from_files = {}
for s in [0, 1, 2, 3]:
    with open(f'./small2024_08_15__19_55_49_{frames[s]}_4_2_short/output/output.txt') as fp:
        full_prompt = fp.read() 
    prompts_from_files[s] = full_prompt

In [None]:
# generate full-resolution videos with long prompt, that we got using small-resolution, small-fps model with short prompts
is_full_prompt = True
start_to_prompt = {0: "The video is taken from a first-person perspective, likely with a camera mounted on the person's head or chest. \
    The setting appears to be a city street with buildings on either side. \
    The sky is overcast, and the lighting is dim, suggesting it could be early morning or late afternoon. \
    The street is mostly empty, with a few pedestrians visible in the distance. \
    The buildings have a modern architectural style, with large windows and neutral-colored facades. \
    The person's hand is visible in the foreground, holding what appears to be a camera or phone.",
                   1: "The video is taken from a first-person perspective, likely with a camera mounted on the person's head or chest. \
    The person is walking through a city street, with buildings on either side and a few pedestrians visible in the distance. \
    The sky is overcast, and the street appears to be relatively empty with no visible traffic. \
    The person's hands are visible in the foreground, and they are wearing a dark-colored jacket. \
    The video does not show any significant action or event, but rather a routine walk through an urban environment.",
                   2: "The video is taken from a first-person perspective, likely with a camera mounted on the person's head or chest. \
    The environment is an urban street with buildings on either side, and the sky is visible at the top of the frame. \
    The street is paved, and there are a few pedestrians visible in the distance. \
    The color palette is dominated by the grays of the pavement and the buildings, with the sky showing a soft blue hue. \
    The pedestrians are too far to discern any specific details about them.",
                   3: "The video is taken from a first-person perspective, likely with a camera mounted on the person's head or chest. \
    The scene is an urban environment with buildings on either side of the street. \
    The sky is overcast, and the lighting is dim, suggesting either early morning or late afternoon. \
    The street is relatively empty, with a few pedestrians visible in the distance. \
    The buildings have a classic architectural style with large windows and detailed facades. \
    The person in the video is wearing a dark-colored jacket and appears to be walking or standing still."
}

start_to_prompt = prompts_from_files

downsampling_resolution = 1
fps_range = [4, 30]
duration_range = [2, 4, 8]
start_range = [3] #[0, 1, 2, 3]
prefix = ''
postfix = ''

experiment_folders = []
for fps in fps_range:
    for fragment_length_in_seconds in duration_range:
        for start_time_in_minutes in start_range:
            starting_frame, _ = calc_initial_frame(rgb_video_path, h=0, m=start_time_in_minutes, s=0)
            experiment_folder = prefix + f'{seq_id}_{starting_frame}_{fps}_{fragment_length_in_seconds}_{"full" if is_full_prompt else "short"}' + postfix
            if fps == 30:
                frames_step = 1
            elif fps == 4:
                frames_step = 7
            rgb = load_video_fragment(rgb_video_path, starting_frame, fragment_length_in_seconds * fps, frames_step=frames_step, drop_chans=False)
            dpt = load_video_fragment(dpt_video_path, starting_frame, fragment_length_in_seconds * fps, frames_step=frames_step, drop_chans=True)
            if downsampling_resolution > 1:
                for i_frame in range(len(rgb)):
                    rgb[i_frame] = rgb[i_frame][::downsampling_resolution, ::downsampling_resolution, :]
                    dpt[i_frame] = dpt[i_frame][::downsampling_resolution, ::downsampling_resolution]
            save_experiment(rgb, dpt, fps, start_to_prompt[start_time_in_minutes], is_full_prompt, experiment_folder)
            # print shell instructions to paste them into command line
            experiment_folders.append(experiment_folder)

In [216]:
for experiment_folder in experiment_folders:
    command = f'sshpass -p "megaB0SS" scp -r ./{experiment_folder} kzipa@10.16.84.42:/mnt/vol2_raid/shared_data/cosmos/cosmos-transfer1'
    print(command)
    os.system(command)

sshpass -p "megaB0SS" scp -r ./2024_08_15__19_55_49_18000_30_4_full00 kzipa@10.16.84.42:/mnt/vol2_raid/shared_data/cosmos/cosmos-transfer1
sshpass -p "megaB0SS" scp -r ./2024_08_15__19_55_49_18000_30_4_full01 kzipa@10.16.84.42:/mnt/vol2_raid/shared_data/cosmos/cosmos-transfer1
sshpass -p "megaB0SS" scp -r ./2024_08_15__19_55_49_18000_30_4_full02 kzipa@10.16.84.42:/mnt/vol2_raid/shared_data/cosmos/cosmos-transfer1
sshpass -p "megaB0SS" scp -r ./2024_08_15__19_55_49_18000_30_4_full10 kzipa@10.16.84.42:/mnt/vol2_raid/shared_data/cosmos/cosmos-transfer1
sshpass -p "megaB0SS" scp -r ./2024_08_15__19_55_49_18000_30_4_full11 kzipa@10.16.84.42:/mnt/vol2_raid/shared_data/cosmos/cosmos-transfer1
sshpass -p "megaB0SS" scp -r ./2024_08_15__19_55_49_18000_30_4_full12 kzipa@10.16.84.42:/mnt/vol2_raid/shared_data/cosmos/cosmos-transfer1
sshpass -p "megaB0SS" scp -r ./2024_08_15__19_55_49_18000_30_4_full20 kzipa@10.16.84.42:/mnt/vol2_raid/shared_data/cosmos/cosmos-transfer1
sshpass -p "megaB0SS" scp -

In [None]:
experiment_folders1 += experiment_folders

In [221]:
def filtered_composed_command(command_list, filter_word):
    commands = []
    for experiment_folder in command_list:
        if filter_word in experiment_folder:
            commands.append(f'./{experiment_folder}/runme.sh')
    return ' && '.join(commands)

# pass my commands to docker container
for filter in ['']:#'_2_full', '_4_full', '_8_full']:
    command = filtered_composed_command(experiment_folders, filter)
    print(command)
    # print('Pending...')
    # os.system(f"sshpass -p megaB0SS ssh kzipa@10.16.84.42 'docker exec stoic_agnesi {command}'")
    # print('...done')

./2024_08_15__19_55_49_18000_30_4_full00/runme.sh && ./2024_08_15__19_55_49_18000_30_4_full01/runme.sh && ./2024_08_15__19_55_49_18000_30_4_full02/runme.sh && ./2024_08_15__19_55_49_18000_30_4_full10/runme.sh && ./2024_08_15__19_55_49_18000_30_4_full11/runme.sh && ./2024_08_15__19_55_49_18000_30_4_full12/runme.sh && ./2024_08_15__19_55_49_18000_30_4_full20/runme.sh && ./2024_08_15__19_55_49_18000_30_4_full21/runme.sh && ./2024_08_15__19_55_49_18000_30_4_full22/runme.sh && ./2024_08_15__19_55_49_18000_30_4_full30/runme.sh && ./2024_08_15__19_55_49_18000_30_4_full31/runme.sh && ./2024_08_15__19_55_49_18000_30_4_full32/runme.sh && ./2024_08_15__19_55_49_18000_30_4_full40/runme.sh && ./2024_08_15__19_55_49_18000_30_4_full41/runme.sh && ./2024_08_15__19_55_49_18000_30_4_full42/runme.sh && ./2024_08_15__19_55_49_18000_30_4_full50/runme.sh && ./2024_08_15__19_55_49_18000_30_4_full51/runme.sh && ./2024_08_15__19_55_49_18000_30_4_full52/runme.sh && ./2024_08_15__19_55_49_18000_30_4_full60/runme

In [None]:
# copy everything on my local device
for experiment_folder in experiment_folders1:
    # command = f'sshpass -p "megaB0SS" scp -r kzipa@10.16.84.42:/mnt/vol2_raid/shared_data/cosmos/cosmos-transfer1/{experiment_folder}/output ./{experiment_folder}'
    # print(command)
    # os.system(command)
    assert(os.path.exists(f'./{experiment_folder}/output'))

In [None]:
for experiment_folder in experiment_folders1:
    assert(os.path.exists(f'./{experiment_folder}/output'))
    input_video = f'./{experiment_folder}/rgb.mp4'
    output_video = f'./{experiment_folder}/output/output.mp4'
    assert(os.path.exists(input_video))
    assert(os.path.exists(output_video))
    _, (fps1, w1, h1) = calc_initial_frame(input_video, 0, 0, 0)
    _, (fps2, w2, h2) = calc_initial_frame(output_video, 0, 0, 0)
    print(experiment_folder, (fps1, w1, h1), (fps2, w2, h2))

# Variate prompts for single fixed video

In [None]:
# generate full-resolution videos with different long prompts (modified by hand)
prompts_3 = [
    # original
    "The video is taken from a first-person perspective, likely with a camera mounted on the person's head or chest. \
    The scene is an urban environment with buildings on either side of the street. \
    The sky is overcast, and the lighting is dim, suggesting either early morning or late afternoon. \
    The street is relatively empty, with a few pedestrians visible in the distance. \
    The buildings have a classic architectural style with large windows and detailed facades. \
    The person in the video is wearing a dark-colored jacket and appears to be walking or standing still.",
    
    "The video is taken from a first-person perspective, likely with a camera mounted on the person's head or chest.\
    The scene is an urban environment with buildings on either side of the street. \
    The sky is vivid blue without clouds and the lighting is bright. \
    The street is relatively empty, with a few pedestrians visible in the distance. \
    The buildings have a classic architectural style with large windows and detailed facades. \
    The person in the video is wearing a red jacket and appears to be walking or standing still.",
    
    "The video is taken from a first-person perspective, likely with a camera mounted on the person's head or chest. \
    The scene is an urban environment with buildings on either side of the street. \
    The sky is overcast, and the lighting is dim, suggesting either early morning or late afternoon. \
    The street is relatively empty, with a few pedestrians visible in the distance. \
    The buildings have a modern architectural style with glass walls like London's City district.\
    The person in the video is wearing a dark-colored jacket and appears to be walking or standing still.",
    
    "The video is taken from a first-person perspective, likely with a camera mounted on the person's head or chest.\
    The scene is an urban environment with buildings on either side of the street. \
    The sky is overcast, and the lighting is dim, suggesting either early morning or late afternoon. \
    It's winter, trees without leaves, a lot of snow. \
    The street is relatively empty, with a few pedestrians visible in the distance.\
    All people in the video are wearing red wear fur coats and bunny-eared hats.",
    
    "The video is taken from a first-person perspective, with a camera mounted on the person's chest.\
    The scene is an urban environment. \
    It's winter, trees without leaves, a lot of snow. \
    Empty street, with a few pedestrians visible in the distance.\
    All people in the video are wearing red wear fur coats and bunny-eared hats.",
    
    "The video is taken from a first-person perspective, with a camera mounted on the person's chest.\
    The scene is an urban environment in LA. \
    It's hot summer, palm-trees grow on each side of the road. \
    Empty street, with a few pedestrians visible in the distance. \
    All cars are blue Nissans. \
    All people in the video are wearing colourful Hawaiian shirts.",
    
    "The video is taken from a first-person perspective, likely with a camera mounted on the person's head or chest. \
    The scene is an urban environment with buildings on either side of the street. \
    The sky is dark, it's late everning. Lamps shine with warm LED. \
    The street is relatively empty, with a few pedestrians visible in the distance. \
    The buildings are high-tech: glass, steel, metal. Glass is covered with golden film. \
    The person in the video is wearing a dark-colored jacket and appears to be walking or standing still.",
]

prefix = ''
postfix = ''
fps = 30
fragment_length_in_seconds = 4
start_time_in_minutes = 3
starting_frame, _ = calc_initial_frame(rgb_video_path, h=0, m=start_time_in_minutes, s=0)
rgb = load_video_fragment(rgb_video_path, starting_frame, fragment_length_in_seconds * fps, frames_step=1, drop_chans=False)
dpt = load_video_fragment(dpt_video_path, starting_frame, fragment_length_in_seconds * fps, frames_step=1, drop_chans=True)
    
experiment_folders = []
for i_prompt, prompt in enumerate(prompts_3):
    # repeat each run 3 times
    for i_attempt in range(3):
        postfix = f'full{i_prompt}{i_attempt}'
        experiment_folder = prefix + f'{seq_id}_{starting_frame}_{fps}_{postfix}'
        save_experiment(rgb, dpt, fps, start_to_prompt[start_time_in_minutes], is_full_prompt, experiment_folder)
        experiment_folders.append(experiment_folder)

In [214]:
experiment_folders

['2024_08_15__19_55_49_18000_30_4_full00',
 '2024_08_15__19_55_49_18000_30_4_full01',
 '2024_08_15__19_55_49_18000_30_4_full02',
 '2024_08_15__19_55_49_18000_30_4_full10',
 '2024_08_15__19_55_49_18000_30_4_full11',
 '2024_08_15__19_55_49_18000_30_4_full12',
 '2024_08_15__19_55_49_18000_30_4_full20',
 '2024_08_15__19_55_49_18000_30_4_full21',
 '2024_08_15__19_55_49_18000_30_4_full22',
 '2024_08_15__19_55_49_18000_30_4_full30',
 '2024_08_15__19_55_49_18000_30_4_full31',
 '2024_08_15__19_55_49_18000_30_4_full32',
 '2024_08_15__19_55_49_18000_30_4_full40',
 '2024_08_15__19_55_49_18000_30_4_full41',
 '2024_08_15__19_55_49_18000_30_4_full42',
 '2024_08_15__19_55_49_18000_30_4_full50',
 '2024_08_15__19_55_49_18000_30_4_full51',
 '2024_08_15__19_55_49_18000_30_4_full52',
 '2024_08_15__19_55_49_18000_30_4_full60',
 '2024_08_15__19_55_49_18000_30_4_full61',
 '2024_08_15__19_55_49_18000_30_4_full62']

In [210]:
# update prompts in follow-up

for i in [1, 2, 3]:
    with open(f'../2024_08_15__19_55_49_18000_30_4_full{i}/output/output.txt') as fp:
        full_prompt = fp.read() 
    print(full_prompt)

The video is taken from a first-person perspective, likely with a camera mounted on the person's head or chest. The scene is an urban environment with buildings on either side of the street. The sky is vivid blue without clouds and the lighting is bright. The street is relatively empty, with a few pedestrians visible in the distance. The buildings have a classic architectural style with large windows and detailed facades. The person in the video is wearing a red jacket and appears to be walking or standing still.
The video is taken from a first-person perspective, likely with a camera mounted on the person's head or chest. The scene is an urban environment with buildings on either side of the street. The sky is overcast, and the lighting is dim, suggesting either early morning or late afternoon. The street is relatively empty, with a few pedestrians visible in the distance. The buildings have a modern architectural style with glass walls like London's City district. The person in the v

# Deekseek-generated

This is a prompt for text-to-video model: "The video is taken from a first-person perspective, likely with a camera mounted on the person's head or chest. The scene is an urban environment with buildings on either side of the street. The sky is overcast, and the lighting is dim, suggesting either early morning or late afternoon. The street is relatively empty, with a few pedestrians visible in the distance. The buildings have a classic architectural style with large windows and detailed facades. The person in the video is wearing a dark-colored jacket and appears to be walking or standing still."

Change this description to generate different surroundings: customize buildings, trees and people accordingly. Keep the structure of the prompt intact.

I need these locations and weather:
1. LA, summer, bright day. Cars are modern and posh.
2. Moscow, winter, dim day. Cars are ordinary.
3. Paris, spring, light clouds. Cars are shiny and clean.
4. London, autumn, rain. Cars are old-fashioned.
5. Seoul, autumn, cold, people are Asians. All cars are black.
6. Pretoria, summer, flourished nature, people are black Africans. Cars are old.
7. India, summer, dry and hot weather, people are Indians. Cars are heavy-loaded and colourful, a lot of cicyles.
8. UAE, summer, palm trees, people are Arabs. All cars are white. All seems a bit yellowish.
9. Switzerland, summer, mountains, cyclists, bright and clean sky.

Here are your customized prompts for each location and weather condition, keeping the original structure intact:  

---  

### **1. LA, summer, bright day. Cars are modern and posh.**  
*The video is taken from a first-person perspective, likely with a camera mounted on the person's head or chest. The scene is a bustling urban environment with sleek, modern high-rises and palm trees lining the street. The sky is clear and bright, with intense sunlight casting sharp shadows. The street is lively, with well-dressed pedestrians and luxury cars gliding past. The buildings have glass facades, reflecting the sunlight, and some have rooftop gardens. The person in the video is wearing sunglasses and a light summer outfit, appearing to walk confidently down the sidewalk.*  

---  

### **2. Moscow, winter, dim day. Cars are ordinary.**  
*The video is taken from a first-person perspective, likely with a camera mounted on the person's head or chest. The scene is a snowy urban environment with imposing Soviet-era buildings and bare trees coated in frost. The sky is overcast, casting a dull gray light over the streets. The road is slushy, with ordinary sedans and compact cars moving slowly. A few bundled-up pedestrians trudge through the snow, their breath visible in the cold air. The buildings have sturdy, utilitarian designs with few decorative elements. The person in the video is wearing a thick winter coat and gloves, either walking carefully or standing still in the chilly weather.*  

---  

### **3. Paris, spring, light clouds. Cars are shiny and clean.**  
*The video is taken from a first-person perspective, likely with a camera mounted on the person's head or chest. The scene is a charming Parisian street lined with elegant Haussmann-style buildings, their wrought-iron balconies adorned with flower boxes. The sky has light clouds, allowing soft sunlight to filter through. The street is lively with well-dressed locals and tourists, and the cars—mostly compact and stylish—are impeccably clean. Cafés spill out onto the sidewalks, and the scent of fresh bread lingers in the air. The person in the video is wearing a light jacket, strolling leisurely or pausing to take in the view.*  

---  

### **4. London, autumn, rain. Cars are old-fashioned.**  
*The video is taken from a first-person perspective, likely with a camera mounted on the person's head or chest. The scene is a wet London street with classic red-brick townhouses and black cabs splashing through puddles. The sky is heavy with rain, and the golden leaves on the pavement are soggy. The cars are mostly vintage models or boxy sedans, their headlights glowing in the dim light. Pedestrians hurry under umbrellas, their coats damp from the drizzle. The buildings have tall windows and ornate details, some with ivy creeping up the walls. The person in the video is wearing a trench coat, walking briskly or waiting at a crosswalk.*  

---  

### **5. Seoul, autumn, cold, people are Asians. All cars are black.**  
*The video is taken from a first-person perspective, likely with a camera mounted on the person's head or chest. The scene is a sleek, modern district of Seoul, with towering glass skyscrapers and neon signs reflecting off wet pavement. The air is crisp, and the trees display warm autumn hues. The streets are filled with well-dressed Asian pedestrians, and every car—luxury sedans and SUVs—is glossy black, moving in orderly lines. The buildings have sharp, futuristic designs with digital billboards flashing advertisements. The person in the video is wearing a stylish padded jacket, walking with purpose or standing at a busy intersection.*  

---  

### **6. Pretoria, summer, flourished nature, people are black Africans. Cars are old.**  
*The video is taken from a first-person perspective, likely with a camera mounted on the person's head or chest. The scene is a vibrant street in Pretoria, lined with jacaranda trees in full purple bloom and lush greenery. The sky is a deep blue, and the sun beats down warmly. The road is filled with older-model cars, some slightly worn but still running. Black African pedestrians in colorful clothing chat and move leisurely. The buildings are a mix of colonial-style and modern, with some storefronts bustling with activity. The person in the video is wearing a light summer shirt, either walking casually or pausing under the shade of a tree.*  

---  

### **7. India, summer, dry and hot weather, people are Indians. Cars are heavy-loaded and colourful, a lot of cycles.**  
*The video is taken from a first-person perspective, likely with a camera mounted on the person's head or chest. The scene is a lively Indian street, packed with auto-rickshaws, brightly painted trucks overloaded with goods, and countless bicycles weaving through traffic. The buildings are a mix of old concrete structures and vibrant market stalls, their facades faded by the sun. The air is thick with dust and the scent of spices. Indian pedestrians in light cotton clothes move energetically, some carrying bundles on their heads. The person in the video is wearing a loose kurta, navigating the chaotic street or standing near a chai stall.*  

---  

### **8. UAE, summer, palm trees, people are Arabs. All cars are white. All seems a bit yellowish.**  
*The video is taken from a first-person perspective, likely with a camera mounted on the person's head or chest. The scene is a wide boulevard in the UAE, flanked by gleaming skyscrapers and tall palm trees swaying in the hot wind. The sky is hazy, giving everything a slightly yellowish tint from the desert sun. Every car on the road is a pristine white SUV or luxury sedan, their windows tinted dark. Arab men in crisp white kanduras and women in abayas walk calmly along shaded pathways. The buildings are ultramodern, with mirrored glass and futuristic designs. The person in the video is wearing sunglasses and light clothing, moving slowly in the heat or waiting near a grand fountain.*  

---  

### **9. Switzerland, summer, mountains, cyclists, bright and clean sky.**  
*The video is taken from a first-person perspective, likely with a camera mounted on the person's head or chest. The scene is a picturesque Swiss village street, with charming wooden chalets and flower-filled window boxes. The backdrop is dominated by towering snow-capped mountains under a crystal-clear blue sky. The road is smooth and clean, with fit cyclists in sporty gear gliding past. A few pedestrians enjoy the fresh air, some carrying hiking poles. The buildings have steep roofs and intricate carvings, blending seamlessly with the alpine scenery. The person in the video is wearing breathable outdoor gear, either walking at a relaxed pace or stopping to admire the view.*  

---  

Each version maintains the original structure while adapting to the specified locations, weather, and cultural elements. Let me know if you'd like any refinements!

In [None]:
# generate full-resolution videos with different long prompts (modified by hand)
deepseek_prompts_3 = {
"LA": "The video is taken from a first-person perspective, likely with a camera mounted on the person's head or chest. The scene is a bustling urban environment with sleek, modern high-rises and palm trees lining the street. The sky is clear and bright, with intense sunlight casting sharp shadows. The street is lively, with well-dressed pedestrians and luxury cars gliding past. The buildings have glass facades, reflecting the sunlight, and some have rooftop gardens. The person in the video is wearing sunglasses and a light summer outfit, appearing to walk confidently down the sidewalk.",
"Moscow": "The video is taken from a first-person perspective, likely with a camera mounted on the person's head or chest. The scene is a snowy urban environment with imposing Soviet-era buildings and bare trees coated in frost. The sky is overcast, casting a dull gray light over the streets. The road is slushy, with ordinary sedans and compact cars moving slowly. A few bundled-up pedestrians trudge through the snow, their breath visible in the cold air. The buildings have sturdy, utilitarian designs with few decorative elements. The person in the video is wearing a thick winter coat and gloves, either walking carefully or standing still in the chilly weather.",
"Paris": "The video is taken from a first-person perspective, likely with a camera mounted on the person's head or chest. The scene is a charming Parisian street lined with elegant Haussmann-style buildings, their wrought-iron balconies adorned with flower boxes. The sky has light clouds, allowing soft sunlight to filter through. The street is lively with well-dressed locals and tourists, and the cars—mostly compact and stylish—are impeccably clean. Cafés spill out onto the sidewalks, and the scent of fresh bread lingers in the air. The person in the video is wearing a light jacket, strolling leisurely or pausing to take in the view.", 
"London": "The video is taken from a first-person perspective, likely with a camera mounted on the person's head or chest. The scene is a wet London street with classic red-brick townhouses and black cabs splashing through puddles. The sky is heavy with rain, and the golden leaves on the pavement are soggy. The cars are mostly vintage models or boxy sedans, their headlights glowing in the dim light. Pedestrians hurry under umbrellas, their coats damp from the drizzle. The buildings have tall windows and ornate details, some with ivy creeping up the walls. The person in the video is wearing a trench coat, walking briskly or waiting at a crosswalk.",
"Seoul": "The video is taken from a first-person perspective, likely with a camera mounted on the person's head or chest. The scene is a sleek, modern district of Seoul, with towering glass skyscrapers and neon signs reflecting off wet pavement. The air is crisp, and the trees display warm autumn hues. The streets are filled with well-dressed Asian pedestrians, and every car—luxury sedans and SUVs—is glossy black, moving in orderly lines. The buildings have sharp, futuristic designs with digital billboards flashing advertisements. The person in the video is wearing a stylish padded jacket, walking with purpose or standing at a busy intersection.",  
"Pretoria": "The video is taken from a first-person perspective, likely with a camera mounted on the person's head or chest. The scene is a vibrant street in Pretoria, lined with jacaranda trees in full purple bloom and lush greenery. The sky is a deep blue, and the sun beats down warmly. The road is filled with older-model cars, some slightly worn but still running. Black African pedestrians in colorful clothing chat and move leisurely. The buildings are a mix of colonial-style and modern, with some storefronts bustling with activity. The person in the video is wearing a light summer shirt, either walking casually or pausing under the shade of a tree.", 
"India": "The video is taken from a first-person perspective, likely with a camera mounted on the person's head or chest. The scene is a lively Indian street, packed with auto-rickshaws, brightly painted trucks overloaded with goods, and countless bicycles weaving through traffic. The buildings are a mix of old concrete structures and vibrant market stalls, their facades faded by the sun. The air is thick with dust and the scent of spices. Indian pedestrians in light cotton clothes move energetically, some carrying bundles on their heads. The person in the video is wearing a loose kurta, navigating the chaotic street or standing near a chai stall.", 
"UAE": "The video is taken from a first-person perspective, likely with a camera mounted on the person's head or chest. The scene is a wide boulevard in the UAE, flanked by gleaming skyscrapers and tall palm trees swaying in the hot wind. The sky is hazy, giving everything a slightly yellowish tint from the desert sun. Every car on the road is a pristine white SUV or luxury sedan, their windows tinted dark. Arab men in crisp white kanduras and women in abayas walk calmly along shaded pathways. The buildings are ultramodern, with mirrored glass and futuristic designs. The person in the video is wearing sunglasses and light clothing, moving slowly in the heat or waiting near a grand fountain.",
"Switzerland": "The video is taken from a first-person perspective, likely with a camera mounted on the person's head or chest. The scene is a picturesque Swiss village street, with charming wooden chalets and flower-filled window boxes. The backdrop is dominated by towering snow-capped mountains under a crystal-clear blue sky. The road is smooth and clean, with fit cyclists in sporty gear gliding past. A few pedestrians enjoy the fresh air, some carrying hiking poles. The buildings have steep roofs and intricate carvings, blending seamlessly with the alpine scenery. The person in the video is wearing breathable outdoor gear, either walking at a relaxed pace or stopping to admire the view."
}
fps = 30
fragment_length_in_seconds = 4
start_time_in_minutes = 3
starting_frame, _ = calc_initial_frame(rgb_video_path, h=0, m=start_time_in_minutes, s=0)
rgb = load_video_fragment(rgb_video_path, starting_frame, fragment_length_in_seconds * fps, frames_step=1, drop_chans=False)
dpt = load_video_fragment(dpt_video_path, starting_frame, fragment_length_in_seconds * fps, frames_step=1, drop_chans=True)
    
experiment_folders = []
for i_prompt, (place, prompt) in enumerate(deepseek_prompts_3.items()):
    # repeat each run 3 times
    for i_attempt in range(3):
        postfix = f'{place}{i_prompt}{i_attempt}'
        experiment_folder = f'{seq_id}_{starting_frame}_{fps}_{postfix}'
        save_experiment(rgb, dpt, fps, start_to_prompt[start_time_in_minutes], is_full_prompt, experiment_folder, relative_root='./')
        experiment_folders.append(experiment_folder)

print(experiment_folders)

for experiment_folder in experiment_folders:
    command = f'sshpass -p "megaB0SS" scp -r ./{experiment_folder} kzipa@10.16.84.42:/mnt/vol2_raid/shared_data/cosmos/cosmos-transfer1'
    print(command)
    os.system(command)
    
# pass my commands to docker container (copy this command into docker on nabo)
for filter in ['']:
    command = filtered_composed_command(experiment_folders, filter)
    print(command)
    
for experiment_folder in experiment_folders:
    command = f'sshpass -p "megaB0SS" scp -r kzipa@10.16.84.42:/mnt/vol2_raid/shared_data/cosmos/cosmos-transfer1/{experiment_folder}/output {experiment_folder}/output'
    print(command)

['2024_08_15__19_55_49_18000_30_LA00', '2024_08_15__19_55_49_18000_30_LA01', '2024_08_15__19_55_49_18000_30_LA02', '2024_08_15__19_55_49_18000_30_Moscow10', '2024_08_15__19_55_49_18000_30_Moscow11', '2024_08_15__19_55_49_18000_30_Moscow12', '2024_08_15__19_55_49_18000_30_Paris20', '2024_08_15__19_55_49_18000_30_Paris21', '2024_08_15__19_55_49_18000_30_Paris22', '2024_08_15__19_55_49_18000_30_London30', '2024_08_15__19_55_49_18000_30_London31', '2024_08_15__19_55_49_18000_30_London32', '2024_08_15__19_55_49_18000_30_Seoul40', '2024_08_15__19_55_49_18000_30_Seoul41', '2024_08_15__19_55_49_18000_30_Seoul42', '2024_08_15__19_55_49_18000_30_Pretoria50', '2024_08_15__19_55_49_18000_30_Pretoria51', '2024_08_15__19_55_49_18000_30_Pretoria52', '2024_08_15__19_55_49_18000_30_India60', '2024_08_15__19_55_49_18000_30_India61', '2024_08_15__19_55_49_18000_30_India62', '2024_08_15__19_55_49_18000_30_UAE70', '2024_08_15__19_55_49_18000_30_UAE71', '2024_08_15__19_55_49_18000_30_UAE72', '2024_08_15__19

In [240]:
with open('runme.sh', 'w') as _f:
    for experiment_folder in ['2024_08_15__19_55_49_18000_30_4_full00',
    '2024_08_15__19_55_49_18000_30_4_full01',
    '2024_08_15__19_55_49_18000_30_4_full02',
    '2024_08_15__19_55_49_18000_30_4_full10',
    '2024_08_15__19_55_49_18000_30_4_full11',
    '2024_08_15__19_55_49_18000_30_4_full12',
    '2024_08_15__19_55_49_18000_30_4_full20',
    '2024_08_15__19_55_49_18000_30_4_full21',
    '2024_08_15__19_55_49_18000_30_4_full22',
    '2024_08_15__19_55_49_18000_30_4_full30',
    '2024_08_15__19_55_49_18000_30_4_full31',
    '2024_08_15__19_55_49_18000_30_4_full32',
    '2024_08_15__19_55_49_18000_30_4_full40',
    '2024_08_15__19_55_49_18000_30_4_full41',
    '2024_08_15__19_55_49_18000_30_4_full42',
    '2024_08_15__19_55_49_18000_30_4_full50',
    '2024_08_15__19_55_49_18000_30_4_full51',
    '2024_08_15__19_55_49_18000_30_4_full52',
    '2024_08_15__19_55_49_18000_30_4_full60',
    '2024_08_15__19_55_49_18000_30_4_full61',
    '2024_08_15__19_55_49_18000_30_4_full62',
    '2024_08_15__19_55_49_18000_30_LA00',
    '2024_08_15__19_55_49_18000_30_LA01',
    '2024_08_15__19_55_49_18000_30_LA02',
    '2024_08_15__19_55_49_18000_30_Moscow10',
    '2024_08_15__19_55_49_18000_30_Moscow11',
    '2024_08_15__19_55_49_18000_30_Moscow12',
    '2024_08_15__19_55_49_18000_30_Paris20',
    '2024_08_15__19_55_49_18000_30_Paris21',
    '2024_08_15__19_55_49_18000_30_Paris22',
    '2024_08_15__19_55_49_18000_30_London30',
    '2024_08_15__19_55_49_18000_30_London31',
    '2024_08_15__19_55_49_18000_30_London32',
    '2024_08_15__19_55_49_18000_30_Seoul40',
    '2024_08_15__19_55_49_18000_30_Seoul41',
    '2024_08_15__19_55_49_18000_30_Seoul42',
    '2024_08_15__19_55_49_18000_30_Pretoria50',
    '2024_08_15__19_55_49_18000_30_Pretoria51',
    '2024_08_15__19_55_49_18000_30_Pretoria52',
    '2024_08_15__19_55_49_18000_30_India60',
    '2024_08_15__19_55_49_18000_30_India61',
    '2024_08_15__19_55_49_18000_30_India62',
    '2024_08_15__19_55_49_18000_30_UAE70',
    '2024_08_15__19_55_49_18000_30_UAE71',
    '2024_08_15__19_55_49_18000_30_UAE72',
    '2024_08_15__19_55_49_18000_30_Switzerland80',
    '2024_08_15__19_55_49_18000_30_Switzerland81',
    '2024_08_15__19_55_49_18000_30_Switzerland82']:
        command = f'sshpass -p "megaB0SS" scp -r kzipa@10.16.84.42:/mnt/vol2_raid/shared_data/cosmos/cosmos-transfer1/{experiment_folder}/output {experiment_folder}/output'
        print(command, file=_f)
os.system('chmod +x runme.sh')

0

In [None]:
# download new results from server every 10-minutes (600 s)
import time

for experiment_folder in ['2024_08_15__19_55_49_18000_30_4_full00',
    '2024_08_15__19_55_49_18000_30_4_full01',
    '2024_08_15__19_55_49_18000_30_4_full02',
    '2024_08_15__19_55_49_18000_30_4_full10',
    '2024_08_15__19_55_49_18000_30_4_full11',
    '2024_08_15__19_55_49_18000_30_4_full12',
    '2024_08_15__19_55_49_18000_30_4_full20',
    '2024_08_15__19_55_49_18000_30_4_full21',
    '2024_08_15__19_55_49_18000_30_4_full22',
    '2024_08_15__19_55_49_18000_30_4_full30',
    '2024_08_15__19_55_49_18000_30_4_full31',
    '2024_08_15__19_55_49_18000_30_4_full32',
    '2024_08_15__19_55_49_18000_30_4_full40',
    '2024_08_15__19_55_49_18000_30_4_full41',
    '2024_08_15__19_55_49_18000_30_4_full42',
    '2024_08_15__19_55_49_18000_30_4_full50',
    '2024_08_15__19_55_49_18000_30_4_full51',
    '2024_08_15__19_55_49_18000_30_4_full52',
    '2024_08_15__19_55_49_18000_30_4_full60',
    '2024_08_15__19_55_49_18000_30_4_full61',
    '2024_08_15__19_55_49_18000_30_4_full62',
    '2024_08_15__19_55_49_18000_30_LA00',
    '2024_08_15__19_55_49_18000_30_LA01',
    '2024_08_15__19_55_49_18000_30_LA02',
    '2024_08_15__19_55_49_18000_30_Moscow10',
    '2024_08_15__19_55_49_18000_30_Moscow11',
    '2024_08_15__19_55_49_18000_30_Moscow12',
    '2024_08_15__19_55_49_18000_30_Paris20',
    '2024_08_15__19_55_49_18000_30_Paris21',
    '2024_08_15__19_55_49_18000_30_Paris22',
    '2024_08_15__19_55_49_18000_30_London30',
    '2024_08_15__19_55_49_18000_30_London31',
    '2024_08_15__19_55_49_18000_30_London32',
    '2024_08_15__19_55_49_18000_30_Seoul40',
    '2024_08_15__19_55_49_18000_30_Seoul41',
    '2024_08_15__19_55_49_18000_30_Seoul42',
    '2024_08_15__19_55_49_18000_30_Pretoria50',
    '2024_08_15__19_55_49_18000_30_Pretoria51',
    '2024_08_15__19_55_49_18000_30_Pretoria52',
    '2024_08_15__19_55_49_18000_30_India60',
    '2024_08_15__19_55_49_18000_30_India61',
    '2024_08_15__19_55_49_18000_30_India62',
    '2024_08_15__19_55_49_18000_30_UAE70',
    '2024_08_15__19_55_49_18000_30_UAE71',
    '2024_08_15__19_55_49_18000_30_UAE72',
    '2024_08_15__19_55_49_18000_30_Switzerland80',
    '2024_08_15__19_55_49_18000_30_Switzerland81',
    '2024_08_15__19_55_49_18000_30_Switzerland82']:
    while not os.path.exists(f'{experiment_folder}/output/output.mp4'):
        command = f'sshpass -p "megaB0SS" scp -r kzipa@10.16.84.42:/mnt/vol2_raid/shared_data/cosmos/cosmos-transfer1/{experiment_folder}/output {experiment_folder}/output'
        os.system(command)
        time.sleep(600.0) # sleep for 10 minutes
